From c9f98622b1daba55477dcb330de1739fb2f02ce6 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Mon, 6 May 2024 14:22:24 -0700 Subject: [PATCH] Skip scheduling cancelled requests, always reload unloaded runners (#4189) --- server/sched.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/server/sched.go b/server/sched.go index 164814a3..c4a071c1 100644 --- a/server/sched.go +++ b/server/sched.go @@ -100,6 +100,12 @@ func (s *Scheduler) processPending(ctx context.Context) { return case pending := <-s.pendingReqCh: // Block other requests until we get this pending request running + + if pending.ctx.Err() != nil { + slog.Debug("pending request cancelled or timed out, skipping scheduling") + continue + } + for { var runnerToExpire *runnerRef s.loadedMu.Lock() @@ -435,6 +441,10 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems... } + if runner.Options == nil { + return true + } + // Don't reload runner if num_gpu=-1 was provided optsExisting := runner.Options.Runner optsNew := req.opts.Runner