diff --git a/server/sched.go b/server/sched.go index 3fe6d7fc..9d8c4144 100644 --- a/server/sched.go +++ b/server/sched.go @@ -736,8 +736,8 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList { if *numParallel <= 0 { *numParallel = 1 - req.opts.NumCtx = req.origNumCtx - } + req.opts.NumCtx = req.origNumCtx + } byLibrary := gpus.ByLibrary() if len(byLibrary) <= 1 { return gpus