From 25906d72d1482bc9dc2e4300a42c8db4823ee1a3 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Sun, 11 Aug 2024 11:30:20 -0700 Subject: [PATCH] llm: prevent loading too large models on windows (#5926) Don't allow loading models that would lead to memory exhaustion (across vram, system memory and disk paging). This check was already applied on Linux but should also be applied on Windows as well. --- llm/server.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llm/server.go b/llm/server.go index 41736068..0bd94f35 100644 --- a/llm/server.go +++ b/llm/server.go @@ -125,8 +125,9 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr } } - // On linux, over-allocating CPU memory will almost always result in an error - if runtime.GOOS == "linux" { + // On linux and windows, over-allocating CPU memory will almost always result in an error + // Darwin has fully dynamic swap so has no direct concept of free swap space + if runtime.GOOS != "darwin" { systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize available := systemFreeMemory + systemSwapFreeMemory if systemMemoryRequired > available {