From 0f92b19bec97198b035a7801eda14e3d48149033 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Sat, 24 Aug 2024 17:24:50 -0700 Subject: [PATCH] Only enable numa on CPUs (#6484) The numa flag may be having a performance impact on multi-socket systems with GPU loads --- llm/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llm/server.go b/llm/server.go index 9347a458..4e5dac28 100644 --- a/llm/server.go +++ b/llm/server.go @@ -258,7 +258,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--mlock") } - if gpu.IsNUMA() { + if gpu.IsNUMA() && gpus[0].Library == "cpu" { numaMode := "distribute" if runtime.GOOS == "linux" { if _, err := exec.LookPath("numactl"); err == nil {