From 56318fb365be38253fc9abeeabc850d01be2521a Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 6 Sep 2024 08:29:36 -0700 Subject: [PATCH] Improve logging on GPU too small (#6666) When we determine a GPU is too small for any layers, it's not always clear why. This will help troubleshoot those scenarios. --- llm/memory.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/llm/memory.go b/llm/memory.go index 185bc8fb..99db7629 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -167,7 +167,21 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts } // Only include GPUs that can fit the graph, gpu minimum, the layer buffer and at least more layer if (gpus[i].FreeMemory - overhead) < gzo+max(graphPartialOffload, graphFullOffload)+gpus[i].MinimumMemory+2*layerSize { - slog.Debug("gpu has too little memory to allocate any layers", "gpu", gpus[i]) + slog.Debug("gpu has too little memory to allocate any layers", + "id", gpus[i].ID, + "library", gpus[i].Library, + "variant", gpus[i].Variant, + "compute", gpus[i].Compute, + "driver", fmt.Sprintf("%d.%d", gpus[i].DriverMajor, gpus[i].DriverMinor), + "name", gpus[i].Name, + "total", format.HumanBytes2(gpus[i].TotalMemory), + "available", format.HumanBytes2(gpus[i].FreeMemory), + "minimum_memory", gpus[i].MinimumMemory, + "layer_size", format.HumanBytes2(layerSize), + "gpu_zer_overhead", format.HumanBytes2(gzo), + "partial_offload", format.HumanBytes2(graphPartialOffload), + "full_offload", format.HumanBytes2(graphFullOffload), + ) continue } gpusWithSpace = append(gpusWithSpace, gs{i, &gpus[i]})