From 359b15a59785809465ddffbaffd8be0ae3afcd5a Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 18 Jun 2024 11:05:34 -0700 Subject: [PATCH] Handle models with divergent layer sizes The recent refactoring of the memory prediction assumed all layers are the same size, but for some models (like deepseek-coder-v2) this is not the case, so our predictions were significantly off. --- llm/memory.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llm/memory.go b/llm/memory.go index 3bf18c3f..b8b862bd 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -1,6 +1,7 @@ package llm import ( + "fmt" "log/slog" "strconv" "strings" @@ -179,6 +180,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts // For all the layers, find where they can fit on the GPU(s) for i := range int(ggml.KV().BlockCount()) { + // Some models have inconsistent layer sizes + if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok { + layerSize = blk.size() + layerSize += kv / ggml.KV().BlockCount() + } memoryWeights += layerSize if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {