From 1d359e737e6342d03553b7a49a9e5f588e9e611f Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 13 May 2024 14:14:10 -0700 Subject: [PATCH] typo --- llm/memory.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/llm/memory.go b/llm/memory.go index 7c6192ca..acc2dd0b 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -54,8 +54,10 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts } layers := ggml.Tensors().Layers() - // add one layer worth of memorr as a buffer - memoryMinimum += layers["blk.0"].size() + // add one layer worth of memory as a buffer + if blk0, ok := layers["blk.0"]; ok { + memoryMinimum += blk0.size() + } // fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV() @@ -102,15 +104,17 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts var layerCount int for i := 0; i < int(ggml.KV().BlockCount()); i++ { - memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size() + if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok { + memoryLayer := blk.size() - // KV is proportional to the number of layers - memoryLayer += kv / ggml.KV().BlockCount() + // KV is proportional to the number of layers + memoryLayer += kv / ggml.KV().BlockCount() - memoryRequiredTotal += memoryLayer - if (opts.NumGPU >= 0 && layerCount+1 <= opts.NumGPU) || (opts.NumGPU < 0 && memoryAvailable > memoryRequiredPartial+memoryLayer) { - memoryRequiredPartial += memoryLayer - layerCount++ + memoryRequiredTotal += memoryLayer + if (opts.NumGPU >= 0 && layerCount+1 <= opts.NumGPU) || (opts.NumGPU < 0 && memoryAvailable > memoryRequiredPartial+memoryLayer) { + memoryRequiredPartial += memoryLayer + layerCount++ + } } }