From 58ce2d8273daef8668a950be9eeeb5bb1bb21b97 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Mon, 8 Jan 2024 21:32:44 -0500 Subject: [PATCH] better estimate scratch buffer size --- llm/llm.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llm/llm.go b/llm/llm.go index 3f1c0e2d..1026debc 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -62,8 +62,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options) // this amount is the overhead + tensors in memory // TODO: get this from the llama.cpp's graph calcluations instead of - // guessing it's ~1/7th of the kv cache times gqa - requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 7 + // estimating it's 1/6 * kv_cache_size * num_gqa + requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 6 requiredTotal := requiredModel + requiredKv + requiredAlloc