From c4904161891cb077834155798099410af2bbfed9 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 20 Jul 2023 09:29:43 -0700 Subject: [PATCH] lock on llm.lock(); decrease batch size --- api/types.go | 2 +- llama/llama.go | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/api/types.go b/api/types.go index 24666462..fc00adb1 100644 --- a/api/types.go +++ b/api/types.go @@ -177,7 +177,7 @@ func DefaultOptions() Options { UseNUMA: false, NumCtx: 2048, - NumBatch: 512, + NumBatch: 32, NumGPU: 1, LowVRAM: false, F16KV: true, diff --git a/llama/llama.go b/llama/llama.go index 5919b4bd..07dd8a13 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -172,9 +172,6 @@ func (llm *LLM) Close() { } func (llm *LLM) Predict(ctx []int, prompt string, fn func(api.GenerateResponse)) error { - llm.mu.Lock() - defer llm.mu.Unlock() - C.llama_reset_timings(llm.ctx) tokens := make([]C.llama_token, len(ctx)) @@ -193,12 +190,12 @@ func (llm *LLM) Predict(ctx []int, prompt string, fn func(api.GenerateResponse)) var b bytes.Buffer for { token, err := llm.next() - if errors.Is(err, io.EOF) { + if llm.gc { + return nil + } else if errors.Is(err, io.EOF) { break } else if err != nil { return err - } else if llm.gc { - return io.EOF } b.WriteString(llm.detokenize(token)) @@ -293,6 +290,9 @@ func (llm *LLM) detokenize(tokens ...C.llama_token) string { } func (llm *LLM) next() (C.llama_token, error) { + llm.mu.Lock() + defer llm.mu.Unlock() + if len(llm.embd) >= llm.NumCtx { numLeft := (llm.NumCtx - llm.NumKeep) / 2 truncated := llm.embd[:llm.NumKeep] @@ -304,6 +304,10 @@ func (llm *LLM) next() (C.llama_token, error) { } for { + if llm.gc { + return 0, io.EOF + } + if llm.cursor >= len(llm.embd) { break }