diff --git a/api/client.go b/api/client.go index 87975a9f..99c1daa1 100644 --- a/api/client.go +++ b/api/client.go @@ -18,9 +18,7 @@ import ( const DefaultHost = "127.0.0.1:11434" -var ( - envHost = os.Getenv("OLLAMA_HOST") -) +var envHost = os.Getenv("OLLAMA_HOST") type Client struct { Base url.URL @@ -123,6 +121,8 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData return nil } +const maxBufferSize = 512 * 1024 // 512KB + func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error { var buf *bytes.Buffer if data != nil { @@ -151,6 +151,9 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f defer response.Body.Close() scanner := bufio.NewScanner(response.Body) + // increase the buffer size to avoid running out of space + scanBuf := make([]byte, 0, maxBufferSize) + scanner.Buffer(scanBuf, maxBufferSize) for scanner.Scan() { var errorResponse struct { Error string `json:"error,omitempty"` diff --git a/llm/llama.go b/llm/llama.go index f9bb451d..0d9e8d5d 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -438,6 +438,8 @@ type PredictRequest struct { Stop []string `json:"stop,omitempty"` } +const maxBufferSize = 512 * 1024 // 512KB + func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error { prevConvo, err := llm.Decode(ctx, prevContext) if err != nil { @@ -498,6 +500,9 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, } scanner := bufio.NewScanner(resp.Body) + // increase the buffer size to avoid running out of space + buf := make([]byte, 0, maxBufferSize) + scanner.Buffer(buf, maxBufferSize) for scanner.Scan() { select { case <-ctx.Done():