From 56497663c8bc7de73f4ff8d53235664a044c9d95 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Thu, 12 Oct 2023 11:16:37 -0400 Subject: [PATCH] relay model runner error message to client (#720) * give direction to user when runner fails * also relay errors from timeout * increase timeout to 3 minutes --- cmd/cmd.go | 13 +------------ llm/llama.go | 44 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/cmd/cmd.go b/cmd/cmd.go index f3fdd793..e3718c98 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -479,18 +479,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error { } if err := client.Generate(cancelCtx, &request, fn); err != nil { - if strings.Contains(err.Error(), "failed to load model") { - // tell the user to check the server log, if it exists locally - home, nestedErr := os.UserHomeDir() - if nestedErr != nil { - // return the original error - return err - } - logPath := filepath.Join(home, ".ollama", "logs", "server.log") - if _, nestedErr := os.Stat(logPath); nestedErr == nil { - err = fmt.Errorf("%w\nFor more details, check the error logs at %s", err, logPath) - } - } else if strings.Contains(err.Error(), "context canceled") && abort { + if strings.Contains(err.Error(), "context canceled") && abort { spinner.Finish() return nil } diff --git a/llm/llama.go b/llm/llama.go index 33468cba..30b7f298 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -248,6 +248,25 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int { return 1 } +// StatusWriter is a writer that captures error messages from the llama runner process +type StatusWriter struct { + ErrCh chan error +} + +func NewStatusWriter() *StatusWriter { + return &StatusWriter{ + ErrCh: make(chan error, 1), + } +} + +func (w *StatusWriter) Write(b []byte) (int, error) { + if _, after, ok := bytes.Cut(b, []byte("error:")); ok { + err := fmt.Errorf("llama runner: %s", after) + w.ErrCh <- err + } + return os.Stderr.Write(b) +} + func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) { fileInfo, err := os.Stat(model) if err != nil { @@ -294,6 +313,8 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers params = append(params, "--numa") } + var runnerErr error + // start the llama.cpp server with a retry in case the port is already in use for _, runner := range runners { if _, err := os.Stat(runner.Path); err != nil { @@ -310,7 +331,8 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers ) cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path))) cmd.Stdout = os.Stderr - cmd.Stderr = os.Stderr + statusWriter := NewStatusWriter() + cmd.Stderr = statusWriter llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}} @@ -333,6 +355,17 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers if err := waitForServer(llm); err != nil { log.Printf("error starting llama runner: %v", err) llm.Close() + + // default the runnerErr to the error returned by the most recent llama runner process + runnerErr = err + + // capture the error directly from the runner process, if any + select { + case runnerErr = <-statusWriter.ErrCh: + default: + // the runner process probably timed out + } + // try again continue } @@ -341,12 +374,17 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers return llm, nil } + if runnerErr != nil { + // this is the error returned from the llama runner process that failed most recently + return nil, runnerErr + } + return nil, fmt.Errorf("failed to start a llama runner") } func waitForServer(llm *llama) error { start := time.Now() - expiresAt := time.Now().Add(2 * time.Minute) // be generous with timeout, large models can take a while to load + expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load ticker := time.NewTicker(200 * time.Millisecond) defer ticker.Stop() @@ -359,7 +397,7 @@ func waitForServer(llm *llama) error { case <-ticker.C: if time.Now().After(expiresAt) { // timeout - return fmt.Errorf("llama runner did not start within allotted time, retrying") + return fmt.Errorf("timed out waiting for llama runner to start") } if err := llm.Ping(context.Background()); err == nil {