From 2aa91a937ba199ae5832c71ecc10221cc6420fa8 Mon Sep 17 00:00:00 2001 From: Blake Mizerany Date: Mon, 24 Jun 2024 20:14:03 -0700 Subject: [PATCH] cmd: defer stating model info until necessary (#5248) This commit changes the 'ollama run' command to defer fetching model information until it really needs it. That is, when in interactive mode. It also removes one such case where the model information is fetch in duplicate, just before calling generateInteractive and then again, first thing, in generateInteractive. This positively impacts the performance of the command: ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.168 total ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.220 total ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.217 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 4% cpu 0.652 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.01s user 0.01s system 5% cpu 0.498 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with or would you like to chat? ./after run llama3 'hi' 0.01s user 0.01s system 3% cpu 0.479 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 5% cpu 0.507 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 5% cpu 0.507 total --- cmd/cmd.go | 65 +++++++++++++++++++++++----------------------- cmd/interactive.go | 51 ++++++++++-------------------------- 2 files changed, 46 insertions(+), 70 deletions(-) diff --git a/cmd/cmd.go b/cmd/cmd.go index 68197f72..89b551f4 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -287,38 +287,12 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er } func RunHandler(cmd *cobra.Command, args []string) error { - client, err := api.ClientFromEnvironment() - if err != nil { - return err - } - - name := args[0] - - // check if the model exists on the server - show, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name}) - var statusError api.StatusError - switch { - case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound: - if err := PullHandler(cmd, []string{name}); err != nil { - return err - } - - show, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name}) - if err != nil { - return err - } - case err != nil: - return err - } - interactive := true opts := runOptions{ - Model: args[0], - WordWrap: os.Getenv("TERM") == "xterm-256color", - Options: map[string]interface{}{}, - MultiModal: slices.Contains(show.Details.Families, "clip"), - ParentModel: show.Details.ParentModel, + Model: args[0], + WordWrap: os.Getenv("TERM") == "xterm-256color", + Options: map[string]interface{}{}, } format, err := cmd.Flags().GetString("format") @@ -362,11 +336,38 @@ func RunHandler(cmd *cobra.Command, args []string) error { } opts.WordWrap = !nowrap - if !interactive { - return generate(cmd, opts) + // Fill out the rest of the options based on information about the + // model. + client, err := api.ClientFromEnvironment() + if err != nil { + return err } - return generateInteractive(cmd, opts) + name := args[0] + info, err := func() (*api.ShowResponse, error) { + showReq := &api.ShowRequest{Name: name} + info, err := client.Show(cmd.Context(), showReq) + var se api.StatusError + if errors.As(err, &se) && se.StatusCode == http.StatusNotFound { + if err := PullHandler(cmd, []string{name}); err != nil { + return nil, err + } + return client.Show(cmd.Context(), &api.ShowRequest{Name: name}) + } + return info, err + }() + if err != nil { + return err + } + + opts.MultiModal = slices.Contains(info.Details.Families, "clip") + opts.ParentModel = info.Details.ParentModel + opts.Messages = append(opts.Messages, info.Messages...) + + if interactive { + return generateInteractive(cmd, opts) + } + return generate(cmd, opts) } func errFromUnknownKey(unknownKeyErr error) error { diff --git a/cmd/interactive.go b/cmd/interactive.go index 80a91547..0a2f429b 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -31,65 +31,40 @@ const ( ) func loadModel(cmd *cobra.Command, opts *runOptions) error { - client, err := api.ClientFromEnvironment() - if err != nil { - return err - } - p := progress.NewProgress(os.Stderr) defer p.StopAndClear() spinner := progress.NewSpinner("") p.Add("", spinner) - showReq := api.ShowRequest{Name: opts.Model} - showResp, err := client.Show(cmd.Context(), &showReq) + client, err := api.ClientFromEnvironment() if err != nil { return err } - opts.MultiModal = slices.Contains(showResp.Details.Families, "clip") - opts.ParentModel = showResp.Details.ParentModel - - if len(showResp.Messages) > 0 { - opts.Messages = append(opts.Messages, showResp.Messages...) - } chatReq := &api.ChatRequest{ - Model: opts.Model, - Messages: []api.Message{}, + Model: opts.Model, + KeepAlive: opts.KeepAlive, } - if opts.KeepAlive != nil { - chatReq.KeepAlive = opts.KeepAlive - } - - err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error { + return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error { p.StopAndClear() - if len(opts.Messages) > 0 { - for _, msg := range opts.Messages { - switch msg.Role { - case "user": - fmt.Printf(">>> %s\n", msg.Content) - case "assistant": - state := &displayResponseState{} - displayResponse(msg.Content, opts.WordWrap, state) - fmt.Println() - fmt.Println() - } + for _, msg := range opts.Messages { + switch msg.Role { + case "user": + fmt.Printf(">>> %s\n", msg.Content) + case "assistant": + state := &displayResponseState{} + displayResponse(msg.Content, opts.WordWrap, state) + fmt.Println() + fmt.Println() } } return nil }) - if err != nil { - return err - } - - return nil } func generateInteractive(cmd *cobra.Command, opts runOptions) error { - opts.Messages = make([]api.Message, 0) - err := loadModel(cmd, &opts) if err != nil { return err