diff --git a/api/types.go b/api/types.go index 42b0c470..dccfbf7a 100644 --- a/api/types.go +++ b/api/types.go @@ -91,6 +91,7 @@ type GenerateResponse struct { Context []int `json:"context,omitempty"` TotalDuration time.Duration `json:"total_duration,omitempty"` + LoadDuration time.Duration `json:"load_duration,omitempty"` PromptEvalCount int `json:"prompt_eval_count,omitempty"` PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"` EvalCount int `json:"eval_count,omitempty"` @@ -102,6 +103,10 @@ func (r *GenerateResponse) Summary() { fmt.Fprintf(os.Stderr, "total duration: %v\n", r.TotalDuration) } + if r.LoadDuration > 0 { + fmt.Fprintf(os.Stderr, "load duration: %v\n", r.LoadDuration) + } + if r.PromptEvalCount > 0 { fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", r.PromptEvalCount) } diff --git a/server/routes.go b/server/routes.go index 93a04cd7..c3f27ec8 100644 --- a/server/routes.go +++ b/server/routes.go @@ -33,7 +33,7 @@ func GenerateHandler(c *gin.Context) { mu.Lock() defer mu.Unlock() - start := time.Now() + checkpointStart := time.Now() var req api.GenerateRequest if err := c.ShouldBindJSON(&req); err != nil { @@ -74,6 +74,8 @@ func GenerateHandler(c *gin.Context) { activeSession.LLM = llm } + checkpointLoaded := time.Now() + prompt, err := model.Prompt(req) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) @@ -88,7 +90,8 @@ func GenerateHandler(c *gin.Context) { r.CreatedAt = time.Now().UTC() r.SessionID = activeSession.ID if r.Done { - r.TotalDuration = time.Since(start) + r.TotalDuration = time.Since(checkpointStart) + r.LoadDuration = checkpointLoaded.Sub(checkpointStart) } ch <- r