add load duration

This commit is contained in:
Michael Yang 2023-07-18 12:02:02 -07:00
parent 35af37a2cb
commit 32aec66e6a
2 changed files with 10 additions and 2 deletions

View file

@ -91,6 +91,7 @@ type GenerateResponse struct {
Context []int `json:"context,omitempty"` Context []int `json:"context,omitempty"`
TotalDuration time.Duration `json:"total_duration,omitempty"` TotalDuration time.Duration `json:"total_duration,omitempty"`
LoadDuration time.Duration `json:"load_duration,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"` PromptEvalCount int `json:"prompt_eval_count,omitempty"`
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"` PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
EvalCount int `json:"eval_count,omitempty"` EvalCount int `json:"eval_count,omitempty"`
@ -102,6 +103,10 @@ func (r *GenerateResponse) Summary() {
fmt.Fprintf(os.Stderr, "total duration: %v\n", r.TotalDuration) fmt.Fprintf(os.Stderr, "total duration: %v\n", r.TotalDuration)
} }
if r.LoadDuration > 0 {
fmt.Fprintf(os.Stderr, "load duration: %v\n", r.LoadDuration)
}
if r.PromptEvalCount > 0 { if r.PromptEvalCount > 0 {
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", r.PromptEvalCount) fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", r.PromptEvalCount)
} }

View file

@ -33,7 +33,7 @@ func GenerateHandler(c *gin.Context) {
mu.Lock() mu.Lock()
defer mu.Unlock() defer mu.Unlock()
start := time.Now() checkpointStart := time.Now()
var req api.GenerateRequest var req api.GenerateRequest
if err := c.ShouldBindJSON(&req); err != nil { if err := c.ShouldBindJSON(&req); err != nil {
@ -74,6 +74,8 @@ func GenerateHandler(c *gin.Context) {
activeSession.LLM = llm activeSession.LLM = llm
} }
checkpointLoaded := time.Now()
prompt, err := model.Prompt(req) prompt, err := model.Prompt(req)
if err != nil { if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
@ -88,7 +90,8 @@ func GenerateHandler(c *gin.Context) {
r.CreatedAt = time.Now().UTC() r.CreatedAt = time.Now().UTC()
r.SessionID = activeSession.ID r.SessionID = activeSession.ID
if r.Done { if r.Done {
r.TotalDuration = time.Since(start) r.TotalDuration = time.Since(checkpointStart)
r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
} }
ch <- r ch <- r