add load duration
This commit is contained in:
parent
35af37a2cb
commit
32aec66e6a
|
@ -91,6 +91,7 @@ type GenerateResponse struct {
|
||||||
Context []int `json:"context,omitempty"`
|
Context []int `json:"context,omitempty"`
|
||||||
|
|
||||||
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
||||||
|
LoadDuration time.Duration `json:"load_duration,omitempty"`
|
||||||
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
||||||
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
||||||
EvalCount int `json:"eval_count,omitempty"`
|
EvalCount int `json:"eval_count,omitempty"`
|
||||||
|
@ -102,6 +103,10 @@ func (r *GenerateResponse) Summary() {
|
||||||
fmt.Fprintf(os.Stderr, "total duration: %v\n", r.TotalDuration)
|
fmt.Fprintf(os.Stderr, "total duration: %v\n", r.TotalDuration)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if r.LoadDuration > 0 {
|
||||||
|
fmt.Fprintf(os.Stderr, "load duration: %v\n", r.LoadDuration)
|
||||||
|
}
|
||||||
|
|
||||||
if r.PromptEvalCount > 0 {
|
if r.PromptEvalCount > 0 {
|
||||||
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", r.PromptEvalCount)
|
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", r.PromptEvalCount)
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,7 @@ func GenerateHandler(c *gin.Context) {
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
defer mu.Unlock()
|
defer mu.Unlock()
|
||||||
|
|
||||||
start := time.Now()
|
checkpointStart := time.Now()
|
||||||
|
|
||||||
var req api.GenerateRequest
|
var req api.GenerateRequest
|
||||||
if err := c.ShouldBindJSON(&req); err != nil {
|
if err := c.ShouldBindJSON(&req); err != nil {
|
||||||
|
@ -74,6 +74,8 @@ func GenerateHandler(c *gin.Context) {
|
||||||
activeSession.LLM = llm
|
activeSession.LLM = llm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkpointLoaded := time.Now()
|
||||||
|
|
||||||
prompt, err := model.Prompt(req)
|
prompt, err := model.Prompt(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
@ -88,7 +90,8 @@ func GenerateHandler(c *gin.Context) {
|
||||||
r.CreatedAt = time.Now().UTC()
|
r.CreatedAt = time.Now().UTC()
|
||||||
r.SessionID = activeSession.ID
|
r.SessionID = activeSession.ID
|
||||||
if r.Done {
|
if r.Done {
|
||||||
r.TotalDuration = time.Since(start)
|
r.TotalDuration = time.Since(checkpointStart)
|
||||||
|
r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||||
}
|
}
|
||||||
|
|
||||||
ch <- r
|
ch <- r
|
||||||
|
|
Loading…
Reference in a new issue