llm: limit generation to 10x context size to avoid run on generations (#3918)
* llm: limit generation to 10x context size to avoid run on generations * add comment * simplify condition statement
This commit is contained in:
parent
5f73c08729
commit
993cf8bf55
|
@ -397,7 +397,9 @@ func DefaultOptions() Options {
|
||||||
return Options{
|
return Options{
|
||||||
// options set on request to runner
|
// options set on request to runner
|
||||||
NumPredict: -1,
|
NumPredict: -1,
|
||||||
NumKeep: 0,
|
|
||||||
|
// set a minimal num_keep to avoid issues on context shifts
|
||||||
|
NumKeep: 4,
|
||||||
Temperature: 0.8,
|
Temperature: 0.8,
|
||||||
TopK: 40,
|
TopK: 40,
|
||||||
TopP: 0.9,
|
TopP: 0.9,
|
||||||
|
|
|
@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer s.sem.Release(1)
|
defer s.sem.Release(1)
|
||||||
|
|
||||||
|
// only allow maximum 10 "context shifts" to avoid infinite generation
|
||||||
|
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||||
|
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||||
|
slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
|
||||||
|
}
|
||||||
|
|
||||||
request := map[string]any{
|
request := map[string]any{
|
||||||
"prompt": req.Prompt,
|
"prompt": req.Prompt,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
|
|
Loading…
Reference in a new issue