llm: dont cap context window limit to training context window (#3988)

This commit is contained in:
Jeffrey Morgan 2024-04-29 10:07:30 -04:00 committed by GitHub
parent 7e432cdfac
commit 7aa08a77ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -73,8 +73,7 @@ func LoadModel(model string) (*GGML, error) {
func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) { func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) {
var err error var err error
if opts.NumCtx > int(ggml.KV().ContextLength()) { if opts.NumCtx > int(ggml.KV().ContextLength()) {
slog.Warn("requested context length is greater than model max context length", "requested", opts.NumCtx, "model", ggml.KV().ContextLength()) slog.Warn("requested context length is greater than the model's training context window size", "requested", opts.NumCtx, "training size", ggml.KV().ContextLength())
opts.NumCtx = int(ggml.KV().ContextLength())
} }
if opts.NumCtx < 4 { if opts.NumCtx < 4 {