llm: dont cap context window limit to training context window (#3988)
This commit is contained in:
parent
7e432cdfac
commit
7aa08a77ca
|
@ -73,8 +73,7 @@ func LoadModel(model string) (*GGML, error) {
|
||||||
func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) {
|
func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) {
|
||||||
var err error
|
var err error
|
||||||
if opts.NumCtx > int(ggml.KV().ContextLength()) {
|
if opts.NumCtx > int(ggml.KV().ContextLength()) {
|
||||||
slog.Warn("requested context length is greater than model max context length", "requested", opts.NumCtx, "model", ggml.KV().ContextLength())
|
slog.Warn("requested context length is greater than the model's training context window size", "requested", opts.NumCtx, "training size", ggml.KV().ContextLength())
|
||||||
opts.NumCtx = int(ggml.KV().ContextLength())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.NumCtx < 4 {
|
if opts.NumCtx < 4 {
|
||||||
|
|
Loading…
Reference in a new issue