diff --git a/api/types.go b/api/types.go index 48380d3c..fcc27585 100644 --- a/api/types.go +++ b/api/types.go @@ -42,6 +42,50 @@ type GenerateRequest struct { Options map[string]interface{} `json:"options"` } +// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also +type Options struct { + Runner + + // Predict options used at runtime + NumKeep int `json:"num_keep,omitempty"` + Seed int `json:"seed,omitempty"` + NumPredict int `json:"num_predict,omitempty"` + TopK int `json:"top_k,omitempty"` + TopP float32 `json:"top_p,omitempty"` + TFSZ float32 `json:"tfs_z,omitempty"` + TypicalP float32 `json:"typical_p,omitempty"` + RepeatLastN int `json:"repeat_last_n,omitempty"` + Temperature float32 `json:"temperature,omitempty"` + RepeatPenalty float32 `json:"repeat_penalty,omitempty"` + PresencePenalty float32 `json:"presence_penalty,omitempty"` + FrequencyPenalty float32 `json:"frequency_penalty,omitempty"` + Mirostat int `json:"mirostat,omitempty"` + MirostatTau float32 `json:"mirostat_tau,omitempty"` + MirostatEta float32 `json:"mirostat_eta,omitempty"` + PenalizeNewline bool `json:"penalize_newline,omitempty"` + Stop []string `json:"stop,omitempty"` +} + +// Runner options which must be set when the model is loaded into memory +type Runner struct { + UseNUMA bool `json:"numa,omitempty"` + NumCtx int `json:"num_ctx,omitempty"` + NumBatch int `json:"num_batch,omitempty"` + NumGQA int `json:"num_gqa,omitempty"` + NumGPU int `json:"num_gpu,omitempty"` + MainGPU int `json:"main_gpu,omitempty"` + LowVRAM bool `json:"low_vram,omitempty"` + F16KV bool `json:"f16_kv,omitempty"` + LogitsAll bool `json:"logits_all,omitempty"` + VocabOnly bool `json:"vocab_only,omitempty"` + UseMMap bool `json:"use_mmap,omitempty"` + UseMLock bool `json:"use_mlock,omitempty"` + EmbeddingOnly bool `json:"embedding_only,omitempty"` + RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"` + RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"` + NumThread int `json:"num_thread,omitempty"` +} + type EmbeddingRequest struct { Model string `json:"model"` Prompt string `json:"prompt"` @@ -162,49 +206,6 @@ func (r *GenerateResponse) Summary() { } } -// Runner options which must be set when the model is loaded into memory -type Runner struct { - UseNUMA bool `json:"numa,omitempty"` - NumCtx int `json:"num_ctx,omitempty"` - NumBatch int `json:"num_batch,omitempty"` - NumGQA int `json:"num_gqa,omitempty"` - NumGPU int `json:"num_gpu,omitempty"` - MainGPU int `json:"main_gpu,omitempty"` - LowVRAM bool `json:"low_vram,omitempty"` - F16KV bool `json:"f16_kv,omitempty"` - LogitsAll bool `json:"logits_all,omitempty"` - VocabOnly bool `json:"vocab_only,omitempty"` - UseMMap bool `json:"use_mmap,omitempty"` - UseMLock bool `json:"use_mlock,omitempty"` - EmbeddingOnly bool `json:"embedding_only,omitempty"` - RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"` - RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"` - NumThread int `json:"num_thread,omitempty"` -} - -type Options struct { - Runner - - // Predict options used at runtime - NumKeep int `json:"num_keep,omitempty"` - Seed int `json:"seed,omitempty"` - NumPredict int `json:"num_predict,omitempty"` - TopK int `json:"top_k,omitempty"` - TopP float32 `json:"top_p,omitempty"` - TFSZ float32 `json:"tfs_z,omitempty"` - TypicalP float32 `json:"typical_p,omitempty"` - RepeatLastN int `json:"repeat_last_n,omitempty"` - Temperature float32 `json:"temperature,omitempty"` - RepeatPenalty float32 `json:"repeat_penalty,omitempty"` - PresencePenalty float32 `json:"presence_penalty,omitempty"` - FrequencyPenalty float32 `json:"frequency_penalty,omitempty"` - Mirostat int `json:"mirostat,omitempty"` - MirostatTau float32 `json:"mirostat_tau,omitempty"` - MirostatEta float32 `json:"mirostat_eta,omitempty"` - PenalizeNewline bool `json:"penalize_newline,omitempty"` - Stop []string `json:"stop,omitempty"` -} - var ErrInvalidOpts = fmt.Errorf("invalid options") func (opts *Options) FromMap(m map[string]interface{}) error { diff --git a/docs/api.md b/docs/api.md index 7b136e9d..99c36bf4 100644 --- a/docs/api.md +++ b/docs/api.md @@ -167,6 +167,73 @@ curl -X POST http://localhost:11434/api/generate -d '{ } ``` +#### Request + +If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override. + +```shell +curl -X POST http://localhost:11434/api/generate -d '{ + "model": "llama2:7b", + "prompt": "Why is the sky blue?", + "stream": false, + "options": { + "num_keep": 5, + "seed": 42, + "num_predict": 100, + "top_k": 20, + "top_p": 0.9, + "tfs_z": 0.5, + "typical_p": 0.7, + "repeat_last_n": 33, + "temperature": 0.8, + "repeat_penalty": 1.2, + "presence_penalty": 1.5, + "frequency_penalty": 1.0, + "mirostat": 1, + "mirostat_tau": 0.8, + "mirostat_eta": 0.6, + "penalize_newline": true, + "stop": ["\n", "user:"], + "numa": false, + "num_ctx": 4, + "num_batch": 2, + "num_gqa": 1, + "num_gpu": 1, + "main_gpu": 0, + "low_vram": false, + "f16_kv": true, + "logits_all": false, + "vocab_only": false, + "use_mmap": true, + "use_mlock": false, + "embedding_only": false, + "rope_frequency_base": 1.1, + "rope_frequency_scale": 0.8, + "num_thread": 8 + } +}' +``` + +#### Response + +```json +{ + "model": "llama2:7b", + "created_at": "2023-08-04T19:22:45.499127Z", + "response": "The sky is blue because it is the color of the sky.", + "context": [1, 2, 3], + "done": true, + "total_duration": 5589157167, + "load_duration": 3013701500, + "sample_count": 114, + "sample_duration": 81442000, + "prompt_eval_count": 46, + "prompt_eval_duration": 1160282000, + "eval_count": 13, + "eval_duration": 1325948000 +} +``` + ## Create a Model ```shell