diff --git a/api/types.go b/api/types.go index 65a99c76..35121813 100644 --- a/api/types.go +++ b/api/types.go @@ -209,6 +209,7 @@ type Options struct { NumPredict int `json:"num_predict,omitempty"` TopK int `json:"top_k,omitempty"` TopP float32 `json:"top_p,omitempty"` + MinP float32 `json:"min_p,omitempty"` TFSZ float32 `json:"tfs_z,omitempty"` TypicalP float32 `json:"typical_p,omitempty"` RepeatLastN int `json:"repeat_last_n,omitempty"` diff --git a/cmd/interactive.go b/cmd/interactive.go index adbc3e9f..c3cdf629 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -138,6 +138,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { fmt.Fprintln(os.Stderr, " /set parameter num_predict Max number of tokens to predict") fmt.Fprintln(os.Stderr, " /set parameter top_k Pick from top k num of tokens") fmt.Fprintln(os.Stderr, " /set parameter top_p Pick token based on sum of probabilities") + fmt.Fprintln(os.Stderr, " /set parameter min_p Pick token based on top token probability * min_p") fmt.Fprintln(os.Stderr, " /set parameter num_ctx Set the context size") fmt.Fprintln(os.Stderr, " /set parameter temperature Set creativity level") fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty How strongly to penalize repetitions") diff --git a/docs/api.md b/docs/api.md index 2d4fe28f..90b41f3e 100644 --- a/docs/api.md +++ b/docs/api.md @@ -336,6 +336,7 @@ curl http://localhost:11434/api/generate -d '{ "num_predict": 100, "top_k": 20, "top_p": 0.9, + "min_p": 0.0, "tfs_z": 0.5, "typical_p": 0.7, "repeat_last_n": 33, diff --git a/docs/modelfile.md b/docs/modelfile.md index c3645b06..852bf96c 100644 --- a/docs/modelfile.md +++ b/docs/modelfile.md @@ -141,6 +141,7 @@ PARAMETER | num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 | | top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 | | top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 | +| min_p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float | min_p 0.05 | ### TEMPLATE diff --git a/llm/server.go b/llm/server.go index 55732773..8127960f 100644 --- a/llm/server.go +++ b/llm/server.go @@ -727,6 +727,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu "temperature": req.Options.Temperature, "top_k": req.Options.TopK, "top_p": req.Options.TopP, + "min_p": req.Options.MinP, "tfs_z": req.Options.TFSZ, "typical_p": req.Options.TypicalP, "repeat_last_n": req.Options.RepeatLastN, diff --git a/parser/parser_test.go b/parser/parser_test.go index 2b5c4c88..48044bc0 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -451,6 +451,7 @@ func TestParseFileParameters(t *testing.T) { "num_predict 1": {"num_predict", "1"}, "top_k 1": {"top_k", "1"}, "top_p 1.0": {"top_p", "1.0"}, + "min_p 0.05": {"min_p", "0.05"}, "tfs_z 1.0": {"tfs_z", "1.0"}, "typical_p 1.0": {"typical_p", "1.0"}, "repeat_last_n 1": {"repeat_last_n", "1"},