diff --git a/api/types.go b/api/types.go index e861ee3a..48380d3c 100644 --- a/api/types.go +++ b/api/types.go @@ -37,6 +37,7 @@ type GenerateRequest struct { Template string `json:"template"` Context []int `json:"context,omitempty"` Stream *bool `json:"stream,omitempty"` + Raw bool `json:"raw,omitempty"` Options map[string]interface{} `json:"options"` } diff --git a/docs/api.md b/docs/api.md index 2b5938ba..7b136e9d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -46,6 +46,7 @@ Advanced parameters (optional): - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`) - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects +- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself. ### Examples @@ -136,6 +137,36 @@ If `stream` is set to `false`, the response will be a single JSON object: } ``` +#### Request + +In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context. + +```shell +curl -X POST http://localhost:11434/api/generate -d '{ + "model": "mistral", + "prompt": "[INST] why is the sky blue? [/INST]", + "raw": true, + "stream": false +}' +``` + +#### Response + +```json +{ + "model": "mistral", + "created_at": "2023-11-03T15:36:02.583064Z", + "response": " The sky appears blue because of a phenomenon called Rayleigh scattering.", + "done": true, + "total_duration": 14648695333, + "load_duration": 3302671417, + "prompt_eval_count": 14, + "prompt_eval_duration": 286243000, + "eval_count": 129, + "eval_duration": 10931424000 +} +``` + ## Create a Model ```shell diff --git a/server/routes.go b/server/routes.go index 0de81abd..9884afbf 100644 --- a/server/routes.go +++ b/server/routes.go @@ -158,9 +158,14 @@ func GenerateHandler(c *gin.Context) { return } - if req.Model == "" { + // validate the request + switch { + case req.Model == "": c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"}) return + case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0): + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"}) + return } model, err := GetModel(req.Model) @@ -189,10 +194,13 @@ func GenerateHandler(c *gin.Context) { checkpointLoaded := time.Now() - prompt, err := model.Prompt(req) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) - return + prompt := req.Prompt + if !req.Raw { + prompt, err = model.Prompt(req) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } } ch := make(chan any) @@ -215,6 +223,11 @@ func GenerateHandler(c *gin.Context) { r.LoadDuration = checkpointLoaded.Sub(checkpointStart) } + if req.Raw { + // in raw mode the client must manage history on their own + r.Context = nil + } + ch <- r }