diff --git a/api/types.go b/api/types.go index d1810726..e98a8d56 100644 --- a/api/types.go +++ b/api/types.go @@ -34,3 +34,11 @@ type GenerateRequest struct { type GenerateResponse struct { Response string `json:"response"` } + +type TokenResponse struct { + Choices []TokenResponseChoice `json:"choices"` +} + +type TokenResponseChoice struct { + Text string `json:"text"` +} diff --git a/go.mod b/go.mod index 335a60c2..ece81ae2 100644 --- a/go.mod +++ b/go.mod @@ -21,6 +21,7 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/leodido/go-urn v1.2.4 // indirect + github.com/lithammer/fuzzysearch v1.1.8 github.com/mattn/go-isatty v0.0.19 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect diff --git a/go.sum b/go.sum index 155236b3..ef8f65ec 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,8 @@ github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZX github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= +github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4= +github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4= github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -68,19 +70,50 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= diff --git a/server/routes.go b/server/routes.go index 5eb2a048..f92fc0b5 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1,25 +1,31 @@ package server import ( + "encoding/json" "fmt" "io" "log" "net" "net/http" + "path" "runtime" + "strings" + "text/template" "github.com/gin-gonic/gin" - llama "github.com/jmorganca/ollama/llama" + "github.com/lithammer/fuzzysearch/fuzzy" "github.com/jmorganca/ollama/api" + "github.com/jmorganca/ollama/llama" ) +var templates = template.Must(template.ParseGlob("templates/*.prompt")) + func generate(c *gin.Context) { // TODO: these should be request parameters gpulayers := 1 tokens := 512 threads := runtime.NumCPU() - // TODO: set prompt from template var req api.GenerateRequest if err := c.ShouldBindJSON(&req); err != nil { @@ -33,6 +39,22 @@ func generate(c *gin.Context) { return } + templateNames := make([]string, 0, len(templates.Templates())) + for _, template := range templates.Templates() { + templateNames = append(templateNames, template.Name()) + } + + match, _ := matchRankOne(path.Base(req.Prompt), templateNames) + if template := templates.Lookup(match); template != nil { + var sb strings.Builder + if err := template.Execute(&sb, req); err != nil { + fmt.Println("Prompt template failed:", err.Error()) + return + } + + req.Prompt = sb.String() + } + ch := make(chan string) go func() { @@ -47,11 +69,29 @@ func generate(c *gin.Context) { }() c.Stream(func(w io.Writer) bool { - tok, ok := <-ch + token, ok := <-ch if !ok { return false } - c.SSEvent("token", tok) + + resp := api.TokenResponse{ + Choices: []api.TokenResponseChoice{ + { + Text: token, + }, + }, + } + + bts, err := json.Marshal(resp) + if err != nil { + return false + } + + bts = append(bts, '\n') + if _, err := w.Write(bts); err != nil { + return false + } + return true }) } @@ -94,3 +134,14 @@ func Serve(ln net.Listener) error { return s.Serve(ln) } + +func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) { + for _, target := range targets { + if rank := fuzzy.LevenshteinDistance(source, target); bestRank < rank { + bestRank = rank + bestMatch = target + } + } + + return +} diff --git a/templates/alpaca.prompt b/templates/alpaca.prompt index e875dd8b..e0574d25 100644 --- a/templates/alpaca.prompt +++ b/templates/alpaca.prompt @@ -1,7 +1,7 @@ Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: -{{ prompt }} +{{ .Prompt }} ### Response: diff --git a/templates/falcon.prompt b/templates/falcon.prompt index a16fedd9..b0aaf3d7 100644 --- a/templates/falcon.prompt +++ b/templates/falcon.prompt @@ -1,3 +1,3 @@ A helpful assistant who helps the user with any questions asked. -User: {{ prompt }} +User: {{ .Prompt }} Assistant: diff --git a/templates/gpt4.prompt b/templates/gpt4.prompt index fd611936..0bf3e5f4 100644 --- a/templates/gpt4.prompt +++ b/templates/gpt4.prompt @@ -1,5 +1,5 @@ ### Instruction: -{{ prompt }} +{{ .Prompt }} ### Response: diff --git a/templates/hermes.prompt b/templates/hermes.prompt index fd611936..0bf3e5f4 100644 --- a/templates/hermes.prompt +++ b/templates/hermes.prompt @@ -1,5 +1,5 @@ ### Instruction: -{{ prompt }} +{{ .Prompt }} ### Response: diff --git a/templates/mpt.prompt b/templates/mpt.prompt index 3bbc09fc..4955ee3a 100644 --- a/templates/mpt.prompt +++ b/templates/mpt.prompt @@ -1,4 +1,4 @@ Below is an instruction that describes a task. Write a response that appropriately completes the request. Be concise. Once the request is completed, include no other text. ### Instruction: -{{ prompt }} +{{ .Prompt }} ### Response: diff --git a/templates/oasst.prompt b/templates/oasst.prompt index c4f94382..ed37be46 100644 --- a/templates/oasst.prompt +++ b/templates/oasst.prompt @@ -1 +1 @@ -{{ prompt }} +{{ .Prompt }} diff --git a/templates/orca.prompt b/templates/orca.prompt index f9b5616e..3908fcde 100644 --- a/templates/orca.prompt +++ b/templates/orca.prompt @@ -2,6 +2,6 @@ You are an AI assistant that follows instruction extremely well. Help as much as you can. ### User: -{{ prompt }} +{{ .Prompt }} ### Response: diff --git a/templates/qlora.prompt b/templates/qlora.prompt index ca6d4a5b..3f47bd55 100644 --- a/templates/qlora.prompt +++ b/templates/qlora.prompt @@ -1,2 +1,2 @@ -### Human: {{ prompt }} +### Human: {{ .Prompt }} ### Assistant: diff --git a/templates/tulu.prompt b/templates/tulu.prompt index 09cfa1ad..1c4d9c82 100644 --- a/templates/tulu.prompt +++ b/templates/tulu.prompt @@ -1,4 +1,4 @@ -{{ prompt }} +{{ .Prompt }} diff --git a/templates/ultralm.prompt b/templates/ultralm.prompt index a1d9ebac..2b692517 100644 --- a/templates/ultralm.prompt +++ b/templates/ultralm.prompt @@ -1,2 +1,2 @@ -USER: {{ prompt }} +USER: {{ .Prompt }} ASSISTANT: diff --git a/templates/vicuna.prompt b/templates/vicuna.prompt index f4a6bc6c..835d5023 100644 --- a/templates/vicuna.prompt +++ b/templates/vicuna.prompt @@ -1,4 +1,4 @@ A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. -USER: {{ prompt }} +USER: {{ .Prompt }} ASSISTANT: diff --git a/templates/wizardcoder.prompt b/templates/wizardcoder.prompt index ab3dcdab..263c4440 100644 --- a/templates/wizardcoder.prompt +++ b/templates/wizardcoder.prompt @@ -1,5 +1,5 @@ Below is an instruction that describes a task. Write a response that appropriately completes the request -### Instruction: {{ prompt }} +### Instruction: {{ .Prompt }} ### Response: diff --git a/templates/wizardlm.prompt b/templates/wizardlm.prompt index 66b76a10..a0574e46 100644 --- a/templates/wizardlm.prompt +++ b/templates/wizardlm.prompt @@ -1,2 +1,2 @@ -{{ prompt }} +{{ .Prompt }} ### Response: