deprecate modelfile embed command (#759)

This commit is contained in:
Bruce MacDonald 2023-10-16 11:07:37 -04:00 committed by GitHub
parent 06bcfbd629
commit a0c3e989de
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 19 additions and 301 deletions

View file

@ -78,18 +78,12 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
spinner.Stop() spinner.Stop()
} }
currentDigest = resp.Digest currentDigest = resp.Digest
switch { // pulling
case strings.Contains(resp.Status, "embeddings"): bar = progressbar.DefaultBytes(
bar = progressbar.Default(resp.Total, resp.Status) resp.Total,
bar.Set64(resp.Completed) resp.Status,
default: )
// pulling bar.Set64(resp.Completed)
bar = progressbar.DefaultBytes(
resp.Total,
resp.Status,
)
bar.Set64(resp.Completed)
}
} else if resp.Digest == currentDigest && resp.Digest != "" { } else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed) bar.Set64(resp.Completed)
} else { } else {

View file

@ -12,7 +12,6 @@ A model file is the blueprint to create and share models with Ollama.
- [FROM (Required)](#from-required) - [FROM (Required)](#from-required)
- [Build from llama2](#build-from-llama2) - [Build from llama2](#build-from-llama2)
- [Build from a bin file](#build-from-a-bin-file) - [Build from a bin file](#build-from-a-bin-file)
- [EMBED](#embed)
- [PARAMETER](#parameter) - [PARAMETER](#parameter)
- [Valid Parameters and Values](#valid-parameters-and-values) - [Valid Parameters and Values](#valid-parameters-and-values)
- [TEMPLATE](#template) - [TEMPLATE](#template)
@ -91,17 +90,6 @@ FROM ./ollama-model.bin
This bin file location should be specified as an absolute path or relative to the `Modelfile` location. This bin file location should be specified as an absolute path or relative to the `Modelfile` location.
### EMBED
The `EMBED` instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.
```modelfile
FROM <model name>:<tag>
EMBED <file path>.txt
EMBED <different file path>.txt
EMBED <path to directory>/*.txt
```
### PARAMETER ### PARAMETER
The `PARAMETER` instruction defines a parameter that can be set when the model is run. The `PARAMETER` instruction defines a parameter that can be set when the model is run.

1
go.mod
View file

@ -45,7 +45,6 @@ require (
golang.org/x/sys v0.11.0 // indirect golang.org/x/sys v0.11.0 // indirect
golang.org/x/term v0.10.0 golang.org/x/term v0.10.0
golang.org/x/text v0.10.0 // indirect golang.org/x/text v0.10.0 // indirect
gonum.org/v1/gonum v0.13.0
google.golang.org/protobuf v1.30.0 // indirect google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect
) )

2
go.sum
View file

@ -145,8 +145,6 @@ golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.13.0 h1:a0T3bh+7fhRyqeNbiC3qVHYmkiQgit3wnNan/2c0HMM=
gonum.org/v1/gonum v0.13.0/go.mod h1:/WPYRckkfWrhWefxyYTfrTtQR0KH4iyHNuzxqXAKyAU=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=

View file

@ -40,7 +40,7 @@ func Parse(reader io.Reader) ([]Command, error) {
command.Args = string(fields[1]) command.Args = string(fields[1])
// copy command for validation // copy command for validation
modelCommand = command modelCommand = command
case "LICENSE", "TEMPLATE", "SYSTEM", "PROMPT", "EMBED", "ADAPTER": case "LICENSE", "TEMPLATE", "SYSTEM", "PROMPT", "ADAPTER":
command.Name = string(bytes.ToLower(fields[0])) command.Name = string(bytes.ToLower(fields[0]))
command.Args = string(fields[1]) command.Args = string(fields[1])
case "PARAMETER": case "PARAMETER":
@ -51,6 +51,8 @@ func Parse(reader io.Reader) ([]Command, error) {
command.Name = string(fields[0]) command.Name = string(fields[0])
command.Args = string(fields[1]) command.Args = string(fields[1])
case "EMBED":
return nil, fmt.Errorf("deprecated command: EMBED is no longer supported, use the /embed API endpoint instead")
default: default:
if !bytes.HasPrefix(fields[0], []byte("#")) { if !bytes.HasPrefix(fields[0], []byte("#")) {
// log a warning for unknown commands // log a warning for unknown commands

View file

@ -1,7 +1,6 @@
package server package server
import ( import (
"bufio"
"bytes" "bytes"
"context" "context"
"crypto/sha256" "crypto/sha256"
@ -26,7 +25,6 @@ import (
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm" "github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser" "github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/vector"
"github.com/jmorganca/ollama/version" "github.com/jmorganca/ollama/version"
) )
@ -49,10 +47,9 @@ type Model struct {
Digest string Digest string
ConfigDigest string ConfigDigest string
Options map[string]interface{} Options map[string]interface{}
Embeddings []vector.Embedding
} }
func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, error) { func (m *Model) Prompt(request api.GenerateRequest) (string, error) {
t := m.Template t := m.Template
if request.Template != "" { if request.Template != "" {
t = request.Template t = request.Template
@ -67,7 +64,6 @@ func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, e
First bool First bool
System string System string
Prompt string Prompt string
Embed string
// deprecated: versions <= 0.0.7 used this to omit the system prompt // deprecated: versions <= 0.0.7 used this to omit the system prompt
Context []int Context []int
@ -77,7 +73,6 @@ func (m *Model) Prompt(request api.GenerateRequest, embedding string) (string, e
vars.System = m.System vars.System = m.System
vars.Prompt = request.Prompt vars.Prompt = request.Prompt
vars.Context = request.Context vars.Context = request.Context
vars.Embed = embedding
if request.System != "" { if request.System != "" {
vars.System = request.System vars.System = request.System
@ -190,15 +185,9 @@ func GetModel(name string) (*Model, error) {
model.ModelPath = filename model.ModelPath = filename
model.OriginalModel = layer.From model.OriginalModel = layer.From
case "application/vnd.ollama.image.embed": case "application/vnd.ollama.image.embed":
file, err := os.Open(filename) // Deprecated in versions > 0.1.2
if err != nil { // TODO: remove this warning in a future version
return nil, fmt.Errorf("failed to open file: %s", filename) log.Print("WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored.")
}
defer file.Close()
if err = json.NewDecoder(file).Decode(&model.Embeddings); err != nil {
return nil, err
}
case "application/vnd.ollama.image.adapter": case "application/vnd.ollama.image.adapter":
model.AdapterPaths = append(model.AdapterPaths, filename) model.AdapterPaths = append(model.AdapterPaths, filename)
case "application/vnd.ollama.image.template": case "application/vnd.ollama.image.template":
@ -310,13 +299,11 @@ func CreateModel(ctx context.Context, workDir, name string, path string, fn func
var layers []*LayerReader var layers []*LayerReader
params := make(map[string][]string) params := make(map[string][]string)
var sourceParams map[string]any var sourceParams map[string]any
embed := EmbeddingParams{fn: fn}
for _, c := range commands { for _, c := range commands {
log.Printf("[%s] - %s\n", c.Name, c.Args) log.Printf("[%s] - %s\n", c.Name, c.Args)
switch c.Name { switch c.Name {
case "model": case "model":
fn(api.ProgressResponse{Status: "looking for model"}) fn(api.ProgressResponse{Status: "looking for model"})
embed.model = c.Args
mp := ParseModelPath(c.Args) mp := ParseModelPath(c.Args)
mf, _, err := GetManifest(mp) mf, _, err := GetManifest(mp)
@ -340,7 +327,6 @@ func CreateModel(ctx context.Context, workDir, name string, path string, fn func
return err return err
} }
} else { } else {
embed.model = modelFile
// create a model from this specified file // create a model from this specified file
fn(api.ProgressResponse{Status: "creating model layer"}) fn(api.ProgressResponse{Status: "creating model layer"})
file, err := os.Open(modelFile) file, err := os.Open(modelFile)
@ -421,12 +407,6 @@ func CreateModel(ctx context.Context, workDir, name string, path string, fn func
layers = append(layers, newLayer) layers = append(layers, newLayer)
} }
} }
case "embed":
embedFilePath, err := filenameWithPath(path, c.Args)
if err != nil {
return err
}
embed.files = append(embed.files, embedFilePath)
case "adapter": case "adapter":
fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)}) fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)})
@ -517,18 +497,8 @@ func CreateModel(ctx context.Context, workDir, name string, path string, fn func
} }
l.MediaType = "application/vnd.ollama.image.params" l.MediaType = "application/vnd.ollama.image.params"
layers = append(layers, l) layers = append(layers, l)
// apply these parameters to the embedding options, in case embeddings need to be generated using this model
embed.opts = formattedParams
} }
// generate the embedding layers
embeddingLayers, err := embeddingLayers(workDir, embed)
if err != nil {
return err
}
layers = append(layers, embeddingLayers...)
digests, err := getLayerDigests(layers) digests, err := getLayerDigests(layers)
if err != nil { if err != nil {
return err return err
@ -572,146 +542,6 @@ func CreateModel(ctx context.Context, workDir, name string, path string, fn func
return nil return nil
} }
type EmbeddingParams struct {
model string
opts map[string]interface{}
files []string // paths to files to embed
fn func(resp api.ProgressResponse)
}
// embeddingLayers loads the associated LLM and generates the embeddings to be stored from an input file
func embeddingLayers(workDir string, e EmbeddingParams) ([]*LayerReader, error) {
layers := []*LayerReader{}
if len(e.files) > 0 {
// check if the model is a file path or a model name
model, err := GetModel(e.model)
if err != nil {
if !strings.Contains(err.Error(), "couldn't open file") {
return nil, fmt.Errorf("unexpected error opening model to generate embeddings: %v", err)
}
// the model may be a file path, create a model from this file
model = &Model{ModelPath: e.model}
}
if err := load(context.Background(), workDir, model, e.opts, defaultSessionDuration); err != nil {
return nil, fmt.Errorf("load model to generate embeddings: %v", err)
}
// this will be used to check if we already have embeddings for a file
modelInfo, err := os.Stat(model.ModelPath)
if err != nil {
return nil, fmt.Errorf("failed to get model file info: %v", err)
}
addedFiles := make(map[string]bool) // keep track of files that have already been added
for _, filePattern := range e.files {
matchingFiles, err := filepath.Glob(filePattern)
if err != nil {
return nil, fmt.Errorf("could not find files with pattern %s: %w", filePattern, err)
}
for _, filePath := range matchingFiles {
if addedFiles[filePath] {
continue
}
addedFiles[filePath] = true
// check if we already have embeddings for this file path
layerIdentifier := fmt.Sprintf("%s:%s:%s:%d", filePath, e.model, modelInfo.ModTime().Format("2006-01-02 15:04:05"), modelInfo.Size())
digest, _ := GetSHA256Digest(strings.NewReader(layerIdentifier))
existing, err := existingFileEmbeddings(digest)
if err != nil {
return nil, fmt.Errorf("failed to check existing embeddings for file %s: %v", filePath, err)
}
// TODO: check file type
f, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("could not open embed file: %w", err)
}
scanner := bufio.NewScanner(f)
scanner.Split(bufio.ScanLines)
data := []string{}
for scanner.Scan() {
data = append(data, scanner.Text())
}
f.Close()
// the digest of the file is set here so that the client knows a new operation is in progress
fileDigest, _ := GetSHA256Digest(bytes.NewReader([]byte(filePath)))
embeddings := []vector.Embedding{}
for i, d := range data {
if strings.TrimSpace(d) == "" {
continue
}
e.fn(api.ProgressResponse{
Status: fmt.Sprintf("creating embeddings for file %s", filePath),
Digest: fileDigest,
Total: int64(len(data) - 1),
Completed: int64(i),
})
if len(existing[d]) > 0 {
// already have an embedding for this line
embeddings = append(embeddings, vector.Embedding{Data: d, Vector: existing[d]})
continue
}
embed, err := loaded.llm.Embedding(context.Background(), d)
if err != nil {
log.Printf("failed to generate embedding for '%s' line %d: %v", filePath, i+1, err)
continue
}
embeddings = append(embeddings, vector.Embedding{Data: d, Vector: embed})
}
b, err := json.Marshal(embeddings)
if err != nil {
return nil, fmt.Errorf("failed to encode embeddings: %w", err)
}
r := bytes.NewReader(b)
layer := &LayerReader{
Layer: Layer{
MediaType: "application/vnd.ollama.image.embed",
Digest: digest,
Size: r.Size(),
},
Reader: r,
}
layers = append(layers, layer)
}
}
}
return layers, nil
}
// existingFileEmbeddings checks if we already have embeddings for a file and loads them into a look-up map
func existingFileEmbeddings(digest string) (map[string][]float64, error) {
path, err := GetBlobsPath(digest)
if err != nil {
return nil, fmt.Errorf("embeddings blobs path: %w", err)
}
existingFileEmbeddings := make(map[string][]float64)
if _, err := os.Stat(path); err == nil {
// already have some embeddings for this file, load embeddings previously generated
file, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open existing embedding file: %s", err)
}
defer file.Close()
existing := []vector.Embedding{}
if err = json.NewDecoder(file).Decode(&existing); err != nil {
return nil, err
}
for _, e := range existing {
existingFileEmbeddings[e.Data] = e.Vector
}
}
return existingFileEmbeddings, nil
}
func removeLayerFromLayers(layers []*LayerReader, mediaType string) []*LayerReader { func removeLayerFromLayers(layers []*LayerReader, mediaType string) []*LayerReader {
return slices.DeleteFunc(layers, func(layer *LayerReader) bool { return slices.DeleteFunc(layers, func(layer *LayerReader) bool {
return layer.MediaType == mediaType return layer.MediaType == mediaType
@ -727,8 +557,7 @@ func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force
} }
_, err = os.Stat(fp) _, err = os.Stat(fp)
// note: embed layers are always written since their digest doesnt indicate anything about the contents if os.IsNotExist(err) || force {
if os.IsNotExist(err) || force || layer.MediaType == "application/vnd.ollama.image.embed" {
fn(api.ProgressResponse{Status: fmt.Sprintf("writing layer %s", layer.Digest)}) fn(api.ProgressResponse{Status: fmt.Sprintf("writing layer %s", layer.Digest)})
out, err := os.Create(fp) out, err := os.Create(fp)

View file

@ -12,7 +12,7 @@ func TestModelPrompt(t *testing.T) {
Template: "a{{ .Prompt }}b", Template: "a{{ .Prompt }}b",
Prompt: "<h1>", Prompt: "<h1>",
} }
s, err := m.Prompt(req, "") s, err := m.Prompt(req)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -23,11 +23,9 @@ import (
"github.com/gin-contrib/cors" "github.com/gin-contrib/cors"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"gonum.org/v1/gonum/mat"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm" "github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/vector"
) )
var mode string = gin.DebugMode var mode string = gin.DebugMode
@ -47,8 +45,7 @@ func init() {
var loaded struct { var loaded struct {
mu sync.Mutex mu sync.Mutex
llm llm.LLM llm llm.LLM
Embeddings []vector.Embedding
expireAt time.Time expireAt time.Time
expireTimer *time.Timer expireTimer *time.Timer
@ -90,11 +87,6 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
loaded.digest = "" loaded.digest = ""
} }
if model.Embeddings != nil && len(model.Embeddings) > 0 {
opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
loaded.Embeddings = model.Embeddings
}
llmModel, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts) llmModel, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
if err != nil { if err != nil {
return err return err
@ -106,12 +98,12 @@ func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]
loaded.options = opts loaded.options = opts
if opts.NumKeep < 0 { if opts.NumKeep < 0 {
promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "") promptWithSystem, err := model.Prompt(api.GenerateRequest{})
if err != nil { if err != nil {
return err return err
} }
promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "") promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}})
if err != nil { if err != nil {
return err return err
} }
@ -195,22 +187,7 @@ func GenerateHandler(c *gin.Context) {
checkpointLoaded := time.Now() checkpointLoaded := time.Now()
embedding := "" prompt, err := model.Prompt(req)
if model.Embeddings != nil && len(model.Embeddings) > 0 {
promptEmbed, err := loaded.llm.Embedding(c.Request.Context(), req.Prompt)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
// TODO: set embed_top from specified parameters in modelfile
embed_top := 3
topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
for _, e := range topK {
embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
}
}
prompt, err := model.Prompt(req, embedding)
if err != nil { if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return

View file

@ -1,69 +0,0 @@
package vector
import (
"container/heap"
"sort"
"gonum.org/v1/gonum/mat"
)
type Embedding struct {
Vector []float64 // the embedding vector
Data string // the data represted by the embedding
}
type EmbeddingSimilarity struct {
Embedding Embedding // the embedding that was used to calculate the similarity
Similarity float64 // the similarity between the embedding and the query
}
type Heap []EmbeddingSimilarity
func (h Heap) Len() int { return len(h) }
func (h Heap) Less(i, j int) bool { return h[i].Similarity < h[j].Similarity }
func (h Heap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *Heap) Push(e any) {
*h = append(*h, e.(EmbeddingSimilarity))
}
func (h *Heap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
// cosineSimilarity is a measure that calculates the cosine of the angle between two vectors.
// This value will range from -1 to 1, where 1 means the vectors are identical.
func cosineSimilarity(vec1, vec2 *mat.VecDense) float64 {
dotProduct := mat.Dot(vec1, vec2)
norms := mat.Norm(vec1, 2) * mat.Norm(vec2, 2)
if norms == 0 {
return 0
}
return dotProduct / norms
}
func TopK(k int, query *mat.VecDense, embeddings []Embedding) []EmbeddingSimilarity {
h := &Heap{}
heap.Init(h)
for _, emb := range embeddings {
similarity := cosineSimilarity(query, mat.NewVecDense(len(emb.Vector), emb.Vector))
heap.Push(h, EmbeddingSimilarity{Embedding: emb, Similarity: similarity})
if h.Len() > k {
heap.Pop(h)
}
}
topK := make([]EmbeddingSimilarity, 0, h.Len())
for h.Len() > 0 {
topK = append(topK, heap.Pop(h).(EmbeddingSimilarity))
}
sort.Slice(topK, func(i, j int) bool {
return topK[i].Similarity > topK[j].Similarity
})
return topK
}