From 2cb0fa7d40dcc781a69ea1aebd474fe37153d935 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 24 Nov 2023 11:57:20 -0800 Subject: [PATCH 1/4] split from into one or more models --- llm/ggml.go | 65 ++++++++++++++-------- llm/gguf.go | 137 ++++++++++++++++++++++++++++++++++++----------- server/images.go | 48 +++++++++++------ 3 files changed, 179 insertions(+), 71 deletions(-) diff --git a/llm/ggml.go b/llm/ggml.go index 608085d0..18ae4bd6 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -7,9 +7,10 @@ import ( ) type GGML struct { - magic uint32 container model + + Size int64 } const ( @@ -82,7 +83,7 @@ type model interface { type container interface { Name() string - Decode(io.Reader) (model, error) + Decode(*readOffset) (model, error) } type containerGGML struct{} @@ -91,7 +92,7 @@ func (c *containerGGML) Name() string { return "ggml" } -func (c *containerGGML) Decode(r io.Reader) (model, error) { +func (c *containerGGML) Decode(ro *readOffset) (model, error) { return nil, nil } @@ -103,9 +104,9 @@ func (c *containerGGMF) Name() string { return "ggmf" } -func (c *containerGGMF) Decode(r io.Reader) (model, error) { +func (c *containerGGMF) Decode(ro *readOffset) (model, error) { var version uint32 - binary.Read(r, binary.LittleEndian, &version) + binary.Read(ro, binary.LittleEndian, &version) switch version { case 1: @@ -125,9 +126,9 @@ func (c *containerGGJT) Name() string { return "ggjt" } -func (c *containerGGJT) Decode(r io.Reader) (model, error) { +func (c *containerGGJT) Decode(ro *readOffset) (model, error) { var version uint32 - binary.Read(r, binary.LittleEndian, &version) + binary.Read(ro, binary.LittleEndian, &version) switch version { case 1, 2, 3: @@ -139,7 +140,7 @@ func (c *containerGGJT) Decode(r io.Reader) (model, error) { // different model types may have different layouts for hyperparameters var llama llamaModel - binary.Read(r, binary.LittleEndian, &llama.hyperparameters) + binary.Read(ro, binary.LittleEndian, &llama.hyperparameters) return &llama, nil } @@ -151,9 +152,9 @@ func (c *containerLORA) Name() string { return "ggla" } -func (c *containerLORA) Decode(r io.Reader) (model, error) { +func (c *containerLORA) Decode(ro *readOffset) (model, error) { var version uint32 - binary.Read(r, binary.LittleEndian, &version) + binary.Read(ro, binary.LittleEndian, &version) switch version { case 1: @@ -180,33 +181,51 @@ const ( ) func DecodeGGML(r io.Reader) (*GGML, error) { - var ggml GGML - binary.Read(r, binary.LittleEndian, &ggml.magic) + ro := readOffset{Reader: r} - switch ggml.magic { + var magic uint32 + if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil { + return nil, err + } + + var c container + switch magic { case FILE_MAGIC_GGML: - ggml.container = &containerGGML{} + c = &containerGGML{} case FILE_MAGIC_GGMF: - ggml.container = &containerGGMF{} + c = &containerGGMF{} case FILE_MAGIC_GGJT: - ggml.container = &containerGGJT{} + c = &containerGGJT{} case FILE_MAGIC_GGLA: - ggml.container = &containerLORA{} + c = &containerLORA{} case FILE_MAGIC_GGUF_LE: - ggml.container = &containerGGUF{bo: binary.LittleEndian} + c = &containerGGUF{bo: binary.LittleEndian} case FILE_MAGIC_GGUF_BE: - ggml.container = &containerGGUF{bo: binary.BigEndian} + c = &containerGGUF{bo: binary.BigEndian} default: return nil, errors.New("invalid file magic") } - model, err := ggml.Decode(r) + model, err := c.Decode(&ro) if err != nil { return nil, err } - ggml.model = model - // final model type - return &ggml, nil + return &GGML{ + container: c, + model: model, + Size: ro.offset, + }, nil +} + +type readOffset struct { + io.Reader + offset int64 +} + +func (r *readOffset) Read(p []byte) (int, error) { + n, err := r.Reader.Read(p) + r.offset += int64(n) + return n, err } diff --git a/llm/gguf.go b/llm/gguf.go index 12f98abe..dc883187 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -23,26 +23,24 @@ type containerGGUF struct { NumTensor uint64 NumKV uint64 } - - parameters uint64 } func (c *containerGGUF) Name() string { return "gguf" } -func (c *containerGGUF) Decode(r io.Reader) (model, error) { - binary.Read(r, c.bo, &c.Version) +func (c *containerGGUF) Decode(ro *readOffset) (model, error) { + binary.Read(ro, c.bo, &c.Version) switch c.Version { case 1: - binary.Read(r, c.bo, &c.V1) + binary.Read(ro, c.bo, &c.V1) default: - binary.Read(r, c.bo, &c.V2) + binary.Read(ro, c.bo, &c.V2) } model := newGGUFModel(c) - if err := model.Decode(r); err != nil { + if err := model.Decode(ro); err != nil { return nil, err } @@ -67,9 +65,23 @@ const ( type kv map[string]any +type tensor struct { + name string + kind uint32 + offset uint64 + size uint64 + + // shape is the number of elements in each dimension + shape [4]uint64 +} + type ggufModel struct { *containerGGUF + kv + tensors []tensor + + parameters uint64 } func newGGUFModel(container *containerGGUF) *ggufModel { @@ -142,49 +154,49 @@ func (llm *ggufModel) FileType() string { return "unknown" } -func (llm *ggufModel) Decode(r io.Reader) error { +func (llm *ggufModel) Decode(ro *readOffset) error { // decode key-values for i := 0; uint64(i) < llm.NumKV(); i++ { - k, err := llm.readString(r) + k, err := llm.readString(ro) if err != nil { return err } - vtype := llm.readU32(r) + vtype := llm.readU32(ro) var v any switch vtype { case ggufTypeUint8: - v = llm.readU8(r) + v = llm.readU8(ro) case ggufTypeInt8: - v = llm.readI8(r) + v = llm.readI8(ro) case ggufTypeUint16: - v = llm.readU16(r) + v = llm.readU16(ro) case ggufTypeInt16: - v = llm.readI16(r) + v = llm.readI16(ro) case ggufTypeUint32: - v = llm.readU32(r) + v = llm.readU32(ro) case ggufTypeInt32: - v = llm.readI32(r) + v = llm.readI32(ro) case ggufTypeUint64: - v = llm.readU64(r) + v = llm.readU64(ro) case ggufTypeInt64: - v = llm.readI64(r) + v = llm.readI64(ro) case ggufTypeFloat32: - v = llm.readF32(r) + v = llm.readF32(ro) case ggufTypeFloat64: - v = llm.readF64(r) + v = llm.readF64(ro) case ggufTypeBool: - v = llm.readBool(r) + v = llm.readBool(ro) case ggufTypeString: - s, err := llm.readString(r) + s, err := llm.readString(ro) if err != nil { return err } v = s case ggufTypeArray: - a, err := llm.readArray(r) + a, err := llm.readArray(ro) if err != nil { return err } @@ -199,21 +211,84 @@ func (llm *ggufModel) Decode(r io.Reader) error { // decode tensors for i := 0; uint64(i) < llm.NumTensor(); i++ { - if _, err := llm.readString(r); err != nil { + name, err := llm.readString(ro) + if err != nil { return err } - dimensions := llm.readU32(r) + dims := llm.readU32(ro) - var elements uint64 = 1 - for i := 0; uint32(i) < dimensions; i++ { - elements *= llm.readU64(r) + shape := [4]uint64{1, 1, 1, 1} + for i := 0; uint32(i) < dims; i++ { + shape[i] = llm.readU64(ro) } - llm.readU32(r) // type - llm.readU64(r) // offset + kind := llm.readU32(ro) + offset := llm.readU64(ro) - llm.parameters += elements + var blockSize uint64 + switch { + case kind < 2: + blockSize = 1 + case kind < 10: + blockSize = 32 + default: + blockSize = 256 + } + + var typeSize uint64 + switch kind { + case 0: // FP32 + typeSize = 4 + case 1: // FP16 + typeSize = 2 + case 2: // Q4_0 + typeSize = 2 + blockSize/2 + case 3: // Q4_1 + typeSize = 2 + 2 + blockSize/2 + case 6: // Q5_0 + typeSize = 2 + 4 + blockSize/2 + case 7: // Q5_1 + typeSize = 2 + 2 + 4 + blockSize/2 + case 8: // Q8_0 + typeSize = 2 + blockSize + case 9: // Q8_1 + typeSize = 4 + 4 + blockSize + case 10: // Q2_K + typeSize = blockSize/16 + blockSize/4 + 2 + 2 + case 11: // Q3_K + typeSize = blockSize/8 + blockSize/4 + 12 + 2 + case 12: // Q4_K + typeSize = 2 + 2 + 12 + blockSize/2 + case 13: // Q5_K + typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2 + case 14: // Q6_K + typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2 + } + + parameters := shape[0] * shape[1] * shape[2] * shape[3] + size := parameters * typeSize / blockSize + + llm.tensors = append(llm.tensors, tensor{ + name: name, + kind: kind, + offset: offset, + size: size, + shape: shape, + }) + + llm.parameters += parameters + } + + alignment, ok := llm.kv["general.alignment"].(uint32) + if !ok { + alignment = 32 + } + + io.CopyN(io.Discard, ro, int64(alignment)-ro.offset%int64(alignment)) + for _, tensor := range llm.tensors { + padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1) + io.CopyN(io.Discard, ro, padded) } return nil diff --git a/server/images.go b/server/images.go index 9995fe62..6eb569a3 100644 --- a/server/images.go +++ b/server/images.go @@ -388,24 +388,38 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars } defer bin.Close() - fn(api.ProgressResponse{Status: "creating model layer"}) - ggml, err := llm.DecodeGGML(bin) - if err != nil { - return err + var offset int64 + for { + fn(api.ProgressResponse{Status: "creating model layer"}) + + bin.Seek(offset, io.SeekStart) + ggml, err := llm.DecodeGGML(bin) + if errors.Is(err, io.EOF) { + break + } else if err != nil { + return err + } + + config.ModelFormat = ggml.Name() + config.ModelFamily = ggml.ModelFamily() + config.ModelType = ggml.ModelType() + config.FileType = ggml.FileType() + + mediatype := mediatype + if ggml.ModelFamily() == "clip" { + mediatype = "application/vnd.ollama.image.projector" + } + + sr := io.NewSectionReader(bin, offset, ggml.Size) + layer, err := NewLayer(sr, mediatype) + if err != nil { + return err + } + + layers.Add(layer) + + offset += ggml.Size } - - config.ModelFormat = ggml.Name() - config.ModelFamily = ggml.ModelFamily() - config.ModelType = ggml.ModelType() - config.FileType = ggml.FileType() - - bin.Seek(0, io.SeekStart) - layer, err := NewLayer(bin, mediatype) - if err != nil { - return err - } - - layers.Add(layer) case "adapter": if strings.HasPrefix(c.Args, "@") { blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@")) From 998f1785b636b591147f73aef7cf70894771bfd4 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 29 Nov 2023 11:11:42 -0800 Subject: [PATCH 2/4] add modelfamilies --- server/images.go | 59 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/server/images.go b/server/images.go index 6eb569a3..071375e3 100644 --- a/server/images.go +++ b/server/images.go @@ -19,6 +19,8 @@ import ( "strings" "text/template" + "golang.org/x/exp/slices" + "github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/llm" "github.com/jmorganca/ollama/parser" @@ -131,17 +133,48 @@ type ManifestV2 struct { } type ConfigV2 struct { - ModelFormat string `json:"model_format"` - ModelFamily string `json:"model_family"` - ModelType string `json:"model_type"` - FileType string `json:"file_type"` - RootFS RootFS `json:"rootfs"` + ModelFormat string `json:"model_format"` + ModelFamily string `json:"model_family"` + ModelFamilies []string `json:"model_families"` + ModelType string `json:"model_type"` + FileType string `json:"file_type"` + RootFS RootFS `json:"rootfs"` // required by spec Architecture string `json:"architecture"` OS string `json:"os"` } +func (c *ConfigV2) SetModelFormat(format string) { + if c.ModelFormat == "" { + c.ModelFormat = format + } +} + +func (c *ConfigV2) SetModelFamily(families ...string) { + for _, family := range families { + if c.ModelFamily == "" { + c.ModelFamily = family + } + + if !slices.Contains(c.ModelFamilies, family) { + c.ModelFamilies = append(c.ModelFamilies, family) + } + } +} + +func (c *ConfigV2) SetModelType(modelType string) { + if c.ModelType == "" { + c.ModelType = modelType + } +} + +func (c *ConfigV2) SetFileType(fileType string) { + if c.FileType == "" { + c.FileType = fileType + } +} + type RootFS struct { Type string `json:"type"` DiffIDs []string `json:"diff_ids"` @@ -351,10 +384,10 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars return err } - config.ModelFormat = fromConfig.ModelFormat - config.ModelFamily = fromConfig.ModelFamily - config.ModelType = fromConfig.ModelType - config.FileType = fromConfig.FileType + config.SetModelFormat(fromConfig.ModelFormat) + config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...) + config.SetModelType(fromConfig.ModelType) + config.SetFileType(fromConfig.FileType) for _, layer := range manifest.Layers { deleteMap[layer.Digest] = struct{}{} @@ -400,10 +433,10 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars return err } - config.ModelFormat = ggml.Name() - config.ModelFamily = ggml.ModelFamily() - config.ModelType = ggml.ModelType() - config.FileType = ggml.FileType() + config.SetModelFormat(ggml.Name()) + config.SetModelFamily(ggml.ModelFamily()) + config.SetModelType(ggml.ModelType()) + config.SetFileType(ggml.FileType()) mediatype := mediatype if ggml.ModelFamily() == "clip" { From 72e7a49aa92c2c727df95f33bfd9b15e9caaae77 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 29 Nov 2023 10:31:58 -0800 Subject: [PATCH 3/4] seek instead of copyn --- llm/ggml.go | 34 ++++++++++++++++++++------------ llm/gguf.go | 56 ++++++++++++++++++++++++++--------------------------- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/llm/ggml.go b/llm/ggml.go index 18ae4bd6..c3e71b88 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -83,7 +83,7 @@ type model interface { type container interface { Name() string - Decode(*readOffset) (model, error) + Decode(*readSeekOffset) (model, error) } type containerGGML struct{} @@ -92,7 +92,7 @@ func (c *containerGGML) Name() string { return "ggml" } -func (c *containerGGML) Decode(ro *readOffset) (model, error) { +func (c *containerGGML) Decode(ro *readSeekOffset) (model, error) { return nil, nil } @@ -104,7 +104,7 @@ func (c *containerGGMF) Name() string { return "ggmf" } -func (c *containerGGMF) Decode(ro *readOffset) (model, error) { +func (c *containerGGMF) Decode(ro *readSeekOffset) (model, error) { var version uint32 binary.Read(ro, binary.LittleEndian, &version) @@ -126,7 +126,7 @@ func (c *containerGGJT) Name() string { return "ggjt" } -func (c *containerGGJT) Decode(ro *readOffset) (model, error) { +func (c *containerGGJT) Decode(ro *readSeekOffset) (model, error) { var version uint32 binary.Read(ro, binary.LittleEndian, &version) @@ -152,7 +152,7 @@ func (c *containerLORA) Name() string { return "ggla" } -func (c *containerLORA) Decode(ro *readOffset) (model, error) { +func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) { var version uint32 binary.Read(ro, binary.LittleEndian, &version) @@ -180,8 +180,8 @@ const ( FILE_MAGIC_GGUF_BE = 0x47475546 ) -func DecodeGGML(r io.Reader) (*GGML, error) { - ro := readOffset{Reader: r} +func DecodeGGML(r io.ReadSeeker) (*GGML, error) { + ro := readSeekOffset{ReadSeeker: r} var magic uint32 if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil { @@ -219,13 +219,23 @@ func DecodeGGML(r io.Reader) (*GGML, error) { }, nil } -type readOffset struct { - io.Reader +type readSeekOffset struct { + io.ReadSeeker offset int64 } -func (r *readOffset) Read(p []byte) (int, error) { - n, err := r.Reader.Read(p) - r.offset += int64(n) +func (rso *readSeekOffset) Seek(offset int64, whence int) (int64, error) { + offset, err := rso.ReadSeeker.Seek(offset, whence) + if err != nil { + return 0, err + } + + rso.offset = offset + return offset, nil +} + +func (rso *readSeekOffset) Read(p []byte) (int, error) { + n, err := rso.ReadSeeker.Read(p) + rso.offset += int64(n) return n, err } diff --git a/llm/gguf.go b/llm/gguf.go index dc883187..f68b87b2 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -29,18 +29,18 @@ func (c *containerGGUF) Name() string { return "gguf" } -func (c *containerGGUF) Decode(ro *readOffset) (model, error) { - binary.Read(ro, c.bo, &c.Version) +func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) { + binary.Read(rso, c.bo, &c.Version) switch c.Version { case 1: - binary.Read(ro, c.bo, &c.V1) + binary.Read(rso, c.bo, &c.V1) default: - binary.Read(ro, c.bo, &c.V2) + binary.Read(rso, c.bo, &c.V2) } model := newGGUFModel(c) - if err := model.Decode(ro); err != nil { + if err := model.Decode(rso); err != nil { return nil, err } @@ -154,49 +154,49 @@ func (llm *ggufModel) FileType() string { return "unknown" } -func (llm *ggufModel) Decode(ro *readOffset) error { +func (llm *ggufModel) Decode(rso *readSeekOffset) error { // decode key-values for i := 0; uint64(i) < llm.NumKV(); i++ { - k, err := llm.readString(ro) + k, err := llm.readString(rso) if err != nil { return err } - vtype := llm.readU32(ro) + vtype := llm.readU32(rso) var v any switch vtype { case ggufTypeUint8: - v = llm.readU8(ro) + v = llm.readU8(rso) case ggufTypeInt8: - v = llm.readI8(ro) + v = llm.readI8(rso) case ggufTypeUint16: - v = llm.readU16(ro) + v = llm.readU16(rso) case ggufTypeInt16: - v = llm.readI16(ro) + v = llm.readI16(rso) case ggufTypeUint32: - v = llm.readU32(ro) + v = llm.readU32(rso) case ggufTypeInt32: - v = llm.readI32(ro) + v = llm.readI32(rso) case ggufTypeUint64: - v = llm.readU64(ro) + v = llm.readU64(rso) case ggufTypeInt64: - v = llm.readI64(ro) + v = llm.readI64(rso) case ggufTypeFloat32: - v = llm.readF32(ro) + v = llm.readF32(rso) case ggufTypeFloat64: - v = llm.readF64(ro) + v = llm.readF64(rso) case ggufTypeBool: - v = llm.readBool(ro) + v = llm.readBool(rso) case ggufTypeString: - s, err := llm.readString(ro) + s, err := llm.readString(rso) if err != nil { return err } v = s case ggufTypeArray: - a, err := llm.readArray(ro) + a, err := llm.readArray(rso) if err != nil { return err } @@ -211,20 +211,20 @@ func (llm *ggufModel) Decode(ro *readOffset) error { // decode tensors for i := 0; uint64(i) < llm.NumTensor(); i++ { - name, err := llm.readString(ro) + name, err := llm.readString(rso) if err != nil { return err } - dims := llm.readU32(ro) + dims := llm.readU32(rso) shape := [4]uint64{1, 1, 1, 1} for i := 0; uint32(i) < dims; i++ { - shape[i] = llm.readU64(ro) + shape[i] = llm.readU64(rso) } - kind := llm.readU32(ro) - offset := llm.readU64(ro) + kind := llm.readU32(rso) + offset := llm.readU64(rso) var blockSize uint64 switch { @@ -285,10 +285,10 @@ func (llm *ggufModel) Decode(ro *readOffset) error { alignment = 32 } - io.CopyN(io.Discard, ro, int64(alignment)-ro.offset%int64(alignment)) + rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent) for _, tensor := range llm.tensors { padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1) - io.CopyN(io.Discard, ro, padded) + rso.Seek(padded, io.SeekCurrent) } return nil From 5a5dca13b2e94e693d445840cb196928878a4af9 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 29 Nov 2023 10:54:23 -0800 Subject: [PATCH 4/4] comments --- llm/gguf.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llm/gguf.go b/llm/gguf.go index f68b87b2..29270e51 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -108,8 +108,7 @@ func (llm *ggufModel) NumKV() uint64 { } func (llm *ggufModel) ModelFamily() string { - t, ok := llm.kv["general.architecture"].(string) - if ok { + if t, ok := llm.kv["general.architecture"].(string); ok { return t } @@ -146,8 +145,7 @@ func (llm *ggufModel) ModelType() string { } func (llm *ggufModel) FileType() string { - t, ok := llm.kv["general.file_type"].(uint32) - if ok { + if t, ok := llm.kv["general.file_type"].(uint32); ok { return fileType(t) } @@ -216,6 +214,7 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error { return err } + // dims is the number of dimensions in the tensor dims := llm.readU32(rso) shape := [4]uint64{1, 1, 1, 1}