diff --git a/llm/ggml.go b/llm/ggml.go index 608085d0..c3e71b88 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -7,9 +7,10 @@ import ( ) type GGML struct { - magic uint32 container model + + Size int64 } const ( @@ -82,7 +83,7 @@ type model interface { type container interface { Name() string - Decode(io.Reader) (model, error) + Decode(*readSeekOffset) (model, error) } type containerGGML struct{} @@ -91,7 +92,7 @@ func (c *containerGGML) Name() string { return "ggml" } -func (c *containerGGML) Decode(r io.Reader) (model, error) { +func (c *containerGGML) Decode(ro *readSeekOffset) (model, error) { return nil, nil } @@ -103,9 +104,9 @@ func (c *containerGGMF) Name() string { return "ggmf" } -func (c *containerGGMF) Decode(r io.Reader) (model, error) { +func (c *containerGGMF) Decode(ro *readSeekOffset) (model, error) { var version uint32 - binary.Read(r, binary.LittleEndian, &version) + binary.Read(ro, binary.LittleEndian, &version) switch version { case 1: @@ -125,9 +126,9 @@ func (c *containerGGJT) Name() string { return "ggjt" } -func (c *containerGGJT) Decode(r io.Reader) (model, error) { +func (c *containerGGJT) Decode(ro *readSeekOffset) (model, error) { var version uint32 - binary.Read(r, binary.LittleEndian, &version) + binary.Read(ro, binary.LittleEndian, &version) switch version { case 1, 2, 3: @@ -139,7 +140,7 @@ func (c *containerGGJT) Decode(r io.Reader) (model, error) { // different model types may have different layouts for hyperparameters var llama llamaModel - binary.Read(r, binary.LittleEndian, &llama.hyperparameters) + binary.Read(ro, binary.LittleEndian, &llama.hyperparameters) return &llama, nil } @@ -151,9 +152,9 @@ func (c *containerLORA) Name() string { return "ggla" } -func (c *containerLORA) Decode(r io.Reader) (model, error) { +func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) { var version uint32 - binary.Read(r, binary.LittleEndian, &version) + binary.Read(ro, binary.LittleEndian, &version) switch version { case 1: @@ -179,34 +180,62 @@ const ( FILE_MAGIC_GGUF_BE = 0x47475546 ) -func DecodeGGML(r io.Reader) (*GGML, error) { - var ggml GGML - binary.Read(r, binary.LittleEndian, &ggml.magic) +func DecodeGGML(r io.ReadSeeker) (*GGML, error) { + ro := readSeekOffset{ReadSeeker: r} - switch ggml.magic { + var magic uint32 + if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil { + return nil, err + } + + var c container + switch magic { case FILE_MAGIC_GGML: - ggml.container = &containerGGML{} + c = &containerGGML{} case FILE_MAGIC_GGMF: - ggml.container = &containerGGMF{} + c = &containerGGMF{} case FILE_MAGIC_GGJT: - ggml.container = &containerGGJT{} + c = &containerGGJT{} case FILE_MAGIC_GGLA: - ggml.container = &containerLORA{} + c = &containerLORA{} case FILE_MAGIC_GGUF_LE: - ggml.container = &containerGGUF{bo: binary.LittleEndian} + c = &containerGGUF{bo: binary.LittleEndian} case FILE_MAGIC_GGUF_BE: - ggml.container = &containerGGUF{bo: binary.BigEndian} + c = &containerGGUF{bo: binary.BigEndian} default: return nil, errors.New("invalid file magic") } - model, err := ggml.Decode(r) + model, err := c.Decode(&ro) if err != nil { return nil, err } - ggml.model = model - // final model type - return &ggml, nil + return &GGML{ + container: c, + model: model, + Size: ro.offset, + }, nil +} + +type readSeekOffset struct { + io.ReadSeeker + offset int64 +} + +func (rso *readSeekOffset) Seek(offset int64, whence int) (int64, error) { + offset, err := rso.ReadSeeker.Seek(offset, whence) + if err != nil { + return 0, err + } + + rso.offset = offset + return offset, nil +} + +func (rso *readSeekOffset) Read(p []byte) (int, error) { + n, err := rso.ReadSeeker.Read(p) + rso.offset += int64(n) + return n, err } diff --git a/llm/gguf.go b/llm/gguf.go index 12f98abe..29270e51 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -23,26 +23,24 @@ type containerGGUF struct { NumTensor uint64 NumKV uint64 } - - parameters uint64 } func (c *containerGGUF) Name() string { return "gguf" } -func (c *containerGGUF) Decode(r io.Reader) (model, error) { - binary.Read(r, c.bo, &c.Version) +func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) { + binary.Read(rso, c.bo, &c.Version) switch c.Version { case 1: - binary.Read(r, c.bo, &c.V1) + binary.Read(rso, c.bo, &c.V1) default: - binary.Read(r, c.bo, &c.V2) + binary.Read(rso, c.bo, &c.V2) } model := newGGUFModel(c) - if err := model.Decode(r); err != nil { + if err := model.Decode(rso); err != nil { return nil, err } @@ -67,9 +65,23 @@ const ( type kv map[string]any +type tensor struct { + name string + kind uint32 + offset uint64 + size uint64 + + // shape is the number of elements in each dimension + shape [4]uint64 +} + type ggufModel struct { *containerGGUF + kv + tensors []tensor + + parameters uint64 } func newGGUFModel(container *containerGGUF) *ggufModel { @@ -96,8 +108,7 @@ func (llm *ggufModel) NumKV() uint64 { } func (llm *ggufModel) ModelFamily() string { - t, ok := llm.kv["general.architecture"].(string) - if ok { + if t, ok := llm.kv["general.architecture"].(string); ok { return t } @@ -134,57 +145,56 @@ func (llm *ggufModel) ModelType() string { } func (llm *ggufModel) FileType() string { - t, ok := llm.kv["general.file_type"].(uint32) - if ok { + if t, ok := llm.kv["general.file_type"].(uint32); ok { return fileType(t) } return "unknown" } -func (llm *ggufModel) Decode(r io.Reader) error { +func (llm *ggufModel) Decode(rso *readSeekOffset) error { // decode key-values for i := 0; uint64(i) < llm.NumKV(); i++ { - k, err := llm.readString(r) + k, err := llm.readString(rso) if err != nil { return err } - vtype := llm.readU32(r) + vtype := llm.readU32(rso) var v any switch vtype { case ggufTypeUint8: - v = llm.readU8(r) + v = llm.readU8(rso) case ggufTypeInt8: - v = llm.readI8(r) + v = llm.readI8(rso) case ggufTypeUint16: - v = llm.readU16(r) + v = llm.readU16(rso) case ggufTypeInt16: - v = llm.readI16(r) + v = llm.readI16(rso) case ggufTypeUint32: - v = llm.readU32(r) + v = llm.readU32(rso) case ggufTypeInt32: - v = llm.readI32(r) + v = llm.readI32(rso) case ggufTypeUint64: - v = llm.readU64(r) + v = llm.readU64(rso) case ggufTypeInt64: - v = llm.readI64(r) + v = llm.readI64(rso) case ggufTypeFloat32: - v = llm.readF32(r) + v = llm.readF32(rso) case ggufTypeFloat64: - v = llm.readF64(r) + v = llm.readF64(rso) case ggufTypeBool: - v = llm.readBool(r) + v = llm.readBool(rso) case ggufTypeString: - s, err := llm.readString(r) + s, err := llm.readString(rso) if err != nil { return err } v = s case ggufTypeArray: - a, err := llm.readArray(r) + a, err := llm.readArray(rso) if err != nil { return err } @@ -199,21 +209,85 @@ func (llm *ggufModel) Decode(r io.Reader) error { // decode tensors for i := 0; uint64(i) < llm.NumTensor(); i++ { - if _, err := llm.readString(r); err != nil { + name, err := llm.readString(rso) + if err != nil { return err } - dimensions := llm.readU32(r) + // dims is the number of dimensions in the tensor + dims := llm.readU32(rso) - var elements uint64 = 1 - for i := 0; uint32(i) < dimensions; i++ { - elements *= llm.readU64(r) + shape := [4]uint64{1, 1, 1, 1} + for i := 0; uint32(i) < dims; i++ { + shape[i] = llm.readU64(rso) } - llm.readU32(r) // type - llm.readU64(r) // offset + kind := llm.readU32(rso) + offset := llm.readU64(rso) - llm.parameters += elements + var blockSize uint64 + switch { + case kind < 2: + blockSize = 1 + case kind < 10: + blockSize = 32 + default: + blockSize = 256 + } + + var typeSize uint64 + switch kind { + case 0: // FP32 + typeSize = 4 + case 1: // FP16 + typeSize = 2 + case 2: // Q4_0 + typeSize = 2 + blockSize/2 + case 3: // Q4_1 + typeSize = 2 + 2 + blockSize/2 + case 6: // Q5_0 + typeSize = 2 + 4 + blockSize/2 + case 7: // Q5_1 + typeSize = 2 + 2 + 4 + blockSize/2 + case 8: // Q8_0 + typeSize = 2 + blockSize + case 9: // Q8_1 + typeSize = 4 + 4 + blockSize + case 10: // Q2_K + typeSize = blockSize/16 + blockSize/4 + 2 + 2 + case 11: // Q3_K + typeSize = blockSize/8 + blockSize/4 + 12 + 2 + case 12: // Q4_K + typeSize = 2 + 2 + 12 + blockSize/2 + case 13: // Q5_K + typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2 + case 14: // Q6_K + typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2 + } + + parameters := shape[0] * shape[1] * shape[2] * shape[3] + size := parameters * typeSize / blockSize + + llm.tensors = append(llm.tensors, tensor{ + name: name, + kind: kind, + offset: offset, + size: size, + shape: shape, + }) + + llm.parameters += parameters + } + + alignment, ok := llm.kv["general.alignment"].(uint32) + if !ok { + alignment = 32 + } + + rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent) + for _, tensor := range llm.tensors { + padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1) + rso.Seek(padded, io.SeekCurrent) } return nil diff --git a/server/images.go b/server/images.go index 24bd2576..b85cf55b 100644 --- a/server/images.go +++ b/server/images.go @@ -19,6 +19,8 @@ import ( "strings" "text/template" + "golang.org/x/exp/slices" + "github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/llm" "github.com/jmorganca/ollama/parser" @@ -134,17 +136,48 @@ type ManifestV2 struct { } type ConfigV2 struct { - ModelFormat string `json:"model_format"` - ModelFamily string `json:"model_family"` - ModelType string `json:"model_type"` - FileType string `json:"file_type"` - RootFS RootFS `json:"rootfs"` + ModelFormat string `json:"model_format"` + ModelFamily string `json:"model_family"` + ModelFamilies []string `json:"model_families"` + ModelType string `json:"model_type"` + FileType string `json:"file_type"` + RootFS RootFS `json:"rootfs"` // required by spec Architecture string `json:"architecture"` OS string `json:"os"` } +func (c *ConfigV2) SetModelFormat(format string) { + if c.ModelFormat == "" { + c.ModelFormat = format + } +} + +func (c *ConfigV2) SetModelFamily(families ...string) { + for _, family := range families { + if c.ModelFamily == "" { + c.ModelFamily = family + } + + if !slices.Contains(c.ModelFamilies, family) { + c.ModelFamilies = append(c.ModelFamilies, family) + } + } +} + +func (c *ConfigV2) SetModelType(modelType string) { + if c.ModelType == "" { + c.ModelType = modelType + } +} + +func (c *ConfigV2) SetFileType(fileType string) { + if c.FileType == "" { + c.FileType = fileType + } +} + type RootFS struct { Type string `json:"type"` DiffIDs []string `json:"diff_ids"` @@ -354,10 +387,10 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars return err } - config.ModelFormat = fromConfig.ModelFormat - config.ModelFamily = fromConfig.ModelFamily - config.ModelType = fromConfig.ModelType - config.FileType = fromConfig.FileType + config.SetModelFormat(fromConfig.ModelFormat) + config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...) + config.SetModelType(fromConfig.ModelType) + config.SetFileType(fromConfig.FileType) for _, layer := range manifest.Layers { deleteMap[layer.Digest] = struct{}{} @@ -391,24 +424,38 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars } defer bin.Close() - fn(api.ProgressResponse{Status: "creating model layer"}) - ggml, err := llm.DecodeGGML(bin) - if err != nil { - return err + var offset int64 + for { + fn(api.ProgressResponse{Status: "creating model layer"}) + + bin.Seek(offset, io.SeekStart) + ggml, err := llm.DecodeGGML(bin) + if errors.Is(err, io.EOF) { + break + } else if err != nil { + return err + } + + config.SetModelFormat(ggml.Name()) + config.SetModelFamily(ggml.ModelFamily()) + config.SetModelType(ggml.ModelType()) + config.SetFileType(ggml.FileType()) + + mediatype := mediatype + if ggml.ModelFamily() == "clip" { + mediatype = "application/vnd.ollama.image.projector" + } + + sr := io.NewSectionReader(bin, offset, ggml.Size) + layer, err := NewLayer(sr, mediatype) + if err != nil { + return err + } + + layers.Add(layer) + + offset += ggml.Size } - - config.ModelFormat = ggml.Name() - config.ModelFamily = ggml.ModelFamily() - config.ModelType = ggml.ModelType() - config.FileType = ggml.FileType() - - bin.Seek(0, io.SeekStart) - layer, err := NewLayer(bin, mediatype) - if err != nil { - return err - } - - layers.Add(layer) case "adapter": if strings.HasPrefix(c.Args, "@") { blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))