refactor model parsing
This commit is contained in:
parent
011bb67351
commit
d338d70492
62
llm/ggla.go
62
llm/ggla.go
|
@ -35,7 +35,7 @@ type ggla struct {
|
||||||
*containerGGLA
|
*containerGGLA
|
||||||
|
|
||||||
kv KV
|
kv KV
|
||||||
tensors []Tensor
|
tensors []*Tensor
|
||||||
}
|
}
|
||||||
|
|
||||||
func newGGLA(container *containerGGLA) *ggla {
|
func newGGLA(container *containerGGLA) *ggla {
|
||||||
|
@ -45,18 +45,26 @@ func newGGLA(container *containerGGLA) *ggla {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ggla) decode(rs io.ReadSeeker) error {
|
func (llm *ggla) KV() KV {
|
||||||
|
return llm.kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *ggla) Tensors() []*Tensor {
|
||||||
|
return llm.tensors
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||||
var r uint32
|
var r uint32
|
||||||
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
m.kv["r"] = r
|
llm.kv["r"] = r
|
||||||
|
|
||||||
var alpha uint32
|
var alpha uint32
|
||||||
if err := binary.Read(rs, binary.LittleEndian, &alpha); err != nil {
|
if err := binary.Read(rs, binary.LittleEndian, &alpha); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
m.kv["alpha"] = alpha
|
llm.kv["alpha"] = alpha
|
||||||
|
|
||||||
for {
|
for {
|
||||||
var dims uint32
|
var dims uint32
|
||||||
|
@ -115,50 +123,6 @@ func (m *ggla) decode(rs io.ReadSeeker) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
m.tensors = append(m.tensors, t)
|
llm.tensors = append(llm.tensors, &t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ggla) KV() KV {
|
|
||||||
return m.kv
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *ggla) Tensor() []Tensor {
|
|
||||||
return m.tensors
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) ModelFamily() string {
|
|
||||||
return "ggla"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) ModelType() string {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) FileType() string {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) NumLayers() uint32 {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) NumGQA() uint32 {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) NumEmbed() uint32 {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) NumHead() uint32 {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) NumHeadKv() uint32 {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*ggla) NumCtx() uint32 {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
|
|
100
llm/ggml.go
100
llm/ggml.go
|
@ -3,14 +3,13 @@ package llm
|
||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
type GGML struct {
|
type GGML struct {
|
||||||
container
|
container
|
||||||
model
|
model
|
||||||
|
|
||||||
Size int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -90,28 +89,82 @@ func fileType(fileType uint32) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
type model interface {
|
type model interface {
|
||||||
ModelFamily() string
|
KV() KV
|
||||||
ModelType() string
|
Tensors() []*Tensor
|
||||||
FileType() string
|
|
||||||
NumLayers() uint32
|
|
||||||
NumGQA() uint32
|
|
||||||
NumEmbed() uint32
|
|
||||||
NumHead() uint32
|
|
||||||
NumHeadKv() uint32
|
|
||||||
NumCtx() uint32
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type KV map[string]any
|
type KV map[string]any
|
||||||
|
|
||||||
|
func (kv KV) u64(key string) uint64 {
|
||||||
|
switch v := kv[key].(type) {
|
||||||
|
case uint64:
|
||||||
|
return v
|
||||||
|
case uint32:
|
||||||
|
return uint64(v)
|
||||||
|
case float64:
|
||||||
|
return uint64(v)
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) Architecture() string {
|
||||||
|
if s, ok := kv["general.architecture"].(string); ok {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) ParameterCount() uint64 {
|
||||||
|
return kv.u64("general.parameter_count")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) FileType() string {
|
||||||
|
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
||||||
|
return fileType(uint32(u64))
|
||||||
|
}
|
||||||
|
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) BlockCount() uint64 {
|
||||||
|
return kv.u64(fmt.Sprintf("%s.block_count", kv.Architecture()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) HeadCount() uint64 {
|
||||||
|
return kv.u64(fmt.Sprintf("%s.attention.head_count", kv.Architecture()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) HeadCountKV() uint64 {
|
||||||
|
return kv.u64(fmt.Sprintf("%s.attention.head_count_kv", kv.Architecture()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) GQA() uint64 {
|
||||||
|
if headCountKV := kv.HeadCountKV(); headCountKV > 0 {
|
||||||
|
return kv.HeadCount() / headCountKV
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) EmbeddingLength() uint64 {
|
||||||
|
return kv.u64(fmt.Sprintf("%s.embedding_length", kv.Architecture()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (kv KV) ContextLength() uint64 {
|
||||||
|
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
|
||||||
|
}
|
||||||
|
|
||||||
type Tensor struct {
|
type Tensor struct {
|
||||||
Name string
|
Name string `json:"name"`
|
||||||
Kind uint32
|
Kind uint32 `json:"kind"`
|
||||||
Offset uint64
|
Offset uint64 `json:"-"`
|
||||||
|
|
||||||
// Shape is the number of elements in each dimension
|
// Shape is the number of elements in each dimension
|
||||||
Shape []uint64
|
Shape []uint64 `json:"shape"`
|
||||||
|
|
||||||
io.WriterTo
|
io.WriterTo `json:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t Tensor) blockSize() uint64 {
|
func (t Tensor) blockSize() uint64 {
|
||||||
|
@ -201,16 +254,16 @@ const (
|
||||||
|
|
||||||
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
var ErrUnsupportedFormat = errors.New("unsupported model format")
|
||||||
|
|
||||||
func DecodeGGML(rs io.ReadSeeker) (*GGML, error) {
|
func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
|
||||||
var magic uint32
|
var magic uint32
|
||||||
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
|
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
|
||||||
return nil, err
|
return nil, 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var c container
|
var c container
|
||||||
switch magic {
|
switch magic {
|
||||||
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
|
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
|
||||||
return nil, ErrUnsupportedFormat
|
return nil, 0, ErrUnsupportedFormat
|
||||||
case FILE_MAGIC_GGLA:
|
case FILE_MAGIC_GGLA:
|
||||||
c = &containerGGLA{}
|
c = &containerGGLA{}
|
||||||
case FILE_MAGIC_GGUF_LE:
|
case FILE_MAGIC_GGUF_LE:
|
||||||
|
@ -218,25 +271,24 @@ func DecodeGGML(rs io.ReadSeeker) (*GGML, error) {
|
||||||
case FILE_MAGIC_GGUF_BE:
|
case FILE_MAGIC_GGUF_BE:
|
||||||
c = &containerGGUF{ByteOrder: binary.BigEndian}
|
c = &containerGGUF{ByteOrder: binary.BigEndian}
|
||||||
default:
|
default:
|
||||||
return nil, errors.New("invalid file magic")
|
return nil, 0, errors.New("invalid file magic")
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := c.Decode(rs)
|
model, err := c.Decode(rs)
|
||||||
if errors.Is(err, io.EOF) {
|
if errors.Is(err, io.EOF) {
|
||||||
// noop
|
// noop
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return nil, err
|
return nil, 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// final model type
|
// final model type
|
||||||
return &GGML{
|
return &GGML{
|
||||||
container: c,
|
container: c,
|
||||||
model: model,
|
model: model,
|
||||||
Size: offset,
|
}, offset, nil
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
105
llm/gguf.go
105
llm/gguf.go
|
@ -6,8 +6,6 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type containerGGUF struct {
|
type containerGGUF struct {
|
||||||
|
@ -90,8 +88,8 @@ const (
|
||||||
type gguf struct {
|
type gguf struct {
|
||||||
*containerGGUF
|
*containerGGUF
|
||||||
|
|
||||||
KV
|
kv KV
|
||||||
Tensors []Tensor
|
tensors []*Tensor
|
||||||
|
|
||||||
parameters uint64
|
parameters uint64
|
||||||
}
|
}
|
||||||
|
@ -99,7 +97,7 @@ type gguf struct {
|
||||||
func newGGUF(container *containerGGUF) *gguf {
|
func newGGUF(container *containerGGUF) *gguf {
|
||||||
return &gguf{
|
return &gguf{
|
||||||
containerGGUF: container,
|
containerGGUF: container,
|
||||||
KV: make(KV),
|
kv: make(KV),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,6 +105,14 @@ func NewGGUFV3(bo binary.ByteOrder) *gguf {
|
||||||
return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
|
return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (llm *gguf) KV() KV {
|
||||||
|
return llm.kv
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *gguf) Tensors() []*Tensor {
|
||||||
|
return llm.tensors
|
||||||
|
}
|
||||||
|
|
||||||
func (llm *gguf) numTensor() uint64 {
|
func (llm *gguf) numTensor() uint64 {
|
||||||
switch llm.Version {
|
switch llm.Version {
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -129,30 +135,6 @@ func (llm *gguf) numKV() uint64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *gguf) ModelFamily() string {
|
|
||||||
if t, ok := llm.KV["general.architecture"].(string); ok {
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
return "unknown"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *gguf) ModelType() string {
|
|
||||||
if llm.parameters > 0 {
|
|
||||||
return format.HumanNumber(llm.parameters)
|
|
||||||
}
|
|
||||||
|
|
||||||
return "unknown"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *gguf) FileType() string {
|
|
||||||
if t, ok := llm.KV["general.file_type"].(uint32); ok {
|
|
||||||
return fileType(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
return "unknown"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||||
// decode key-values
|
// decode key-values
|
||||||
for i := 0; uint64(i) < llm.numKV(); i++ {
|
for i := 0; uint64(i) < llm.numKV(); i++ {
|
||||||
|
@ -202,7 +184,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
llm.KV[k] = v
|
llm.kv[k] = v
|
||||||
}
|
}
|
||||||
|
|
||||||
// decode tensors
|
// decode tensors
|
||||||
|
@ -243,11 +225,14 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||||
Shape: shape[:],
|
Shape: shape[:],
|
||||||
}
|
}
|
||||||
|
|
||||||
llm.Tensors = append(llm.Tensors, tensor)
|
llm.tensors = append(llm.tensors, &tensor)
|
||||||
llm.parameters += tensor.parameters()
|
llm.parameters += tensor.parameters()
|
||||||
}
|
}
|
||||||
|
|
||||||
alignment, ok := llm.KV["general.alignment"].(uint32)
|
// patch KV with parameter count
|
||||||
|
llm.kv["general.parameter_count"] = llm.parameters
|
||||||
|
|
||||||
|
alignment, ok := llm.kv["general.alignment"].(uint32)
|
||||||
if !ok {
|
if !ok {
|
||||||
alignment = 32
|
alignment = 32
|
||||||
}
|
}
|
||||||
|
@ -262,7 +247,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tensor := range llm.Tensors {
|
for _, tensor := range llm.tensors {
|
||||||
padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
||||||
if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
|
if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -272,60 +257,6 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *gguf) NumLayers() uint32 {
|
|
||||||
value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
|
|
||||||
if !exists {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return value.(uint32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *gguf) NumHead() uint32 {
|
|
||||||
value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
|
|
||||||
if !exists {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return value.(uint32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *gguf) NumEmbed() uint32 {
|
|
||||||
value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
|
|
||||||
if !exists {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return value.(uint32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *gguf) NumHeadKv() uint32 {
|
|
||||||
value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
|
|
||||||
if !exists {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return value.(uint32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *gguf) NumCtx() uint32 {
|
|
||||||
value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
|
|
||||||
if !exists {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return value.(uint32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *gguf) NumGQA() uint32 {
|
|
||||||
numHeadKv := llm.NumHeadKv()
|
|
||||||
if numHeadKv == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return llm.NumHead() / numHeadKv
|
|
||||||
}
|
|
||||||
|
|
||||||
func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
|
func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
|
||||||
var t T
|
var t T
|
||||||
err := binary.Read(r, llm.ByteOrder, &t)
|
err := binary.Read(r, llm.ByteOrder, &t)
|
||||||
|
|
18
llm/llm.go
18
llm/llm.go
|
@ -35,14 +35,14 @@ func New(model string, adapters, projectors []string, opts api.Options) (LLM, er
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
ggml, err := DecodeGGML(f)
|
ggml, size, err := DecodeGGML(f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.NumCtx > int(ggml.NumCtx()) {
|
if opts.NumCtx > int(ggml.KV().ContextLength()) {
|
||||||
slog.Warn(fmt.Sprintf("requested context length is greater than model's max context length (%d > %d), using %d instead", opts.NumCtx, ggml.NumCtx(), ggml.NumCtx()))
|
slog.Warn("requested context length is greater than model max context length", "requested", opts.NumCtx, "model", ggml.KV().ContextLength())
|
||||||
opts.NumCtx = int(ggml.NumCtx())
|
opts.NumCtx = int(ggml.KV().ContextLength())
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.NumCtx < 4 {
|
if opts.NumCtx < 4 {
|
||||||
|
@ -50,18 +50,16 @@ func New(model string, adapters, projectors []string, opts api.Options) (LLM, er
|
||||||
}
|
}
|
||||||
|
|
||||||
vram, _ := gpu.CheckVRAM()
|
vram, _ := gpu.CheckVRAM()
|
||||||
size := ggml.Size
|
|
||||||
|
|
||||||
// fp16 k,v matrices require = n_ctx * n_layer * n_embd / n_head * n_head_kv * 2 bytes each * 2 key and value
|
// fp16 k,v matrices require = n_ctx * n_layer * n_embd / n_head * n_head_kv * 2 bytes each * 2 key and value
|
||||||
kv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.NumLayers()) * int64(ggml.NumEmbed()) * int64(ggml.NumHeadKv()) / int64(max(ggml.NumHead(), 1))
|
kv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.KV().BlockCount()) * int64(ggml.KV().EmbeddingLength()) * int64(ggml.KV().HeadCountKV()) / int64(ggml.KV().HeadCount())
|
||||||
|
|
||||||
// this amount is the overhead + tensors in memory
|
// this amount is the overhead + tensors in memory
|
||||||
// TODO: get this from the llama.cpp's graph calculations instead of
|
// TODO: get this from the llama.cpp's graph calculations instead of
|
||||||
// estimating it's 1/6 * kv_cache_size * num_gqa
|
// estimating it's 1/6 * kv_cache_size * num_gqa
|
||||||
graph := int64(ggml.NumGQA()) * kv / 6
|
graph := int64(ggml.KV().GQA()) * kv / 6
|
||||||
|
|
||||||
// certain model architectures don't support gpu inference yet
|
if slices.Contains(cpuOnlyFamilies, ggml.KV().Architecture()) {
|
||||||
if slices.Contains(cpuOnlyFamilies, ggml.ModelFamily()) {
|
|
||||||
opts.NumGPU = 0
|
opts.NumGPU = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,7 +103,7 @@ func New(model string, adapters, projectors []string, opts api.Options) (LLM, er
|
||||||
// 2. the proportional kv cache for all devices (kv * % layers)
|
// 2. the proportional kv cache for all devices (kv * % layers)
|
||||||
// 3. the proportional model (size * % layers / # devices)
|
// 3. the proportional model (size * % layers / # devices)
|
||||||
// This estimates the number of layers
|
// This estimates the number of layers
|
||||||
maxlayers := int64(ggml.NumLayers()) + 1
|
maxlayers := int64(ggml.KV().BlockCount()) + 1
|
||||||
devices := int64(info.DeviceCount)
|
devices := int64(info.DeviceCount)
|
||||||
avg := vram / devices
|
avg := vram / devices
|
||||||
layers := maxlayers * (avg - graph) / (kv + size/devices)
|
layers := maxlayers * (avg - graph) / (kv + size/devices)
|
||||||
|
|
|
@ -26,6 +26,7 @@ import (
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/convert"
|
"github.com/ollama/ollama/convert"
|
||||||
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/version"
|
"github.com/ollama/ollama/version"
|
||||||
|
@ -420,37 +421,32 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||||
defer bin.Close()
|
defer bin.Close()
|
||||||
|
|
||||||
var offset int64
|
var offset int64
|
||||||
CREATE:
|
|
||||||
for {
|
for {
|
||||||
fn(api.ProgressResponse{Status: "creating model layer"})
|
fn(api.ProgressResponse{Status: "creating model layer"})
|
||||||
if _, err := bin.Seek(offset, io.SeekStart); err != nil {
|
if _, err := bin.Seek(offset, io.SeekStart); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml, err := llm.DecodeGGML(bin)
|
ggml, size, err := llm.DecodeGGML(bin)
|
||||||
if err != nil {
|
if errors.Is(err, io.EOF) {
|
||||||
slog.Error(fmt.Sprintf("error decoding gguf file: %q", err))
|
break
|
||||||
switch {
|
} else if errors.Is(err, llm.ErrUnsupportedFormat) {
|
||||||
case errors.Is(err, io.EOF):
|
return fmt.Errorf("model binary specified in FROM field is not a valid gguf format model, %w", err)
|
||||||
break CREATE
|
} else if err != nil {
|
||||||
case errors.Is(err, llm.ErrUnsupportedFormat):
|
return err
|
||||||
return fmt.Errorf("model binary specified in FROM field is not a valid gguf format model, %w", err)
|
|
||||||
default:
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
config.SetModelFormat(ggml.Name())
|
config.SetModelFormat(ggml.Name())
|
||||||
config.SetModelFamily(ggml.ModelFamily())
|
config.SetModelFamily(ggml.KV().Architecture())
|
||||||
config.SetModelType(ggml.ModelType())
|
config.SetModelType(format.HumanNumber(ggml.KV().ParameterCount()))
|
||||||
config.SetFileType(ggml.FileType())
|
config.SetFileType(ggml.KV().FileType())
|
||||||
|
|
||||||
mediatype := mediatype
|
mediatype := mediatype
|
||||||
if ggml.ModelFamily() == "clip" {
|
if ggml.KV().Architecture() == "clip" {
|
||||||
mediatype = "application/vnd.ollama.image.projector"
|
mediatype = "application/vnd.ollama.image.projector"
|
||||||
}
|
}
|
||||||
|
|
||||||
sr := io.NewSectionReader(bin, offset, ggml.Size)
|
sr := io.NewSectionReader(bin, offset, size)
|
||||||
layer, err := NewLayer(sr, mediatype)
|
layer, err := NewLayer(sr, mediatype)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -458,7 +454,7 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||||
|
|
||||||
layers.Add(layer)
|
layers.Add(layer)
|
||||||
|
|
||||||
offset += ggml.Size
|
offset += size
|
||||||
}
|
}
|
||||||
case "adapter":
|
case "adapter":
|
||||||
if strings.HasPrefix(c.Args, "@") {
|
if strings.HasPrefix(c.Args, "@") {
|
||||||
|
@ -477,12 +473,12 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||||
}
|
}
|
||||||
defer bin.Close()
|
defer bin.Close()
|
||||||
|
|
||||||
ggml, err := llm.DecodeGGML(bin)
|
_, size, err := llm.DecodeGGML(bin)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
sr := io.NewSectionReader(bin, 0, ggml.Size)
|
sr := io.NewSectionReader(bin, 0, size)
|
||||||
layer, err := NewLayer(sr, mediatype)
|
layer, err := NewLayer(sr, mediatype)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -554,13 +550,6 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// xxx - can this be removed?
|
|
||||||
if config.ModelType == "65B" {
|
|
||||||
if gqa, ok := formattedParams["gqa"].(int); ok && gqa == 8 {
|
|
||||||
config.ModelType = "70B"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var b bytes.Buffer
|
var b bytes.Buffer
|
||||||
if err := json.NewEncoder(&b).Encode(formattedParams); err != nil {
|
if err := json.NewEncoder(&b).Encode(formattedParams); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
Loading…
Reference in a new issue