diff --git a/convert/convert.go b/convert/convert.go index e9c2ef2d..e71a0ff3 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -18,6 +18,16 @@ import ( "github.com/ollama/ollama/llm" ) +const ( + _ int32 = iota + tokenTypeNormal + tokenTypeUnknown + tokenTypeControl + tokenTypeUserDefined + tokenTypeUnused + tokenTypeByte +) + type Params struct { Architectures []string `json:"architectures"` VocabSize int `json:"vocab_size"` @@ -172,7 +182,7 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) { } v.Tokens = append(v.Tokens, t.key) v.Scores = append(v.Scores, -1000.0) - v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined)) + v.Types = append(v.Types, tokenTypeUserDefined) } slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens))) @@ -182,7 +192,7 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) { for cnt := 0; cnt < missingTokens; cnt++ { v.Tokens = append(v.Tokens, fmt.Sprintf("", cnt+1)) v.Scores = append(v.Scores, -1) - v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined)) + v.Types = append(v.Types, tokenTypeUserDefined) } } diff --git a/convert/gemma.go b/convert/gemma.go index 88abe646..e24b8ec5 100644 --- a/convert/gemma.go +++ b/convert/gemma.go @@ -71,8 +71,6 @@ func (m *GemmaModel) GetTensors() error { } slog.Debug(fmt.Sprintf("Total tensors: %d", len(t))) - - m.Tensors = []llm.Tensor{} for _, l := range t { if strings.HasSuffix(l.Name, "norm.weight") { wt := l.WriterTo.(safetensorWriterTo) diff --git a/convert/llama.go b/convert/llama.go index 83d942cb..a10670e6 100644 --- a/convert/llama.go +++ b/convert/llama.go @@ -105,8 +105,6 @@ func (m *LlamaModel) GetTensors() error { return err } - m.Tensors = []llm.Tensor{} - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` re, err := regexp.Compile(pattern) if err != nil { @@ -133,30 +131,22 @@ func (m *LlamaModel) GetTensors() error { return nil } -func (m *LlamaModel) LoadVocab() error { - v := &Vocab{} - - tokpath := filepath.Join(m.Path, "tokenizer.json") - pre, ts, merges, err := parseTokens(tokpath) +func (m *LlamaModel) LoadVocab() (err error) { + pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json")) if errors.Is(err, os.ErrNotExist) { - v, err = LoadSentencePieceTokens(m.Path, m.Params) - if err != nil { - return err - } + return nil } else if err != nil { return err - } else { - for _, t := range ts { - v.Tokens = append(v.Tokens, t.Content) - v.Types = append(v.Types, t.Type()) - } - - m.Params.PreTokenizer = pre - v.Merges = merges } - m.Vocab = v + m.Vocab = &Vocab{} + for _, t := range ts { + m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content) + m.Vocab.Types = append(m.Vocab.Types, t.Type()) + } + m.Vocab.Merges = merges + m.Params.PreTokenizer = pre return nil } @@ -174,7 +164,7 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error { "llama.attention.head_count": uint32(m.Params.AttentionHeads), "llama.attention.head_count_kv": uint32(m.Params.KeyValHeads), "llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS), - "general.file_type": uint32(2), + "general.file_type": uint32(1), "tokenizer.ggml.model": "gpt2", "tokenizer.ggml.pre": m.Params.PreTokenizer, diff --git a/convert/mistral.go b/convert/mistral.go index f88de12b..89d2e084 100644 --- a/convert/mistral.go +++ b/convert/mistral.go @@ -102,8 +102,6 @@ func (m *MistralModel) GetTensors() error { return err } - m.Tensors = []llm.Tensor{} - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` re, err := regexp.Compile(pattern) if err != nil { diff --git a/convert/mixtral.go b/convert/mixtral.go index 940df55d..66546fd7 100644 --- a/convert/mixtral.go +++ b/convert/mixtral.go @@ -17,8 +17,6 @@ func (m *MixtralModel) GetTensors() error { return err } - m.Tensors = []llm.Tensor{} - pattern := `^blk\.[0-9]+\.attn_(?Pq|k)\.weight$` re, err := regexp.Compile(pattern) if err != nil { diff --git a/convert/safetensors.go b/convert/safetensors.go index b52a048d..2107ae81 100644 --- a/convert/safetensors.go +++ b/convert/safetensors.go @@ -11,6 +11,7 @@ import ( "path/filepath" "regexp" "slices" + "strings" "github.com/d4l3k/go-bfloat16" "github.com/mitchellh/mapstructure" @@ -97,6 +98,10 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) var tensors []llm.Tensor for _, k := range keys { + if strings.HasSuffix(k, "self_attn.rotary_emb.inv_freq") { + continue + } + vals := parsed[k].(map[string]interface{}) var data tensorMetaData if err = mapstructure.Decode(vals, &data); err != nil { diff --git a/convert/tokenizer.go b/convert/tokenizer.go index a847a84c..e0fe0bb7 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -44,11 +44,11 @@ type Token struct { func (t *Token) Type() int32 { switch { case t.Special: - return 3 + return tokenTypeControl case t.UserDefined: - return 4 + return tokenTypeUserDefined default: - return 1 + return tokenTypeNormal } } diff --git a/convert/torch.go b/convert/torch.go index 803827ba..cb8d74b0 100644 --- a/convert/torch.go +++ b/convert/torch.go @@ -34,18 +34,13 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, slog.Debug("getting torch tensors") var files []string - var err error - files, err = filepath.Glob(filepath.Join(dirpath, "consolidated.*.pth")) - if err != nil { - files, err = filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) - if err != nil { - slog.Error("didn't find any torch files") - return nil, err - } + if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 { + files = append(files, pt...) + } else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 { + files = append(files, pt...) } var offset uint64 - var tensors []llm.Tensor for _, fn := range files { m, err := pytorch.Load(fn) diff --git a/llm/gguf.go b/llm/gguf.go index 179b3255..eb7d7b75 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -62,16 +62,6 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) { return model, nil } -const ( - _ uint32 = iota - GGUFTokenNormal - GGUFTokenUnknown - GGUFTokenControl - GGUFTokenUserDefined - GGUFTokenUnused - GGUFTokenByte -) - const ( ggufTypeUint8 uint32 = iota ggufTypeInt8