diff --git a/convert/convert.go b/convert/convert.go index f4210e50..dbc26da1 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -77,7 +77,8 @@ func GetModelFormat(dirname string) (ModelFormat, error) { slog.Debug(fmt.Sprintf("file = %s", fn)) if strings.HasSuffix(fn, ".safetensors") { return &SafetensorFormat{}, nil - } else if strings.HasSuffix(fn, ".bin") { + //} else if strings.HasSuffix(fn, ".bin") { + } else if strings.HasSuffix(fn, ".pth") { slog.Debug("model is torch") return &TorchFormat{}, nil } diff --git a/convert/torch.go b/convert/torch.go index 92c58872..0ad10c0e 100644 --- a/convert/torch.go +++ b/convert/torch.go @@ -33,7 +33,8 @@ type TorchFormat struct{} func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) { slog.Debug("getting torch tensors") - files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) + //files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin")) + files, err := filepath.Glob(filepath.Join(dirpath, "consolidatedr.*.pth")) if err != nil { slog.Error("didn't find any torch files") return nil, err @@ -120,7 +121,7 @@ func getAltParams(dirpath string) (*Params, error) { AttentionHeads int `json:"n_heads"` KeyValHeads int `json:"n_kv_heads"` HiddenLayers int `json:"n_layers"` - RopeTheta int `json:"rope_theta"` + RopeTheta float64 `json:"rope_theta"` NormEPS float64 `json:"norm_eps"` } @@ -133,6 +134,7 @@ func getAltParams(dirpath string) (*Params, error) { } params := &Params{ + Architectures: []string{"LlamaForCausalLM"}, HiddenSize: tparams.HiddenSize, AttentionHeads: tparams.AttentionHeads, KeyValHeads: tparams.KeyValHeads,