diff --git a/llm/filetype.go b/llm/filetype.go index e5e9410d..7a8e9f69 100644 --- a/llm/filetype.go +++ b/llm/filetype.go @@ -27,8 +27,16 @@ const ( fileTypeIQ2_XXS fileTypeIQ2_XS fileTypeQ2_K_S - fileTypeQ3_K_XS + fileTypeIQ3_XS fileTypeIQ3_XXS + fileTypeIQ1_S + fileTypeIQ4_NL + fileTypeIQ3_S + fileTypeIQ2_S + fileTypeIQ4_XS + fileTypeIQ2_M + fileTypeIQ1_M + fileTypeBF16 fileTypeUnknown ) @@ -75,10 +83,26 @@ func ParseFileType(s string) (fileType, error) { return fileTypeIQ2_XS, nil case "Q2_K_S": return fileTypeQ2_K_S, nil - case "Q3_K_XS": - return fileTypeQ3_K_XS, nil + case "IQ3_XS": + return fileTypeIQ3_XS, nil case "IQ3_XXS": return fileTypeIQ3_XXS, nil + case "IQ1_S": + return fileTypeIQ1_S, nil + case "IQ4_NL": + return fileTypeIQ4_NL, nil + case "IQ3_S": + return fileTypeIQ3_S, nil + case "IQ2_S": + return fileTypeIQ2_S, nil + case "IQ4_XS": + return fileTypeIQ4_XS, nil + case "IQ2_M": + return fileTypeIQ2_M, nil + case "IQ1_M": + return fileTypeIQ1_M, nil + case "BF16": + return fileTypeBF16, nil default: return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s) } @@ -126,10 +150,26 @@ func (t fileType) String() string { return "IQ2_XS" case fileTypeQ2_K_S: return "Q2_K_S" - case fileTypeQ3_K_XS: - return "Q3_K_XS" + case fileTypeIQ3_XS: + return "IQ3_XS" case fileTypeIQ3_XXS: return "IQ3_XXS" + case fileTypeIQ1_S: + return "IQ1_S" + case fileTypeIQ4_NL: + return "IQ4_NL" + case fileTypeIQ3_S: + return "IQ3_S" + case fileTypeIQ2_S: + return "IQ2_S" + case fileTypeIQ4_XS: + return "IQ4_XS" + case fileTypeIQ2_M: + return "IQ2_M" + case fileTypeIQ1_M: + return "IQ1_M" + case fileTypeBF16: + return "BF16" default: return "unknown" } diff --git a/llm/ggml.go b/llm/ggml.go index 48b69f51..9b6da425 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -124,12 +124,12 @@ type Tensor struct { } func (t Tensor) blockSize() uint64 { - switch { - case t.Kind < 2: + switch t.Kind { + case 0, 1, 24, 25, 26, 27, 28, 31: // F32, F16, I8, I16, I32, I64, F64, BF16 return 1 - case t.Kind < 10: + case 2, 3, 8, 9, 20: // Q4_0, Q4_1, Q8_0, Q8_1, IQ4_NL return 32 - default: + default: // All others return 256 } } @@ -171,7 +171,29 @@ func (t Tensor) typeSize() uint64 { case 17: // IQ2_XS return 2 + 2*blockSize/8 + blockSize/32 case 18: // IQ3_XXS - return 2 + 3*blockSize/8 + return 2 + blockSize/4 + blockSize/8 + case 19: // IQ1_S + return 2 + blockSize/8 + blockSize/16 + case 20: // IQ4_NL + return 2 + blockSize/2 + case 21: // IQ3_S + return 2 + blockSize/4 + blockSize/8 + blockSize/32 + 4 + case 22: // IQ2_S + return 2 + blockSize/4 + blockSize/16 + case 23: // IQ4_XS + return 2 + 2 + blockSize/2 + blockSize/64 + case 24: // I8 + return 1 + case 25: // I16 + return 2 + case 26: // I32 + return 4 + case 27: // I64 + return 8 + case 28: // F64 + return 8 + case 29: // IQ1_M + return blockSize/8 + blockSize/16 + blockSize/32 default: return 0 }