seek instead of copyn

This commit is contained in:
Michael Yang 2023-11-29 10:31:58 -08:00
parent 998f1785b6
commit 72e7a49aa9
2 changed files with 50 additions and 40 deletions

View file

@ -83,7 +83,7 @@ type model interface {
type container interface { type container interface {
Name() string Name() string
Decode(*readOffset) (model, error) Decode(*readSeekOffset) (model, error)
} }
type containerGGML struct{} type containerGGML struct{}
@ -92,7 +92,7 @@ func (c *containerGGML) Name() string {
return "ggml" return "ggml"
} }
func (c *containerGGML) Decode(ro *readOffset) (model, error) { func (c *containerGGML) Decode(ro *readSeekOffset) (model, error) {
return nil, nil return nil, nil
} }
@ -104,7 +104,7 @@ func (c *containerGGMF) Name() string {
return "ggmf" return "ggmf"
} }
func (c *containerGGMF) Decode(ro *readOffset) (model, error) { func (c *containerGGMF) Decode(ro *readSeekOffset) (model, error) {
var version uint32 var version uint32
binary.Read(ro, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
@ -126,7 +126,7 @@ func (c *containerGGJT) Name() string {
return "ggjt" return "ggjt"
} }
func (c *containerGGJT) Decode(ro *readOffset) (model, error) { func (c *containerGGJT) Decode(ro *readSeekOffset) (model, error) {
var version uint32 var version uint32
binary.Read(ro, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
@ -152,7 +152,7 @@ func (c *containerLORA) Name() string {
return "ggla" return "ggla"
} }
func (c *containerLORA) Decode(ro *readOffset) (model, error) { func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
var version uint32 var version uint32
binary.Read(ro, binary.LittleEndian, &version) binary.Read(ro, binary.LittleEndian, &version)
@ -180,8 +180,8 @@ const (
FILE_MAGIC_GGUF_BE = 0x47475546 FILE_MAGIC_GGUF_BE = 0x47475546
) )
func DecodeGGML(r io.Reader) (*GGML, error) { func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
ro := readOffset{Reader: r} ro := readSeekOffset{ReadSeeker: r}
var magic uint32 var magic uint32
if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil { if err := binary.Read(&ro, binary.LittleEndian, &magic); err != nil {
@ -219,13 +219,23 @@ func DecodeGGML(r io.Reader) (*GGML, error) {
}, nil }, nil
} }
type readOffset struct { type readSeekOffset struct {
io.Reader io.ReadSeeker
offset int64 offset int64
} }
func (r *readOffset) Read(p []byte) (int, error) { func (rso *readSeekOffset) Seek(offset int64, whence int) (int64, error) {
n, err := r.Reader.Read(p) offset, err := rso.ReadSeeker.Seek(offset, whence)
r.offset += int64(n) if err != nil {
return 0, err
}
rso.offset = offset
return offset, nil
}
func (rso *readSeekOffset) Read(p []byte) (int, error) {
n, err := rso.ReadSeeker.Read(p)
rso.offset += int64(n)
return n, err return n, err
} }

View file

@ -29,18 +29,18 @@ func (c *containerGGUF) Name() string {
return "gguf" return "gguf"
} }
func (c *containerGGUF) Decode(ro *readOffset) (model, error) { func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
binary.Read(ro, c.bo, &c.Version) binary.Read(rso, c.bo, &c.Version)
switch c.Version { switch c.Version {
case 1: case 1:
binary.Read(ro, c.bo, &c.V1) binary.Read(rso, c.bo, &c.V1)
default: default:
binary.Read(ro, c.bo, &c.V2) binary.Read(rso, c.bo, &c.V2)
} }
model := newGGUFModel(c) model := newGGUFModel(c)
if err := model.Decode(ro); err != nil { if err := model.Decode(rso); err != nil {
return nil, err return nil, err
} }
@ -154,49 +154,49 @@ func (llm *ggufModel) FileType() string {
return "unknown" return "unknown"
} }
func (llm *ggufModel) Decode(ro *readOffset) error { func (llm *ggufModel) Decode(rso *readSeekOffset) error {
// decode key-values // decode key-values
for i := 0; uint64(i) < llm.NumKV(); i++ { for i := 0; uint64(i) < llm.NumKV(); i++ {
k, err := llm.readString(ro) k, err := llm.readString(rso)
if err != nil { if err != nil {
return err return err
} }
vtype := llm.readU32(ro) vtype := llm.readU32(rso)
var v any var v any
switch vtype { switch vtype {
case ggufTypeUint8: case ggufTypeUint8:
v = llm.readU8(ro) v = llm.readU8(rso)
case ggufTypeInt8: case ggufTypeInt8:
v = llm.readI8(ro) v = llm.readI8(rso)
case ggufTypeUint16: case ggufTypeUint16:
v = llm.readU16(ro) v = llm.readU16(rso)
case ggufTypeInt16: case ggufTypeInt16:
v = llm.readI16(ro) v = llm.readI16(rso)
case ggufTypeUint32: case ggufTypeUint32:
v = llm.readU32(ro) v = llm.readU32(rso)
case ggufTypeInt32: case ggufTypeInt32:
v = llm.readI32(ro) v = llm.readI32(rso)
case ggufTypeUint64: case ggufTypeUint64:
v = llm.readU64(ro) v = llm.readU64(rso)
case ggufTypeInt64: case ggufTypeInt64:
v = llm.readI64(ro) v = llm.readI64(rso)
case ggufTypeFloat32: case ggufTypeFloat32:
v = llm.readF32(ro) v = llm.readF32(rso)
case ggufTypeFloat64: case ggufTypeFloat64:
v = llm.readF64(ro) v = llm.readF64(rso)
case ggufTypeBool: case ggufTypeBool:
v = llm.readBool(ro) v = llm.readBool(rso)
case ggufTypeString: case ggufTypeString:
s, err := llm.readString(ro) s, err := llm.readString(rso)
if err != nil { if err != nil {
return err return err
} }
v = s v = s
case ggufTypeArray: case ggufTypeArray:
a, err := llm.readArray(ro) a, err := llm.readArray(rso)
if err != nil { if err != nil {
return err return err
} }
@ -211,20 +211,20 @@ func (llm *ggufModel) Decode(ro *readOffset) error {
// decode tensors // decode tensors
for i := 0; uint64(i) < llm.NumTensor(); i++ { for i := 0; uint64(i) < llm.NumTensor(); i++ {
name, err := llm.readString(ro) name, err := llm.readString(rso)
if err != nil { if err != nil {
return err return err
} }
dims := llm.readU32(ro) dims := llm.readU32(rso)
shape := [4]uint64{1, 1, 1, 1} shape := [4]uint64{1, 1, 1, 1}
for i := 0; uint32(i) < dims; i++ { for i := 0; uint32(i) < dims; i++ {
shape[i] = llm.readU64(ro) shape[i] = llm.readU64(rso)
} }
kind := llm.readU32(ro) kind := llm.readU32(rso)
offset := llm.readU64(ro) offset := llm.readU64(rso)
var blockSize uint64 var blockSize uint64
switch { switch {
@ -285,10 +285,10 @@ func (llm *ggufModel) Decode(ro *readOffset) error {
alignment = 32 alignment = 32
} }
io.CopyN(io.Discard, ro, int64(alignment)-ro.offset%int64(alignment)) rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
for _, tensor := range llm.tensors { for _, tensor := range llm.tensors {
padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1) padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
io.CopyN(io.Discard, ro, padded) rso.Seek(padded, io.SeekCurrent)
} }
return nil return nil