Merge pull request #5117 from dhiltgen/fix_prediction
Handle models with divergent layer sizes
This commit is contained in:
commit
26d0bf9236
|
@ -1,6 +1,7 @@
|
||||||
package llm
|
package llm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -179,6 +180,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||||
|
|
||||||
// For all the layers, find where they can fit on the GPU(s)
|
// For all the layers, find where they can fit on the GPU(s)
|
||||||
for i := range int(ggml.KV().BlockCount()) {
|
for i := range int(ggml.KV().BlockCount()) {
|
||||||
|
// Some models have inconsistent layer sizes
|
||||||
|
if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
|
||||||
|
layerSize = blk.size()
|
||||||
|
layerSize += kv / ggml.KV().BlockCount()
|
||||||
|
}
|
||||||
memoryWeights += layerSize
|
memoryWeights += layerSize
|
||||||
|
|
||||||
if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
|
if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
|
||||||
|
|
Loading…
Reference in a new issue