diff --git a/llama/ggml-cuda.cu b/llama/ggml-cuda.cu index e41ed50f..439f9777 100644 --- a/llama/ggml-cuda.cu +++ b/llama/ggml-cuda.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-cuda.h b/llama/ggml-cuda.h index 27aae6dc..2c481bbe 100644 --- a/llama/ggml-cuda.h +++ b/llama/ggml-cuda.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-metal.h b/llama/ggml-metal.h index 57264feb..bb93cde3 100644 --- a/llama/ggml-metal.h +++ b/llama/ggml-metal.h @@ -1,7 +1,7 @@ //go:build darwin /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-metal.m b/llama/ggml-metal.m index 709feea4..5fbf5254 100644 --- a/llama/ggml-metal.m +++ b/llama/ggml-metal.m @@ -1,7 +1,7 @@ //go:build darwin /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-metal.metal b/llama/ggml-metal.metal index b8397d9f..0009f091 100644 --- a/llama/ggml-metal.metal +++ b/llama/ggml-metal.metal @@ -1,7 +1,7 @@ //go:build darwin /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-mpi.c b/llama/ggml-mpi.c index 82b93bb3..2ba3727a 100644 --- a/llama/ggml-mpi.c +++ b/llama/ggml-mpi.c @@ -1,7 +1,7 @@ //go:build mpi /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-mpi.h b/llama/ggml-mpi.h index 61862740..414fa1f7 100644 --- a/llama/ggml-mpi.h +++ b/llama/ggml-mpi.h @@ -1,7 +1,7 @@ //go:build mpi /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-opencl.cpp b/llama/ggml-opencl.cpp index b392f0b3..56650fce 100644 --- a/llama/ggml-opencl.cpp +++ b/llama/ggml-opencl.cpp @@ -1,7 +1,7 @@ //go:build opencl /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-opencl.h b/llama/ggml-opencl.h index 94043893..b05ac6f5 100644 --- a/llama/ggml-opencl.h +++ b/llama/ggml-opencl.h @@ -1,7 +1,7 @@ //go:build opencl /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml.c b/llama/ggml.c index cc9c594f..854606c6 100644 --- a/llama/ggml.c +++ b/llama/ggml.c @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml.h b/llama/ggml.h index 3069ae58..0c2c6e91 100644 --- a/llama/ggml.h +++ b/llama/ggml.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/k_quants.c b/llama/k_quants.c index e6bd7889..85649755 100644 --- a/llama/k_quants.c +++ b/llama/k_quants.c @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/k_quants.h b/llama/k_quants.h index 177de2d3..88a9d439 100644 --- a/llama/k_quants.h +++ b/llama/k_quants.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/llama-util.h b/llama/llama-util.h index 0424ed65..0f910783 100644 --- a/llama/llama-util.h +++ b/llama/llama-util.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/llama.cpp b/llama/llama.cpp index 25c29e79..c5f15d9a 100644 --- a/llama/llama.cpp +++ b/llama/llama.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * @@ -3689,7 +3689,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) { const auto & kv_self = ctx->kv_self; const auto & hparams = ctx->model.hparams; const int n_layer = hparams.n_layer; - const int n_embd = hparams.n_embd; + const int n_embd = hparams.n_embd_gqa(); const int n_ctx = hparams.n_ctx; const size_t kv_size = kv_self.buf.size; @@ -3792,7 +3792,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const auto & kv_self = ctx->kv_self; const auto & hparams = ctx->model.hparams; const int n_layer = hparams.n_layer; - const int n_embd = hparams.n_embd; + const int n_embd = hparams.n_embd_gqa(); const int n_ctx = hparams.n_ctx; size_t kv_size; diff --git a/llama/llama.h b/llama/llama.h index 4a92566f..d3de28f3 100644 --- a/llama/llama.h +++ b/llama/llama.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/update-llama-cpp.sh b/llama/update-llama-cpp.sh old mode 100644 new mode 100755