diff --git a/llama/ggml-cuda.cu b/llama/ggml-cuda.cu
index e41ed50f..439f9777 100644
--- a/llama/ggml-cuda.cu
+++ b/llama/ggml-cuda.cu
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-cuda.h b/llama/ggml-cuda.h
index 27aae6dc..2c481bbe 100644
--- a/llama/ggml-cuda.h
+++ b/llama/ggml-cuda.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-metal.h b/llama/ggml-metal.h
index 57264feb..bb93cde3 100644
--- a/llama/ggml-metal.h
+++ b/llama/ggml-metal.h
@@ -1,7 +1,7 @@
 //go:build darwin
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-metal.m b/llama/ggml-metal.m
index 709feea4..5fbf5254 100644
--- a/llama/ggml-metal.m
+++ b/llama/ggml-metal.m
@@ -1,7 +1,7 @@
 //go:build darwin
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-metal.metal b/llama/ggml-metal.metal
index b8397d9f..0009f091 100644
--- a/llama/ggml-metal.metal
+++ b/llama/ggml-metal.metal
@@ -1,7 +1,7 @@
 //go:build darwin
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-mpi.c b/llama/ggml-mpi.c
index 82b93bb3..2ba3727a 100644
--- a/llama/ggml-mpi.c
+++ b/llama/ggml-mpi.c
@@ -1,7 +1,7 @@
 //go:build mpi
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-mpi.h b/llama/ggml-mpi.h
index 61862740..414fa1f7 100644
--- a/llama/ggml-mpi.h
+++ b/llama/ggml-mpi.h
@@ -1,7 +1,7 @@
 //go:build mpi
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-opencl.cpp b/llama/ggml-opencl.cpp
index b392f0b3..56650fce 100644
--- a/llama/ggml-opencl.cpp
+++ b/llama/ggml-opencl.cpp
@@ -1,7 +1,7 @@
 //go:build opencl
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-opencl.h b/llama/ggml-opencl.h
index 94043893..b05ac6f5 100644
--- a/llama/ggml-opencl.h
+++ b/llama/ggml-opencl.h
@@ -1,7 +1,7 @@
 //go:build opencl
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml.c b/llama/ggml.c
index cc9c594f..854606c6 100644
--- a/llama/ggml.c
+++ b/llama/ggml.c
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml.h b/llama/ggml.h
index 3069ae58..0c2c6e91 100644
--- a/llama/ggml.h
+++ b/llama/ggml.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/k_quants.c b/llama/k_quants.c
index e6bd7889..85649755 100644
--- a/llama/k_quants.c
+++ b/llama/k_quants.c
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/k_quants.h b/llama/k_quants.h
index 177de2d3..88a9d439 100644
--- a/llama/k_quants.h
+++ b/llama/k_quants.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/llama-util.h b/llama/llama-util.h
index 0424ed65..0f910783 100644
--- a/llama/llama-util.h
+++ b/llama/llama-util.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 25c29e79..c5f15d9a 100644
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
@@ -3689,7 +3689,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
         const auto & kv_self = ctx->kv_self;
         const auto & hparams = ctx->model.hparams;
         const int    n_layer = hparams.n_layer;
-        const int    n_embd  = hparams.n_embd;
+        const int    n_embd  = hparams.n_embd_gqa();
         const int    n_ctx   = hparams.n_ctx;
 
         const size_t kv_size = kv_self.buf.size;
@@ -3792,7 +3792,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
         const auto & kv_self = ctx->kv_self;
         const auto & hparams = ctx->model.hparams;
         const int    n_layer = hparams.n_layer;
-        const int    n_embd  = hparams.n_embd;
+        const int    n_embd  = hparams.n_embd_gqa();
         const int    n_ctx   = hparams.n_ctx;
 
         size_t kv_size;
diff --git a/llama/llama.h b/llama/llama.h
index 4a92566f..d3de28f3 100644
--- a/llama/llama.h
+++ b/llama/llama.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/update-llama-cpp.sh b/llama/update-llama-cpp.sh
old mode 100644
new mode 100755