From 7bd7b02712beeedad23a95b29c1faf6137f57487 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 16 Sep 2024 15:58:55 -0700 Subject: [PATCH] make patches git am-able raw diffs can be applied using `git apply` but not with `git am`. git patches, e.g. through `git format-patch` are both apply-able and am-able --- llm/generate/gen_common.sh | 20 ++++------------ llm/generate/gen_windows.ps1 | 24 ++----------------- llm/patches/0000-cmakelist.patch | 22 +++++++++++++++++ ...progress.diff => 0001-load-progress.patch} | 21 ++++++++++++---- .../{02-clip-log.diff => 0002-clip-log.patch} | 14 ++++++++++- ...ception.diff => 0003-load_exception.patch} | 18 +++++++++++--- .../{04-metal.diff => 0004-metal.patch} | 16 +++++++++++-- ...r.diff => 0005-default-pretokenizer.patch} | 14 ++++++++++- ...-embeddings.diff => 0006-embeddings.patch} | 18 +++++++++++--- ...p-unicode.diff => 0007-clip-unicode.patch} | 18 +++++++++++--- 10 files changed, 130 insertions(+), 55 deletions(-) create mode 100644 llm/patches/0000-cmakelist.patch rename llm/patches/{01-load-progress.diff => 0001-load-progress.patch} (74%) rename llm/patches/{02-clip-log.diff => 0002-clip-log.patch} (60%) rename llm/patches/{03-load_exception.diff => 0003-load_exception.patch} (74%) rename llm/patches/{04-metal.diff => 0004-metal.patch} (87%) rename llm/patches/{05-default-pretokenizer.diff => 0005-default-pretokenizer.patch} (85%) rename llm/patches/{06-embeddings.diff => 0006-embeddings.patch} (79%) rename llm/patches/{07-clip-unicode.diff => 0007-clip-unicode.patch} (73%) diff --git a/llm/generate/gen_common.sh b/llm/generate/gen_common.sh index ab5d7612..3825c155 100644 --- a/llm/generate/gen_common.sh +++ b/llm/generate/gen_common.sh @@ -69,22 +69,10 @@ git_module_setup() { } apply_patches() { - # Wire up our CMakefile - if ! grep ollama ${LLAMACPP_DIR}/CMakeLists.txt; then - echo 'add_subdirectory(../ext_server ext_server) # ollama' >>${LLAMACPP_DIR}/CMakeLists.txt - fi - - if [ -n "$(ls -A ../patches/*.diff)" ]; then - # apply temporary patches until fix is upstream - for patch in ../patches/*.diff; do - for file in $(grep "^+++ " ${patch} | cut -f2 -d' ' | cut -f2- -d/); do - (cd ${LLAMACPP_DIR}; git checkout ${file}) - done - done - for patch in ../patches/*.diff; do - (cd ${LLAMACPP_DIR} && git apply ${patch}) - done - fi + # apply temporary patches until fix is upstream + for patch in ../patches/*.patch; do + git -c 'user.name=nobody' -c 'user.email=<>' -C ${LLAMACPP_DIR} am ${patch} + done } build() { diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 7179c1bc..22538851 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -83,29 +83,9 @@ function git_module_setup { } function apply_patches { - # Wire up our CMakefile - if (!(Select-String -Path "${script:llamacppDir}/CMakeLists.txt" -Pattern 'ollama')) { - Add-Content -Path "${script:llamacppDir}/CMakeLists.txt" -Value 'add_subdirectory(../ext_server ext_server) # ollama' - } - # Apply temporary patches until fix is upstream - $patches = Get-ChildItem "../patches/*.diff" - foreach ($patch in $patches) { - # Extract file paths from the patch file - $filePaths = Get-Content $patch.FullName | Where-Object { $_ -match '^\+\+\+ ' } | ForEach-Object { - $parts = $_ -split ' ' - ($parts[1] -split '/', 2)[1] - } - - # Checkout each file - foreach ($file in $filePaths) { - git -C "${script:llamacppDir}" checkout $file - } - } - - # Apply each patch - foreach ($patch in $patches) { - git -C "${script:llamacppDir}" apply $patch.FullName + foreach ($patch in $(Get-ChildItem "../patches/*.patch")) { + git -c 'user.name=nobody' -c 'user.email=<>' -C "${script:llamacppDir}" am $patch.FullName } } diff --git a/llm/patches/0000-cmakelist.patch b/llm/patches/0000-cmakelist.patch new file mode 100644 index 00000000..54e9b602 --- /dev/null +++ b/llm/patches/0000-cmakelist.patch @@ -0,0 +1,22 @@ +From 8b8d83ffca775840acc5dc700f3b3703e9f5cfe4 Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Fri, 23 Aug 2024 11:27:48 -0700 +Subject: [PATCH] patch cmakelist + +--- + CMakeLists.txt | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index a3132063..6a2a9912 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -199,3 +199,5 @@ if (LLAMA_BUILD_EXAMPLES) + add_subdirectory(examples) + add_subdirectory(pocs) + endif() ++ ++add_subdirectory(../ext_server ext_server) # ollama +-- +2.45.2 + diff --git a/llm/patches/01-load-progress.diff b/llm/patches/0001-load-progress.patch similarity index 74% rename from llm/patches/01-load-progress.diff rename to llm/patches/0001-load-progress.patch index a053c1c2..0ddabc80 100644 --- a/llm/patches/01-load-progress.diff +++ b/llm/patches/0001-load-progress.patch @@ -1,8 +1,18 @@ +From 2cfaa0a04faa9c87ba8f1ac8527eb953e69c6cde Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Mon, 16 Sep 2024 15:53:10 -0700 +Subject: [PATCH] 01-load-progress.diff + +--- + common/common.cpp | 2 ++ + common/common.h | 7 +++++++ + 2 files changed, 9 insertions(+) + diff --git a/common/common.cpp b/common/common.cpp -index 2c05a4d4..927f0e3d 100644 +index 9fa18472..48ff41e9 100644 --- a/common/common.cpp +++ b/common/common.cpp -@@ -2093,6 +2093,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & +@@ -2573,6 +2573,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & mparams.use_mmap = params.use_mmap; mparams.use_mlock = params.use_mlock; mparams.check_tensors = params.check_tensors; @@ -12,10 +22,10 @@ index 2c05a4d4..927f0e3d 100644 mparams.kv_overrides = NULL; } else { diff --git a/common/common.h b/common/common.h -index 65c0ef81..ebca2c77 100644 +index cb5e7f6d..d8f043f7 100644 --- a/common/common.h +++ b/common/common.h -@@ -184,6 +184,13 @@ struct gpt_params { +@@ -204,6 +204,13 @@ struct gpt_params { std::string mmproj = ""; // path to multimodal projector std::vector image; // path to image file(s) @@ -29,3 +39,6 @@ index 65c0ef81..ebca2c77 100644 // embedding bool embedding = false; // get only sentence embedding int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm) +-- +2.46.0 + diff --git a/llm/patches/02-clip-log.diff b/llm/patches/0002-clip-log.patch similarity index 60% rename from llm/patches/02-clip-log.diff rename to llm/patches/0002-clip-log.patch index 34a018e8..8df0da17 100644 --- a/llm/patches/02-clip-log.diff +++ b/llm/patches/0002-clip-log.patch @@ -1,5 +1,14 @@ +From ba4bba80a744f76ac67b8234451c259a3c5da83b Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Mon, 16 Sep 2024 15:53:11 -0700 +Subject: [PATCH] 02-clip-log.diff + +--- + examples/llava/clip.cpp | 1 + + 1 file changed, 1 insertion(+) + diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp -index e431c7f7..f077e688 100644 +index 9b890571..cb51793d 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -3,6 +3,7 @@ @@ -10,3 +19,6 @@ index e431c7f7..f077e688 100644 #include "log.h" #include "ggml.h" #include "ggml-alloc.h" +-- +2.46.0 + diff --git a/llm/patches/03-load_exception.diff b/llm/patches/0003-load_exception.patch similarity index 74% rename from llm/patches/03-load_exception.diff rename to llm/patches/0003-load_exception.patch index 02666196..3d858ebb 100644 --- a/llm/patches/03-load_exception.diff +++ b/llm/patches/0003-load_exception.patch @@ -1,8 +1,17 @@ +From e43bfd3f607a6dfcaba2d490d35f412a52e55e30 Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Mon, 16 Sep 2024 15:53:12 -0700 +Subject: [PATCH] 03-load_exception.diff + +--- + src/llama.cpp | 25 ++++++++++++++++--------- + 1 file changed, 16 insertions(+), 9 deletions(-) + diff --git a/src/llama.cpp b/src/llama.cpp -index 73f52435..58a00fb1 100644 +index 88355971..926bb71a 100644 --- a/src/llama.cpp +++ b/src/llama.cpp -@@ -7241,7 +7241,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam +@@ -8635,7 +8635,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam } } catch (const std::exception & err) { LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what()); @@ -11,7 +20,7 @@ index 73f52435..58a00fb1 100644 } return 0; -@@ -17564,16 +17564,23 @@ struct llama_model * llama_load_model_from_file( +@@ -18022,16 +18022,23 @@ struct llama_model * llama_load_model_from_file( } model->rpc_servers.push_back(servers); } @@ -43,3 +52,6 @@ index 73f52435..58a00fb1 100644 } return model; +-- +2.46.0 + diff --git a/llm/patches/04-metal.diff b/llm/patches/0004-metal.patch similarity index 87% rename from llm/patches/04-metal.diff rename to llm/patches/0004-metal.patch index e63732e7..4cfa407e 100644 --- a/llm/patches/04-metal.diff +++ b/llm/patches/0004-metal.patch @@ -1,8 +1,17 @@ +From 29411d9a9d2b6a0af6425ffe88498f17f71f7d5d Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Mon, 16 Sep 2024 15:53:12 -0700 +Subject: [PATCH] 04-metal.diff + +--- + ggml/src/ggml-metal.m | 30 +++++++++++++----------------- + 1 file changed, 13 insertions(+), 17 deletions(-) + diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m -index 0207b787..b5e9884b 100644 +index 91b5e61b..9cfa72ac 100644 --- a/ggml/src/ggml-metal.m +++ b/ggml/src/ggml-metal.m -@@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute( +@@ -1734,27 +1734,23 @@ static enum ggml_status ggml_metal_graph_compute( // to the matrix-vector kernel int ne11_mm_min = 1; @@ -43,3 +52,6 @@ index 0207b787..b5e9884b 100644 // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel +-- +2.46.0 + diff --git a/llm/patches/05-default-pretokenizer.diff b/llm/patches/0005-default-pretokenizer.patch similarity index 85% rename from llm/patches/05-default-pretokenizer.diff rename to llm/patches/0005-default-pretokenizer.patch index 351bcaef..6ad0ee97 100644 --- a/llm/patches/05-default-pretokenizer.diff +++ b/llm/patches/0005-default-pretokenizer.patch @@ -1,5 +1,14 @@ +From b298ac8614d1e38da28f760eb1d2ae8af0fbbe62 Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Mon, 16 Sep 2024 15:53:13 -0700 +Subject: [PATCH] 05-default-pretokenizer.diff + +--- + src/llama.cpp | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + diff --git a/src/llama.cpp b/src/llama.cpp -index 88355971..dd7d41ed 100644 +index 926bb71a..d1e959fc 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -6083,16 +6083,7 @@ static void llm_load_vocab( @@ -30,3 +39,6 @@ index 88355971..dd7d41ed 100644 } } else if (vocab.type == LLAMA_VOCAB_TYPE_SPM) { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; +-- +2.46.0 + diff --git a/llm/patches/06-embeddings.diff b/llm/patches/0006-embeddings.patch similarity index 79% rename from llm/patches/06-embeddings.diff rename to llm/patches/0006-embeddings.patch index f3c071cb..8f89ffeb 100644 --- a/llm/patches/06-embeddings.diff +++ b/llm/patches/0006-embeddings.patch @@ -1,8 +1,17 @@ +From c9a6ca9fc039233dee746a4da9705762cd9e515d Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Mon, 16 Sep 2024 15:53:14 -0700 +Subject: [PATCH] 06-embeddings.diff + +--- + src/llama.cpp | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + diff --git a/src/llama.cpp b/src/llama.cpp -index 88355971..d7db689b 100644 +index d1e959fc..f79bd782 100644 --- a/src/llama.cpp +++ b/src/llama.cpp -@@ -15906,7 +15906,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) { +@@ -15898,7 +15898,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) { const auto n_embd = hparams.n_embd; // TODO: use a per-batch flag for logits presence instead @@ -11,7 +20,7 @@ index 88355971..d7db689b 100644 const bool has_embd = cparams.embeddings && (cparams.pooling_type == LLAMA_POOLING_TYPE_NONE); const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0; -@@ -16175,20 +16175,23 @@ static int llama_decode_internal( +@@ -16167,20 +16167,23 @@ static int llama_decode_internal( // no output res = nullptr; embd = nullptr; @@ -41,3 +50,6 @@ index 88355971..d7db689b 100644 // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs); ggml_backend_sched_alloc_graph(lctx.sched, gf); +-- +2.46.0 + diff --git a/llm/patches/07-clip-unicode.diff b/llm/patches/0007-clip-unicode.patch similarity index 73% rename from llm/patches/07-clip-unicode.diff rename to llm/patches/0007-clip-unicode.patch index 53e5ee11..72c061cb 100644 --- a/llm/patches/07-clip-unicode.diff +++ b/llm/patches/0007-clip-unicode.patch @@ -1,8 +1,17 @@ +From ae2b188a679c83ce105aa1e823499441dfab3c57 Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Mon, 16 Sep 2024 15:53:15 -0700 +Subject: [PATCH] 07-clip-unicode.diff + +--- + examples/llava/clip.cpp | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp -index 95fbe3d0..5a02a6ec 100644 +index cb51793d..8716472b 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp -@@ -32,6 +33,14 @@ +@@ -41,6 +41,14 @@ #include #include @@ -17,7 +26,7 @@ index 95fbe3d0..5a02a6ec 100644 //#define CLIP_DEBUG_FUNCTIONS // RGB uint8 image -@@ -1055,7 +1064,22 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { +@@ -1223,7 +1231,22 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { return nullptr; } @@ -40,3 +49,6 @@ index 95fbe3d0..5a02a6ec 100644 if (!fin) { LOG_TEE("cannot open model file for loading tensors\n"); clip_free(new_clip); +-- +2.46.0 +