diff --git a/.gitmodules b/.gitmodules index e2901f45..49a54fa9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,4 +6,5 @@ [submodule "llm/llama.cpp/gguf"] path = llm/llama.cpp/gguf url = https://github.com/ggerganov/llama.cpp.git + ignore = dirty shallow = true diff --git a/llm/llama.cpp/generate_darwin_amd64.go b/llm/llama.cpp/generate_darwin_amd64.go index 9b782db3..1a1d3317 100644 --- a/llm/llama.cpp/generate_darwin_amd64.go +++ b/llm/llama.cpp/generate_darwin_amd64.go @@ -11,5 +11,6 @@ package llm //go:generate cmake --build ggml/build/cpu --target server --config Release //go:generate git submodule update --force gguf +//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --build gguf/build/cpu --target server --config Release diff --git a/llm/llama.cpp/generate_darwin_arm64.go b/llm/llama.cpp/generate_darwin_arm64.go index 72d175ef..ffd1fa85 100644 --- a/llm/llama.cpp/generate_darwin_arm64.go +++ b/llm/llama.cpp/generate_darwin_arm64.go @@ -11,5 +11,6 @@ package llm //go:generate cmake --build ggml/build/metal --target server --config Release //go:generate git submodule update --force gguf +//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch //go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --build gguf/build/metal --target server --config Release diff --git a/llm/llama.cpp/generate_linux.go b/llm/llama.cpp/generate_linux.go index 76be15d5..4cae3845 100644 --- a/llm/llama.cpp/generate_linux.go +++ b/llm/llama.cpp/generate_linux.go @@ -12,6 +12,7 @@ package llm //go:generate git submodule update --force gguf //go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch +//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on //go:generate cmake --build gguf/build/cpu --target server --config Release diff --git a/llm/llama.cpp/generate_windows.go b/llm/llama.cpp/generate_windows.go index 0d8cd411..3f69a39d 100644 --- a/llm/llama.cpp/generate_windows.go +++ b/llm/llama.cpp/generate_windows.go @@ -9,5 +9,6 @@ package llm //go:generate cmake --build ggml/build/cpu --target server --config Release //go:generate git submodule update --force gguf +//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on //go:generate cmake --build gguf/build/cpu --target server --config Release diff --git a/llm/llama.cpp/patches/0001-remove-warm-up-logging.patch b/llm/llama.cpp/patches/0001-remove-warm-up-logging.patch new file mode 100644 index 00000000..04aa4802 --- /dev/null +++ b/llm/llama.cpp/patches/0001-remove-warm-up-logging.patch @@ -0,0 +1,25 @@ +From 07993bdc35345b67b27aa649a7c099ad42d80c4c Mon Sep 17 00:00:00 2001 +From: Michael Yang +Date: Thu, 21 Sep 2023 14:43:21 -0700 +Subject: [PATCH] remove warm up logging + +--- + common/common.cpp | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/common/common.cpp b/common/common.cpp +index 2597ba0..b56549b 100644 +--- a/common/common.cpp ++++ b/common/common.cpp +@@ -780,8 +780,6 @@ std::tuple llama_init_from_gpt_par + } + + { +- LOG("warming up the model with an empty run\n"); +- + const std::vector tmp = { llama_token_bos(lctx), llama_token_eos(lctx), }; + llama_eval(lctx, tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, params.n_threads); + llama_reset_timings(lctx); +-- +2.42.0 +