From 5d22319a2c7f8509d5eb8f325a7ff9decd1ccda0 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Fri, 6 Oct 2023 10:15:42 -0400 Subject: [PATCH] rename server subprocess (#700) - this makes it easier to see that the subprocess is associated with ollama --- llm/llama.cpp/generate_darwin_amd64.go | 2 ++ llm/llama.cpp/generate_darwin_arm64.go | 2 ++ llm/llama.cpp/generate_linux.go | 4 ++++ llm/llama.cpp/generate_windows.go | 2 ++ llm/llama.go | 12 ++++++------ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/llm/llama.cpp/generate_darwin_amd64.go b/llm/llama.cpp/generate_darwin_amd64.go index 1a1d3317..ea976cb8 100644 --- a/llm/llama.cpp/generate_darwin_amd64.go +++ b/llm/llama.cpp/generate_darwin_amd64.go @@ -9,8 +9,10 @@ package llm //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --build ggml/build/cpu --target server --config Release +//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner //go:generate git submodule update --force gguf //go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --build gguf/build/cpu --target server --config Release +//go:generate mv gguf/build/metal/bin/server gguf/build/metal/bin/ollama-runner diff --git a/llm/llama.cpp/generate_darwin_arm64.go b/llm/llama.cpp/generate_darwin_arm64.go index ffd1fa85..3413850d 100644 --- a/llm/llama.cpp/generate_darwin_arm64.go +++ b/llm/llama.cpp/generate_darwin_arm64.go @@ -9,8 +9,10 @@ package llm //go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch //go:generate cmake -S ggml -B ggml/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --build ggml/build/metal --target server --config Release +//go:generate mv ggml/build/metal/bin/server ggml/build/metal/bin/ollama-runner //go:generate git submodule update --force gguf //go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch //go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 //go:generate cmake --build gguf/build/metal --target server --config Release +//go:generate mv gguf/build/metal/bin/server gguf/build/metal/bin/ollama-runner diff --git a/llm/llama.cpp/generate_linux.go b/llm/llama.cpp/generate_linux.go index 4cae3845..07849f85 100644 --- a/llm/llama.cpp/generate_linux.go +++ b/llm/llama.cpp/generate_linux.go @@ -9,14 +9,18 @@ package llm //go:generate git -C ggml apply ../patches/0001-copy-cuda-runtime-libraries.patch //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on //go:generate cmake --build ggml/build/cpu --target server --config Release +//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner //go:generate git submodule update --force gguf //go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch //go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on //go:generate cmake --build gguf/build/cpu --target server --config Release +//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner //go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on //go:generate cmake --build ggml/build/cuda --target server --config Release +//go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner //go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on //go:generate cmake --build gguf/build/cuda --target server --config Release +//go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner diff --git a/llm/llama.cpp/generate_windows.go b/llm/llama.cpp/generate_windows.go index 3f69a39d..20353c2c 100644 --- a/llm/llama.cpp/generate_windows.go +++ b/llm/llama.cpp/generate_windows.go @@ -7,8 +7,10 @@ package llm //go:generate git -C ggml apply ../patches/0002-34B-model-support.patch //go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on //go:generate cmake --build ggml/build/cpu --target server --config Release +//go:generate cmd /c move ggml\build\cpu\bin\Release\server.exe ggml\build\cpu\bin\Release\ollama-runner.exe //go:generate git submodule update --force gguf //go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on //go:generate cmake --build gguf/build/cpu --target server --config Release +//go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe diff --git a/llm/llama.go b/llm/llama.go index 0d9e8d5d..79cd8f7e 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -41,23 +41,23 @@ func chooseRunners(workDir, runnerType string) []ModelRunner { switch runtime.GOOS { case "darwin": runners = []string{ - path.Join(buildPath, "metal", "bin", "server"), - path.Join(buildPath, "cpu", "bin", "server"), + path.Join(buildPath, "metal", "bin", "ollama-runner"), + path.Join(buildPath, "cpu", "bin", "ollama-runner"), } case "linux": runners = []string{ - path.Join(buildPath, "cuda", "bin", "server"), - path.Join(buildPath, "cpu", "bin", "server"), + path.Join(buildPath, "cuda", "bin", "ollama-runner"), + path.Join(buildPath, "cpu", "bin", "ollama-runner"), } case "windows": // TODO: select windows GPU runner here when available runners = []string{ - path.Join(buildPath, "cpu", "bin", "Release", "server.exe"), + path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe"), } default: log.Printf("unknown OS, running on CPU: %s", runtime.GOOS) runners = []string{ - path.Join(buildPath, "cpu", "bin", "server"), + path.Join(buildPath, "cpu", "bin", "ollama-runner"), } }