rename server subprocess (#700)
- this makes it easier to see that the subprocess is associated with ollama
This commit is contained in:
parent
2130c0708b
commit
5d22319a2c
|
@ -9,8 +9,10 @@ package llm
|
||||||
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
||||||
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||||
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||||
|
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
|
||||||
|
|
||||||
//go:generate git submodule update --force gguf
|
//go:generate git submodule update --force gguf
|
||||||
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
||||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||||
|
//go:generate mv gguf/build/metal/bin/server gguf/build/metal/bin/ollama-runner
|
||||||
|
|
|
@ -9,8 +9,10 @@ package llm
|
||||||
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
|
||||||
//go:generate cmake -S ggml -B ggml/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
//go:generate cmake -S ggml -B ggml/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||||
//go:generate cmake --build ggml/build/metal --target server --config Release
|
//go:generate cmake --build ggml/build/metal --target server --config Release
|
||||||
|
//go:generate mv ggml/build/metal/bin/server ggml/build/metal/bin/ollama-runner
|
||||||
|
|
||||||
//go:generate git submodule update --force gguf
|
//go:generate git submodule update --force gguf
|
||||||
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
||||||
//go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
//go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
|
||||||
//go:generate cmake --build gguf/build/metal --target server --config Release
|
//go:generate cmake --build gguf/build/metal --target server --config Release
|
||||||
|
//go:generate mv gguf/build/metal/bin/server gguf/build/metal/bin/ollama-runner
|
||||||
|
|
|
@ -9,14 +9,18 @@ package llm
|
||||||
//go:generate git -C ggml apply ../patches/0001-copy-cuda-runtime-libraries.patch
|
//go:generate git -C ggml apply ../patches/0001-copy-cuda-runtime-libraries.patch
|
||||||
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||||
|
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
|
||||||
|
|
||||||
//go:generate git submodule update --force gguf
|
//go:generate git submodule update --force gguf
|
||||||
//go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch
|
//go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch
|
||||||
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
||||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||||
|
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
|
||||||
|
|
||||||
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build ggml/build/cuda --target server --config Release
|
//go:generate cmake --build ggml/build/cuda --target server --config Release
|
||||||
|
//go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner
|
||||||
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build gguf/build/cuda --target server --config Release
|
//go:generate cmake --build gguf/build/cuda --target server --config Release
|
||||||
|
//go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner
|
||||||
|
|
|
@ -7,8 +7,10 @@ package llm
|
||||||
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
|
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
|
||||||
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
//go:generate cmake --build ggml/build/cpu --target server --config Release
|
||||||
|
//go:generate cmd /c move ggml\build\cpu\bin\Release\server.exe ggml\build\cpu\bin\Release\ollama-runner.exe
|
||||||
|
|
||||||
//go:generate git submodule update --force gguf
|
//go:generate git submodule update --force gguf
|
||||||
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
|
||||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||||
|
//go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe
|
||||||
|
|
12
llm/llama.go
12
llm/llama.go
|
@ -41,23 +41,23 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "darwin":
|
case "darwin":
|
||||||
runners = []string{
|
runners = []string{
|
||||||
path.Join(buildPath, "metal", "bin", "server"),
|
path.Join(buildPath, "metal", "bin", "ollama-runner"),
|
||||||
path.Join(buildPath, "cpu", "bin", "server"),
|
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
|
||||||
}
|
}
|
||||||
case "linux":
|
case "linux":
|
||||||
runners = []string{
|
runners = []string{
|
||||||
path.Join(buildPath, "cuda", "bin", "server"),
|
path.Join(buildPath, "cuda", "bin", "ollama-runner"),
|
||||||
path.Join(buildPath, "cpu", "bin", "server"),
|
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
|
||||||
}
|
}
|
||||||
case "windows":
|
case "windows":
|
||||||
// TODO: select windows GPU runner here when available
|
// TODO: select windows GPU runner here when available
|
||||||
runners = []string{
|
runners = []string{
|
||||||
path.Join(buildPath, "cpu", "bin", "Release", "server.exe"),
|
path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe"),
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
||||||
runners = []string{
|
runners = []string{
|
||||||
path.Join(buildPath, "cpu", "bin", "server"),
|
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue