From 1255bc9b45686e50795f2bd7a3f312cac2536bca Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Wed, 20 Sep 2023 20:00:41 +0100 Subject: [PATCH] only package 11.8 runner --- docs/development.md | 2 +- llm/llama.cpp/generate_linux.go | 24 +++++++------- llm/llama.go | 58 ++------------------------------- server/routes.go | 2 +- 4 files changed, 17 insertions(+), 69 deletions(-) diff --git a/docs/development.md b/docs/development.md index 803e5135..98a81b4b 100644 --- a/docs/development.md +++ b/docs/development.md @@ -35,5 +35,5 @@ Now you can run `ollama`: ## Building on Linux with GPU support - Install cmake and nvidia-cuda-toolkit -- run `CUDA_VERSION=11 CUDA_PATH=/path/to/libcuda.so CUBLAS_PATH=/path/to/libcublas.so CUDART_PATH=/path/to/libcudart.so CUBLASLT_PATH=/path/to/libcublasLt.so go generate ./...` +- run `CUDA_PATH=/path/to/libcuda.so CUBLAS_PATH=/path/to/libcublas.so CUDART_PATH=/path/to/libcudart.so CUBLASLT_PATH=/path/to/libcublasLt.so go generate ./...` - run `go build .` diff --git a/llm/llama.cpp/generate_linux.go b/llm/llama.cpp/generate_linux.go index c06cda3a..c9303c4f 100644 --- a/llm/llama.cpp/generate_linux.go +++ b/llm/llama.cpp/generate_linux.go @@ -15,15 +15,15 @@ package llm //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on //go:generate cmake --build gguf/build/cpu --target server --config Release -//go:generate cmake -S ggml -B ggml/build/cuda-${CUDA_VERSION} -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -//go:generate cmake --build ggml/build/cuda-${CUDA_VERSION} --target server --config Release -//go:generate cmake -S gguf -B gguf/build/cuda-${CUDA_VERSION} -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -//go:generate cmake --build gguf/build/cuda-${CUDA_VERSION} --target server --config Release -//go:generate cp --dereference ${CUDA_PATH} ggml/build/cuda-${CUDA_VERSION}/bin/libcuda.so -//go:generate cp --dereference ${CUDA_PATH} gguf/build/cuda-${CUDA_VERSION}/bin/libcuda.so -//go:generate cp --dereference ${CUBLAS_PATH} ggml/build/cuda-${CUDA_VERSION}/bin/libcublas.so.${CUDA_VERSION} -//go:generate cp --dereference ${CUBLAS_PATH} gguf/build/cuda-${CUDA_VERSION}/bin/libcublas.so.${CUDA_VERSION} -//go:generate cp --dereference ${CUDART_PATH} ggml/build/cuda-${CUDA_VERSION}/bin/libcudart.so.${CUDA_VERSION}.0 -//go:generate cp --dereference ${CUDART_PATH} gguf/build/cuda-${CUDA_VERSION}/bin/libcudart.so.${CUDA_VERSION}.0 -//go:generate cp --dereference ${CUBLASLT_PATH} ggml/build/cuda-${CUDA_VERSION}/bin/libcublasLt.so.${CUDA_VERSION} -//go:generate cp --dereference ${CUBLASLT_PATH} gguf/build/cuda-${CUDA_VERSION}/bin/libcublasLt.so.${CUDA_VERSION} +//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on +//go:generate cmake --build ggml/build/cuda --target server --config Release +//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on +//go:generate cmake --build gguf/build/cuda --target server --config Release +//go:generate cp --dereference ${CUDA_PATH} ggml/build/cuda/bin/libcuda.so +//go:generate cp --dereference ${CUDA_PATH} gguf/build/cuda/bin/libcuda.so +//go:generate cp --dereference ${CUBLAS_PATH} ggml/build/cuda/bin/libcublas.so.11 +//go:generate cp --dereference ${CUBLAS_PATH} gguf/build/cuda/bin/libcublas.so.11 +//go:generate cp --dereference ${CUDART_PATH} ggml/build/cuda/bin/libcudart.so.11.0 +//go:generate cp --dereference ${CUDART_PATH} gguf/build/cuda/bin/libcudart.so.11.0 +//go:generate cp --dereference ${CUBLASLT_PATH} ggml/build/cuda/bin/libcublasLt.so.11 +//go:generate cp --dereference ${CUBLASLT_PATH} gguf/build/cuda/bin/libcublasLt.so.11 diff --git a/llm/llama.go b/llm/llama.go index eaf89ef9..9118da2a 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -17,7 +17,6 @@ import ( "os/exec" "path" "path/filepath" - "regexp" "runtime" "strconv" "strings" @@ -29,46 +28,6 @@ import ( //go:embed llama.cpp/*/build/*/bin/* var llamaCppEmbed embed.FS -func cudaVersion() int { - // first try nvcc, it gives the most accurate version if available - cmd := exec.Command("nvcc", "--version") - output, err := cmd.CombinedOutput() - if err == nil { - // regex to match the CUDA version line in nvcc --version output - re := regexp.MustCompile(`release (\d+\.\d+),`) - matches := re.FindStringSubmatch(string(output)) - if len(matches) >= 2 { - cudaVersion := matches[1] - cudaVersionParts := strings.Split(cudaVersion, ".") - cudaMajorVersion, err := strconv.Atoi(cudaVersionParts[0]) - if err == nil { - return cudaMajorVersion - } - } - } - - // fallback to nvidia-smi - cmd = exec.Command("nvidia-smi") - output, err = cmd.CombinedOutput() - if err != nil { - return -1 - } - - re := regexp.MustCompile(`CUDA Version: (\d+\.\d+)`) - matches := re.FindStringSubmatch(string(output)) - if len(matches) < 2 { - return -1 - } - - cudaVersion := matches[1] - cudaVersionParts := strings.Split(cudaVersion, ".") - cudaMajorVersion, err := strconv.Atoi(cudaVersionParts[0]) - if err != nil { - return -1 - } - return cudaMajorVersion -} - type ModelRunner struct { Path string // path to the model runner executable } @@ -86,20 +45,9 @@ func chooseRunners(runnerType string) []ModelRunner { path.Join(buildPath, "cpu", "bin", "server"), } case "linux": - cuda := cudaVersion() - if cuda == 11 { - // prioritize CUDA 11 runner - runners = []string{ - path.Join(buildPath, "cuda-11", "bin", "server"), - path.Join(buildPath, "cuda-12", "bin", "server"), - path.Join(buildPath, "cpu", "bin", "server"), - } - } else { - runners = []string{ - path.Join(buildPath, "cuda-12", "bin", "server"), - path.Join(buildPath, "cuda-11", "bin", "server"), - path.Join(buildPath, "cpu", "bin", "server"), - } + runners = []string{ + path.Join(buildPath, "cuda", "bin", "server"), + path.Join(buildPath, "cpu", "bin", "server"), } case "windows": // TODO: select windows GPU runner here when available diff --git a/server/routes.go b/server/routes.go index d3d3d11c..79d2ee72 100644 --- a/server/routes.go +++ b/server/routes.go @@ -556,7 +556,7 @@ func Serve(ln net.Listener, origins []string) error { if runtime.GOOS == "linux" { // check compatibility to log warnings if _, err := llm.CheckVRAM(); err != nil { - log.Printf("Warning: GPU support not enabled, you may need to install GPU drivers: %v", err) + log.Printf("Warning: GPU support may not enabled, check you have installed install GPU drivers: %v", err) } }