diff --git a/.gitignore b/.gitignore index feb68d6c..a10a0da8 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ dist ollama ggml-metal.metal .cache +*.exe diff --git a/llm/llama.cpp/generate_windows.go b/llm/llama.cpp/generate_windows.go index 326e83d5..2fb4c39f 100644 --- a/llm/llama.cpp/generate_windows.go +++ b/llm/llama.cpp/generate_windows.go @@ -14,3 +14,11 @@ package llm //go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off //go:generate cmake --build gguf/build/cpu --target server --config Release //go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe + +//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on +//go:generate cmake --build ggml/build/cuda --target server --config Release +//go:generate cmd /c move ggml\build\cuda\bin\Release\server.exe ggml\build\cuda\bin\Release\ollama-runner.exe + +//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off +//go:generate cmake --build gguf/build/cuda --target server --config Release +//go:generate cmd /c move gguf\build\cuda\bin\Release\server.exe gguf\build\cuda\bin\Release\ollama-runner.exe diff --git a/llm/llama.go b/llm/llama.go index 7172f91e..fc033258 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -84,6 +84,7 @@ func chooseRunners(workDir, runnerType string) []ModelRunner { case "windows": // TODO: select windows GPU runner here when available runners = []ModelRunner{ + {Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true}, {Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")}, } default: @@ -269,7 +270,7 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int { if opts.NumGPU != -1 { return opts.NumGPU } - if runtime.GOOS == "linux" { + if runtime.GOOS == "linux" || runtime.GOOS == "windows" { freeBytes, err := CheckVRAM() if err != nil { if !errors.Is(err, errNvidiaSMI) {