diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ffb2cf9d..f14305a4 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -103,6 +103,7 @@ jobs: path: | llm/build/**/bin/* llm/build/**/*.a + dist/windows-amd64/** # ROCm generation step generate-windows-rocm: @@ -173,7 +174,9 @@ jobs: - uses: actions/upload-artifact@v4 with: name: generate-windows-rocm - path: llm/build/**/bin/* + path: | + llm/build/**/bin/* + dist/windows-amd64/** - uses: actions/upload-artifact@v4 with: name: windows-rocm-deps @@ -253,7 +256,9 @@ jobs: - uses: actions/upload-artifact@v4 with: name: generate-windows-cuda - path: llm/build/**/bin/* + path: | + llm/build/**/bin/* + dist/windows-amd64/** - uses: actions/upload-artifact@v4 with: name: windows-cuda-deps @@ -306,11 +311,15 @@ jobs: - uses: actions/download-artifact@v4 with: name: generate-windows-cpu - path: llm/build + path: | + llm/build + dist/windows-amd64 - uses: actions/download-artifact@v4 with: name: generate-windows-cuda - path: llm/build + path: | + llm/build + dist/windows-amd64 - uses: actions/download-artifact@v4 with: name: windows-cuda-deps @@ -322,7 +331,9 @@ jobs: - uses: actions/download-artifact@v4 with: name: generate-windows-rocm - path: llm/build + path: | + llm/build + dist/windows-amd64 - run: dir llm/build - run: | $gopath=(get-command go).source | split-path -parent @@ -337,7 +348,9 @@ jobs: - uses: actions/upload-artifact@v4 with: name: dist-windows - path: dist/*.exe + path: | + dist/OllamaSetup.exe + dist/ollama-windows-*.zip # Linux x86 assets built using the container based build build-linux-amd64: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 497773eb..57867950 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -103,7 +103,9 @@ jobs: - uses: actions/upload-artifact@v4 with: name: cuda-${{ matrix.cuda-version }}-libraries - path: llm/build/**/bin/* + path: | + llm/build/**/bin/* + dist/windows-amd64/** generate-rocm: needs: [changes] if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }} @@ -134,7 +136,9 @@ jobs: - uses: actions/upload-artifact@v4 with: name: rocm-${{ matrix.rocm-version }}-libraries - path: llm/build/**/bin/* + path: | + llm/build/**/bin/* + dist/windows-amd64/** # ROCm generation step generate-windows-rocm: @@ -253,11 +257,6 @@ jobs: mkdir -p llm/build/darwin/$ARCH/stub/bin touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'macos-') }} - - run: | - mkdir -p llm/build/windows/$ARCH/stub/bin - touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'windows-') }} - shell: bash - uses: golangci/golangci-lint-action@v4 with: args: --timeout 8m0s -v @@ -299,10 +298,6 @@ jobs: mkdir -p llm/build/darwin/$ARCH/stub/bin touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'macos-') }} - - run: | - mkdir -p llm/build/windows/$ARCH/stub/bin - touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'windows-') }} shell: bash - run: go generate ./... - run: go build diff --git a/app/ollama.iss b/app/ollama.iss index 8f46223b..f77742dc 100644 --- a/app/ollama.iss +++ b/app/ollama.iss @@ -88,7 +88,8 @@ DialogFontSize=12 [Files] Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit -Source: "..\dist\windeps\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit +Source: "..\dist\windows-amd64\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit +Source: "..\dist\windows-amd64\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion ; Assumes v5.7, may need adjustments for v6 diff --git a/gpu/assets.go b/gpu/assets.go index 4915471b..ea11570a 100644 --- a/gpu/assets.go +++ b/gpu/assets.go @@ -24,6 +24,35 @@ func PayloadsDir() (string, error) { defer lock.Unlock() var err error if payloadsDir == "" { + runnersDir := os.Getenv("OLLAMA_RUNNERS_DIR") + // On Windows we do not carry the payloads inside the main executable + if runtime.GOOS == "windows" && runnersDir == "" { + appExe, err := os.Executable() + if err != nil { + slog.Error("failed to lookup executable path", "error", err) + return "", err + } + // Try a few variations to improve developer experience when building from source in the local tree + for _, d := range []string{".", "windows-" + runtime.GOARCH, "dist\\windows-" + runtime.GOARCH} { + candidate := filepath.Join(filepath.Dir(appExe), d, "ollama_runners") + _, err := os.Stat(candidate) + if err == nil { + runnersDir = candidate + break + } + } + if runnersDir == "" { + err = fmt.Errorf("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") + slog.Error("incomplete distribution", "error", err) + return "", err + } + } + if runnersDir != "" { + payloadsDir = runnersDir + return payloadsDir, nil + } + + // The remainder only applies on non-windows where we still carry payloads in the main executable cleanupTmpDirs() tmpDir := os.Getenv("OLLAMA_TMPDIR") if tmpDir == "" { @@ -88,7 +117,8 @@ func cleanupTmpDirs() { func Cleanup() { lock.Lock() defer lock.Unlock() - if payloadsDir != "" { + runnersDir := os.Getenv("OLLAMA_RUNNERS_DIR") + if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" { // We want to fully clean up the tmpdir parent of the payloads dir tmpDir := filepath.Clean(filepath.Join(payloadsDir, "..")) slog.Debug("cleaning up", "dir", tmpDir) diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 5071e09f..737cbbee 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -35,6 +35,7 @@ function init_vars { ) $script:cmakeTargets = @("ollama_llama_server") $script:ARCH = "amd64" # arm not yet supported. + $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners" if ($env:CGO_CFLAGS -contains "-g") { $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo") $script:config = "RelWithDebInfo" @@ -55,7 +56,6 @@ function init_vars { } else { $script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR } - $script:GZIP=(get-command -ea 'silentlycontinue' gzip).path $script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path if ($null -eq $env:CMAKE_CUDA_ARCHITECTURES) { $script:CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80" @@ -134,21 +134,18 @@ function sign { } } -function compress { - if ($script:GZIP -eq $null) { - write-host "gzip not installed, not compressing files" - return - } - write-host "Compressing binaries..." +function install { + write-host "Installing binaries to dist dir ${script:distDir}" + mkdir ${script:distDir} -ErrorAction SilentlyContinue $binaries = dir "${script:buildDir}/bin/*.exe" foreach ($file in $binaries) { - & "$script:GZIP" --best -f $file + copy-item -Path $file -Destination ${script:distDir} -Force } - write-host "Compressing dlls..." + write-host "Installing dlls to dist dir ${script:distDir}" $dlls = dir "${script:buildDir}/bin/*.dll" foreach ($file in $dlls) { - & "$script:GZIP" --best -f $file + copy-item -Path $file -Destination ${script:distDir} -Force } } @@ -209,26 +206,29 @@ build init_vars $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:buildDir="../build/windows/${script:ARCH}/cpu" + $script:distDir="$script:DIST_BASE\cpu" write-host "Building LCD CPU" build sign - compress + install init_vars $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:buildDir="../build/windows/${script:ARCH}/cpu_avx" + $script:distDir="$script:DIST_BASE\cpu_avx" write-host "Building AVX CPU" build sign - compress + install init_vars $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2" + $script:distDir="$script:DIST_BASE\cpu_avx2" write-host "Building AVX2 CPU" build sign - compress + install } else { write-host "Skipping CPU generation step as requested" } @@ -242,6 +242,7 @@ if ($null -ne $script:CUDA_LIB_DIR) { } init_vars $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT" + $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT" $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}") if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) { write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`"" @@ -250,7 +251,7 @@ if ($null -ne $script:CUDA_LIB_DIR) { } build sign - compress + install } if ($null -ne $env:HIP_PATH) { @@ -261,6 +262,7 @@ if ($null -ne $env:HIP_PATH) { init_vars $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT" + $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT" $script:cmakeDefs += @( "-G", "Ninja", "-DCMAKE_C_COMPILER=clang.exe", @@ -292,9 +294,9 @@ if ($null -ne $env:HIP_PATH) { & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll" } sign - compress + install } cleanup -write-host "`ngo generate completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})" +write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)" diff --git a/llm/llm_windows.go b/llm/llm_windows.go index 17967b4e..e44f4b95 100644 --- a/llm/llm_windows.go +++ b/llm/llm_windows.go @@ -2,5 +2,5 @@ package llm import "embed" -//go:embed build/windows/*/*/bin/* +// unused on windows var libEmbed embed.FS diff --git a/llm/payload.go b/llm/payload.go index c81c2784..abe3d263 100644 --- a/llm/payload.go +++ b/llm/payload.go @@ -26,13 +26,15 @@ func Init() error { return err } - slog.Info("extracting embedded files", "dir", payloadsDir) - binGlob := "build/*/*/*/bin/*" + if runtime.GOOS != "windows" { + slog.Info("extracting embedded files", "dir", payloadsDir) + binGlob := "build/*/*/*/bin/*" - // extract server libraries - err = extractFiles(payloadsDir, binGlob) - if err != nil { - return fmt.Errorf("extract binaries: %v", err) + // extract server libraries + err = extractFiles(payloadsDir, binGlob) + if err != nil { + return fmt.Errorf("extract binaries: %v", err) + } } var variants []string diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index 1a89045a..a9aade8c 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -30,7 +30,7 @@ function checkEnv() { $script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0] - $script:DEPS_DIR="${script:SRC_DIR}\dist\windeps" + $script:DEPS_DIR="${script:SRC_DIR}\dist\windows-amd64" $env:CGO_ENABLED="1" echo "Checking version" if (!$env:VERSION) { @@ -81,8 +81,8 @@ function buildOllama() { /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } - New-Item -ItemType Directory -Path .\dist -Force - cp .\ollama.exe .\dist\ollama-windows-amd64.exe + New-Item -ItemType Directory -Path .\dist\windows-amd64\ -Force + cp .\ollama.exe .\dist\windows-amd64\ollama-windows-amd64.exe } function buildApp() { @@ -101,7 +101,6 @@ function buildApp() { function gatherDependencies() { write-host "Gathering runtime dependencies" cd "${script:SRC_DIR}" - rm -ea 0 -recurse -force -path "${script:DEPS_DIR}" md "${script:DEPS_DIR}" -ea 0 > $null # TODO - this varies based on host build system and MSVC version - drive from dumpbin output @@ -124,7 +123,15 @@ function gatherDependencies() { if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } } - + if ($null -ne $env:HIP_PATH) { + # Assumes v5.7, may need adjustments for v6 + rm -ea 0 -recurse -force -path "${script:DEPS_DIR}\rocm\" + md "${script:DEPS_DIR}\rocm\rocblas\library\" -ea 0 > $null + cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:DEPS_DIR}\rocm\" + cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:DEPS_DIR}\rocm\" + # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs + cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:DEPS_DIR}\rocm\rocblas\library\" + } } function buildInstaller() { @@ -139,12 +146,18 @@ function buildInstaller() { if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } +function distZip() { + write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-amd64.zip" + Compress-Archive -Path "${script:SRC_DIR}\dist\windows-amd64\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-amd64.zip" -Force +} + try { checkEnv buildOllama buildApp gatherDependencies buildInstaller + distZip } catch { write-host "Build Failed" write-host $_ diff --git a/server/sched_test.go b/server/sched_test.go index b5117631..27d64a9b 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -92,6 +92,7 @@ func TestLoad(t *testing.T) { runner := s.loaded["dummy_model_path"] require.NotNil(t, runner) require.Equal(t, uint(0), runner.refCount) + time.Sleep(1 * time.Millisecond) require.Len(t, s.expiredCh, 1) }