From 74d45f010276c2f2653f3ca8c4f76cb0552fb46e Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 8 Jul 2024 12:50:11 -0700 Subject: [PATCH 01/10] Refactor linux packaging This adjusts linux to follow a similar model to windows with a discrete archive (zip/tgz) to cary the primary executable, and dependent libraries. Runners are still carried as payloads inside the main binary Darwin retain the payload model where the go binary is fully self contained. --- .github/workflows/release.yaml | 1 - Dockerfile | 29 ++++++------ app/ollama.iss | 11 +---- envconfig/config.go | 4 +- gpu/amd_common.go | 2 +- gpu/amd_windows.go | 2 +- gpu/gpu.go | 50 ++++++++++++++------- gpu/gpu_linux.go | 2 +- llm/ext_server/CMakeLists.txt | 3 +- llm/generate/gen_common.sh | 17 ++++++- llm/generate/gen_linux.sh | 81 ++++++++++++++++------------------ llm/generate/gen_windows.ps1 | 43 +++++++++--------- llm/server.go | 12 +++-- scripts/build_linux.sh | 10 ++--- scripts/build_windows.ps1 | 12 ++--- scripts/install.sh | 31 ++++++++++--- 16 files changed, 171 insertions(+), 139 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 5ae630c3..9287f6f7 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -363,7 +363,6 @@ jobs: - run: | ./scripts/build_linux.sh ./scripts/build_docker.sh - mv dist/deps/* dist/ - uses: actions/upload-artifact@v4 with: name: dist-linux-amd64 diff --git a/Dockerfile b/Dockerfile index c8efdd8a..120ddc21 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,6 +18,7 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS +ENV GOARCH amd64 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 @@ -28,6 +29,7 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS +ENV GOARCH arm64 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 @@ -40,15 +42,10 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ARG AMDGPU_TARGETS +ENV GOARCH amd64 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh -RUN mkdir /tmp/scratch && \ - for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \ - cp ${dep} /tmp/scratch/ || exit 1 ; \ - done && \ - (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \ - mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \ - (cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . ) - +RUN mkdir -p ../../dist/linux-amd64/ollama_libs && \ + (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64/ollama_libs && tar xf - ) FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 ARG CMAKE_VERSION @@ -59,6 +56,7 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ARG OLLAMA_CUSTOM_CPU_DEFS ARG CGO_CFLAGS +ENV GOARCH amd64 WORKDIR /go/src/github.com/ollama/ollama/llm/generate FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64 @@ -79,6 +77,7 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ARG OLLAMA_CUSTOM_CPU_DEFS ARG CGO_CFLAGS +ENV GOARCH arm64 WORKDIR /go/src/github.com/ollama/ollama/llm/generate FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64 @@ -95,12 +94,13 @@ COPY . . COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/ ARG GOFLAGS ARG CGO_CFLAGS -RUN go build -trimpath . +RUN go build -trimpath -o dist/linux-amd64/ollama . # Intermediate stage used for ./scripts/build_linux.sh FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 @@ -109,23 +109,24 @@ ARG GOLANG_VERSION WORKDIR /go/src/github.com/ollama/ollama COPY . . COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ ARG GOFLAGS ARG CGO_CFLAGS -RUN go build -trimpath . +RUN go build -trimpath -o dist/linux-arm64/ollama . # Runtime stages FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64 RUN apt-get update && apt-get install -y ca-certificates -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/ollama /bin/ollama FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64 RUN apt-get update && apt-get install -y ca-certificates -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama +COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/ollama /bin/ollama # Radeon images are much larger so we keep it distinct from the CPU/CUDA image FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm RUN update-pciids -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/ollama /bin/ollama EXPOSE 11434 ENV OLLAMA_HOST 0.0.0.0 diff --git a/app/ollama.iss b/app/ollama.iss index dc6178f7..e9cf48ec 100644 --- a/app/ollama.iss +++ b/app/ollama.iss @@ -91,16 +91,7 @@ Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion -#if DirExists("..\dist\windows-amd64\cuda") - Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs -#endif -#if DirExists("..\dist\windows-amd64\oneapi") - Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs -#endif -#if DirExists("..\dist\windows-amd64\rocm") - Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs -#endif - +Source: "..\dist\windows-amd64\ollama_libs\*"; DestDir: "{app}\ollama_libs\"; Flags: ignoreversion recursesubdirs [Icons] Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" diff --git a/envconfig/config.go b/envconfig/config.go index b82b773d..7f0976c0 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -193,8 +193,8 @@ func RunnersDir() (p string) { for _, root := range []string{filepath.Dir(exe), cwd} { paths = append(paths, root, - filepath.Join(root, "windows-"+runtime.GOARCH), - filepath.Join(root, "dist", "windows-"+runtime.GOARCH), + filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), + filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH), ) } diff --git a/gpu/amd_common.go b/gpu/amd_common.go index 2839cb7c..05747208 100644 --- a/gpu/amd_common.go +++ b/gpu/amd_common.go @@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) { // Installer payload location if we're running the installed binary exe, err := os.Executable() if err == nil { - rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm") + rocmTargetDir := filepath.Join(filepath.Dir(exe), "ollama_libs") if rocmLibUsable(rocmTargetDir) { slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir) return rocmTargetDir, nil diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go index edabeb43..5d25a966 100644 --- a/gpu/amd_windows.go +++ b/gpu/amd_windows.go @@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) { // Installer payload (if we're running from some other location) localAppData := os.Getenv("LOCALAPPDATA") appDir := filepath.Join(localAppData, "Programs", "Ollama") - rocmTargetDir := filepath.Join(appDir, "rocm") + rocmTargetDir := filepath.Join(appDir, "ollama_libs") if rocmLibUsable(rocmTargetDir) { slog.Debug("detected ollama installed ROCm at " + rocmTargetDir) return rocmTargetDir, nil diff --git a/gpu/gpu.go b/gpu/gpu.go index dc124a3e..d0ae0f34 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -229,11 +229,7 @@ func GetGPUInfo() GpuInfoList { return GpuInfoList{cpus[0].GpuInfo} } - // On windows we bundle the nvidia library one level above the runner dir - depPath := "" - if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { - depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda") - } + depPath := GetDepDir() // Load ALL libraries cHandles = initCudaHandles() @@ -306,13 +302,6 @@ func GetGPUInfo() GpuInfoList { if envconfig.IntelGPU() { oHandles = initOneAPIHandles() if oHandles != nil && oHandles.oneapi != nil { - - // On windows we bundle the oneapi library one level above the runner dir - depPath = "" - if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { - depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi") - } - for d := range oHandles.oneapi.num_drivers { if oHandles.oneapi == nil { // shouldn't happen @@ -467,10 +456,12 @@ func GetGPUInfo() GpuInfoList { func FindGPULibs(baseLibName string, defaultPatterns []string) []string { // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them var ldPaths []string - var patterns []string gpuLibPaths := []string{} slog.Debug("Searching for GPU library", "name", baseLibName) + // Start with our bundled libraries + patterns := []string{filepath.Join(GetDepDir(), baseLibName)} + switch runtime.GOOS { case "windows": ldPaths = strings.Split(os.Getenv("PATH"), ";") @@ -479,13 +470,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string { default: return gpuLibPaths } - // Start with whatever we find in the PATH/LD_LIBRARY_PATH + + // Then with whatever we find in the PATH/LD_LIBRARY_PATH for _, ldPath := range ldPaths { d, err := filepath.Abs(ldPath) if err != nil { continue } - patterns = append(patterns, filepath.Join(d, baseLibName+"*")) + patterns = append(patterns, filepath.Join(d, baseLibName)) } patterns = append(patterns, defaultPatterns...) slog.Debug("gpu library search", "globs", patterns) @@ -641,3 +633,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { return "", "" } } + +func GetDepDir() string { + // On Windows/linux we bundle the dependencies at the same level as the executable + appExe, err := os.Executable() + if err != nil { + slog.Warn("failed to lookup executable path", "error", err) + } + cwd, err := os.Getwd() + if err != nil { + slog.Warn("failed to lookup working directory", "error", err) + } + // Scan for any of our dependeices, and pick first match + for _, root := range []string{filepath.Dir(appExe), cwd} { + libDep := "ollama_libs" + if _, err := os.Stat(filepath.Join(root, libDep)); err == nil { + return filepath.Join(root, libDep) + } + // Developer mode, local build + if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil { + return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep) + } + if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil { + return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep) + } + } + slog.Warn("unable to locate gpu dependency libraries") + return "" +} diff --git a/gpu/gpu_linux.go b/gpu/gpu_linux.go index d6d2675c..d4d20bc4 100644 --- a/gpu/gpu_linux.go +++ b/gpu/gpu_linux.go @@ -47,7 +47,7 @@ var ( CudartMgmtName = "libcudart.so*" NvcudaMgmtName = "libcuda.so*" NvmlMgmtName = "" // not currently wired on linux - OneapiMgmtName = "libze_intel_gpu.so" + OneapiMgmtName = "libze_intel_gpu.so*" ) func GetCPUMem() (memInfo, error) { diff --git a/llm/ext_server/CMakeLists.txt b/llm/ext_server/CMakeLists.txt index bfc97c63..90fd0ef2 100644 --- a/llm/ext_server/CMakeLists.txt +++ b/llm/ext_server/CMakeLists.txt @@ -1,12 +1,13 @@ set(TARGET ollama_llama_server) option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) +set(LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h) install(TARGETS ${TARGET} RUNTIME) target_compile_definitions(${TARGET} PRIVATE SERVER_VERBOSE=$ ) -target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_SERVER_LDFLAGS}) if (WIN32) TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32) endif() diff --git a/llm/generate/gen_common.sh b/llm/generate/gen_common.sh index da1b0688..f1541f2a 100644 --- a/llm/generate/gen_common.sh +++ b/llm/generate/gen_common.sh @@ -9,11 +9,14 @@ init_vars() { ARCH="arm64" ;; *) - ARCH=$(uname -m | sed -e "s/aarch64/arm64/g") + echo "GOARCH must be set" + echo "this script is meant to be run from within go generate" + exit 1 + ;; esac LLAMACPP_DIR=../llama.cpp - CMAKE_DEFS="" + CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on" CMAKE_TARGETS="--target ollama_llama_server" if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}" @@ -27,6 +30,7 @@ init_vars() { WHOLE_ARCHIVE="-Wl,-force_load" NO_WHOLE_ARCHIVE="" GCC_ARCH="-arch ${ARCH}" + DIST_BASE=../../dist/darwin-${GOARCH}/ ;; "Linux") LIB_EXT="so" @@ -35,6 +39,7 @@ init_vars() { # Cross compiling not supported on linux - Use docker GCC_ARCH="" + DIST_BASE=../../dist/linux-${GOARCH}/ ;; *) ;; @@ -105,6 +110,14 @@ compress() { echo "Finished compression" } +install() { + echo "Installing libraries to bin dir ${BUILD_DIR}/bin/" + for lib in $(find ${BUILD_DIR} -name \*.${LIB_EXT}); do + rm -f "${BUILD_DIR}/bin/$(basename ${lib})" + cp -af "${lib}" "${BUILD_DIR}/bin/" + done +} + # Keep the local tree clean after we're done with the build cleanup() { (cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt) diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index db2c6c30..70fc0313 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -51,7 +51,7 @@ if [ -z "${CUDACXX}" ]; then export CUDACXX=$(command -v nvcc) fi fi -COMMON_CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off" +COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off" source $(dirname $0)/gen_common.sh init_vars git_module_setup @@ -77,10 +77,11 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then init_vars echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\"" - CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}" + CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}" BUILD_DIR="../build/linux/${ARCH}/cpu" echo "Building custom CPU" build + install compress else # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512 @@ -93,7 +94,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake - COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off" + COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off" if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then # # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) @@ -103,6 +104,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then BUILD_DIR="../build/linux/${ARCH}/cpu" echo "Building LCD CPU" build + install compress fi @@ -120,6 +122,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then BUILD_DIR="../build/linux/${ARCH}/cpu_avx" echo "Building AVX CPU" build + install compress fi @@ -133,6 +136,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then BUILD_DIR="../build/linux/${ARCH}/cpu_avx2" echo "Building AVX2 CPU" build + install compress fi fi @@ -178,29 +182,18 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}" echo "Building custom CUDA GPU" else - CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" + CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" fi - CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}" + export CUDAFLAGS="-t8" + CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off" BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}" - EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" + export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" + CUDA_DIST_DIR="${DIST_BASE}/ollama_libs" build - - # Carry the CUDA libs as payloads to help reduce dependency burden on users - # - # TODO - in the future we may shift to packaging these separately and conditionally - # downloading them in the install script. - DEPS="$(ldd ${BUILD_DIR}/bin/ollama_llama_server )" - for lib in libcudart.so libcublas.so libcublasLt.so ; do - DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true) - if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then - cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/bin/" - elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then - cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/bin/" - elif [ -e "${CUDART_LIB_DIR}/${lib}" ]; then - cp -d ${CUDART_LIB_DIR}/${lib}* "${BUILD_DIR}/bin/" - else - cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/bin/" - fi + install + mkdir -p "${CUDA_DIST_DIR}" + for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do + cp -a "${lib}" "${CUDA_DIST_DIR}" done compress @@ -218,21 +211,24 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then CC=icx CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF" BUILD_DIR="../build/linux/${ARCH}/oneapi" - EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb" + ONEAPI_DIST_DIR="${DIST_BASE}/ollama_libs" + export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb" DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it build # copy oneAPI dependencies + mkdir -p "${ONEAPI_DIST_DIR}" for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do - cp "${dep}" "${BUILD_DIR}/bin/" + cp -a "${dep}" "${ONEAPI_DIST_DIR}" done - cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}" + install compress fi @@ -262,21 +258,18 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then echo "Building custom ROCM GPU" fi BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}" - EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" + ROCM_DIST_DIR="${DIST_BASE}/ollama_libs" + # TODO figure out how to disable runpath (rpath) + # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work + export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" build - # Record the ROCM dependencies - rm -f "${BUILD_DIR}/bin/deps.txt" - touch "${BUILD_DIR}/bin/deps.txt" - for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do - echo "${dep}" >> "${BUILD_DIR}/bin/deps.txt" + # copy the ROCM dependencies + mkdir -p "${ROCM_DIST_DIR}" + for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do + cp -a "${dep}"* "${ROCM_DIST_DIR}" done - # bomb out if for some reason we didn't get a few deps - if [ $(cat "${BUILD_DIR}/bin/deps.txt" | wc -l ) -lt 8 ] ; then - cat "${BUILD_DIR}/bin/deps.txt" - echo "ERROR: deps file short" - exit 1 - fi + install compress fi diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index d8bce92d..1f8c96d8 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -286,12 +286,11 @@ function build_cuda() { sign install - rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null - write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" - cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" - cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" - cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" -ea 0 > $null + write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" } else { write-host "Skipping CUDA generation step" } @@ -325,18 +324,17 @@ function build_oneapi() { sign install - rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" -ea 0 > $null + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" } else { Write-Host "Skipping oneAPI generation step" } @@ -386,12 +384,11 @@ function build_rocm() { sign install - rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null - cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" - cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\rocblas\library\" -ea 0 > $null + cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs - cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" + cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\rocblas\library\" } else { write-host "Skipping ROCm generation step" } diff --git a/llm/server.go b/llm/server.go index d2b8db9b..9347a458 100644 --- a/llm/server.go +++ b/llm/server.go @@ -306,20 +306,18 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr if runtime.GOOS == "windows" { pathEnv = "PATH" } - // prepend the server directory to LD_LIBRARY_PATH/PATH and the parent dir for common dependencies - libraryPaths := []string{dir, filepath.Dir(dir)} + // Start with the server directory for the LD_LIBRARY_PATH/PATH + libraryPaths := []string{dir} if libraryPath, ok := os.LookupEnv(pathEnv); ok { - // Append our runner directory to the path - // This will favor system libraries over our bundled library dependencies + // favor our bundled library dependencies over system libraries libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...) } // Note: we always put the dependency path first - // since this was the exact version we verified for AMD GPUs - // and we favor what the user had in their path + // since this was the exact version we compiled/linked against if gpus[0].DependencyPath != "" { - // TODO refine for multi-gpu support + // assume gpus from the same library have the same dependency path libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...) } diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 27c4ff1f..4ea51229 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -21,11 +21,9 @@ for TARGETARCH in ${BUILD_ARCH}; do -t builder:$TARGETARCH \ . docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH - docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-$TARGETARCH - - if [ "$TARGETARCH" = "amd64" ]; then - docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/ - fi - + docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist docker rm builder-$TARGETARCH + echo "Compressing final linux bundle..." + rm -f ./dist/ollama-linux-$TARGETARCH.tgz + (cd dist/linux-$TARGETARCH && tar cf - . | gzip --best > ../ollama-linux-$TARGETARCH.tgz ) done diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index edc73759..e8d851f4 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -103,22 +103,22 @@ function buildApp() { function gatherDependencies() { write-host "Gathering runtime dependencies" cd "${script:SRC_DIR}" - md "${script:DEPS_DIR}\ollama_runners" -ea 0 > $null + md "${script:DEPS_DIR}\ollama_libs" -ea 0 > $null # TODO - this varies based on host build system and MSVC version - drive from dumpbin output # currently works for Win11 + MSVC 2019 + Cuda V11 - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\ollama_runners\" - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\ollama_runners\" - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\ollama_runners\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\ollama_libs\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\ollama_libs\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\ollama_libs\" foreach ($part in $("runtime", "stdio", "filesystem", "math", "convert", "heap", "string", "time", "locale", "environment")) { - cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\ollama_runners\" + cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\ollama_libs\" } cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\" if ("${env:KEY_CONTAINER}") { write-host "about to sign" - foreach ($file in (get-childitem "${script:DEPS_DIR}\cuda\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){ + foreach ($file in (get-childitem "${script:DEPS_DIR}\ollama_libs\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){ write-host "signing $file" & "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" ` /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} $file diff --git a/scripts/install.sh b/scripts/install.sh index 03af5a69..f0439b00 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -63,16 +63,32 @@ if [ -n "$NEEDS" ]; then exit 1 fi -status "Downloading ollama..." -curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}" - for BINDIR in /usr/local/bin /usr/bin /bin; do echo $PATH | grep -q $BINDIR && break || continue done +OLLAMA_INSTALL_DIR=${OLLAMA_INSTALL_DIR:-${BINDIR}} -status "Installing ollama to $BINDIR..." +status "Installing ollama to $OLLAMA_INSTALL_DIR" $SUDO install -o0 -g0 -m755 -d $BINDIR -$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama +$SUDO install -o0 -g0 -m755 -d "$OLLAMA_INSTALL_DIR" +if curl -I --silent --fail --location "https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" >/dev/null ; then + status "Downloading Linux ${ARCH} bundle" + curl --fail --show-error --location --progress-bar \ + "https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" | \ + $SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR" + BUNDLE=1 +else + status "Downloading Linux ${ARCH} CLI" + curl --fail --show-error --location --progress-bar -o "$TEMP_DIR/ollama"\ + "https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}" + $SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $OLLAMA_INSTALL_DIR/ollama + BUNDLE=0 +fi + +if [ "$OLLAMA_INSTALL_DIR/ollama" != "$BINDIR/ollama" ] ; then + status "Making ollama accessible in the PATH in $BINDIR" + $SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama" +fi install_success() { status 'The Ollama API is now available at 127.0.0.1:11434.' @@ -178,6 +194,11 @@ if ! check_gpu lspci nvidia && ! check_gpu lshw nvidia && ! check_gpu lspci amdg fi if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then + if [ $BUNDLE -ne 0 ]; then + install_success + status "AMD GPU ready." + exit 0 + fi # Look for pre-existing ROCm v6 before downloading the dependencies for search in "${HIP_PATH:-''}" "${ROCM_PATH:-''}" "/opt/rocm" "/usr/lib64"; do if [ -n "${search}" ] && [ -e "${search}/libhipblas.so.2" -o -e "${search}/lib/libhipblas.so.2" ]; then From c7bcb0031965e33531358639620a11516d101b54 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 9 Aug 2024 07:21:40 -0700 Subject: [PATCH 02/10] Wire up ccache and pigz in the docker based build This should help speed things up a little --- Dockerfile | 37 ++++++++++++++++++++++++++----------- llm/generate/gen_common.sh | 15 +++++++++------ llm/generate/gen_darwin.sh | 2 ++ llm/generate/gen_linux.sh | 2 ++ scripts/build_linux.sh | 3 ++- scripts/rh_linux_deps.sh | 14 ++++++++++++-- 6 files changed, 53 insertions(+), 20 deletions(-) diff --git a/Dockerfile b/Dockerfile index 120ddc21..8eb90057 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,8 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ENV GOARCH amd64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 ARG CMAKE_VERSION @@ -30,7 +31,12 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ENV GOARCH arm64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ + CUDA_VARIANT="_v11" \ + bash gen_linux.sh FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 ARG CMAKE_VERSION @@ -43,7 +49,8 @@ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ARG AMDGPU_TARGETS ENV GOARCH amd64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh RUN mkdir -p ../../dist/linux-amd64/ollama_libs && \ (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64/ollama_libs && tar xf - ) @@ -60,13 +67,17 @@ ENV GOARCH amd64 WORKDIR /go/src/github.com/ollama/ollama/llm/generate FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64 -RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_CPU_TARGET="static" bash gen_linux.sh FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64 ARG CMAKE_VERSION @@ -81,9 +92,11 @@ ENV GOARCH arm64 WORKDIR /go/src/github.com/ollama/ollama/llm/generate FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64 -RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_CPU_TARGET="static" bash gen_linux.sh FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh # Intermediate stage used for ./scripts/build_linux.sh @@ -100,7 +113,8 @@ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ ARG GOFLAGS ARG CGO_CFLAGS -RUN go build -trimpath -o dist/linux-amd64/ollama . +RUN --mount=type=cache,target=/root/.ccache \ + go build -trimpath -o dist/linux-amd64/ollama . # Intermediate stage used for ./scripts/build_linux.sh FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 @@ -113,7 +127,8 @@ COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ ARG GOFLAGS ARG CGO_CFLAGS -RUN go build -trimpath -o dist/linux-arm64/ollama . +RUN --mount=type=cache,target=/root/.ccache \ + go build -trimpath -o dist/linux-arm64/ollama . # Runtime stages FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64 diff --git a/llm/generate/gen_common.sh b/llm/generate/gen_common.sh index f1541f2a..40115936 100644 --- a/llm/generate/gen_common.sh +++ b/llm/generate/gen_common.sh @@ -47,6 +47,7 @@ init_vars() { if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80" fi + GZIP=$(which pigz 2>/dev/null || echo "gzip") } git_module_setup() { @@ -90,21 +91,23 @@ build() { compress() { echo "Compressing payloads to reduce overall binary size..." - pids="" rm -rf ${BUILD_DIR}/bin/*.gz for f in ${BUILD_DIR}/bin/* ; do - gzip -n --best -f ${f} & - pids+=" $!" + ${GZIP} -n --best -f ${f} & + compress_pids+=" $!" done # check for lib directory if [ -d ${BUILD_DIR}/lib ]; then for f in ${BUILD_DIR}/lib/* ; do - gzip -n --best -f ${f} & - pids+=" $!" + ${GZIP} -n --best -f ${f} & + compress_pids+=" $!" done fi echo - for pid in ${pids}; do +} + +wait_for_compress() { + for pid in ${compress_pids}; do wait $pid done echo "Finished compression" diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh index 6c0b62cb..f22c0f8e 100755 --- a/llm/generate/gen_darwin.sh +++ b/llm/generate/gen_darwin.sh @@ -6,6 +6,7 @@ set -ex set -o pipefail +compress_pids="" echo "Starting darwin generate script" source $(dirname $0)/gen_common.sh init_vars @@ -98,4 +99,5 @@ case "${GOARCH}" in esac cleanup +wait_for_compress echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index 70fc0313..1365d07d 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -13,6 +13,7 @@ set -ex set -o pipefail +compress_pids="" # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference amdGPUs() { @@ -274,4 +275,5 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then fi cleanup +wait_for_compress echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 4ea51229..ebb60c5a 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -4,6 +4,7 @@ set -eu export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" +GZIP=$(which pigz 2>/dev/null || echo "gzip") BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""} @@ -25,5 +26,5 @@ for TARGETARCH in ${BUILD_ARCH}; do docker rm builder-$TARGETARCH echo "Compressing final linux bundle..." rm -f ./dist/ollama-linux-$TARGETARCH.tgz - (cd dist/linux-$TARGETARCH && tar cf - . | gzip --best > ../ollama-linux-$TARGETARCH.tgz ) + (cd dist/linux-$TARGETARCH && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH.tgz ) done diff --git a/scripts/rh_linux_deps.sh b/scripts/rh_linux_deps.sh index 81648d68..b4c9afd6 100644 --- a/scripts/rh_linux_deps.sh +++ b/scripts/rh_linux_deps.sh @@ -3,6 +3,7 @@ # Script for common Dockerfile dependency installation in redhat linux based images set -ex +set -o pipefail MACHINE=$(uname -m) if grep -i "centos" /etc/system-release >/dev/null; then @@ -29,7 +30,7 @@ if grep -i "centos" /etc/system-release >/dev/null; then dnf install -y rh-git227-git ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git fi - dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ + dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz elif grep -i "rocky" /etc/system-release >/dev/null; then # Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC) cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo @@ -43,12 +44,21 @@ gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-rockyofficial EOF dnf install -y git \ gcc-toolset-10-gcc-10.2.1-8.2.el8 \ - gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 + gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \ + pigz else echo "ERROR Unexpected distro" exit 1 fi +if [ "${MACHINE}" = "x86_64" ] ; then + curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /tmp --strip-components 1 && \ + mv /tmp/ccache /usr/local/bin/ +else + yum -y install epel-release + yum install -y ccache +fi + if [ -n "${CMAKE_VERSION}" ]; then curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 fi From d470ebe78bc76c098bc378f98f08f7094063ab4d Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 30 May 2024 21:54:07 -0700 Subject: [PATCH 03/10] Add Jetson cuda variants for arm This adds new variants for arm64 specific to Jetson platforms --- Dockerfile | 48 +++++++++++++++++++++++++++++++++++---- gpu/gpu.go | 44 +++++++++++++++++++++++++++++++++-- gpu/gpu_darwin.go | 4 ++-- gpu/types.go | 6 ++--- llm/generate/gen_linux.sh | 5 ++-- llm/payload.go | 4 ++-- scripts/build_linux.sh | 1 + 7 files changed, 96 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8eb90057..79b2a696 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,9 @@ ARG CMAKE_VERSION=3.22.1 # this CUDA_VERSION corresponds with the one specified in docs/gpu.md ARG CUDA_VERSION=11.3.1 ARG ROCM_VERSION=6.1.2 +ARG JETPACK_6=r36.2.0 +ARG JETPACK_5=r35.4.1 +ARG JETPACK_4=r32.7.1 # Copy the minimal context we need to run the generate scripts FROM scratch AS llm-code @@ -22,7 +25,7 @@ ENV GOARCH amd64 RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh -FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 +FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-server-arm64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh @@ -31,11 +34,40 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ENV GOARCH arm64 +RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh + +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64 +ARG CMAKE_VERSION +RUN apt-get update && apt-get install -y git curl && \ + curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 +COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +WORKDIR /go/src/github.com/ollama/ollama/llm/generate +ARG CGO_CFLAGS +ENV GOARCH arm64 +ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_SKIP_STATIC_GENERATE=1 \ OLLAMA_SKIP_CPU_GENERATE=1 \ - CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ - CUDA_VARIANT="_v11" \ + CUDA_VARIANT="_jetpack6" \ + CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack6" \ + CMAKE_CUDA_ARCHITECTURES="87" \ + bash gen_linux.sh + +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS cuda-build-jetpack5-arm64 +ARG CMAKE_VERSION +RUN apt-get update && apt-get install -y git curl && \ + curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 +COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +WORKDIR /go/src/github.com/ollama/ollama/llm/generate +ARG CGO_CFLAGS +ENV GOARCH arm64 +ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CUDA_VARIANT="_jetpack5" \ + CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack5" \ + CMAKE_CUDA_ARCHITECTURES="72;87" \ bash gen_linux.sh FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 @@ -123,8 +155,14 @@ ARG GOLANG_VERSION WORKDIR /go/src/github.com/ollama/ollama COPY . . COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +## arm binary += 381M +COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +## arm binary += 330M +COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ ARG GOFLAGS ARG CGO_CFLAGS RUN --mount=type=cache,target=/root/.ccache \ diff --git a/gpu/gpu.go b/gpu/gpu.go index d0ae0f34..22461922 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -15,7 +15,9 @@ import ( "log/slog" "os" "path/filepath" + "regexp" "runtime" + "strconv" "strings" "sync" "unsafe" @@ -215,7 +217,7 @@ func GetGPUInfo() GpuInfoList { GpuInfo: GpuInfo{ memInfo: mem, Library: "cpu", - Variant: cpuCapability, + Variant: cpuCapability.String(), ID: "0", }, }, @@ -231,6 +233,35 @@ func GetGPUInfo() GpuInfoList { depPath := GetDepDir() + var cudaVariant string + if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { + if CudaTegra != "" { + ver := strings.Split(CudaTegra, ".") + if len(ver) > 0 { + cudaVariant = "jetpack" + ver[0] + } + } else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil { + r := regexp.MustCompile(` R(\d+) `) + m := r.FindSubmatch(data) + if len(m) != 2 { + slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version") + } else { + if l4t, err := strconv.Atoi(string(m[1])); err == nil { + // Note: mapping from L4t -> JP is inconsistent (can't just subtract 30) + // https://developer.nvidia.com/embedded/jetpack-archive + switch l4t { + case 35: + cudaVariant = "jetpack5" + case 36: + cudaVariant = "jetpack6" + default: + slog.Info("unsupported L4T version", "nv_tegra_release", string(data)) + } + } + } + } + } + // Load ALL libraries cHandles = initCudaHandles() @@ -240,6 +271,7 @@ func GetGPUInfo() GpuInfoList { gpuInfo := CudaGPUInfo{ GpuInfo: GpuInfo{ Library: "cuda", + Variant: cudaVariant, }, index: i, } @@ -266,7 +298,15 @@ func GetGPUInfo() GpuInfoList { gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) gpuInfo.MinimumMemory = cudaMinimumMemory - gpuInfo.DependencyPath = depPath + if depPath != "" { + gpuInfo.DependencyPath = depPath + // Check for variant specific directory + if cudaVariant != "" { + if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil { + gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant) + } + } + } gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMinor = driverMinor diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go index 9d9fd84e..417b48df 100644 --- a/gpu/gpu_darwin.go +++ b/gpu/gpu_darwin.go @@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList { return []GpuInfo{ { Library: "cpu", - Variant: GetCPUCapability(), + Variant: GetCPUCapability().String(), memInfo: mem, }, } @@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList { return []GpuInfo{ { Library: "cpu", - Variant: GetCPUCapability(), + Variant: GetCPUCapability().String(), memInfo: mem, }, } diff --git a/gpu/types.go b/gpu/types.go index 8d22b06b..fc628d47 100644 --- a/gpu/types.go +++ b/gpu/types.go @@ -19,7 +19,7 @@ type GpuInfo struct { Library string `json:"library,omitempty"` // Optional variant to select (e.g. versions, cpu feature flags) - Variant CPUCapability `json:"variant"` + Variant string `json:"variant"` // MinimumMemory represents the minimum memory required to use the GPU MinimumMemory uint64 `json:"-"` @@ -81,8 +81,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList { for _, info := range l { found := false requested := info.Library - if info.Variant != CPUCapabilityNone { - requested += "_" + info.Variant.String() + if info.Variant != CPUCapabilityNone.String() { + requested += "_" + info.Variant } for i, lib := range libs { if lib == requested { diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index 1365d07d..dc9dda5a 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -165,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then echo "CUDA libraries detected - building dynamic CUDA library" init_vars CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true) - if [ -n "${CUDA_MAJOR}" ]; then + if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then CUDA_VARIANT=_v${CUDA_MAJOR} fi if [ "${ARCH}" == "arm64" ]; then @@ -189,9 +189,10 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off" BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}" export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" - CUDA_DIST_DIR="${DIST_BASE}/ollama_libs" + CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/ollama_libs}" build install + echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}" mkdir -p "${CUDA_DIST_DIR}" for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do cp -a "${lib}" "${CUDA_DIST_DIR}" diff --git a/llm/payload.go b/llm/payload.go index b402e1f2..963b3295 100644 --- a/llm/payload.go +++ b/llm/payload.go @@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string { // glob workDir for files that start with ollama_ availableServers := getAvailableServers() requested := info.Library - if info.Variant != gpu.CPUCapabilityNone { - requested += "_" + info.Variant.String() + if info.Variant != gpu.CPUCapabilityNone.String() { + requested += "_" + info.Variant } servers := []string{} diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index ebb60c5a..adda2ad7 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -22,6 +22,7 @@ for TARGETARCH in ${BUILD_ARCH}; do -t builder:$TARGETARCH \ . docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH + rm -rf ./dist/linux-$TARGETARCH docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist docker rm builder-$TARGETARCH echo "Compressing final linux bundle..." From fc3b4cda89f468f923e2e6095c6a62a5c3c336ff Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Wed, 19 Jun 2024 09:36:30 -0700 Subject: [PATCH 04/10] Report GPU variant in log --- gpu/types.go | 1 + 1 file changed, 1 insertion(+) diff --git a/gpu/types.go b/gpu/types.go index fc628d47..88539078 100644 --- a/gpu/types.go +++ b/gpu/types.go @@ -105,6 +105,7 @@ func (l GpuInfoList) LogDetails() { slog.Info("inference compute", "id", g.ID, "library", g.Library, + "variant", g.Variant, "compute", g.Compute, "driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor), "name", g.Name, From 4fe3a556faf790ba993223cfdda16e281b6cb76d Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 13 Jun 2024 20:46:14 -0700 Subject: [PATCH 05/10] Add cuda v12 variant and selection logic Based on compute capability and driver version, pick v12 or v11 cuda variants. --- Dockerfile | 43 +++++++++++++++++++++++++++++++++---------- gpu/cuda_common.go | 43 +++++++++++++++++++++++++++++++++++++++++++ gpu/gpu.go | 40 ++++------------------------------------ gpu/types.go | 6 ++++-- 4 files changed, 84 insertions(+), 48 deletions(-) diff --git a/Dockerfile b/Dockerfile index 79b2a696..e200f5d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ ARG GOLANG_VERSION=1.22.5 ARG CMAKE_VERSION=3.22.1 -# this CUDA_VERSION corresponds with the one specified in docs/gpu.md -ARG CUDA_VERSION=11.3.1 +ARG CUDA_VERSION_11=11.3.1 +ARG CUDA_VERSION_12=12.4.0 ARG ROCM_VERSION=6.1.2 ARG JETPACK_6=r36.2.0 ARG JETPACK_5=r35.4.1 @@ -13,7 +13,7 @@ COPY .git .git COPY .gitmodules .gitmodules COPY llm llm -FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64 +FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh @@ -23,9 +23,29 @@ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ENV GOARCH amd64 RUN --mount=type=cache,target=/root/.ccache \ - OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh + OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" \ + CUDA_VARIANT="_v11" \ + bash gen_linux.sh -FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-server-arm64 +FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64 +ARG CMAKE_VERSION +COPY ./scripts/rh_linux_deps.sh / +RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh +ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH +COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +WORKDIR /go/src/github.com/ollama/ollama/llm/generate +ARG CGO_CFLAGS +ENV GOARCH amd64 +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" \ + CUDA_VARIANT="_v12" \ + bash gen_linux.sh + +FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh @@ -34,7 +54,8 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ENV GOARCH arm64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh +RUN --mount=type=cache,target=/root/.ccache \ + OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64 ARG CMAKE_VERSION @@ -139,8 +160,10 @@ COPY . . COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ ARG GOFLAGS @@ -155,8 +178,8 @@ ARG GOLANG_VERSION WORKDIR /go/src/github.com/ollama/ollama COPY . . COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ ## arm binary += 381M COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ diff --git a/gpu/cuda_common.go b/gpu/cuda_common.go index c90a644c..defaa60a 100644 --- a/gpu/cuda_common.go +++ b/gpu/cuda_common.go @@ -4,9 +4,17 @@ package gpu import ( "log/slog" + "os" + "regexp" + "runtime" + "strconv" "strings" ) +// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed. +// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. +var CudaTegra string = os.Getenv("JETSON_JETPACK") + func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { ids := []string{} for _, info := range gpuInfo { @@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { } return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",") } + +func cudaGetVariant(gpuInfo CudaGPUInfo) string { + if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { + if CudaTegra != "" { + ver := strings.Split(CudaTegra, ".") + if len(ver) > 0 { + return "jetpack" + ver[0] + } + } else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil { + r := regexp.MustCompile(` R(\d+) `) + m := r.FindSubmatch(data) + if len(m) != 2 { + slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version") + } else { + if l4t, err := strconv.Atoi(string(m[1])); err == nil { + // Note: mapping from L4t -> JP is inconsistent (can't just subtract 30) + // https://developer.nvidia.com/embedded/jetpack-archive + switch l4t { + case 35: + return "jetpack5" + case 36: + return "jetpack6" + default: + slog.Info("unsupported L4T version", "nv_tegra_release", string(data)) + } + } + } + } + } + + if gpuInfo.computeMajor < 6 || gpuInfo.DriverMajor < 12 { + return "v11" + } + return "v12" +} diff --git a/gpu/gpu.go b/gpu/gpu.go index 22461922..eb87807a 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -15,9 +15,7 @@ import ( "log/slog" "os" "path/filepath" - "regexp" "runtime" - "strconv" "strings" "sync" "unsafe" @@ -66,10 +64,6 @@ var RocmComputeMin = 9 // TODO find a better way to detect iGPU instead of minimum memory const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU -// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed. -// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. -var CudaTegra string = os.Getenv("JETSON_JETPACK") - // Note: gpuMutex must already be held func initCudaHandles() *cudaHandles { // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing @@ -233,35 +227,6 @@ func GetGPUInfo() GpuInfoList { depPath := GetDepDir() - var cudaVariant string - if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { - if CudaTegra != "" { - ver := strings.Split(CudaTegra, ".") - if len(ver) > 0 { - cudaVariant = "jetpack" + ver[0] - } - } else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil { - r := regexp.MustCompile(` R(\d+) `) - m := r.FindSubmatch(data) - if len(m) != 2 { - slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version") - } else { - if l4t, err := strconv.Atoi(string(m[1])); err == nil { - // Note: mapping from L4t -> JP is inconsistent (can't just subtract 30) - // https://developer.nvidia.com/embedded/jetpack-archive - switch l4t { - case 35: - cudaVariant = "jetpack5" - case 36: - cudaVariant = "jetpack6" - default: - slog.Info("unsupported L4T version", "nv_tegra_release", string(data)) - } - } - } - } - } - // Load ALL libraries cHandles = initCudaHandles() @@ -271,7 +236,6 @@ func GetGPUInfo() GpuInfoList { gpuInfo := CudaGPUInfo{ GpuInfo: GpuInfo{ Library: "cuda", - Variant: cudaVariant, }, index: i, } @@ -297,7 +261,10 @@ func GetGPUInfo() GpuInfoList { gpuInfo.FreeMemory = uint64(memInfo.free) gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) + gpuInfo.computeMajor = int(memInfo.major) + gpuInfo.computeMinor = int(memInfo.minor) gpuInfo.MinimumMemory = cudaMinimumMemory + cudaVariant := cudaGetVariant(gpuInfo) if depPath != "" { gpuInfo.DependencyPath = depPath // Check for variant specific directory @@ -310,6 +277,7 @@ func GetGPUInfo() GpuInfoList { gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMinor = driverMinor + gpuInfo.Variant = cudaGetVariant(gpuInfo) // query the management library as well so we can record any skew between the two // which represents overhead on the GPU we must set aside on subsequent updates diff --git a/gpu/types.go b/gpu/types.go index 88539078..4cbbeb84 100644 --- a/gpu/types.go +++ b/gpu/types.go @@ -53,8 +53,10 @@ type CPUInfo struct { type CudaGPUInfo struct { GpuInfo - OSOverhead uint64 // Memory overhead between the driver library and management library - index int //nolint:unused,nolintlint + OSOverhead uint64 // Memory overhead between the driver library and management library + index int //nolint:unused,nolintlint + computeMajor int //nolint:unused,nolintlint + computeMinor int //nolint:unused,nolintlint } type CudaGPUInfoList []CudaGPUInfo From f6c811b32075cb3b7633d7d4213251d474a77682 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 12 Jul 2024 11:35:41 -0700 Subject: [PATCH 06/10] Enable cuda v12 flags --- Dockerfile | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index e200f5d4..e83a266a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,9 @@ ARG GOLANG_VERSION=1.22.5 ARG CMAKE_VERSION=3.22.1 ARG CUDA_VERSION_11=11.3.1 +ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" ARG CUDA_VERSION_12=12.4.0 +ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" ARG ROCM_VERSION=6.1.2 ARG JETPACK_6=r36.2.0 ARG JETPACK_5=r35.4.1 @@ -21,11 +23,12 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS +ARG CUDA_V11_ARCHITECTURES ENV GOARCH amd64 RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_SKIP_STATIC_GENERATE=1 \ OLLAMA_SKIP_CPU_GENERATE=1 \ - CMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ CUDA_VARIANT="_v11" \ bash gen_linux.sh @@ -37,12 +40,14 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS +ARG CUDA_V12_ARCHITECTURES ENV GOARCH amd64 RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_SKIP_STATIC_GENERATE=1 \ OLLAMA_SKIP_CPU_GENERATE=1 \ - CMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \ CUDA_VARIANT="_v12" \ + OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ bash gen_linux.sh FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64 @@ -53,9 +58,31 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS +ARG CUDA_V11_ARCHITECTURES +ENV GOARCH arm64 +RUN OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ + CUDA_VARIANT="_v11" \ + bash gen_linux.sh + +FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64 +ARG CMAKE_VERSION +COPY ./scripts/rh_linux_deps.sh / +RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh +ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH +COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +WORKDIR /go/src/github.com/ollama/ollama/llm/generate +ARG CGO_CFLAGS +ARG CUDA_V12_ARCHITECTURES ENV GOARCH arm64 RUN --mount=type=cache,target=/root/.ccache \ - OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh + OLLAMA_SKIP_STATIC_GENERATE=1 \ + OLLAMA_SKIP_CPU_GENERATE=1 \ + CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \ + CUDA_VARIANT="_v12" \ + OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ + bash gen_linux.sh FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64 ARG CMAKE_VERSION @@ -180,6 +207,8 @@ COPY . . COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ ## arm binary += 381M COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ From 927d98a6cde43ffee3ef269cf013df5e96cbe767 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 12 Jul 2024 14:33:13 -0700 Subject: [PATCH 07/10] Add windows cuda v12 + v11 support --- .github/workflows/release.yaml | 93 ++++++++++++++++++++++++++++++++-- llm/generate/gen_windows.ps1 | 6 +-- scripts/build_windows.ps1 | 63 ++++++++++++++++++----- 3 files changed, 142 insertions(+), 20 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 9287f6f7..4bd68455 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -183,8 +183,8 @@ jobs: name: windows-rocm-deps path: dist/deps/* - # CUDA generation step - generate-windows-cuda: + # CUDA v11 generation step + generate-windows-cuda-v11: environment: release runs-on: windows env: @@ -256,7 +256,89 @@ jobs: cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" - uses: actions/upload-artifact@v4 with: - name: generate-windows-cuda + name: generate-windows-cuda-v11 + path: | + llm/build/**/bin/* + dist/windows-amd64/** + - uses: actions/upload-artifact@v4 + with: + name: windows-cuda-deps + path: dist/deps/* + + # CUDA v12 generation step + generate-windows-cuda-v12: + environment: release + runs-on: windows + env: + KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} + steps: + - uses: actions/checkout@v4 + - name: Set Version + shell: bash + run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV + - uses: 'google-github-actions/auth@v2' + with: + project_id: 'ollama' + credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}' + - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt + - name: install Windows SDK 8.1 to get signtool + run: | + $ErrorActionPreference = "Stop" + write-host "downloading SDK" + Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe" + Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait + write-host "Win SDK 8.1 installed" + gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe' + - name: install signing plugin + run: | + $ErrorActionPreference = "Stop" + write-host "downloading plugin" + Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip" + Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\ + write-host "Installing plugin" + & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet + write-host "plugin installed" + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + - name: 'Install CUDA' + run: | + $ErrorActionPreference = "Stop" + write-host "downloading CUDA Installer" + Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" + write-host "Installing CUDA" + Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait + write-host "Completed CUDA" + $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path) + $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' + echo "$cudaPath\bin" >> $env:GITHUB_PATH + echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV + echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV + echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV + - name: 'Verify CUDA' + run: nvcc -V + - run: go get ./... + - name: go generate + run: | + $gopath=(get-command go).source | split-path -parent + $cudabin=(get-command nvcc).source | split-path + & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" + cd $env:GITHUB_WORKSPACE + $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" + $env:PATH="$gopath;$cudabin;$env:PATH" + $env:OLLAMA_SKIP_CPU_GENERATE="1" + go generate -x ./... + - name: 'gather cuda dependencies' + run: | + $NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0] + md "dist\deps" + cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\" + cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\" + cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" + - uses: actions/upload-artifact@v4 + with: + name: generate-windows-cuda-v12 path: | llm/build/**/bin/* dist/windows-amd64/** @@ -270,7 +352,8 @@ jobs: environment: release runs-on: windows needs: - - generate-windows-cuda + - generate-windows-cuda-v11 + - generate-windows-cuda-v12 - generate-windows-rocm - generate-windows-cpu env: @@ -314,7 +397,7 @@ jobs: name: generate-windows-cpu - uses: actions/download-artifact@v4 with: - name: generate-windows-cuda + name: generate-windows-cuda-v11 - uses: actions/download-artifact@v4 with: name: windows-cuda-deps diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 1f8c96d8..42708d3e 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -261,7 +261,7 @@ function build_cuda() { if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) { # Then build cuda as a dynamically loaded library $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe" - $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename + $script:CUDA_VERSION=((get-item ($nvcc | split-path | split-path)).Basename -Split "\.")[0] if ($null -ne $script:CUDA_VERSION) { $script:CUDA_VARIANT="_"+$script:CUDA_VERSION } @@ -273,9 +273,9 @@ function build_cuda() { "-DGGML_CUDA=ON", "-DGGML_AVX=on", "-DGGML_AVX2=off", - "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_FLAGS=-t8", - "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}" + "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}", + "-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH" ) if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) { write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`"" diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index e8d851f4..50b60230 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -7,6 +7,7 @@ $ErrorActionPreference = "Stop" function checkEnv() { + $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower() $script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower() Write-host "Building for ${script:TARGET_ARCH}" write-host "Locating required tools and paths" @@ -15,26 +16,23 @@ function checkEnv() { $MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation $env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0] } - # Try to find the CUDA dir - if ($null -eq $env:NVIDIA_DIR) { + # Locate CUDA versions + # Note: this assumes every version found will be built + $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue') + if ($cudaList.length -eq 0) { $d=(get-command -ea 'silentlycontinue' nvcc).path - if ($d -ne $null) { - $script:NVIDIA_DIR=($d| split-path -parent) - } else { - $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue') - if ($cudaList.length > 0) { - $script:NVIDIA_DIR=$cudaList[0] - } + if ($null -ne $d) { + $script:CUDA_DIRS=@($d| split-path -parent) } } else { - $script:NVIDIA_DIR=$env:NVIDIA_DIR + $script:CUDA_DIRS=$cudaList } $script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0] $script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}" $env:CGO_ENABLED="1" - echo "Checking version" + Write-Output "Checking version" if (!$env:VERSION) { $data=(git describe --tags --first-parent --abbrev=7 --long --dirty --always) $pattern="v(.+)" @@ -71,7 +69,48 @@ function checkEnv() { function buildOllama() { write-host "Building ollama CLI" if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) { - & go generate ./... + Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}" + + # TODO - consider trying to parallelize this with Start-ThreadJob, but env vars can't be used to toggle + # which targets to build + + # Start by skipping CUDA to build everything else + pwsh -Command { $env:OLLAMA_SKIP_CUDA_GENERATE="1"; & go generate ./... } + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + + # Then skip everyhting else and build all the CUDA variants + foreach ($env:CUDA_LIB_DIR in $script:CUDA_DIRS) { + write-host "Building CUDA ${env:CUDA_LIB_DIR}" + + if ($env:CUDA_LIB_DIR.Contains("v12")) { + pwsh -Command { + $env:OLLAMA_SKIP_CUDA_GENERATE="" + $env:OLLAMA_SKIP_STATIC_GENERATE="1" + $env:OLLAMA_SKIP_CPU_GENERATE="1" + $env:OLLAMA_SKIP_ONEAPI_GENERATE="1" + $env:OLLAMA_SKIP_ROCM_GENERATE="1" + $env:CMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" + $env:OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" + $env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent + $env:PATH="$envs:CUDA_LIB_DIR;$env:PATH" + & go generate ./... + } + } else { + pwsh -Command { + $env:OLLAMA_SKIP_CUDA_GENERATE="" + $env:OLLAMA_SKIP_STATIC_GENERATE="1" + $env:OLLAMA_SKIP_CPU_GENERATE="1" + $env:OLLAMA_SKIP_ONEAPI_GENERATE="1" + $env:OLLAMA_SKIP_ROCM_GENERATE="1" + $env:CMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" + $env:OLLAMA_CUSTOM_CUDA_DEFS="" + $env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent + $env:PATH="$envs:CUDA_LIB_DIR;$env:PATH" + & go generate ./... + } + } + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + } if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } else { write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set" From 3b19cdba2a090772b2e886dbfbf712992fafe0cd Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 13 Aug 2024 13:30:28 -0700 Subject: [PATCH 08/10] Remove Jetpack --- Dockerfile | 42 ------------------------------------------ 1 file changed, 42 deletions(-) diff --git a/Dockerfile b/Dockerfile index e83a266a..99ba5b65 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,9 +5,6 @@ ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" ARG CUDA_VERSION_12=12.4.0 ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" ARG ROCM_VERSION=6.1.2 -ARG JETPACK_6=r36.2.0 -ARG JETPACK_5=r35.4.1 -ARG JETPACK_4=r32.7.1 # Copy the minimal context we need to run the generate scripts FROM scratch AS llm-code @@ -84,39 +81,6 @@ RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ bash gen_linux.sh -FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS cuda-build-jetpack6-arm64 -ARG CMAKE_VERSION -RUN apt-get update && apt-get install -y git curl && \ - curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 -COPY --from=llm-code / /go/src/github.com/ollama/ollama/ -WORKDIR /go/src/github.com/ollama/ollama/llm/generate -ARG CGO_CFLAGS -ENV GOARCH arm64 -ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs -RUN --mount=type=cache,target=/root/.ccache \ - OLLAMA_SKIP_STATIC_GENERATE=1 \ - OLLAMA_SKIP_CPU_GENERATE=1 \ - CUDA_VARIANT="_jetpack6" \ - CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack6" \ - CMAKE_CUDA_ARCHITECTURES="87" \ - bash gen_linux.sh - -FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS cuda-build-jetpack5-arm64 -ARG CMAKE_VERSION -RUN apt-get update && apt-get install -y git curl && \ - curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 -COPY --from=llm-code / /go/src/github.com/ollama/ollama/ -WORKDIR /go/src/github.com/ollama/ollama/llm/generate -ARG CGO_CFLAGS -ENV GOARCH arm64 -ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs -RUN --mount=type=cache,target=/root/.ccache \ - OLLAMA_SKIP_STATIC_GENERATE=1 \ - OLLAMA_SKIP_CPU_GENERATE=1 \ - CUDA_VARIANT="_jetpack5" \ - CUDA_DIST_DIR="/go/src/github.com/ollama/ollama/dist/linux-arm64/ollama_libs/cuda_jetpack5" \ - CMAKE_CUDA_ARCHITECTURES="72;87" \ - bash gen_linux.sh FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 ARG CMAKE_VERSION @@ -209,12 +173,6 @@ COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ di COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -## arm binary += 381M -COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -## arm binary += 330M -COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ ARG GOFLAGS ARG CGO_CFLAGS RUN --mount=type=cache,target=/root/.ccache \ From 88bb9e332877dfbba40030c19570fdbe00f41a21 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Wed, 14 Aug 2024 16:32:57 -0700 Subject: [PATCH 09/10] Adjust layout to bin+lib/ollama --- Dockerfile | 23 ++++++++++++++------ app/ollama.iss | 12 +++++------ docs/linux.md | 10 ++++----- envconfig/config.go | 6 +++--- gpu/amd_common.go | 2 +- gpu/amd_windows.go | 2 +- gpu/gpu.go | 4 ++-- llm/generate/gen_linux.sh | 6 +++--- llm/generate/gen_windows.ps1 | 42 ++++++++++++++++++------------------ scripts/build_windows.ps1 | 16 +++++++------- scripts/install.sh | 14 +++++++----- 11 files changed, 74 insertions(+), 63 deletions(-) diff --git a/Dockerfile b/Dockerfile index 99ba5b65..d4b86918 100644 --- a/Dockerfile +++ b/Dockerfile @@ -95,8 +95,8 @@ ARG AMDGPU_TARGETS ENV GOARCH amd64 RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh -RUN mkdir -p ../../dist/linux-amd64/ollama_libs && \ - (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64/ollama_libs && tar xf - ) +RUN mkdir -p ../../dist/linux-amd64/lib/ollama && \ + (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64/lib/ollama && tar xf - ) FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 ARG CMAKE_VERSION @@ -160,7 +160,7 @@ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ l ARG GOFLAGS ARG CGO_CFLAGS RUN --mount=type=cache,target=/root/.ccache \ - go build -trimpath -o dist/linux-amd64/ollama . + go build -trimpath -o dist/linux-amd64/bin/ollama . # Intermediate stage used for ./scripts/build_linux.sh FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 @@ -176,20 +176,29 @@ COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/buil ARG GOFLAGS ARG CGO_CFLAGS RUN --mount=type=cache,target=/root/.ccache \ - go build -trimpath -o dist/linux-arm64/ollama . + go build -trimpath -o dist/linux-arm64/bin/ollama . + +# Strip out ROCm dependencies to keep the primary image lean +FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/ +RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa* # Runtime stages FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64 +COPY --from=amd64-libs-without-rocm /scratch/ /lib/ RUN apt-get update && apt-get install -y ca-certificates -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/ollama /bin/ollama +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ + FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64 +COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ RUN apt-get update && apt-get install -y ca-certificates -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/ollama /bin/ollama +COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ # Radeon images are much larger so we keep it distinct from the CPU/CUDA image FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm RUN update-pciids -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/ollama /bin/ollama +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ +RUN ln -s /opt/rocm/lib /lib/ollama EXPOSE 11434 ENV OLLAMA_HOST 0.0.0.0 diff --git a/app/ollama.iss b/app/ollama.iss index e9cf48ec..bce0a337 100644 --- a/app/ollama.iss +++ b/app/ollama.iss @@ -87,11 +87,11 @@ DialogFontSize=12 [Files] Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit -Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit -Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs +Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit +Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion -Source: "..\dist\windows-amd64\ollama_libs\*"; DestDir: "{app}\ollama_libs\"; Flags: ignoreversion recursesubdirs +Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs [Icons] Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" @@ -99,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" [Run] -Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden +Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden [UninstallRun] ; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden @@ -134,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi [Registry] Root: HKCU; Subkey: "Environment"; \ - ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \ - Check: NeedsAddPath('{app}') + ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \ + Check: NeedsAddPath('{app}\bin') [Code] diff --git a/docs/linux.md b/docs/linux.md index ec730656..3ed2bed0 100644 --- a/docs/linux.md +++ b/docs/linux.md @@ -20,13 +20,12 @@ GPU. ## Manual install -### Download the `ollama` binary +### Download the `ollama` tar file -Ollama is distributed as a self-contained binary. Download it to a directory in your PATH: +Ollama is distributed as a tar file including GPU library dependencies. ```bash -sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama -sudo chmod +x /usr/bin/ollama +curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf - ``` ### Adding Ollama as a startup service (recommended) @@ -96,8 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh Or by downloading the ollama binary: ```bash -sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama -sudo chmod +x /usr/bin/ollama +curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf - ``` ## Installing specific versions diff --git a/envconfig/config.go b/envconfig/config.go index 7f0976c0..7e45a4f5 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -174,7 +174,7 @@ func RunnersDir() (p string) { defer func() { if p == "" { - slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") + slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'") } }() @@ -190,7 +190,7 @@ func RunnersDir() (p string) { } var paths []string - for _, root := range []string{filepath.Dir(exe), cwd} { + for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} { paths = append(paths, root, filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), @@ -200,7 +200,7 @@ func RunnersDir() (p string) { // Try a few variations to improve developer experience when building from source in the local tree for _, path := range paths { - candidate := filepath.Join(path, "ollama_runners") + candidate := filepath.Join(path, "lib", "ollama", "runners") if _, err := os.Stat(candidate); err == nil { p = candidate break diff --git a/gpu/amd_common.go b/gpu/amd_common.go index 05747208..72d204f7 100644 --- a/gpu/amd_common.go +++ b/gpu/amd_common.go @@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) { // Installer payload location if we're running the installed binary exe, err := os.Executable() if err == nil { - rocmTargetDir := filepath.Join(filepath.Dir(exe), "ollama_libs") + rocmTargetDir := filepath.Join(filepath.Dir(exe), "..", "lib", "ollama") if rocmLibUsable(rocmTargetDir) { slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir) return rocmTargetDir, nil diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go index 5d25a966..a0ae7c96 100644 --- a/gpu/amd_windows.go +++ b/gpu/amd_windows.go @@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) { // Installer payload (if we're running from some other location) localAppData := os.Getenv("LOCALAPPDATA") appDir := filepath.Join(localAppData, "Programs", "Ollama") - rocmTargetDir := filepath.Join(appDir, "ollama_libs") + rocmTargetDir := filepath.Join(appDir, "..", "lib", "ollama") if rocmLibUsable(rocmTargetDir) { slog.Debug("detected ollama installed ROCm at " + rocmTargetDir) return rocmTargetDir, nil diff --git a/gpu/gpu.go b/gpu/gpu.go index eb87807a..391c98a8 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -653,8 +653,8 @@ func GetDepDir() string { slog.Warn("failed to lookup working directory", "error", err) } // Scan for any of our dependeices, and pick first match - for _, root := range []string{filepath.Dir(appExe), cwd} { - libDep := "ollama_libs" + for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), ".."), cwd} { + libDep := filepath.Join("lib", "ollama") if _, err := os.Stat(filepath.Join(root, libDep)); err == nil { return filepath.Join(root, libDep) } diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index dc9dda5a..aef03f9a 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -189,7 +189,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off" BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}" export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" - CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/ollama_libs}" + CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}" build install echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}" @@ -213,7 +213,7 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then CC=icx CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF" BUILD_DIR="../build/linux/${ARCH}/oneapi" - ONEAPI_DIST_DIR="${DIST_BASE}/ollama_libs" + ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama" export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb" DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it build @@ -260,7 +260,7 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then echo "Building custom ROCM GPU" fi BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}" - ROCM_DIST_DIR="${DIST_BASE}/ollama_libs" + ROCM_DIST_DIR="${DIST_BASE}/lib/ollama" # TODO figure out how to disable runpath (rpath) # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 42708d3e..4d43c9e2 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -35,7 +35,7 @@ function init_vars { ) $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on") $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower() - $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners" + $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\runners" md "$script:DIST_BASE" -ea 0 > $null if ($env:CGO_CFLAGS -contains "-g") { $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo") @@ -286,11 +286,11 @@ function build_cuda() { sign install - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" -ea 0 > $null - write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ea 0 > $null + write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" } else { write-host "Skipping CUDA generation step" } @@ -324,17 +324,17 @@ function build_oneapi() { sign install - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" -ea 0 > $null - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ea 0 > $null + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" } else { Write-Host "Skipping oneAPI generation step" } @@ -384,11 +384,11 @@ function build_rocm() { sign install - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\rocblas\library\" -ea 0 > $null - cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" - cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\rocblas\library\" -ea 0 > $null + cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" + cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs - cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\rocblas\library\" + cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\rocblas\library\" } else { write-host "Skipping ROCm generation step" } diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index 50b60230..9cebf1f4 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -122,8 +122,8 @@ function buildOllama() { /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } - New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force - cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\ + New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\bin\ -Force + cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\bin\ } function buildApp() { @@ -142,22 +142,22 @@ function buildApp() { function gatherDependencies() { write-host "Gathering runtime dependencies" cd "${script:SRC_DIR}" - md "${script:DEPS_DIR}\ollama_libs" -ea 0 > $null + md "${script:DEPS_DIR}\lib\ollama" -ea 0 > $null # TODO - this varies based on host build system and MSVC version - drive from dumpbin output # currently works for Win11 + MSVC 2019 + Cuda V11 - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\ollama_libs\" - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\ollama_libs\" - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\ollama_libs\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\lib\ollama\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\lib\ollama\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\lib\ollama\" foreach ($part in $("runtime", "stdio", "filesystem", "math", "convert", "heap", "string", "time", "locale", "environment")) { - cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\ollama_libs\" + cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\lib\ollama\" } cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\" if ("${env:KEY_CONTAINER}") { write-host "about to sign" - foreach ($file in (get-childitem "${script:DEPS_DIR}\ollama_libs\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){ + foreach ($file in (get-childitem "${script:DEPS_DIR}\lib\ollama\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){ write-host "signing $file" & "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" ` /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} $file diff --git a/scripts/install.sh b/scripts/install.sh index f0439b00..a02a0675 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -66,7 +66,7 @@ fi for BINDIR in /usr/local/bin /usr/bin /bin; do echo $PATH | grep -q $BINDIR && break || continue done -OLLAMA_INSTALL_DIR=${OLLAMA_INSTALL_DIR:-${BINDIR}} +OLLAMA_INSTALL_DIR=$(dirname ${BINDIR}) status "Installing ollama to $OLLAMA_INSTALL_DIR" $SUDO install -o0 -g0 -m755 -d $BINDIR @@ -77,18 +77,22 @@ if curl -I --silent --fail --location "https://ollama.com/download/ollama-linux- "https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" | \ $SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR" BUNDLE=1 + if [ "$OLLAMA_INSTALL_DIR/bin/ollama" != "$BINDIR/ollama" ] ; then + status "Making ollama accessible in the PATH in $BINDIR" + $SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama" + fi else status "Downloading Linux ${ARCH} CLI" curl --fail --show-error --location --progress-bar -o "$TEMP_DIR/ollama"\ "https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}" $SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $OLLAMA_INSTALL_DIR/ollama BUNDLE=0 + if [ "$OLLAMA_INSTALL_DIR/ollama" != "$BINDIR/ollama" ] ; then + status "Making ollama accessible in the PATH in $BINDIR" + $SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama" + fi fi -if [ "$OLLAMA_INSTALL_DIR/ollama" != "$BINDIR/ollama" ] ; then - status "Making ollama accessible in the PATH in $BINDIR" - $SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama" -fi install_success() { status 'The Ollama API is now available at 127.0.0.1:11434.' From f9e31da9463092d7b3661594788c259d6d55b3d9 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 15 Aug 2024 14:38:14 -0700 Subject: [PATCH 10/10] Review comments --- .github/workflows/release.yaml | 106 ++++++--------------------------- docs/linux.md | 8 +-- gpu/cuda_common.go | 2 +- gpu/gpu.go | 16 ++--- llm/generate/gen_windows.ps1 | 4 +- 5 files changed, 32 insertions(+), 104 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 4bd68455..508fbb35 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -183,10 +183,17 @@ jobs: name: windows-rocm-deps path: dist/deps/* - # CUDA v11 generation step - generate-windows-cuda-v11: + # CUDA generation step + generate-windows-cuda: environment: release runs-on: windows + strategy: + matrix: + cuda: + - version: "11" + url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe' + - version: "12" + url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe' env: KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} steps: @@ -220,11 +227,11 @@ jobs: with: go-version-file: go.mod cache: true - - name: 'Install CUDA' + - name: 'Install CUDA ${{ matrix.cuda.version }}' run: | $ErrorActionPreference = "Stop" write-host "downloading CUDA Installer" - Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" + Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" write-host "Installing CUDA" Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait write-host "Completed CUDA" @@ -256,7 +263,7 @@ jobs: cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" - uses: actions/upload-artifact@v4 with: - name: generate-windows-cuda-v11 + name: generate-windows-cuda-${{ matrix.cuda.version }} path: | llm/build/**/bin/* dist/windows-amd64/** @@ -265,95 +272,13 @@ jobs: name: windows-cuda-deps path: dist/deps/* - # CUDA v12 generation step - generate-windows-cuda-v12: - environment: release - runs-on: windows - env: - KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} - steps: - - uses: actions/checkout@v4 - - name: Set Version - shell: bash - run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV - - uses: 'google-github-actions/auth@v2' - with: - project_id: 'ollama' - credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}' - - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt - - name: install Windows SDK 8.1 to get signtool - run: | - $ErrorActionPreference = "Stop" - write-host "downloading SDK" - Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe" - Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait - write-host "Win SDK 8.1 installed" - gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe' - - name: install signing plugin - run: | - $ErrorActionPreference = "Stop" - write-host "downloading plugin" - Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip" - Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\ - write-host "Installing plugin" - & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet - write-host "plugin installed" - - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - cache: true - - name: 'Install CUDA' - run: | - $ErrorActionPreference = "Stop" - write-host "downloading CUDA Installer" - Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" - write-host "Installing CUDA" - Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait - write-host "Completed CUDA" - $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path) - $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' - echo "$cudaPath\bin" >> $env:GITHUB_PATH - echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV - echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV - echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV - - name: 'Verify CUDA' - run: nvcc -V - - run: go get ./... - - name: go generate - run: | - $gopath=(get-command go).source | split-path -parent - $cudabin=(get-command nvcc).source | split-path - & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" - cd $env:GITHUB_WORKSPACE - $env:CMAKE_SYSTEM_VERSION="10.0.22621.0" - $env:PATH="$gopath;$cudabin;$env:PATH" - $env:OLLAMA_SKIP_CPU_GENERATE="1" - go generate -x ./... - - name: 'gather cuda dependencies' - run: | - $NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0] - md "dist\deps" - cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\" - cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\" - cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" - - uses: actions/upload-artifact@v4 - with: - name: generate-windows-cuda-v12 - path: | - llm/build/**/bin/* - dist/windows-amd64/** - - uses: actions/upload-artifact@v4 - with: - name: windows-cuda-deps - path: dist/deps/* # Import the prior generation steps and build the final windows assets build-windows: environment: release runs-on: windows needs: - - generate-windows-cuda-v11 - - generate-windows-cuda-v12 + - generate-windows-cuda - generate-windows-rocm - generate-windows-cpu env: @@ -397,7 +322,10 @@ jobs: name: generate-windows-cpu - uses: actions/download-artifact@v4 with: - name: generate-windows-cuda-v11 + name: generate-windows-cuda-11 + - uses: actions/download-artifact@v4 + with: + name: generate-windows-cuda-12 - uses: actions/download-artifact@v4 with: name: windows-cuda-deps diff --git a/docs/linux.md b/docs/linux.md index 3ed2bed0..d1d5892c 100644 --- a/docs/linux.md +++ b/docs/linux.md @@ -20,12 +20,12 @@ GPU. ## Manual install -### Download the `ollama` tar file +### Download `ollama` -Ollama is distributed as a tar file including GPU library dependencies. +Download and extract the Linux package: ```bash -curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf - +curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr ``` ### Adding Ollama as a startup service (recommended) @@ -95,7 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh Or by downloading the ollama binary: ```bash -curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf - +curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr ``` ## Installing specific versions diff --git a/gpu/cuda_common.go b/gpu/cuda_common.go index defaa60a..827cc9b4 100644 --- a/gpu/cuda_common.go +++ b/gpu/cuda_common.go @@ -28,7 +28,7 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",") } -func cudaGetVariant(gpuInfo CudaGPUInfo) string { +func cudaVariant(gpuInfo CudaGPUInfo) string { if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { if CudaTegra != "" { ver := strings.Split(CudaTegra, ".") diff --git a/gpu/gpu.go b/gpu/gpu.go index 391c98a8..72d237a6 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -225,7 +225,7 @@ func GetGPUInfo() GpuInfoList { return GpuInfoList{cpus[0].GpuInfo} } - depPath := GetDepDir() + depPath := LibraryDir() // Load ALL libraries cHandles = initCudaHandles() @@ -264,20 +264,20 @@ func GetGPUInfo() GpuInfoList { gpuInfo.computeMajor = int(memInfo.major) gpuInfo.computeMinor = int(memInfo.minor) gpuInfo.MinimumMemory = cudaMinimumMemory - cudaVariant := cudaGetVariant(gpuInfo) + variant := cudaVariant(gpuInfo) if depPath != "" { gpuInfo.DependencyPath = depPath // Check for variant specific directory - if cudaVariant != "" { - if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil { - gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant) + if variant != "" { + if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil { + gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant) } } } gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMinor = driverMinor - gpuInfo.Variant = cudaGetVariant(gpuInfo) + gpuInfo.Variant = variant // query the management library as well so we can record any skew between the two // which represents overhead on the GPU we must set aside on subsequent updates @@ -468,7 +468,7 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string { slog.Debug("Searching for GPU library", "name", baseLibName) // Start with our bundled libraries - patterns := []string{filepath.Join(GetDepDir(), baseLibName)} + patterns := []string{filepath.Join(LibraryDir(), baseLibName)} switch runtime.GOOS { case "windows": @@ -642,7 +642,7 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { } } -func GetDepDir() string { +func LibraryDir() string { // On Windows/linux we bundle the dependencies at the same level as the executable appExe, err := os.Executable() if err != nil { diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 4d43c9e2..cbdfd09f 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -117,7 +117,7 @@ function build { if ($cmakeDefs -contains "-G") { $extra=@("-j8") } else { - $extra= @("--", "/p:CL_MPcount=8") + $extra= @("--", "/maxCpuCount:8") } write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra" & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra @@ -273,7 +273,7 @@ function build_cuda() { "-DGGML_CUDA=ON", "-DGGML_AVX=on", "-DGGML_AVX2=off", - "-DCMAKE_CUDA_FLAGS=-t8", + "-DCMAKE_CUDA_FLAGS=-t6", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}", "-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH" )