From 74d45f010276c2f2653f3ca8c4f76cb0552fb46e Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 8 Jul 2024 12:50:11 -0700 Subject: [PATCH] Refactor linux packaging This adjusts linux to follow a similar model to windows with a discrete archive (zip/tgz) to cary the primary executable, and dependent libraries. Runners are still carried as payloads inside the main binary Darwin retain the payload model where the go binary is fully self contained. --- .github/workflows/release.yaml | 1 - Dockerfile | 29 ++++++------ app/ollama.iss | 11 +---- envconfig/config.go | 4 +- gpu/amd_common.go | 2 +- gpu/amd_windows.go | 2 +- gpu/gpu.go | 50 ++++++++++++++------- gpu/gpu_linux.go | 2 +- llm/ext_server/CMakeLists.txt | 3 +- llm/generate/gen_common.sh | 17 ++++++- llm/generate/gen_linux.sh | 81 ++++++++++++++++------------------ llm/generate/gen_windows.ps1 | 43 +++++++++--------- llm/server.go | 12 +++-- scripts/build_linux.sh | 10 ++--- scripts/build_windows.ps1 | 12 ++--- scripts/install.sh | 31 ++++++++++--- 16 files changed, 171 insertions(+), 139 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 5ae630c3..9287f6f7 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -363,7 +363,6 @@ jobs: - run: | ./scripts/build_linux.sh ./scripts/build_docker.sh - mv dist/deps/* dist/ - uses: actions/upload-artifact@v4 with: name: dist-linux-amd64 diff --git a/Dockerfile b/Dockerfile index c8efdd8a..120ddc21 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,6 +18,7 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS +ENV GOARCH amd64 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 @@ -28,6 +29,7 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS +ENV GOARCH arm64 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 @@ -40,15 +42,10 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/ WORKDIR /go/src/github.com/ollama/ollama/llm/generate ARG CGO_CFLAGS ARG AMDGPU_TARGETS +ENV GOARCH amd64 RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh -RUN mkdir /tmp/scratch && \ - for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \ - cp ${dep} /tmp/scratch/ || exit 1 ; \ - done && \ - (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \ - mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \ - (cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . ) - +RUN mkdir -p ../../dist/linux-amd64/ollama_libs && \ + (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64/ollama_libs && tar xf - ) FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 ARG CMAKE_VERSION @@ -59,6 +56,7 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ARG OLLAMA_CUSTOM_CPU_DEFS ARG CGO_CFLAGS +ENV GOARCH amd64 WORKDIR /go/src/github.com/ollama/ollama/llm/generate FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64 @@ -79,6 +77,7 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH COPY --from=llm-code / /go/src/github.com/ollama/ollama/ ARG OLLAMA_CUSTOM_CPU_DEFS ARG CGO_CFLAGS +ENV GOARCH arm64 WORKDIR /go/src/github.com/ollama/ollama/llm/generate FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64 @@ -95,12 +94,13 @@ COPY . . COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/ ARG GOFLAGS ARG CGO_CFLAGS -RUN go build -trimpath . +RUN go build -trimpath -o dist/linux-amd64/ollama . # Intermediate stage used for ./scripts/build_linux.sh FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 @@ -109,23 +109,24 @@ ARG GOLANG_VERSION WORKDIR /go/src/github.com/ollama/ollama COPY . . COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ ARG GOFLAGS ARG CGO_CFLAGS -RUN go build -trimpath . +RUN go build -trimpath -o dist/linux-arm64/ollama . # Runtime stages FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64 RUN apt-get update && apt-get install -y ca-certificates -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/ollama /bin/ollama FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64 RUN apt-get update && apt-get install -y ca-certificates -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama +COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/ollama /bin/ollama # Radeon images are much larger so we keep it distinct from the CPU/CUDA image FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm RUN update-pciids -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/ollama /bin/ollama EXPOSE 11434 ENV OLLAMA_HOST 0.0.0.0 diff --git a/app/ollama.iss b/app/ollama.iss index dc6178f7..e9cf48ec 100644 --- a/app/ollama.iss +++ b/app/ollama.iss @@ -91,16 +91,7 @@ Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion -#if DirExists("..\dist\windows-amd64\cuda") - Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs -#endif -#if DirExists("..\dist\windows-amd64\oneapi") - Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs -#endif -#if DirExists("..\dist\windows-amd64\rocm") - Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs -#endif - +Source: "..\dist\windows-amd64\ollama_libs\*"; DestDir: "{app}\ollama_libs\"; Flags: ignoreversion recursesubdirs [Icons] Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" diff --git a/envconfig/config.go b/envconfig/config.go index b82b773d..7f0976c0 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -193,8 +193,8 @@ func RunnersDir() (p string) { for _, root := range []string{filepath.Dir(exe), cwd} { paths = append(paths, root, - filepath.Join(root, "windows-"+runtime.GOARCH), - filepath.Join(root, "dist", "windows-"+runtime.GOARCH), + filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), + filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH), ) } diff --git a/gpu/amd_common.go b/gpu/amd_common.go index 2839cb7c..05747208 100644 --- a/gpu/amd_common.go +++ b/gpu/amd_common.go @@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) { // Installer payload location if we're running the installed binary exe, err := os.Executable() if err == nil { - rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm") + rocmTargetDir := filepath.Join(filepath.Dir(exe), "ollama_libs") if rocmLibUsable(rocmTargetDir) { slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir) return rocmTargetDir, nil diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go index edabeb43..5d25a966 100644 --- a/gpu/amd_windows.go +++ b/gpu/amd_windows.go @@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) { // Installer payload (if we're running from some other location) localAppData := os.Getenv("LOCALAPPDATA") appDir := filepath.Join(localAppData, "Programs", "Ollama") - rocmTargetDir := filepath.Join(appDir, "rocm") + rocmTargetDir := filepath.Join(appDir, "ollama_libs") if rocmLibUsable(rocmTargetDir) { slog.Debug("detected ollama installed ROCm at " + rocmTargetDir) return rocmTargetDir, nil diff --git a/gpu/gpu.go b/gpu/gpu.go index dc124a3e..d0ae0f34 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -229,11 +229,7 @@ func GetGPUInfo() GpuInfoList { return GpuInfoList{cpus[0].GpuInfo} } - // On windows we bundle the nvidia library one level above the runner dir - depPath := "" - if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { - depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda") - } + depPath := GetDepDir() // Load ALL libraries cHandles = initCudaHandles() @@ -306,13 +302,6 @@ func GetGPUInfo() GpuInfoList { if envconfig.IntelGPU() { oHandles = initOneAPIHandles() if oHandles != nil && oHandles.oneapi != nil { - - // On windows we bundle the oneapi library one level above the runner dir - depPath = "" - if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { - depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi") - } - for d := range oHandles.oneapi.num_drivers { if oHandles.oneapi == nil { // shouldn't happen @@ -467,10 +456,12 @@ func GetGPUInfo() GpuInfoList { func FindGPULibs(baseLibName string, defaultPatterns []string) []string { // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them var ldPaths []string - var patterns []string gpuLibPaths := []string{} slog.Debug("Searching for GPU library", "name", baseLibName) + // Start with our bundled libraries + patterns := []string{filepath.Join(GetDepDir(), baseLibName)} + switch runtime.GOOS { case "windows": ldPaths = strings.Split(os.Getenv("PATH"), ";") @@ -479,13 +470,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string { default: return gpuLibPaths } - // Start with whatever we find in the PATH/LD_LIBRARY_PATH + + // Then with whatever we find in the PATH/LD_LIBRARY_PATH for _, ldPath := range ldPaths { d, err := filepath.Abs(ldPath) if err != nil { continue } - patterns = append(patterns, filepath.Join(d, baseLibName+"*")) + patterns = append(patterns, filepath.Join(d, baseLibName)) } patterns = append(patterns, defaultPatterns...) slog.Debug("gpu library search", "globs", patterns) @@ -641,3 +633,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { return "", "" } } + +func GetDepDir() string { + // On Windows/linux we bundle the dependencies at the same level as the executable + appExe, err := os.Executable() + if err != nil { + slog.Warn("failed to lookup executable path", "error", err) + } + cwd, err := os.Getwd() + if err != nil { + slog.Warn("failed to lookup working directory", "error", err) + } + // Scan for any of our dependeices, and pick first match + for _, root := range []string{filepath.Dir(appExe), cwd} { + libDep := "ollama_libs" + if _, err := os.Stat(filepath.Join(root, libDep)); err == nil { + return filepath.Join(root, libDep) + } + // Developer mode, local build + if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil { + return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep) + } + if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil { + return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep) + } + } + slog.Warn("unable to locate gpu dependency libraries") + return "" +} diff --git a/gpu/gpu_linux.go b/gpu/gpu_linux.go index d6d2675c..d4d20bc4 100644 --- a/gpu/gpu_linux.go +++ b/gpu/gpu_linux.go @@ -47,7 +47,7 @@ var ( CudartMgmtName = "libcudart.so*" NvcudaMgmtName = "libcuda.so*" NvmlMgmtName = "" // not currently wired on linux - OneapiMgmtName = "libze_intel_gpu.so" + OneapiMgmtName = "libze_intel_gpu.so*" ) func GetCPUMem() (memInfo, error) { diff --git a/llm/ext_server/CMakeLists.txt b/llm/ext_server/CMakeLists.txt index bfc97c63..90fd0ef2 100644 --- a/llm/ext_server/CMakeLists.txt +++ b/llm/ext_server/CMakeLists.txt @@ -1,12 +1,13 @@ set(TARGET ollama_llama_server) option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) +set(LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h) install(TARGETS ${TARGET} RUNTIME) target_compile_definitions(${TARGET} PRIVATE SERVER_VERBOSE=$ ) -target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_SERVER_LDFLAGS}) if (WIN32) TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32) endif() diff --git a/llm/generate/gen_common.sh b/llm/generate/gen_common.sh index da1b0688..f1541f2a 100644 --- a/llm/generate/gen_common.sh +++ b/llm/generate/gen_common.sh @@ -9,11 +9,14 @@ init_vars() { ARCH="arm64" ;; *) - ARCH=$(uname -m | sed -e "s/aarch64/arm64/g") + echo "GOARCH must be set" + echo "this script is meant to be run from within go generate" + exit 1 + ;; esac LLAMACPP_DIR=../llama.cpp - CMAKE_DEFS="" + CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on" CMAKE_TARGETS="--target ollama_llama_server" if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}" @@ -27,6 +30,7 @@ init_vars() { WHOLE_ARCHIVE="-Wl,-force_load" NO_WHOLE_ARCHIVE="" GCC_ARCH="-arch ${ARCH}" + DIST_BASE=../../dist/darwin-${GOARCH}/ ;; "Linux") LIB_EXT="so" @@ -35,6 +39,7 @@ init_vars() { # Cross compiling not supported on linux - Use docker GCC_ARCH="" + DIST_BASE=../../dist/linux-${GOARCH}/ ;; *) ;; @@ -105,6 +110,14 @@ compress() { echo "Finished compression" } +install() { + echo "Installing libraries to bin dir ${BUILD_DIR}/bin/" + for lib in $(find ${BUILD_DIR} -name \*.${LIB_EXT}); do + rm -f "${BUILD_DIR}/bin/$(basename ${lib})" + cp -af "${lib}" "${BUILD_DIR}/bin/" + done +} + # Keep the local tree clean after we're done with the build cleanup() { (cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt) diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index db2c6c30..70fc0313 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -51,7 +51,7 @@ if [ -z "${CUDACXX}" ]; then export CUDACXX=$(command -v nvcc) fi fi -COMMON_CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off" +COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off" source $(dirname $0)/gen_common.sh init_vars git_module_setup @@ -77,10 +77,11 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then init_vars echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\"" - CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}" + CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}" BUILD_DIR="../build/linux/${ARCH}/cpu" echo "Building custom CPU" build + install compress else # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512 @@ -93,7 +94,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake - COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off" + COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off" if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then # # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) @@ -103,6 +104,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then BUILD_DIR="../build/linux/${ARCH}/cpu" echo "Building LCD CPU" build + install compress fi @@ -120,6 +122,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then BUILD_DIR="../build/linux/${ARCH}/cpu_avx" echo "Building AVX CPU" build + install compress fi @@ -133,6 +136,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then BUILD_DIR="../build/linux/${ARCH}/cpu_avx2" echo "Building AVX2 CPU" build + install compress fi fi @@ -178,29 +182,18 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}" echo "Building custom CUDA GPU" else - CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" + CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" fi - CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}" + export CUDAFLAGS="-t8" + CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off" BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}" - EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" + export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" + CUDA_DIST_DIR="${DIST_BASE}/ollama_libs" build - - # Carry the CUDA libs as payloads to help reduce dependency burden on users - # - # TODO - in the future we may shift to packaging these separately and conditionally - # downloading them in the install script. - DEPS="$(ldd ${BUILD_DIR}/bin/ollama_llama_server )" - for lib in libcudart.so libcublas.so libcublasLt.so ; do - DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true) - if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then - cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/bin/" - elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then - cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/bin/" - elif [ -e "${CUDART_LIB_DIR}/${lib}" ]; then - cp -d ${CUDART_LIB_DIR}/${lib}* "${BUILD_DIR}/bin/" - else - cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/bin/" - fi + install + mkdir -p "${CUDA_DIST_DIR}" + for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do + cp -a "${lib}" "${CUDA_DIST_DIR}" done compress @@ -218,21 +211,24 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then CC=icx CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF" BUILD_DIR="../build/linux/${ARCH}/oneapi" - EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb" + ONEAPI_DIST_DIR="${DIST_BASE}/ollama_libs" + export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb" DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it build # copy oneAPI dependencies + mkdir -p "${ONEAPI_DIST_DIR}" for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do - cp "${dep}" "${BUILD_DIR}/bin/" + cp -a "${dep}" "${ONEAPI_DIST_DIR}" done - cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/" - cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}" + install compress fi @@ -262,21 +258,18 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then echo "Building custom ROCM GPU" fi BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}" - EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" + ROCM_DIST_DIR="${DIST_BASE}/ollama_libs" + # TODO figure out how to disable runpath (rpath) + # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work + export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" build - # Record the ROCM dependencies - rm -f "${BUILD_DIR}/bin/deps.txt" - touch "${BUILD_DIR}/bin/deps.txt" - for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do - echo "${dep}" >> "${BUILD_DIR}/bin/deps.txt" + # copy the ROCM dependencies + mkdir -p "${ROCM_DIST_DIR}" + for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do + cp -a "${dep}"* "${ROCM_DIST_DIR}" done - # bomb out if for some reason we didn't get a few deps - if [ $(cat "${BUILD_DIR}/bin/deps.txt" | wc -l ) -lt 8 ] ; then - cat "${BUILD_DIR}/bin/deps.txt" - echo "ERROR: deps file short" - exit 1 - fi + install compress fi diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index d8bce92d..1f8c96d8 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -286,12 +286,11 @@ function build_cuda() { sign install - rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null - write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" - cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" - cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" - cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" -ea 0 > $null + write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" } else { write-host "Skipping CUDA generation step" } @@ -325,18 +324,17 @@ function build_oneapi() { sign install - rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" - cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" -ea 0 > $null + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" } else { Write-Host "Skipping oneAPI generation step" } @@ -386,12 +384,11 @@ function build_rocm() { sign install - rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" - md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null - cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" - cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" + md "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\rocblas\library\" -ea 0 > $null + cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" + cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\" # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs - cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" + cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_libs\rocblas\library\" } else { write-host "Skipping ROCm generation step" } diff --git a/llm/server.go b/llm/server.go index d2b8db9b..9347a458 100644 --- a/llm/server.go +++ b/llm/server.go @@ -306,20 +306,18 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr if runtime.GOOS == "windows" { pathEnv = "PATH" } - // prepend the server directory to LD_LIBRARY_PATH/PATH and the parent dir for common dependencies - libraryPaths := []string{dir, filepath.Dir(dir)} + // Start with the server directory for the LD_LIBRARY_PATH/PATH + libraryPaths := []string{dir} if libraryPath, ok := os.LookupEnv(pathEnv); ok { - // Append our runner directory to the path - // This will favor system libraries over our bundled library dependencies + // favor our bundled library dependencies over system libraries libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...) } // Note: we always put the dependency path first - // since this was the exact version we verified for AMD GPUs - // and we favor what the user had in their path + // since this was the exact version we compiled/linked against if gpus[0].DependencyPath != "" { - // TODO refine for multi-gpu support + // assume gpus from the same library have the same dependency path libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...) } diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 27c4ff1f..4ea51229 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -21,11 +21,9 @@ for TARGETARCH in ${BUILD_ARCH}; do -t builder:$TARGETARCH \ . docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH - docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-$TARGETARCH - - if [ "$TARGETARCH" = "amd64" ]; then - docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/ - fi - + docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist docker rm builder-$TARGETARCH + echo "Compressing final linux bundle..." + rm -f ./dist/ollama-linux-$TARGETARCH.tgz + (cd dist/linux-$TARGETARCH && tar cf - . | gzip --best > ../ollama-linux-$TARGETARCH.tgz ) done diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index edc73759..e8d851f4 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -103,22 +103,22 @@ function buildApp() { function gatherDependencies() { write-host "Gathering runtime dependencies" cd "${script:SRC_DIR}" - md "${script:DEPS_DIR}\ollama_runners" -ea 0 > $null + md "${script:DEPS_DIR}\ollama_libs" -ea 0 > $null # TODO - this varies based on host build system and MSVC version - drive from dumpbin output # currently works for Win11 + MSVC 2019 + Cuda V11 - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\ollama_runners\" - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\ollama_runners\" - cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\ollama_runners\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\ollama_libs\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\ollama_libs\" + cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\ollama_libs\" foreach ($part in $("runtime", "stdio", "filesystem", "math", "convert", "heap", "string", "time", "locale", "environment")) { - cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\ollama_runners\" + cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\ollama_libs\" } cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\" if ("${env:KEY_CONTAINER}") { write-host "about to sign" - foreach ($file in (get-childitem "${script:DEPS_DIR}\cuda\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){ + foreach ($file in (get-childitem "${script:DEPS_DIR}\ollama_libs\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){ write-host "signing $file" & "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" ` /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} $file diff --git a/scripts/install.sh b/scripts/install.sh index 03af5a69..f0439b00 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -63,16 +63,32 @@ if [ -n "$NEEDS" ]; then exit 1 fi -status "Downloading ollama..." -curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}" - for BINDIR in /usr/local/bin /usr/bin /bin; do echo $PATH | grep -q $BINDIR && break || continue done +OLLAMA_INSTALL_DIR=${OLLAMA_INSTALL_DIR:-${BINDIR}} -status "Installing ollama to $BINDIR..." +status "Installing ollama to $OLLAMA_INSTALL_DIR" $SUDO install -o0 -g0 -m755 -d $BINDIR -$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama +$SUDO install -o0 -g0 -m755 -d "$OLLAMA_INSTALL_DIR" +if curl -I --silent --fail --location "https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" >/dev/null ; then + status "Downloading Linux ${ARCH} bundle" + curl --fail --show-error --location --progress-bar \ + "https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" | \ + $SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR" + BUNDLE=1 +else + status "Downloading Linux ${ARCH} CLI" + curl --fail --show-error --location --progress-bar -o "$TEMP_DIR/ollama"\ + "https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}" + $SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $OLLAMA_INSTALL_DIR/ollama + BUNDLE=0 +fi + +if [ "$OLLAMA_INSTALL_DIR/ollama" != "$BINDIR/ollama" ] ; then + status "Making ollama accessible in the PATH in $BINDIR" + $SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama" +fi install_success() { status 'The Ollama API is now available at 127.0.0.1:11434.' @@ -178,6 +194,11 @@ if ! check_gpu lspci nvidia && ! check_gpu lshw nvidia && ! check_gpu lspci amdg fi if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then + if [ $BUNDLE -ne 0 ]; then + install_success + status "AMD GPU ready." + exit 0 + fi # Look for pre-existing ROCm v6 before downloading the dependencies for search in "${HIP_PATH:-''}" "${ROCM_PATH:-''}" "/opt/rocm" "/usr/lib64"; do if [ -n "${search}" ] && [ -e "${search}/libhipblas.so.2" -o -e "${search}/lib/libhipblas.so.2" ]; then