diff --git a/.dockerignore b/.dockerignore index 43f2e07d..fada7a9b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,3 +7,5 @@ llm/llama.cpp .env .cache test_data +llm/build +llama/build diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 9c1e3e13..02b5f8e6 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -102,8 +102,8 @@ jobs: with: name: generate-windows-cpu path: | - llm/build/**/bin/* - llm/build/**/*.a + build/**/* + build/**/*.a dist/windows-amd64/** # ROCm generation step @@ -176,7 +176,7 @@ jobs: with: name: generate-windows-rocm path: | - llm/build/**/bin/* + build/**/* dist/windows-amd64/** - uses: actions/upload-artifact@v4 with: @@ -265,7 +265,7 @@ jobs: with: name: generate-windows-cuda-${{ matrix.cuda.version }} path: | - llm/build/**/bin/* + build/**/* dist/windows-amd64/** - uses: actions/upload-artifact@v4 with: @@ -338,7 +338,7 @@ jobs: - uses: actions/download-artifact@v4 with: name: generate-windows-rocm - - run: dir llm/build + - run: dir build - run: | $gopath=(get-command go).source | split-path -parent & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1" @@ -359,9 +359,7 @@ jobs: environment: release runs-on: linux env: - OLLAMA_SKIP_MANIFEST_CREATE: '1' BUILD_ARCH: amd64 - PUSH: '1' steps: - uses: actions/checkout@v4 with: @@ -369,14 +367,8 @@ jobs: - name: Set Version shell: bash run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - run: | ./scripts/build_linux.sh - ./scripts/build_docker.sh - uses: actions/upload-artifact@v4 with: name: dist-linux-amd64 @@ -390,9 +382,7 @@ jobs: environment: release runs-on: linux-arm64 env: - OLLAMA_SKIP_MANIFEST_CREATE: '1' BUILD_ARCH: arm64 - PUSH: '1' steps: - uses: actions/checkout@v4 with: @@ -421,14 +411,8 @@ jobs: sudo usermod -aG docker $USER sudo apt-get install acl sudo setfacl --modify user:$USER:rw /var/run/docker.sock - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - run: | ./scripts/build_linux.sh - ./scripts/build_docker.sh - uses: actions/upload-artifact@v4 with: name: dist-linux-arm64 @@ -436,6 +420,181 @@ jobs: dist/*linux* !dist/*-cov + # Container image build + build-linux: + environment: release + strategy: + matrix: + runner: + - linux + - linux-arm64 + runs-on: ${{ matrix.runner }} + env: + FINAL_IMAGE_REPO: ollama/ollama + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: 'Install Docker' + if: ${{ startsWith(matrix.runner, 'linux-arm64') }} + run: | + sudo apt-get update + sudo apt-get install -y ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io + sudo usermod -aG docker $USER + sudo apt-get install acl + sudo setfacl --modify user:$USER:rw /var/run/docker.sock + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FINAL_IMAGE_REPO }} + flavor: | + latest=false + tags: | + type=ref,event=tag + type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr + type=semver,pattern={{version}} + - name: Set Version + shell: bash + run: | + machine=$(uname -m) + case ${machine} in + x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;; + aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;; + esac >>$GITHUB_ENV + echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ vars.DOCKER_USER }} + password: ${{ secrets.DOCKER_ACCESS_TOKEN }} + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + context: "." + platforms: linux/${{ env.ARCH }} + build-args: | + GOFLAGS + outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + merge: + environment: release + runs-on: linux + needs: + - build-linux + env: + FINAL_IMAGE_REPO: ollama/ollama + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-* + merge-multiple: true + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FINAL_IMAGE_REPO }} + flavor: | + latest=false + tags: | + type=ref,event=tag + type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr + type=semver,pattern={{version}} + - name: Set Version + shell: bash + run: | + machine=$(uname -m) + case ${machine} in + x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;; + aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;; + esac >>$GITHUB_ENV + echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ vars.DOCKER_USER }} + password: ${{ secrets.DOCKER_ACCESS_TOKEN }} + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *) + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }} + build-linux-rocm: + environment: release + runs-on: linux + env: + FINAL_IMAGE_REPO: ollama/ollama + ARCH: amd64 + PLATFORM_PAIR: linux-amd64 + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FINAL_IMAGE_REPO }} + flavor: | + latest=false + tags: | + type=ref,event=tag + type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr + type=semver,pattern={{version}} + - name: Set Version + shell: bash + run: | + echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ vars.DOCKER_USER }} + password: ${{ secrets.DOCKER_ACCESS_TOKEN }} + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + context: "." + target: runtime-rocm + build-args: | + GOFLAGS + tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm,${{ env.FINAL_IMAGE_REPO }}:rocm + push: true + # Aggregate all the assets and ship a release release: needs: @@ -448,8 +607,6 @@ jobs: permissions: contents: write env: - OLLAMA_SKIP_IMAGE_BUILD: '1' - PUSH: '1' GH_TOKEN: ${{ github.token }} steps: - uses: actions/checkout@v4 @@ -458,12 +615,6 @@ jobs: run: | echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - - run: ./scripts/build_docker.sh - name: Retrieve built artifact uses: actions/download-artifact@v4 with: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 3d58fa3e..26dc732a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -81,12 +81,6 @@ jobs: if: ${{ ! startsWith(matrix.os, 'windows-') }} name: 'Unix Go Generate' - run: go build . - - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.os }}-${{ matrix.arch }}-libraries - path: | - llm/build/**/bin/* - llm/build/**/*.a generate-cuda: needs: [changes] if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }} @@ -114,12 +108,6 @@ jobs: go generate -x ./... env: OLLAMA_SKIP_CPU_GENERATE: '1' - - uses: actions/upload-artifact@v4 - with: - name: cuda-${{ matrix.cuda-version }}-libraries - path: | - llm/build/**/bin/* - dist/windows-amd64/** generate-rocm: needs: [changes] if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }} @@ -147,12 +135,6 @@ jobs: go generate -x ./... env: OLLAMA_SKIP_CPU_GENERATE: '1' - - uses: actions/upload-artifact@v4 - with: - name: rocm-${{ matrix.rocm-version }}-libraries - path: | - llm/build/**/bin/* - dist/windows-amd64/** # ROCm generation step generate-windows-rocm: @@ -189,7 +171,6 @@ jobs: name: go generate env: OLLAMA_SKIP_CPU_GENERATE: '1' - # TODO - do we need any artifacts? # CUDA generation step generate-windows-cuda: @@ -231,7 +212,6 @@ jobs: go generate -x ./... env: OLLAMA_SKIP_CPU_GENERATE: '1' - # TODO - do we need any artifacts? lint: strategy: @@ -263,14 +243,6 @@ jobs: arm64) echo ARCH=arm64 ;; esac >>$GITHUB_ENV shell: bash - - run: | - mkdir -p llm/build/linux/$ARCH/stub/bin - touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'ubuntu-') }} - - run: | - mkdir -p llm/build/darwin/$ARCH/stub/bin - touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'macos-') }} - uses: golangci/golangci-lint-action@v6 with: args: --timeout 8m0s -v @@ -301,23 +273,10 @@ jobs: cache: true - run: | case ${{ matrix.arch }} in - amd64) echo ARCH=x86_64 ;; + amd64) echo ARCH=amd64 ;; arm64) echo ARCH=arm64 ;; esac >>$GITHUB_ENV shell: bash - - run: | - mkdir -p llm/build/linux/$ARCH/stub/bin - touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'ubuntu-') }} - - run: | - mkdir -p llm/build/darwin/$ARCH/stub/bin - touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server - if: ${{ startsWith(matrix.os, 'macos-') }} - shell: bash - run: go generate ./... - run: go build - run: go test -v ./... - - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.os }}-binaries - path: ollama diff --git a/.gitignore b/.gitignore index 0d826ab6..87f8b007 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,7 @@ ggml-metal.metal test_data *.crt llm/build +build/*/*/* +!build/**/placeholder +llama/build __debug_bin* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 655f1081..0f43e618 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,7 +47,7 @@ RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ bash gen_linux.sh -FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64 +FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-runner-arm64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh @@ -63,7 +63,7 @@ RUN OLLAMA_SKIP_STATIC_GENERATE=1 \ CUDA_VARIANT="_v11" \ bash gen_linux.sh -FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64 +FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-runner-arm64 ARG CMAKE_VERSION COPY ./scripts/rh_linux_deps.sh / RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh @@ -143,64 +143,103 @@ RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh -# Intermediate stage used for ./scripts/build_linux.sh +# Intermediate stages used for ./scripts/build_linux.sh FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64 ENV CGO_ENABLED=1 WORKDIR /go/src/github.com/ollama/ollama COPY . . -COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/ llm/build/ +COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/build/ build/ ARG GOFLAGS ARG CGO_CFLAGS RUN --mount=type=cache,target=/root/.ccache \ go build -trimpath -o dist/linux-amd64/bin/ollama . +RUN cd dist/linux-$GOARCH && \ + tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz +RUN cd dist/linux-$GOARCH-rocm && \ + tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz -# Intermediate stage used for ./scripts/build_linux.sh FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 ENV CGO_ENABLED=1 ARG GOLANG_VERSION WORKDIR /go/src/github.com/ollama/ollama COPY . . -COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/ llm/build/ +COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/ +COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/ +ARG GOFLAGS +ARG CGO_CFLAGS +RUN --mount=type=cache,target=/root/.ccache \ + go build -trimpath -o dist/linux-arm64/bin/ollama . +RUN cd dist/linux-$GOARCH && \ + tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz + +FROM --platform=linux/amd64 scratch AS dist-amd64 +COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / +FROM --platform=linux/arm64 scratch AS dist-arm64 +COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / +FROM dist-$TARGETARCH as dist + + +# Optimized container images do not cary nested payloads +FROM --platform=linux/amd64 static-build-amd64 AS container-build-amd64 +WORKDIR /go/src/github.com/ollama/ollama +COPY . . +ARG GOFLAGS +ARG CGO_CFLAGS +RUN --mount=type=cache,target=/root/.ccache \ + go build -trimpath -o dist/linux-amd64/bin/ollama . + +FROM --platform=linux/arm64 static-build-arm64 AS container-build-arm64 +WORKDIR /go/src/github.com/ollama/ollama +COPY . . ARG GOFLAGS ARG CGO_CFLAGS RUN --mount=type=cache,target=/root/.ccache \ go build -trimpath -o dist/linux-arm64/bin/ollama . -# Strip out ROCm dependencies to keep the primary image lean -FROM --platform=linux/amd64 ubuntu:22.04 AS amd64-libs-without-rocm -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/ -RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa* - -# Runtime stages FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64 -COPY --from=amd64-libs-without-rocm /scratch/ /lib/ -RUN apt-get update && apt-get install -y ca-certificates && \ +RUN apt-get update && \ + apt-get install -y ca-certificates && \ apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ +COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ +COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64 -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ -RUN apt-get update && apt-get install -y ca-certificates && \ +RUN apt-get update && \ + apt-get install -y ca-certificates && \ apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ +COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ +COPY --from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ -# Radeon images are much larger so we keep it distinct from the CPU/CUDA image -FROM rocm/dev-centos-7:${ROCM_VERSION}-complete AS runtime-rocm -RUN update-pciids -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ -RUN ln -s /opt/rocm/lib /lib/ollama +# ROCm libraries larger so we keep it distinct from the CPU/CUDA image +FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm +# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer +# across releases +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/ +RUN apt-get update && \ + apt-get install -y ca-certificates && \ + apt-get clean && rm -rf /var/lib/apt/lists/* +COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ +COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ EXPOSE 11434 ENV OLLAMA_HOST=0.0.0.0 diff --git a/build/darwin/amd64/placeholder b/build/darwin/amd64/placeholder new file mode 100644 index 00000000..87dc2738 --- /dev/null +++ b/build/darwin/amd64/placeholder @@ -0,0 +1 @@ +This is here to make sure the build/ directory exists for the go:embed command diff --git a/build/darwin/arm64/placeholder b/build/darwin/arm64/placeholder new file mode 100644 index 00000000..87dc2738 --- /dev/null +++ b/build/darwin/arm64/placeholder @@ -0,0 +1 @@ +This is here to make sure the build/ directory exists for the go:embed command diff --git a/build/embed_darwin_amd64.go b/build/embed_darwin_amd64.go new file mode 100644 index 00000000..af1458ea --- /dev/null +++ b/build/embed_darwin_amd64.go @@ -0,0 +1,8 @@ +package build + +import "embed" + +// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling + +//go:embed darwin/amd64/* +var EmbedFS embed.FS diff --git a/build/embed_darwin_arm64.go b/build/embed_darwin_arm64.go new file mode 100644 index 00000000..d885365d --- /dev/null +++ b/build/embed_darwin_arm64.go @@ -0,0 +1,8 @@ +package build + +import "embed" + +// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling + +//go:embed darwin/arm64/* +var EmbedFS embed.FS diff --git a/build/embed_linux.go b/build/embed_linux.go new file mode 100644 index 00000000..4cf7be4c --- /dev/null +++ b/build/embed_linux.go @@ -0,0 +1,6 @@ +package build + +import "embed" + +//go:embed linux/* +var EmbedFS embed.FS diff --git a/build/embed_unused.go b/build/embed_unused.go new file mode 100644 index 00000000..00fbe02e --- /dev/null +++ b/build/embed_unused.go @@ -0,0 +1,8 @@ +//go:build !linux && !darwin + +package build + +import "embed" + +// unused on windows +var EmbedFS embed.FS diff --git a/build/linux/amd64/placeholder b/build/linux/amd64/placeholder new file mode 100644 index 00000000..87dc2738 --- /dev/null +++ b/build/linux/amd64/placeholder @@ -0,0 +1 @@ +This is here to make sure the build/ directory exists for the go:embed command diff --git a/build/linux/arm64/placeholder b/build/linux/arm64/placeholder new file mode 100644 index 00000000..87dc2738 --- /dev/null +++ b/build/linux/arm64/placeholder @@ -0,0 +1 @@ +This is here to make sure the build/ directory exists for the go:embed command diff --git a/envconfig/config.go b/envconfig/config.go index 2c4393fe..9c1490a9 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -179,53 +179,6 @@ var ( HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION") ) -func RunnersDir() (p string) { - if p := Var("OLLAMA_RUNNERS_DIR"); p != "" { - return p - } - - if runtime.GOOS != "windows" { - return - } - - defer func() { - if p == "" { - slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'") - } - }() - - // On Windows we do not carry the payloads inside the main executable - exe, err := os.Executable() - if err != nil { - return - } - - cwd, err := os.Getwd() - if err != nil { - return - } - - var paths []string - for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} { - paths = append(paths, - root, - filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), - filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH), - ) - } - - // Try a few variations to improve developer experience when building from source in the local tree - for _, path := range paths { - candidate := filepath.Join(path, "lib", "ollama", "runners") - if _, err := os.Stat(candidate); err == nil { - p = candidate - break - } - } - - return p -} - func Uint(key string, defaultValue uint) func() uint { return func() uint { if s := Var(key); s != "" { @@ -290,7 +243,6 @@ func AsMap() map[string]EnvVar { "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"}, "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"}, - "OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"}, "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, "OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"}, diff --git a/gpu/assets.go b/gpu/assets.go deleted file mode 100644 index 6d62d0dc..00000000 --- a/gpu/assets.go +++ /dev/null @@ -1,148 +0,0 @@ -package gpu - -import ( - "errors" - "fmt" - "log/slog" - "os" - "path/filepath" - "runtime" - "strconv" - "strings" - "sync" - "syscall" - "time" - - "github.com/ollama/ollama/envconfig" -) - -var ( - lock sync.Mutex - payloadsDir = "" -) - -func PayloadsDir() (string, error) { - lock.Lock() - defer lock.Unlock() - var err error - if payloadsDir == "" { - runnersDir := envconfig.RunnersDir() - - if runnersDir != "" { - payloadsDir = runnersDir - return payloadsDir, nil - } - - // The remainder only applies on non-windows where we still carry payloads in the main executable - cleanupTmpDirs() - tmpDir := envconfig.TmpDir() - if tmpDir == "" { - tmpDir, err = os.MkdirTemp("", "ollama") - if err != nil { - return "", fmt.Errorf("failed to generate tmp dir: %w", err) - } - } else { - err = os.MkdirAll(tmpDir, 0o755) - if err != nil { - return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err) - } - } - - // Track our pid so we can clean up orphaned tmpdirs - n := filepath.Join(tmpDir, "ollama.pid") - if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil { - return "", fmt.Errorf("failed to write pid file %s: %w", n, err) - } - - // We create a distinct subdirectory for payloads within the tmpdir - // This will typically look like /tmp/ollama3208993108/runners on linux - payloadsDir = filepath.Join(tmpDir, "runners") - } - return payloadsDir, nil -} - -// Best effort to clean up prior tmpdirs -func cleanupTmpDirs() { - matches, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*", "ollama.pid")) - if err != nil { - return - } - - for _, match := range matches { - raw, err := os.ReadFile(match) - if errors.Is(err, os.ErrNotExist) { - slog.Debug("not a ollama runtime directory, skipping", "path", match) - continue - } else if err != nil { - slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err) - continue - } - - pid, err := strconv.Atoi(string(raw)) - if err != nil { - slog.Warn("invalid pid, skipping", "path", match, "error", err) - continue - } - - p, err := os.FindProcess(pid) - if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) { - slog.Warn("process still running, skipping", "pid", pid, "path", match) - continue - } - - if err := os.Remove(match); err != nil { - slog.Warn("could not cleanup stale pidfile", "path", match, "error", err) - } - - runners := filepath.Join(filepath.Dir(match), "runners") - if err := os.RemoveAll(runners); err != nil { - slog.Warn("could not cleanup stale runners", "path", runners, "error", err) - } - - if err := os.Remove(filepath.Dir(match)); err != nil { - slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err) - } - } -} - -func Cleanup() { - lock.Lock() - defer lock.Unlock() - runnersDir := envconfig.RunnersDir() - if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" { - // We want to fully clean up the tmpdir parent of the payloads dir - tmpDir := filepath.Clean(filepath.Join(payloadsDir, "..")) - slog.Debug("cleaning up", "dir", tmpDir) - err := os.RemoveAll(tmpDir) - if err != nil { - // On windows, if we remove too quickly the llama.dll may still be in-use and fail to remove - time.Sleep(1000 * time.Millisecond) - err = os.RemoveAll(tmpDir) - if err != nil { - slog.Warn("failed to clean up", "dir", tmpDir, "err", err) - } - } - } -} - -func UpdatePath(dir string) { - if runtime.GOOS == "windows" { - tmpDir := filepath.Dir(dir) - pathComponents := strings.Split(os.Getenv("PATH"), ";") - i := 0 - for _, comp := range pathComponents { - if strings.EqualFold(comp, dir) { - return - } - // Remove any other prior paths to our temp dir - if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) { - pathComponents[i] = comp - i++ - } - } - newPath := strings.Join(append([]string{dir}, pathComponents...), ";") - slog.Info("updating", "PATH", newPath) - os.Setenv("PATH", newPath) - } - // linux and darwin rely on rpath -} diff --git a/gpu/gpu.go b/gpu/gpu.go index 3de93f7f..1fa941dd 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -93,10 +93,9 @@ func initCudaHandles() *cudaHandles { localAppData := os.Getenv("LOCALAPPDATA") cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)} } - tmpDir, _ := PayloadsDir() - if tmpDir != "" { - // TODO - add "payloads" for subprocess - cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)} + libDir := LibraryDir() + if libDir != "" { + cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)} } cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...) diff --git a/llm/generate/gen_common.sh b/llm/generate/gen_common.sh index cef68ea1..9fe47529 100644 --- a/llm/generate/gen_common.sh +++ b/llm/generate/gen_common.sh @@ -31,6 +31,7 @@ init_vars() { NO_WHOLE_ARCHIVE="" GCC_ARCH="-arch ${ARCH}" DIST_BASE=../../dist/darwin-${GOARCH}/ + PAYLOAD_BASE=../../build/darwin/${GOARCH} ;; "Linux") LIB_EXT="so" @@ -40,6 +41,7 @@ init_vars() { # Cross compiling not supported on linux - Use docker GCC_ARCH="" DIST_BASE=../../dist/linux-${GOARCH}/ + PAYLOAD_BASE=../../build/linux/${GOARCH} ;; *) ;; @@ -47,7 +49,8 @@ init_vars() { if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80" fi - GZIP=$(which pigz 2>/dev/null || echo "gzip") + GZIP=$(command -v pigz 2>/dev/null || echo "gzip") + RUNNER_BASE="${DIST_BASE}/lib/ollama/runners" } git_module_setup() { @@ -91,17 +94,34 @@ build() { rm -f ${BUILD_DIR}/bin/ggml-common.h ${BUILD_DIR}/bin/ggml-metal.metal } -compress() { - echo "Compressing payloads to reduce overall binary size..." - rm -rf ${BUILD_DIR}/bin/*.gz +dist() { + [ -z "${RUNNER}" ] && exit 1 + mkdir -p ${RUNNER_BASE}/${RUNNER}/ for f in ${BUILD_DIR}/bin/* ; do - ${GZIP} -n --best -f ${f} & + cp ${f} ${RUNNER_BASE}/${RUNNER}/ + done + # check for lib directory + if [ -d ${BUILD_DIR}/lib ]; then + for f in ${BUILD_DIR}/lib/* ; do + cp ${f} ${RUNNER_BASE}/${RUNNER}/ + done + fi +} + +# Compress from the build $BUILD_DIR into the $PAYLOAD_BASE/$RUNNER dir +compress() { + [ -z "${RUNNER}" ] && exit 1 + echo "Compressing payloads with ${GZIP} to reduce overall binary size..." + rm -rf "${PAYLOAD_BASE}/${RUNNER}/" + mkdir -p "${PAYLOAD_BASE}/${RUNNER}/" + for f in ${BUILD_DIR}/bin/* ; do + ${GZIP} -c --best ${f} > "${PAYLOAD_BASE}/${RUNNER}/$(basename ${f}).gz" & compress_pids+=" $!" done # check for lib directory if [ -d ${BUILD_DIR}/lib ]; then for f in ${BUILD_DIR}/lib/* ; do - ${GZIP} -n --best -f ${f} & + ${GZIP} -c --best ${f} > "${PAYLOAD_BASE}/${RUNNER}/$(basename ${f}).gz" & compress_pids+=" $!" done fi diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh index acea9c8d..49c67125 100755 --- a/llm/generate/gen_darwin.sh +++ b/llm/generate/gen_darwin.sh @@ -39,7 +39,8 @@ case "${GOARCH}" in # init_vars CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}" - BUILD_DIR="../build/darwin/${ARCH}/cpu" + RUNNER=cpu + BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}" echo "Building LCD CPU" build sign ${BUILD_DIR}/bin/ollama_llama_server @@ -51,7 +52,8 @@ case "${GOARCH}" in # init_vars CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}" - BUILD_DIR="../build/darwin/${ARCH}/cpu_avx" + RUNNER=cpu_avx + BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}" echo "Building AVX CPU" build sign ${BUILD_DIR}/bin/ollama_llama_server @@ -63,7 +65,8 @@ case "${GOARCH}" in # init_vars CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=on -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}" - BUILD_DIR="../build/darwin/${ARCH}/cpu_avx2" + RUNNER=cpu_avx2 + BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}" echo "Building AVX2 CPU" EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation" build @@ -84,7 +87,8 @@ case "${GOARCH}" in if [ -z "$OLLAMA_SKIP_METAL_GENERATE" ]; then init_vars CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} ${CMAKE_DEFS}" - BUILD_DIR="../build/darwin/${ARCH}/metal" + RUNNER="metal" + BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}" EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders" build sign ${BUILD_DIR}/bin/ollama_llama_server diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index 1f702ca2..eb7fa786 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -79,10 +79,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then init_vars echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\"" CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}" - BUILD_DIR="../build/linux/${ARCH}/cpu" + RUNNER="cpu" + BUILD_DIR="../build/linux/${ARCH}/${RUNNER}" echo "Building custom CPU" build install + dist compress else # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512 @@ -102,10 +104,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then # init_vars CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}" - BUILD_DIR="../build/linux/${ARCH}/cpu" + RUNNER=cpu + BUILD_DIR="../build/linux/${ARCH}/${RUNNER}" echo "Building LCD CPU" build install + dist compress fi @@ -120,10 +124,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then # init_vars CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}" - BUILD_DIR="../build/linux/${ARCH}/cpu_avx" + RUNNER=cpu_avx + BUILD_DIR="../build/linux/${ARCH}/${RUNNER}" echo "Building AVX CPU" build install + dist compress fi @@ -134,10 +140,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then # init_vars CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}" - BUILD_DIR="../build/linux/${ARCH}/cpu_avx2" + RUNNER=cpu_avx2 + BUILD_DIR="../build/linux/${ARCH}/${RUNNER}" echo "Building AVX2 CPU" build install + dist compress fi fi @@ -187,11 +195,13 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then fi export CUDAFLAGS="-t8" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off" - BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}" + RUNNER=cuda${CUDA_VARIANT} + BUILD_DIR="../build/linux/${ARCH}/${RUNNER}" export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}" build install + dist echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}" mkdir -p "${CUDA_DIST_DIR}" for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do @@ -212,7 +222,8 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI CC=icx CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF" - BUILD_DIR="../build/linux/${ARCH}/oneapi" + RUNNER=oneapi + BUILD_DIR="../build/linux/${ARCH}/${RUNNER}" ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama" export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb" DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it @@ -231,6 +242,7 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}" cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}" install + dist compress fi @@ -259,7 +271,8 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}" echo "Building custom ROCM GPU" fi - BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}" + RUNNER=rocm${ROCM_VARIANT} + BUILD_DIR="../build/linux/${ARCH}/${RUNNER}" # ROCm dependencies are too large to fit into a unified bundle ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama" # TODO figure out how to disable runpath (rpath) @@ -269,13 +282,17 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then # copy the ROCM dependencies mkdir -p "${ROCM_DIST_DIR}" - for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do + for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf ); do cp -a "${dep}"* "${ROCM_DIST_DIR}" + if [ $(readlink -f "${dep}") != "${dep}" ] ; then + cp $(readlink -f "${dep}") "${ROCM_DIST_DIR}" + fi done install + dist compress fi cleanup wait_for_compress -echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" +echo "go generate completed. LLM runners: $(cd ${PAYLOAD_BASE}; echo *)" diff --git a/llm/llm_darwin_arm64.go b/llm/llm_darwin.go similarity index 55% rename from llm/llm_darwin_arm64.go rename to llm/llm_darwin.go index 20ce8552..60837ed0 100644 --- a/llm/llm_darwin_arm64.go +++ b/llm/llm_darwin.go @@ -1,11 +1,7 @@ package llm import ( - "embed" "syscall" ) -//go:embed build/darwin/arm64/*/bin/* -var libEmbed embed.FS - var LlamaServerSysProcAttr = &syscall.SysProcAttr{} diff --git a/llm/llm_darwin_amd64.go b/llm/llm_darwin_amd64.go deleted file mode 100644 index 60eed719..00000000 --- a/llm/llm_darwin_amd64.go +++ /dev/null @@ -1,11 +0,0 @@ -package llm - -import ( - "embed" - "syscall" -) - -//go:embed build/darwin/x86_64/*/bin/* -var libEmbed embed.FS - -var LlamaServerSysProcAttr = &syscall.SysProcAttr{} diff --git a/llm/llm_linux.go b/llm/llm_linux.go index 928b4e79..60837ed0 100644 --- a/llm/llm_linux.go +++ b/llm/llm_linux.go @@ -1,11 +1,7 @@ package llm import ( - "embed" "syscall" ) -//go:embed build/linux/*/*/bin/* -var libEmbed embed.FS - var LlamaServerSysProcAttr = &syscall.SysProcAttr{} diff --git a/llm/llm_windows.go b/llm/llm_windows.go index 763cccf9..74a735c2 100644 --- a/llm/llm_windows.go +++ b/llm/llm_windows.go @@ -1,13 +1,9 @@ package llm import ( - "embed" "syscall" ) -// unused on windows -var libEmbed embed.FS - const CREATE_DEFAULT_ERROR_MODE = 0x04000000 var LlamaServerSysProcAttr = &syscall.SysProcAttr{ diff --git a/llm/payload.go b/llm/payload.go deleted file mode 100644 index 963b3295..00000000 --- a/llm/payload.go +++ /dev/null @@ -1,233 +0,0 @@ -package llm - -import ( - "compress/gzip" - "errors" - "fmt" - "io" - "io/fs" - "log/slog" - "os" - "path/filepath" - "runtime" - "slices" - "strings" - - "golang.org/x/sync/errgroup" - - "github.com/ollama/ollama/gpu" -) - -var errPayloadMissing = errors.New("expected payloads not included in this build of ollama") - -func Init() error { - payloadsDir, err := gpu.PayloadsDir() - if err != nil { - return err - } - - if runtime.GOOS != "windows" { - slog.Info("extracting embedded files", "dir", payloadsDir) - binGlob := "build/*/*/*/bin/*" - - // extract server libraries - err = extractFiles(payloadsDir, binGlob) - if err != nil { - return fmt.Errorf("extract binaries: %v", err) - } - } - - var variants []string - for v := range getAvailableServers() { - variants = append(variants, v) - } - slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants)) - slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY") - - return nil -} - -// binary names may contain an optional variant separated by '_' -// For example, "ollama_rocm_v6" and "ollama_rocm_v5" or "ollama_cpu" and "ollama_cpu_avx2" -// Any library without a variant is the lowest common denominator -func getAvailableServers() map[string]string { - payloadsDir, err := gpu.PayloadsDir() - if err != nil { - slog.Error("payload lookup error", "error", err) - return nil - } - - // glob payloadsDir for files that start with ollama_ - pattern := filepath.Join(payloadsDir, "*", "ollama_*") - - files, err := filepath.Glob(pattern) - if err != nil { - slog.Debug("could not glob", "pattern", pattern, "error", err) - return nil - } - - servers := make(map[string]string) - for _, file := range files { - slog.Debug("availableServers : found", "file", file) - servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file) - } - - return servers -} - -// serversForGpu returns a list of compatible servers give the provided GPU -// info, ordered by performance. assumes Init() has been called -// TODO - switch to metadata based mapping -func serversForGpu(info gpu.GpuInfo) []string { - // glob workDir for files that start with ollama_ - availableServers := getAvailableServers() - requested := info.Library - if info.Variant != gpu.CPUCapabilityNone.String() { - requested += "_" + info.Variant - } - - servers := []string{} - - // exact match first - for a := range availableServers { - if a == requested { - servers = []string{a} - - if a == "metal" { - return servers - } - - break - } - } - - alt := []string{} - - // Then for GPUs load alternates and sort the list for consistent load ordering - if info.Library != "cpu" { - for a := range availableServers { - if info.Library == strings.Split(a, "_")[0] && a != requested { - alt = append(alt, a) - } - } - - slices.Sort(alt) - servers = append(servers, alt...) - } - - if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") { - // Load up the best CPU variant if not primary requested - if info.Library != "cpu" { - variant := gpu.GetCPUCapability() - // If no variant, then we fall back to default - // If we have a variant, try that if we find an exact match - // Attempting to run the wrong CPU instructions will panic the - // process - if variant != gpu.CPUCapabilityNone { - for cmp := range availableServers { - if cmp == "cpu_"+variant.String() { - servers = append(servers, cmp) - break - } - } - } else { - servers = append(servers, "cpu") - } - } - - if len(servers) == 0 { - servers = []string{"cpu"} - } - } - - return servers -} - -// Return the optimal server for this CPU architecture -func serverForCpu() string { - if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { - return "metal" - } - variant := gpu.GetCPUCapability() - availableServers := getAvailableServers() - if variant != gpu.CPUCapabilityNone { - for cmp := range availableServers { - if cmp == "cpu_"+variant.String() { - return cmp - } - } - } - return "cpu" -} - -// extract extracts the embedded files to the target directory -func extractFiles(targetDir string, glob string) error { - files, err := fs.Glob(libEmbed, glob) - if err != nil || len(files) == 0 { - return errPayloadMissing - } - - if err := os.MkdirAll(targetDir, 0o755); err != nil { - return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err) - } - - g := new(errgroup.Group) - - // build/$OS/$GOARCH/$VARIANT/{bin,lib}/$FILE - for _, file := range files { - filename := file - - variant := filepath.Base(filepath.Dir(filepath.Dir(filename))) - - slog.Debug("extracting", "variant", variant, "file", filename) - - g.Go(func() error { - srcf, err := libEmbed.Open(filename) - if err != nil { - return err - } - defer srcf.Close() - - src := io.Reader(srcf) - if strings.HasSuffix(filename, ".gz") { - src, err = gzip.NewReader(src) - if err != nil { - return fmt.Errorf("decompress payload %s: %v", filename, err) - } - filename = strings.TrimSuffix(filename, ".gz") - } - - variantDir := filepath.Join(targetDir, variant) - if err := os.MkdirAll(variantDir, 0o755); err != nil { - return fmt.Errorf("extractFiles could not mkdir %s: %v", variantDir, err) - } - - base := filepath.Base(filename) - destFilename := filepath.Join(variantDir, base) - - _, err = os.Stat(destFilename) - switch { - case errors.Is(err, os.ErrNotExist): - destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) - if err != nil { - return fmt.Errorf("write payload %s: %v", filename, err) - } - defer destFile.Close() - if _, err := io.Copy(destFile, src); err != nil { - return fmt.Errorf("copy payload %s: %v", filename, err) - } - case err != nil: - return fmt.Errorf("stat payload %s: %v", filename, err) - } - return nil - }) - } - - err = g.Wait() - if err != nil { - // If we fail to extract, the payload dir is unusable, so cleanup whatever we extracted - gpu.Cleanup() - return err - } - return nil -} diff --git a/llm/server.go b/llm/server.go index 5d5b8c4f..6c504f14 100644 --- a/llm/server.go +++ b/llm/server.go @@ -24,9 +24,11 @@ import ( "golang.org/x/sync/semaphore" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/build" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" + "github.com/ollama/ollama/runners" ) type LlamaServer interface { @@ -106,7 +108,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr gpus = gpu.GetCPUInfo() } if len(gpus) == 1 && gpus[0].Library == "cpu" { - cpuRunner = serverForCpu() + cpuRunner = runners.ServerForCpu() estimate = EstimateGPULayers(gpus, ggml, projectors, opts) } else { estimate = EstimateGPULayers(gpus, ggml, projectors, opts) @@ -118,7 +120,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr opts.NumGPU = 0 case gpus[0].Library != "metal" && estimate.Layers == 0: // Don't bother loading into the GPU if no layers can fit - cpuRunner = serverForCpu() + cpuRunner = runners.ServerForCpu() gpus = gpu.GetCPUInfo() case opts.NumGPU < 0 && estimate.Layers > 0 && gpus[0].Library != "cpu": opts.NumGPU = estimate.Layers @@ -145,25 +147,20 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr return nil, errors.New("ollama supports only one lora adapter, but multiple were provided") } - availableServers := getAvailableServers() + rDir, err := runners.Refresh(build.EmbedFS) + if err != nil { + return nil, err + } + + availableServers := runners.GetAvailableServers(rDir) if len(availableServers) == 0 { - if runtime.GOOS != "windows" { - slog.Warn("llama server binary disappeared, reinitializing payloads") - err = Init() - if err != nil { - slog.Warn("failed to reinitialize payloads", "error", err) - return nil, err - } - availableServers = getAvailableServers() - } else { - return nil, finalErr - } + return nil, finalErr } var servers []string if cpuRunner != "" { servers = []string{cpuRunner} } else { - servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant + servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant } demandLib := envconfig.LLMLibrary() if demandLib != "" { @@ -330,7 +327,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr _, err := os.Stat(server) if errors.Is(err, os.ErrNotExist) { slog.Warn("llama server disappeared, reinitializing payloads", "path", server, "error", err) - err = Init() + _, err = runners.Refresh(build.EmbedFS) if err != nil { slog.Warn("failed to reinitialize payloads", "error", err) return nil, err diff --git a/runners/common.go b/runners/common.go new file mode 100644 index 00000000..681c397b --- /dev/null +++ b/runners/common.go @@ -0,0 +1,384 @@ +package runners + +import ( + "compress/gzip" + "errors" + "fmt" + "io" + "io/fs" + "log/slog" + "os" + "path/filepath" + "runtime" + "slices" + "strconv" + "strings" + "sync" + "syscall" + + "golang.org/x/sync/errgroup" + + "github.com/ollama/ollama/envconfig" + "github.com/ollama/ollama/gpu" +) + +const ( + binGlob = "*/*/*/*" +) + +var ( + lock sync.Mutex + runnersDir = "" +) + +// Return the location where runners are stored +// If runners are payloads, this will either extract them +// or refresh them if any have disappeared due to tmp cleaners +func Refresh(payloadFS fs.FS) (string, error) { + lock.Lock() + defer lock.Unlock() + var err error + + // Wire up extra logging on our first load + if runnersDir == "" { + defer func() { + var runners []string + for v := range GetAvailableServers(runnersDir) { + runners = append(runners, v) + } + slog.Info("Dynamic LLM libraries", "runners", runners) + slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY") + }() + } + + if hasPayloads(payloadFS) { + if runnersDir == "" { + runnersDir, err = extractRunners(payloadFS) + } else { + err = refreshRunners(payloadFS, runnersDir) + } + } else if runnersDir == "" { + runnersDir, err = locateRunners() + } + + return runnersDir, err +} + +func Cleanup(payloadFS fs.FS) { + lock.Lock() + defer lock.Unlock() + if hasPayloads(payloadFS) && runnersDir != "" { + // We want to fully clean up the tmpdir parent of the payloads dir + tmpDir := filepath.Clean(filepath.Join(runnersDir, "..")) + slog.Debug("cleaning up", "dir", tmpDir) + err := os.RemoveAll(tmpDir) + if err != nil { + slog.Warn("failed to clean up", "dir", tmpDir, "err", err) + } + } +} + +func locateRunners() (string, error) { + exe, err := os.Executable() + if err != nil { + return "", err + } + + cwd, err := os.Getwd() + if err != nil { + return "", err + } + + var paths []string + for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} { + paths = append(paths, + root, + filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), + filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH), + ) + } + + // Try a few variations to improve developer experience when building from source in the local tree + for _, path := range paths { + candidate := filepath.Join(path, "lib", "ollama", "runners") + if _, err := os.Stat(candidate); err == nil { + return candidate, nil + } + } + return "", fmt.Errorf("unable to locate runners in any search path %v", paths) +} + +// Return true if we're carying nested payloads for the runners +func hasPayloads(payloadFS fs.FS) bool { + files, err := fs.Glob(payloadFS, binGlob) + if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) { + return false + } + return true +} + +func extractRunners(payloadFS fs.FS) (string, error) { + cleanupTmpDirs() + tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama") + if err != nil { + return "", fmt.Errorf("failed to generate tmp dir: %w", err) + } + // Track our pid so we can clean up orphaned tmpdirs + n := filepath.Join(tmpDir, "ollama.pid") + if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil { + slog.Warn("failed to write pid file", "file", n, "error", err) + } + // We create a distinct subdirectory for payloads within the tmpdir + // This will typically look like /tmp/ollama3208993108/runners on linux + rDir := filepath.Join(tmpDir, "runners") + + slog.Info("extracting embedded files", "dir", rDir) + return rDir, refreshRunners(payloadFS, rDir) +} + +func refreshRunners(payloadFS fs.FS, rDir string) error { + // extract or refresh server libraries + err := extractFiles(payloadFS, rDir, binGlob) + if err != nil { + return fmt.Errorf("extract binaries: %v", err) + } + return nil +} + +// extract extracts the embedded files to the target directory +func extractFiles(payloadFS fs.FS, targetDir string, glob string) error { + files, err := fs.Glob(payloadFS, glob) + if err != nil || len(files) == 0 { + // Should not happen + return fmt.Errorf("extractFiles called without payload present") + } + + if err := os.MkdirAll(targetDir, 0o755); err != nil { + return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err) + } + + g := new(errgroup.Group) + + // $OS/$GOARCH/$RUNNER/$FILE + for _, file := range files { + filename := file + + runner := filepath.Base(filepath.Dir(filename)) + + slog.Debug("extracting", "runner", runner, "payload", filename) + + g.Go(func() error { + srcf, err := payloadFS.Open(filename) + if err != nil { + return err + } + defer srcf.Close() + + src := io.Reader(srcf) + if strings.HasSuffix(filename, ".gz") { + src, err = gzip.NewReader(src) + if err != nil { + return fmt.Errorf("decompress payload %s: %v", filename, err) + } + filename = strings.TrimSuffix(filename, ".gz") + } + + runnerDir := filepath.Join(targetDir, runner) + if err := os.MkdirAll(runnerDir, 0o755); err != nil { + return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err) + } + + base := filepath.Base(filename) + destFilename := filepath.Join(runnerDir, base) + + _, err = os.Stat(destFilename) + switch { + case errors.Is(err, os.ErrNotExist): + destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) + if err != nil { + return fmt.Errorf("write payload %s: %v", filename, err) + } + defer destFile.Close() + if _, err := io.Copy(destFile, src); err != nil { + return fmt.Errorf("copy payload %s: %v", filename, err) + } + case err != nil: + return fmt.Errorf("stat payload %s: %v", filename, err) + } + return nil + }) + } + + err = g.Wait() + if err != nil { + slog.Error("failed to extract files", "error", err) + // If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted + err := os.RemoveAll(targetDir) + if err != nil { + slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err) + } + return err + } + return nil +} + +// Best effort to clean up prior tmpdirs +func cleanupTmpDirs() { + tmpDir := envconfig.TmpDir() + if tmpDir == "" { + tmpDir = os.TempDir() + } + matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid")) + if err != nil { + return + } + + for _, match := range matches { + raw, err := os.ReadFile(match) + if errors.Is(err, os.ErrNotExist) { + slog.Debug("not a ollama runtime directory, skipping", "path", match) + continue + } else if err != nil { + slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err) + continue + } + + pid, err := strconv.Atoi(string(raw)) + if err != nil { + slog.Warn("invalid pid, skipping", "path", match, "error", err) + continue + } + + p, err := os.FindProcess(pid) + if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) { + slog.Warn("process still running, skipping", "pid", pid, "path", match) + continue + } + + if err := os.Remove(match); err != nil { + slog.Warn("could not cleanup stale pidfile", "path", match, "error", err) + } + + runners := filepath.Join(filepath.Dir(match), "runners") + if err := os.RemoveAll(runners); err != nil { + slog.Warn("could not cleanup stale runners", "path", runners, "error", err) + } + + if err := os.Remove(filepath.Dir(match)); err != nil { + slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err) + } + } +} + +// directory names are the name of the runner and may contain an optional +// variant prefixed with '_' as the separator. For example, "cuda_v11" and +// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the +// lowest common denominator +func GetAvailableServers(payloadsDir string) map[string]string { + if payloadsDir == "" { + slog.Error("empty runner dir") + return nil + } + + // glob payloadsDir for files that start with ollama_ + pattern := filepath.Join(payloadsDir, "*", "ollama_*") + + files, err := filepath.Glob(pattern) + if err != nil { + slog.Debug("could not glob", "pattern", pattern, "error", err) + return nil + } + + servers := make(map[string]string) + for _, file := range files { + slog.Debug("availableServers : found", "file", file) + servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file) + } + + return servers +} + +// serversForGpu returns a list of compatible servers give the provided GPU +// info, ordered by performance. assumes Init() has been called +// TODO - switch to metadata based mapping +func ServersForGpu(info gpu.GpuInfo) []string { + // glob workDir for files that start with ollama_ + availableServers := GetAvailableServers(runnersDir) + requested := info.Library + if info.Variant != gpu.CPUCapabilityNone.String() { + requested += "_" + info.Variant + } + + servers := []string{} + + // exact match first + for a := range availableServers { + if a == requested { + servers = []string{a} + + if a == "metal" { + return servers + } + + break + } + } + + alt := []string{} + + // Then for GPUs load alternates and sort the list for consistent load ordering + if info.Library != "cpu" { + for a := range availableServers { + if info.Library == strings.Split(a, "_")[0] && a != requested { + alt = append(alt, a) + } + } + + slices.Sort(alt) + servers = append(servers, alt...) + } + + if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") { + // Load up the best CPU variant if not primary requested + if info.Library != "cpu" { + variant := gpu.GetCPUCapability() + // If no variant, then we fall back to default + // If we have a variant, try that if we find an exact match + // Attempting to run the wrong CPU instructions will panic the + // process + if variant != gpu.CPUCapabilityNone { + for cmp := range availableServers { + if cmp == "cpu_"+variant.String() { + servers = append(servers, cmp) + break + } + } + } else { + servers = append(servers, "cpu") + } + } + + if len(servers) == 0 { + servers = []string{"cpu"} + } + } + + return servers +} + +// Return the optimal server for this CPU architecture +func ServerForCpu() string { + if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" { + return "metal" + } + variant := gpu.GetCPUCapability() + availableServers := GetAvailableServers(runnersDir) + if variant != gpu.CPUCapabilityNone { + for cmp := range availableServers { + if cmp == "cpu_"+variant.String() { + return cmp + } + } + } + return "cpu" +} diff --git a/runners/runners_test.go b/runners/runners_test.go new file mode 100644 index 00000000..e6439448 --- /dev/null +++ b/runners/runners_test.go @@ -0,0 +1,50 @@ +package runners + +import ( + "log/slog" + "os" + "path" + "runtime" + "strings" + "testing" + "testing/fstest" +) + +func TestRefreshRunners(t *testing.T) { + slog.SetLogLoggerLevel(slog.LevelDebug) + + payloadFS := fstest.MapFS{ + path.Join(runtime.GOOS, runtime.GOARCH, "foo", "ollama_llama_server"): {Data: []byte("hello, world\n")}, + } + tmpDir, err := os.MkdirTemp("", "testing") + if err != nil { + t.Fatalf("failed to make tmp dir %s", err) + } + t.Setenv("OLLAMA_TMPDIR", tmpDir) + rDir, err := Refresh(payloadFS) + if err != nil { + t.Fatalf("failed to extract to %s %s", tmpDir, err) + } + if !strings.Contains(rDir, tmpDir) { + t.Fatalf("runner dir %s was not in tmp dir %s", rDir, tmpDir) + } + + // spot check results + servers := GetAvailableServers(rDir) + if len(servers) < 1 { + t.Fatalf("expected at least 1 server") + } + + // Refresh contents + rDir, err = extractRunners(payloadFS) + if err != nil { + t.Fatalf("failed to extract to %s %s", tmpDir, err) + } + if !strings.Contains(rDir, tmpDir) { + t.Fatalf("runner dir %s was not in tmp dir %s", rDir, tmpDir) + } + + cleanupTmpDirs() + + Cleanup(payloadFS) +} diff --git a/scripts/build_darwin.sh b/scripts/build_darwin.sh index a2f76af2..17ac0b94 100755 --- a/scripts/build_darwin.sh +++ b/scripts/build_darwin.sh @@ -2,8 +2,7 @@ set -e -export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} -export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" +. $(dirname $0)/env.sh mkdir -p dist diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh index e91c56ed..567eb7c7 100755 --- a/scripts/build_docker.sh +++ b/scripts/build_docker.sh @@ -2,76 +2,34 @@ set -eu -export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} -export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" - -# We use 2 different image repositories to handle combining architecture images into multiarch manifest -# (The ROCm image is x86 only and is not a multiarch manifest) -# For developers, you can override the DOCKER_ORG to generate multiarch manifests -# DOCKER_ORG=jdoe PUSH=1 ./scripts/build_docker.sh -DOCKER_ORG=${DOCKER_ORG:-"ollama"} -RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"} -FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"} - -BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} +. $(dirname $0)/env.sh # Set PUSH to a non-empty string to trigger push instead of load PUSH=${PUSH:-""} -# In CI mode, we break things down -OLLAMA_SKIP_MANIFEST_CREATE=${OLLAMA_SKIP_MANIFEST_CREATE:-""} -OLLAMA_SKIP_IMAGE_BUILD=${OLLAMA_SKIP_IMAGE_BUILD:-""} - if [ -z "${PUSH}" ] ; then + echo "Building ${FINAL_IMAGE_REPO}:$VERSION locally. set PUSH=1 to push" LOAD_OR_PUSH="--load" else - echo "Will be pushing ${RELEASE_IMAGE_REPO}:$VERSION for ${BUILD_ARCH}" + echo "Will be pushing ${FINAL_IMAGE_REPO}:$VERSION" LOAD_OR_PUSH="--push" fi -if [ -z "${OLLAMA_SKIP_IMAGE_BUILD}" ]; then - for TARGETARCH in ${BUILD_ARCH}; do - docker build \ - ${LOAD_OR_PUSH} \ - --platform=linux/${TARGETARCH} \ - --build-arg=VERSION \ - --build-arg=GOFLAGS \ - -f Dockerfile \ - -t ${RELEASE_IMAGE_REPO}:$VERSION-${TARGETARCH} \ - . - done +docker buildx build \ + ${LOAD_OR_PUSH} \ + --platform=${PLATFORM} \ + ${OLLAMA_COMMON_BUILD_ARGS} \ + -f Dockerfile \ + -t ${FINAL_IMAGE_REPO}:$VERSION \ + . - if echo ${BUILD_ARCH} | grep "amd64" > /dev/null; then - docker build \ - ${LOAD_OR_PUSH} \ - --platform=linux/amd64 \ - --build-arg=VERSION \ - --build-arg=GOFLAGS \ - --target runtime-rocm \ - -f Dockerfile \ - -t ${RELEASE_IMAGE_REPO}:$VERSION-rocm \ - . - fi -fi - -if [ -z "${OLLAMA_SKIP_MANIFEST_CREATE}" ]; then - if [ -n "${PUSH}" ]; then - docker manifest create ${FINAL_IMAGE_REPO}:$VERSION \ - ${RELEASE_IMAGE_REPO}:$VERSION-amd64 \ - ${RELEASE_IMAGE_REPO}:$VERSION-arm64 - docker manifest push ${FINAL_IMAGE_REPO}:$VERSION - - # For symmetry, tag/push the rocm image - if [ "${RELEASE_IMAGE_REPO}" != "${FINAL_IMAGE_REPO}" ]; then - echo "Tagging and pushing rocm image" - docker pull ${RELEASE_IMAGE_REPO}:$VERSION-rocm - docker tag ${RELEASE_IMAGE_REPO}:$VERSION-rocm ${FINAL_IMAGE_REPO}:$VERSION-rocm - docker push ${FINAL_IMAGE_REPO}:$VERSION-rocm - fi - else - echo "Skipping manifest generation when not pushing images are available locally as " - echo " ${RELEASE_IMAGE_REPO}:$VERSION-amd64" - echo " ${RELEASE_IMAGE_REPO}:$VERSION-arm64" - echo " ${RELEASE_IMAGE_REPO}:$VERSION-rocm" - fi -fi +if echo $PLATFORM | grep "amd64" > /dev/null; then + docker buildx build \ + ${LOAD_OR_PUSH} \ + --platform=linux/amd64 \ + ${OLLAMA_COMMON_BUILD_ARGS} \ + --target runtime-rocm \ + -f Dockerfile \ + -t ${FINAL_IMAGE_REPO}:$VERSION-rocm \ + . +fi \ No newline at end of file diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 6cb0d0cd..894d9dd2 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -1,37 +1,29 @@ #!/bin/sh +# +# Mac ARM users, rosetta can be flaky, so to use a remote x86 builder +# +# docker context create amd64 --docker host=ssh://mybuildhost +# docker buildx create --name mybuilder amd64 --platform linux/amd64 +# docker buildx create --name mybuilder --append desktop-linux --platform linux/arm64 +# docker buildx use mybuilder + set -eu -export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} -export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" -GZIP=$(which pigz 2>/dev/null || echo "gzip") +. $(dirname $0)/env.sh -BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} -export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""} mkdir -p dist -for TARGETARCH in ${BUILD_ARCH}; do - docker build \ - --platform=linux/$TARGETARCH \ - --build-arg=GOFLAGS \ - --build-arg=CGO_CFLAGS \ - --build-arg=OLLAMA_CUSTOM_CPU_DEFS \ - --build-arg=AMDGPU_TARGETS \ - --target build-$TARGETARCH \ +docker buildx build \ + --output type=local,dest=./dist/ \ + --platform=${PLATFORM} \ + ${OLLAMA_COMMON_BUILD_ARGS} \ + --target dist \ -f Dockerfile \ - -t builder:$TARGETARCH \ . - docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH - rm -rf ./dist/linux-$TARGETARCH - docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist - if echo ${TARGETARCH} | grep "amd64" > /dev/null; then - docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH-rocm ./dist - fi - docker rm builder-$TARGETARCH - echo "Compressing final linux bundle..." - rm -f ./dist/ollama-linux-$TARGETARCH.tgz - (cd dist/linux-$TARGETARCH && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH.tgz ) - if [ -d dist/linux-$TARGETARCH-rocm ]; then - (cd dist/linux-$TARGETARCH-rocm && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH-rocm.tgz ) - fi -done + +# buildx behavior changes for single vs. multiplatform +if echo $PLATFORM | grep "," > /dev/null ; then + mv -f ./dist/linux_*64/ollama* ./dist/ + rmdir ./dist/linux_*64 +fi \ No newline at end of file diff --git a/scripts/env.sh b/scripts/env.sh new file mode 100644 index 00000000..d3ca05d7 --- /dev/null +++ b/scripts/env.sh @@ -0,0 +1,14 @@ +# Common environment setup across build*.sh scripts + +export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} +export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" +# TODO - consider `docker buildx ls --format=json` to autodiscover platform capability +PLATFORM=${PLATFORM:-"linux/arm64,linux/amd64"} +DOCKER_ORG=${DOCKER_ORG:-"ollama"} +RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"} +FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"} +OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION --build-arg=GOFLAGS --build-arg=OLLAMA_CUSTOM_CPU_DEFS --build-arg=AMDGPU_TARGETS" + +echo "Building Ollama" +echo "VERSION=$VERSION" +echo "PLATFORM=$PLATFORM" \ No newline at end of file diff --git a/server/routes.go b/server/routes.go index f202973e..6bd3a93f 100644 --- a/server/routes.go +++ b/server/routes.go @@ -26,11 +26,13 @@ import ( "golang.org/x/sync/errgroup" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/build" "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/gpu" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/openai" "github.com/ollama/ollama/parser" + "github.com/ollama/ollama/runners" "github.com/ollama/ollama/template" "github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/model" @@ -1216,12 +1218,12 @@ func Serve(ln net.Listener) error { srvr.Close() schedDone() sched.unloadAllRunners() - gpu.Cleanup() + runners.Cleanup(build.EmbedFS) done() }() - if err := llm.Init(); err != nil { - return fmt.Errorf("unable to initialize llm library %w", err) + if _, err := runners.Refresh(build.EmbedFS); err != nil { + return fmt.Errorf("unable to initialize llm runners %w", err) } s.sched.Run(schedCtx)