diff --git a/Dockerfile b/Dockerfile index 6743866a..e0f94b57 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ ARG CUDA_VERSION_11=11.3.1 ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86" ARG CUDA_VERSION_12=12.4.0 ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a" -ARG ROCM_VERSION=6.1.2 +ARG ROCM_VERSION=5.7.1 # Copy the minimal context we need to run the generate scripts FROM scratch AS llm-code @@ -47,39 +47,39 @@ RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ bash gen_linux.sh -FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64 -ARG CMAKE_VERSION -COPY ./scripts/rh_linux_deps.sh / -RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH -COPY --from=llm-code / /go/src/github.com/ollama/ollama/ -WORKDIR /go/src/github.com/ollama/ollama/llm/generate -ARG CGO_CFLAGS -ARG CUDA_V11_ARCHITECTURES -ENV GOARCH arm64 -RUN OLLAMA_SKIP_STATIC_GENERATE=1 \ - OLLAMA_SKIP_CPU_GENERATE=1 \ - CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ - CUDA_VARIANT="_v11" \ - bash gen_linux.sh +#FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64 +#ARG CMAKE_VERSION +#COPY ./scripts/rh_linux_deps.sh / +#RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh +#ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH +#COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +#WORKDIR /go/src/github.com/ollama/ollama/llm/generate +#ARG CGO_CFLAGS +#ARG CUDA_V11_ARCHITECTURES +#ENV GOARCH arm64 +#RUN OLLAMA_SKIP_STATIC_GENERATE=1 \ +# OLLAMA_SKIP_CPU_GENERATE=1 \ +# CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \ +# CUDA_VARIANT="_v11" \ +# bash gen_linux.sh -FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64 -ARG CMAKE_VERSION -COPY ./scripts/rh_linux_deps.sh / -RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH -COPY --from=llm-code / /go/src/github.com/ollama/ollama/ -WORKDIR /go/src/github.com/ollama/ollama/llm/generate -ARG CGO_CFLAGS -ARG CUDA_V12_ARCHITECTURES -ENV GOARCH arm64 -RUN --mount=type=cache,target=/root/.ccache \ - OLLAMA_SKIP_STATIC_GENERATE=1 \ - OLLAMA_SKIP_CPU_GENERATE=1 \ - CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \ - CUDA_VARIANT="_v12" \ - OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ - bash gen_linux.sh +#FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64 +#ARG CMAKE_VERSION +#COPY ./scripts/rh_linux_deps.sh / +#RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh +#ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH +#COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +#WORKDIR /go/src/github.com/ollama/ollama/llm/generate +#ARG CGO_CFLAGS +#ARG CUDA_V12_ARCHITECTURES +#ENV GOARCH arm64 +#RUN --mount=type=cache,target=/root/.ccache \ +# OLLAMA_SKIP_STATIC_GENERATE=1 \ +# OLLAMA_SKIP_CPU_GENERATE=1 \ +# CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \ +# CUDA_VARIANT="_v12" \ +# OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \ +# bash gen_linux.sh FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 @@ -123,24 +123,24 @@ FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64 RUN --mount=type=cache,target=/root/.ccache \ OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh -FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64 -ARG CMAKE_VERSION -ARG GOLANG_VERSION -COPY ./scripts/rh_linux_deps.sh / -RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh -ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH -COPY --from=llm-code / /go/src/github.com/ollama/ollama/ -ARG OLLAMA_CUSTOM_CPU_DEFS -ARG CGO_CFLAGS -ENV GOARCH arm64 -WORKDIR /go/src/github.com/ollama/ollama/llm/generate +#FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64 +#ARG CMAKE_VERSION +#ARG GOLANG_VERSION +#COPY ./scripts/rh_linux_deps.sh / +#RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh +#ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH +#COPY --from=llm-code / /go/src/github.com/ollama/ollama/ +#ARG OLLAMA_CUSTOM_CPU_DEFS +#ARG CGO_CFLAGS +#ENV GOARCH arm64 +#WORKDIR /go/src/github.com/ollama/ollama/llm/generate -FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64 -RUN --mount=type=cache,target=/root/.ccache \ - OLLAMA_CPU_TARGET="static" bash gen_linux.sh -FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64 -RUN --mount=type=cache,target=/root/.ccache \ - OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh +#FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64 +#RUN --mount=type=cache,target=/root/.ccache \ +# OLLAMA_CPU_TARGET="static" bash gen_linux.sh +#FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64 +#RUN --mount=type=cache,target=/root/.ccache \ +# OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh # Intermediate stage used for ./scripts/build_linux.sh @@ -163,20 +163,20 @@ RUN --mount=type=cache,target=/root/.ccache \ go build -trimpath -o dist/linux-amd64/bin/ollama . # Intermediate stage used for ./scripts/build_linux.sh -FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 -ENV CGO_ENABLED 1 -ARG GOLANG_VERSION -WORKDIR /go/src/github.com/ollama/ollama -COPY . . -COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ -ARG GOFLAGS -ARG CGO_CFLAGS -RUN --mount=type=cache,target=/root/.ccache \ - go build -trimpath -o dist/linux-arm64/bin/ollama . +#FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 +#ENV CGO_ENABLED 1 +#ARG GOLANG_VERSION +#WORKDIR /go/src/github.com/ollama/ollama +#COPY . . +#COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +#COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +#COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +#COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ +#COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ +#ARG GOFLAGS +#ARG CGO_CFLAGS +#RUN --mount=type=cache,target=/root/.ccache \ +# go build -trimpath -o dist/linux-arm64/bin/ollama . # Strip out ROCm dependencies to keep the primary image lean FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm @@ -190,11 +190,11 @@ RUN apt-get update && apt-get install -y ca-certificates && \ apt-get clean && rm -rf /var/lib/apt/lists/* COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ -FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64 -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ -RUN apt-get update && apt-get install -y ca-certificates && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ +#FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64 +#COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ +#RUN apt-get update && apt-get install -y ca-certificates && \ +# apt-get clean && rm -rf /var/lib/apt/lists/* +#COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ # Radeon images are much larger so we keep it distinct from the CPU/CUDA image FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go index aab67efe..33300eb6 100644 --- a/gpu/amd_linux.go +++ b/gpu/amd_linux.go @@ -41,7 +41,7 @@ const ( var ( // Used to validate if the given ROCm lib is usable - ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here... + ROCmLibGlobs = []string{"libhipblas.so.1*", "rocblas"} // TODO - probably include more coverage of files here... RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"} ) @@ -386,7 +386,7 @@ func AMDValidateLibDir() (string, error) { } // Well known ollama installer path - installedRocmDir := "/usr/share/ollama/lib/rocm" + installedRocmDir := "/opt/rocm-5.7.1" if rocmLibUsable(installedRocmDir) { return installedRocmDir, nil } diff --git a/gpu/gpu.go b/gpu/gpu.go index 3de93f7f..7b2bd810 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -59,7 +59,7 @@ var ( // With our current CUDA compile flags, older than 5.0 will not work properly var CudaComputeMin = [2]C.int{5, 0} -var RocmComputeMin = 9 +var RocmComputeMin = 8 // TODO find a better way to detect iGPU instead of minimum memory const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index 1f702ca2..fae3785c 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -22,6 +22,7 @@ amdGPUs() { return fi GPU_LIST=( + "gfx803" "gfx900" "gfx906:xnack-" "gfx908:xnack-" diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh index e91c56ed..f70624e6 100755 --- a/scripts/build_docker.sh +++ b/scripts/build_docker.sh @@ -13,7 +13,7 @@ DOCKER_ORG=${DOCKER_ORG:-"ollama"} RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"} FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"} -BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} +BUILD_ARCH=${BUILD_ARCH:-"amd64"} # Set PUSH to a non-empty string to trigger push instead of load PUSH=${PUSH:-""} diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 6cb0d0cd..290b99c9 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -6,7 +6,8 @@ export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" GZIP=$(which pigz 2>/dev/null || echo "gzip") -BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} +#BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} +BUILD_ARCH=${BUILD_ARCH:-"amd64"} export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""} mkdir -p dist