Merge pull request #2130 from dhiltgen/more_faster
Make CPU builds parallel and customizable AMD GPUs
This commit is contained in:
commit
5576bb2348
41
Dockerfile
41
Dockerfile
|
@ -10,91 +10,102 @@ COPY llm llm
|
||||||
|
|
||||||
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ARG AMDGPU_TARGETS
|
||||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
|
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
ARG AMDGPU_TARGETS
|
||||||
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
|
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG OLLAMA_CUSTOM_CPU_DEFS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
RUN sh gen_linux.sh
|
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
|
||||||
|
RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
|
||||||
|
RUN OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
|
||||||
|
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
|
||||||
|
RUN OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
|
||||||
|
|
||||||
FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
|
FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
|
||||||
ARG CMAKE_VERSION
|
ARG CMAKE_VERSION
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG OLLAMA_CUSTOM_CPU_DEFS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
COPY ./scripts/rh_linux_deps.sh /
|
COPY ./scripts/rh_linux_deps.sh /
|
||||||
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
||||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||||
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
|
||||||
RUN sh gen_linux.sh
|
# Note, we only build the "base" CPU variant on arm since avx/avx2 are x86 features
|
||||||
|
ARG OLLAMA_CUSTOM_CPU_DEFS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
|
RUN OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
|
||||||
|
|
||||||
# Intermediate stage used for ./scripts/build_linux.sh
|
# Intermediate stage used for ./scripts/build_linux.sh
|
||||||
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
|
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
|
||||||
ENV CGO_ENABLED 1
|
ENV CGO_ENABLED 1
|
||||||
ARG GOFLAGS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
|
COPY --from=cpu_avx-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
|
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
|
ARG GOFLAGS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
RUN go build .
|
RUN go build .
|
||||||
|
|
||||||
# Intermediate stage used for ./scripts/build_linux.sh
|
# Intermediate stage used for ./scripts/build_linux.sh
|
||||||
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
|
||||||
ENV CGO_ENABLED 1
|
ENV CGO_ENABLED 1
|
||||||
ARG GOLANG_VERSION
|
ARG GOLANG_VERSION
|
||||||
ARG GOFLAGS
|
|
||||||
ARG CGO_CFLAGS
|
|
||||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
|
||||||
|
ARG GOFLAGS
|
||||||
|
ARG CGO_CFLAGS
|
||||||
RUN go build .
|
RUN go build .
|
||||||
|
|
||||||
# Runtime stages
|
# Runtime stages
|
||||||
|
|
|
@ -74,7 +74,8 @@ Typically the build scripts will auto-detect ROCm, however, if your Linux distro
|
||||||
or installation approach uses unusual paths, you can specify the location by
|
or installation approach uses unusual paths, you can specify the location by
|
||||||
specifying an environment variable `ROCM_PATH` to the location of the ROCm
|
specifying an environment variable `ROCM_PATH` to the location of the ROCm
|
||||||
install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
|
install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
|
||||||
CLBlast install (typically `/usr/lib/cmake/CLBlast`).
|
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
|
||||||
|
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
|
||||||
|
|
||||||
```
|
```
|
||||||
go generate ./...
|
go generate ./...
|
||||||
|
|
|
@ -16,6 +16,10 @@ set -o pipefail
|
||||||
|
|
||||||
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
|
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
|
||||||
amdGPUs() {
|
amdGPUs() {
|
||||||
|
if [ -n "${AMDGPU_TARGETS}" ]; then
|
||||||
|
echo "${AMDGPU_TARGETS}"
|
||||||
|
return
|
||||||
|
fi
|
||||||
GPU_LIST=(
|
GPU_LIST=(
|
||||||
"gfx803"
|
"gfx803"
|
||||||
"gfx900"
|
"gfx900"
|
||||||
|
@ -73,36 +77,42 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||||
# -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
|
# -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
|
||||||
|
|
||||||
COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
|
COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
|
||||||
#
|
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
|
||||||
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
|
#
|
||||||
#
|
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
#
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
echo "Building LCD CPU"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
|
||||||
build
|
echo "Building LCD CPU"
|
||||||
compress_libs
|
build
|
||||||
|
compress_libs
|
||||||
|
fi
|
||||||
|
|
||||||
#
|
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
|
||||||
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
|
#
|
||||||
# Approximately 400% faster than LCD on same CPU
|
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
|
||||||
#
|
# Approximately 400% faster than LCD on same CPU
|
||||||
init_vars
|
#
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
init_vars
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
echo "Building AVX CPU"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
|
||||||
build
|
echo "Building AVX CPU"
|
||||||
compress_libs
|
build
|
||||||
|
compress_libs
|
||||||
|
fi
|
||||||
|
|
||||||
#
|
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
|
||||||
# ~2013 CPU Dynamic library
|
#
|
||||||
# Approximately 10% faster than AVX on same CPU
|
# ~2013 CPU Dynamic library
|
||||||
#
|
# Approximately 10% faster than AVX on same CPU
|
||||||
init_vars
|
#
|
||||||
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
|
init_vars
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
|
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
|
||||||
echo "Building AVX2 CPU"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
|
||||||
build
|
echo "Building AVX2 CPU"
|
||||||
compress_libs
|
build
|
||||||
|
compress_libs
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Skipping CPU generation step as requested"
|
echo "Skipping CPU generation step as requested"
|
||||||
|
|
|
@ -6,6 +6,7 @@ export VERSION=${VERSION:-0.0.0}
|
||||||
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
|
||||||
|
|
||||||
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
|
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
|
||||||
|
export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
|
||||||
mkdir -p dist
|
mkdir -p dist
|
||||||
|
|
||||||
for TARGETARCH in ${BUILD_ARCH}; do
|
for TARGETARCH in ${BUILD_ARCH}; do
|
||||||
|
@ -14,6 +15,7 @@ for TARGETARCH in ${BUILD_ARCH}; do
|
||||||
--build-arg=GOFLAGS \
|
--build-arg=GOFLAGS \
|
||||||
--build-arg=CGO_CFLAGS \
|
--build-arg=CGO_CFLAGS \
|
||||||
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
--build-arg=OLLAMA_CUSTOM_CPU_DEFS \
|
||||||
|
--build-arg=AMDGPU_TARGETS \
|
||||||
--target build-$TARGETARCH \
|
--target build-$TARGETARCH \
|
||||||
-f Dockerfile \
|
-f Dockerfile \
|
||||||
-t builder:$TARGETARCH \
|
-t builder:$TARGETARCH \
|
||||||
|
|
Loading…
Reference in a new issue