From 0bacb300071ba4baa928075b142633f2e85281ab Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 5 Jul 2024 12:46:28 -0700 Subject: [PATCH] Workaround broken ROCm p2p copy Enable the build flag for llama.cpp to use CPU copy for multi-GPU scenarios. --- llm/generate/gen_linux.sh | 2 +- llm/generate/gen_windows.ps1 | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index d3e2d13b..304eadbd 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -254,7 +254,7 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true) fi init_vars - CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" + CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DLLAMA_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\"" diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 5c694350..26bc4fa3 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -366,6 +366,7 @@ function build_rocm() { "-DCMAKE_C_COMPILER=clang.exe", "-DCMAKE_CXX_COMPILER=clang++.exe", "-DGGML_HIPBLAS=on", + "-DLLAMA_CUDA_NO_PEER_COPY=on", "-DHIP_PLATFORM=amd", "-DGGML_AVX=on", "-DGGML_AVX2=off",