From 1b7183c5a1c124050f29013c60968e1ff55d41c1 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 5 Jul 2023 17:45:11 -0700 Subject: [PATCH] enable metal gpu acceleration ggml-metal.metal must be in the same directory as the ollama binary otherwise llama.cpp will not be able to find it and load it. 1. go generate llama/llama_metal.go 2. go build . 3. ./ollama serve --- llama/CMakeLists.txt | 23 +++++++++-------------- llama/llama_metal.go | 5 +++++ server/routes.go | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) create mode 100644 llama/llama_metal.go diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt index dd1450dd..3ea66d7e 100644 --- a/llama/CMakeLists.txt +++ b/llama/CMakeLists.txt @@ -1,4 +1,6 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.12) +project(binding) + include(FetchContent) FetchContent_Declare( @@ -9,20 +11,13 @@ FetchContent_Declare( FetchContent_MakeAvailable(llama_cpp) -if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set(LLAMA_METAL ON) - add_compile_definitions(GGML_USE_METAL) -endif() - -project(binding) - add_library(binding ${CMAKE_CURRENT_SOURCE_DIR}/binding/binding.cpp ${llama_cpp_SOURCE_DIR}/examples/common.cpp) -target_compile_features(binding PRIVATE cxx_std_11) -target_include_directories(binding PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) -target_include_directories(binding PRIVATE ${llama_cpp_SOURCE_DIR}) target_include_directories(binding PRIVATE ${llama_cpp_SOURCE_DIR}/examples) target_link_libraries(binding llama ggml_static) -configure_file(${llama_cpp_SOURCE_DIR}/ggml-metal.metal ${CMAKE_CURRENT_BINARY_DIR}/ggml-metal.metal COPYONLY) -add_custom_target(copy_libllama ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different ${llama_cpp_BINARY_DIR}/libllama.a ${CMAKE_CURRENT_BINARY_DIR}) -add_custom_target(copy_libggml_static ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different ${llama_cpp_BINARY_DIR}/libggml_static.a ${CMAKE_CURRENT_BINARY_DIR}) \ No newline at end of file +if (LLAMA_METAL) + configure_file(${llama_cpp_SOURCE_DIR}/ggml-metal.metal ${CMAKE_CURRENT_BINARY_DIR}/../../ggml-metal.metal COPYONLY) +endif() + +add_custom_target(copy_libllama ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${CMAKE_CURRENT_BINARY_DIR}) +add_custom_target(copy_libggml_static ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different $ ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/llama/llama_metal.go b/llama/llama_metal.go new file mode 100644 index 00000000..111cc5ff --- /dev/null +++ b/llama/llama_metal.go @@ -0,0 +1,5 @@ +//go:build metal +package llama + +//go:generate cmake -S . -B build --fresh -DLLAMA_METAL=on +//go:generate cmake --build build diff --git a/server/routes.go b/server/routes.go index 8796a37b..cacd36a0 100644 --- a/server/routes.go +++ b/server/routes.go @@ -22,7 +22,7 @@ func pull(c *gin.Context) { func generate(c *gin.Context) { // TODO: these should be request parameters - gpulayers := 0 + gpulayers := 1 tokens := 512 threads := runtime.NumCPU() // TODO: set prompt from template