diff --git a/gpu/gpu.go b/gpu/gpu.go index 6937de7a..57b04da1 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -13,7 +13,10 @@ import "C" import ( "fmt" "log" + "os" + "path/filepath" "runtime" + "strings" "sync" "unsafe" ) @@ -29,31 +32,79 @@ var gpuHandles *handles = nil // With our current CUDA compile flags, 5.2 and older will not work properly const CudaComputeMajorMin = 6 +// Possible locations for the nvidia-ml library +var CudaLinuxGlobs = []string{ + "/usr/local/cuda/lib64/libnvidia-ml.so*", + "/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*", + "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so*", + "/usr/lib/wsl/lib/libnvidia-ml.so*", + "/opt/cuda/lib64/libnvidia-ml.so*", + "/usr/lib*/libnvidia-ml.so*", + "/usr/local/lib*/libnvidia-ml.so*", + "/usr/lib/aarch64-linux-gnu/nvidia/current/libnvidia-ml.so*", + "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so*", +} + +var CudaWindowsGlobs = []string{ + "c:\\Windows\\System32\\nvml.dll", +} + +var RocmLinuxGlobs = []string{ + "/opt/rocm*/lib*/librocm_smi64.so*", +} + +var RocmWindowsGlobs = []string{ + "c:\\Windows\\System32\\rocm_smi64.dll", +} + // Note: gpuMutex must already be held func initGPUHandles() { + // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing + + var cudaMgmtName string + var cudaMgmtPatterns []string + var rocmMgmtName string + var rocmMgmtPatterns []string + switch runtime.GOOS { + case "windows": + cudaMgmtName = "nvml.dll" + cudaMgmtPatterns = make([]string, len(CudaWindowsGlobs)) + copy(cudaMgmtPatterns, CudaWindowsGlobs) + rocmMgmtName = "rocm_smi64.dll" + rocmMgmtPatterns = make([]string, len(RocmWindowsGlobs)) + copy(rocmMgmtPatterns, RocmWindowsGlobs) + case "linux": + cudaMgmtName = "libnvidia-ml.so" + cudaMgmtPatterns = make([]string, len(CudaLinuxGlobs)) + copy(cudaMgmtPatterns, CudaLinuxGlobs) + rocmMgmtName = "librocm_smi64.so" + rocmMgmtPatterns = make([]string, len(RocmLinuxGlobs)) + copy(rocmMgmtPatterns, RocmLinuxGlobs) + default: + return + } + log.Printf("Detecting GPU type") gpuHandles = &handles{nil, nil} - var resp C.cuda_init_resp_t - C.cuda_init(&resp) - if resp.err != nil { - log.Printf("CUDA not detected: %s", C.GoString(resp.err)) - C.free(unsafe.Pointer(resp.err)) - - var resp C.rocm_init_resp_t - C.rocm_init(&resp) - if resp.err != nil { - log.Printf("ROCm not detected: %s", C.GoString(resp.err)) - C.free(unsafe.Pointer(resp.err)) - } else { - log.Printf("Radeon GPU detected") - rocm := resp.rh - gpuHandles.rocm = &rocm + cudaLibPaths := FindGPULibs(cudaMgmtName, cudaMgmtPatterns) + if len(cudaLibPaths) > 0 { + cuda := LoadCUDAMgmt(cudaLibPaths) + if cuda != nil { + log.Printf("Nvidia GPU detected") + gpuHandles.cuda = cuda + return + } + } + + rocmLibPaths := FindGPULibs(rocmMgmtName, rocmMgmtPatterns) + if len(rocmLibPaths) > 0 { + rocm := LoadROCMMgmt(rocmLibPaths) + if rocm != nil { + log.Printf("Radeon GPU detected") + gpuHandles.rocm = rocm + return } - } else { - log.Printf("Nvidia GPU detected") - cuda := resp.ch - gpuHandles.cuda = &cuda } } @@ -143,3 +194,88 @@ func CheckVRAM() (int64, error) { return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation } + +func FindGPULibs(baseLibName string, patterns []string) []string { + // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them + var ldPaths []string + gpuLibPaths := []string{} + log.Printf("Searching for GPU management library %s", baseLibName) + + switch runtime.GOOS { + case "windows": + ldPaths = strings.Split(os.Getenv("PATH"), ";") + case "linux": + ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":") + default: + return gpuLibPaths + } + // Start with whatever we find in the PATH/LD_LIBRARY_PATH + for _, ldPath := range ldPaths { + d, err := filepath.Abs(ldPath) + if err != nil { + continue + } + patterns = append(patterns, filepath.Join(d, baseLibName+"*")) + } + for _, pattern := range patterns { + // Ignore glob discovery errors + matches, _ := filepath.Glob(pattern) + for _, match := range matches { + // Resolve any links so we don't try the same lib multiple times + // and weed out any dups across globs + libPath := match + tmp := match + var err error + for ; err == nil; tmp, err = os.Readlink(libPath) { + if !filepath.IsAbs(tmp) { + tmp = filepath.Join(filepath.Dir(libPath), tmp) + } + libPath = tmp + } + new := true + for _, cmp := range gpuLibPaths { + if cmp == libPath { + new = false + break + } + } + if new { + gpuLibPaths = append(gpuLibPaths, libPath) + } + } + } + log.Printf("Discovered GPU libraries: %v", gpuLibPaths) + return gpuLibPaths +} + +func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t { + var resp C.cuda_init_resp_t + for _, libPath := range cudaLibPaths { + lib := C.CString(libPath) + defer C.free(unsafe.Pointer(lib)) + C.cuda_init(lib, &resp) + if resp.err != nil { + log.Printf("Unable to load CUDA management library %s: %s", libPath, C.GoString(resp.err)) + C.free(unsafe.Pointer(resp.err)) + } else { + return &resp.ch + } + } + return nil +} + +func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t { + var resp C.rocm_init_resp_t + for _, libPath := range rocmLibPaths { + lib := C.CString(libPath) + defer C.free(unsafe.Pointer(lib)) + C.rocm_init(lib, &resp) + if resp.err != nil { + log.Printf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err)) + C.free(unsafe.Pointer(resp.err)) + } else { + return &resp.rh + } + } + return nil +} diff --git a/gpu/gpu_info_cuda.c b/gpu/gpu_info_cuda.c index 9e76b791..4ee0d8f9 100644 --- a/gpu/gpu_info_cuda.c +++ b/gpu/gpu_info_cuda.c @@ -4,26 +4,9 @@ #include -#ifndef _WIN32 -const char *cuda_lib_paths[] = { - "libnvidia-ml.so", - "/usr/local/cuda/lib64/libnvidia-ml.so", - "/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so", - "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1", - "/usr/lib/wsl/lib/libnvidia-ml.so.1", // TODO Maybe glob? - NULL, -}; -#else -const char *cuda_lib_paths[] = { - "nvml.dll", - "", - NULL, -}; -#endif - #define CUDA_LOOKUP_SIZE 6 -void cuda_init(cuda_init_resp_t *resp) { +void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp) { nvmlReturn_t ret; resp->err = NULL; const int buflen = 256; @@ -42,16 +25,12 @@ void cuda_init(cuda_init_resp_t *resp) { {"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability}, }; - for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) { - resp->ch.handle = LOAD_LIBRARY(cuda_lib_paths[i], RTLD_LAZY); - } + resp->ch.handle = LOAD_LIBRARY(cuda_lib_path, RTLD_LAZY); if (!resp->ch.handle) { - // TODO improve error message, as the LOAD_ERR will have typically have the - // final path that was checked which might be confusing. char *msg = LOAD_ERR(); snprintf(buf, buflen, "Unable to load %s library to query for Nvidia GPUs: %s", - cuda_lib_paths[0], msg); + cuda_lib_path, msg); free(msg); resp->err = strdup(buf); return; @@ -73,6 +52,8 @@ void cuda_init(cuda_init_resp_t *resp) { ret = (*resp->ch.initFn)(); if (ret != NVML_SUCCESS) { + UNLOAD_LIBRARY(resp->ch.handle); + resp->ch.handle = NULL; snprintf(buf, buflen, "nvml vram init failure: %d", ret); resp->err = strdup(buf); } diff --git a/gpu/gpu_info_cuda.h b/gpu/gpu_info_cuda.h index 81995ab2..61c48012 100644 --- a/gpu/gpu_info_cuda.h +++ b/gpu/gpu_info_cuda.h @@ -36,7 +36,7 @@ typedef struct cuda_compute_capability { int minor; } cuda_compute_capability_t; -void cuda_init(cuda_init_resp_t *resp); +void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp); void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp); void cuda_compute_capability(cuda_handle_t ch, cuda_compute_capability_t *cc); diff --git a/gpu/gpu_info_rocm.c b/gpu/gpu_info_rocm.c index 9901172b..58d1c973 100644 --- a/gpu/gpu_info_rocm.c +++ b/gpu/gpu_info_rocm.c @@ -4,22 +4,7 @@ #include -#ifndef _WIN32 -const char *rocm_lib_paths[] = { - "librocm_smi64.so", - "/opt/rocm/lib/librocm_smi64.so", - NULL, -}; -#else -// TODO untested -const char *rocm_lib_paths[] = { - "rocm_smi64.dll", - "/opt/rocm/lib/rocm_smi64.dll", - NULL, -}; -#endif - -void rocm_init(rocm_init_resp_t *resp) { +void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) { rsmi_status_t ret; resp->err = NULL; const int buflen = 256; @@ -36,14 +21,12 @@ void rocm_init(rocm_init_resp_t *resp) { // { "rsmi_dev_id_get", (void*)&resp->rh.getHandle }, }; - for (i = 0; rocm_lib_paths[i] != NULL && resp->rh.handle == NULL; i++) { - resp->rh.handle = LOAD_LIBRARY(rocm_lib_paths[i], RTLD_LAZY); - } + resp->rh.handle = LOAD_LIBRARY(rocm_lib_path, RTLD_LAZY); if (!resp->rh.handle) { char *msg = LOAD_ERR(); snprintf(buf, buflen, "Unable to load %s library to query for Radeon GPUs: %s\n", - rocm_lib_paths[0], msg); + rocm_lib_path, msg); free(msg); resp->err = strdup(buf); return; @@ -53,6 +36,7 @@ void rocm_init(rocm_init_resp_t *resp) { *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s); if (!l[i].p) { UNLOAD_LIBRARY(resp->rh.handle); + resp->rh.handle = NULL; char *msg = LOAD_ERR(); snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg); @@ -64,6 +48,8 @@ void rocm_init(rocm_init_resp_t *resp) { ret = (*resp->rh.initFn)(0); if (ret != RSMI_STATUS_SUCCESS) { + UNLOAD_LIBRARY(resp->rh.handle); + resp->rh.handle = NULL; snprintf(buf, buflen, "rocm vram init failure: %d", ret); resp->err = strdup(buf); } @@ -83,7 +69,7 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) { int i; if (h.handle == NULL) { - resp->err = strdup("nvml handle sn't initialized"); + resp->err = strdup("rocm handle not initialized"); return; } diff --git a/gpu/gpu_info_rocm.h b/gpu/gpu_info_rocm.h index 8d7a04ae..1f74713b 100644 --- a/gpu/gpu_info_rocm.h +++ b/gpu/gpu_info_rocm.h @@ -29,7 +29,7 @@ typedef struct rocm_init_resp { rocm_handle_t rh; } rocm_init_resp_t; -void rocm_init(rocm_init_resp_t *resp); +void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp); void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp); #endif // __GPU_INFO_ROCM_H__