2023-11-29 14:00:37 -05:00
//go:build linux || windows
package gpu
/ *
2023-12-13 20:26:47 -05:00
# cgo linux LDFLAGS : - lrt - lpthread - ldl - lstdc ++ - lm
# cgo windows LDFLAGS : - lpthread
2023-11-29 14:00:37 -05:00
# include "gpu_info.h"
* /
import "C"
import (
2024-06-03 22:09:23 -04:00
"bufio"
"bytes"
2023-11-29 14:00:37 -05:00
"fmt"
2024-01-18 13:52:01 -05:00
"log/slog"
2024-01-10 17:39:51 -05:00
"os"
"path/filepath"
2023-12-23 14:35:44 -05:00
"runtime"
2024-01-10 17:39:51 -05:00
"strings"
2023-11-29 14:00:37 -05:00
"sync"
"unsafe"
2024-03-18 05:45:22 -04:00
2024-06-03 11:31:48 -04:00
"github.com/ollama/ollama/envconfig"
2024-05-15 18:13:16 -04:00
"github.com/ollama/ollama/format"
2023-11-29 14:00:37 -05:00
)
2024-05-29 19:37:34 -04:00
type cudaHandles struct {
2024-03-30 12:50:05 -04:00
deviceCount int
cudart * C . cudart_handle_t
2024-04-30 19:42:48 -04:00
nvcuda * C . nvcuda_handle_t
2024-06-03 18:07:50 -04:00
nvml * C . nvml_handle_t
2024-05-29 19:37:34 -04:00
}
type oneapiHandles struct {
2024-05-23 23:18:27 -04:00
oneapi * C . oneapi_handle_t
2024-05-29 19:37:34 -04:00
deviceCount int
2023-11-29 14:00:37 -05:00
}
2024-03-18 05:45:22 -04:00
const (
2024-05-10 12:15:28 -04:00
cudaMinimumMemory = 457 * format . MebiByte
rocmMinimumMemory = 457 * format . MebiByte
2024-05-29 19:37:34 -04:00
// TODO OneAPI minimum memory
2024-03-18 05:45:22 -04:00
)
2024-05-15 18:13:16 -04:00
var (
gpuMutex sync . Mutex
bootstrapped bool
cpuCapability CPUCapability
cpus [ ] CPUInfo
cudaGPUs [ ] CudaGPUInfo
nvcudaLibPath string
cudartLibPath string
oneapiLibPath string
2024-06-03 18:07:50 -04:00
nvmlLibPath string
2024-05-15 18:13:16 -04:00
rocmGPUs [ ] RocmGPUInfo
oneapiGPUs [ ] OneapiGPUInfo
)
2023-11-29 14:00:37 -05:00
2024-01-20 15:15:50 -05:00
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [ 2 ] C . int { 5 , 0 }
2024-01-07 00:40:04 -05:00
2024-03-30 12:50:05 -04:00
var RocmComputeMin = 9
2024-01-10 17:39:51 -05:00
2024-03-30 12:50:05 -04:00
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format . GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
2024-01-10 17:39:51 -05:00
2024-03-25 11:07:44 -04:00
var CudartLinuxGlobs = [ ] string {
"/usr/local/cuda/lib64/libcudart.so*" ,
"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*" ,
"/usr/lib/x86_64-linux-gnu/libcudart.so*" ,
"/usr/lib/wsl/lib/libcudart.so*" ,
"/usr/lib/wsl/drivers/*/libcudart.so*" ,
"/opt/cuda/lib64/libcudart.so*" ,
"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*" ,
"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*" ,
"/usr/lib/aarch64-linux-gnu/libcudart.so*" ,
"/usr/local/cuda/lib*/libcudart.so*" ,
"/usr/lib*/libcudart.so*" ,
"/usr/local/lib*/libcudart.so*" ,
}
var CudartWindowsGlobs = [ ] string {
"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll" ,
}
2024-06-03 18:07:50 -04:00
var NvmlWindowsGlobs = [ ] string {
"c:\\Windows\\System32\\nvml.dll" ,
}
2024-04-30 19:42:48 -04:00
var NvcudaLinuxGlobs = [ ] string {
"/usr/local/cuda*/targets/*/lib/libcuda.so*" ,
"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*" ,
"/usr/lib/*-linux-gnu/libcuda.so*" ,
"/usr/lib/wsl/lib/libcuda.so*" ,
"/usr/lib/wsl/drivers/*/libcuda.so*" ,
"/opt/cuda/lib*/libcuda.so*" ,
"/usr/local/cuda/lib*/libcuda.so*" ,
"/usr/lib*/libcuda.so*" ,
"/usr/local/lib*/libcuda.so*" ,
}
var NvcudaWindowsGlobs = [ ] string {
"c:\\windows\\system*\\nvcuda.dll" ,
}
2024-05-23 23:18:27 -04:00
var OneapiWindowsGlobs = [ ] string {
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll" ,
}
var OneapiLinuxGlobs = [ ] string {
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*" ,
"/usr/lib*/libze_intel_gpu.so*" ,
}
2024-03-25 11:07:44 -04:00
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os . Getenv ( "JETSON_JETPACK" )
2023-11-29 14:00:37 -05:00
// Note: gpuMutex must already be held
2024-05-29 19:37:34 -04:00
func initCudaHandles ( ) * cudaHandles {
2024-01-10 17:39:51 -05:00
2023-12-13 20:26:47 -05:00
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
2024-01-10 17:39:51 -05:00
2024-05-29 19:37:34 -04:00
cHandles := & cudaHandles { }
2024-05-15 18:13:16 -04:00
// Short Circuit if we already know which library to use
2024-06-03 18:07:50 -04:00
if nvmlLibPath != "" {
cHandles . nvml , _ = LoadNVMLMgmt ( [ ] string { nvmlLibPath } )
return cHandles
}
2024-05-15 18:13:16 -04:00
if nvcudaLibPath != "" {
2024-05-29 19:37:34 -04:00
cHandles . deviceCount , cHandles . nvcuda , _ = LoadNVCUDAMgmt ( [ ] string { nvcudaLibPath } )
return cHandles
2024-05-15 18:13:16 -04:00
}
if cudartLibPath != "" {
2024-05-29 19:37:34 -04:00
cHandles . deviceCount , cHandles . cudart , _ = LoadCUDARTMgmt ( [ ] string { cudartLibPath } )
return cHandles
2024-05-15 18:13:16 -04:00
}
slog . Debug ( "searching for GPU discovery libraries for NVIDIA" )
2024-03-25 11:07:44 -04:00
var cudartMgmtName string
var cudartMgmtPatterns [ ] string
2024-04-30 19:42:48 -04:00
var nvcudaMgmtName string
var nvcudaMgmtPatterns [ ] string
2024-06-03 18:07:50 -04:00
var nvmlMgmtName string
var nvmlMgmtPatterns [ ] string
2024-03-25 11:07:44 -04:00
tmpDir , _ := PayloadsDir ( )
2024-01-10 17:39:51 -05:00
switch runtime . GOOS {
case "windows" :
2024-03-25 11:07:44 -04:00
cudartMgmtName = "cudart64_*.dll"
localAppData := os . Getenv ( "LOCALAPPDATA" )
cudartMgmtPatterns = [ ] string { filepath . Join ( localAppData , "Programs" , "Ollama" , cudartMgmtName ) }
cudartMgmtPatterns = append ( cudartMgmtPatterns , CudartWindowsGlobs ... )
2024-04-30 19:42:48 -04:00
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "nvcuda.dll"
nvcudaMgmtPatterns = NvcudaWindowsGlobs
2024-06-03 18:07:50 -04:00
// Use nvml to refresh free memory on windows only
nvmlMgmtName = "nvml.dll"
nvmlMgmtPatterns = make ( [ ] string , len ( NvmlWindowsGlobs ) )
copy ( nvmlMgmtPatterns , NvmlWindowsGlobs )
2024-01-10 17:39:51 -05:00
case "linux" :
2024-03-25 11:07:44 -04:00
cudartMgmtName = "libcudart.so*"
if tmpDir != "" {
// TODO - add "payloads" for subprocess
cudartMgmtPatterns = [ ] string { filepath . Join ( tmpDir , "cuda*" , cudartMgmtName ) }
}
cudartMgmtPatterns = append ( cudartMgmtPatterns , CudartLinuxGlobs ... )
2024-04-30 19:42:48 -04:00
// Aligned with driver, we can't carry as payloads
nvcudaMgmtName = "libcuda.so*"
nvcudaMgmtPatterns = NvcudaLinuxGlobs
2024-06-03 18:07:50 -04:00
// nvml omitted on linux
2024-01-10 17:39:51 -05:00
default :
2024-05-29 19:37:34 -04:00
return cHandles
2024-01-10 17:39:51 -05:00
}
2024-06-03 18:07:50 -04:00
if len ( nvmlMgmtPatterns ) > 0 {
nvmlLibPaths := FindGPULibs ( nvmlMgmtName , nvmlMgmtPatterns )
if len ( nvmlLibPaths ) > 0 {
nvml , libPath := LoadNVMLMgmt ( nvmlLibPaths )
if nvml != nil {
slog . Debug ( "nvidia-ml loaded" , "library" , libPath )
cHandles . nvml = nvml
nvmlLibPath = libPath
}
}
}
2024-04-30 19:42:48 -04:00
nvcudaLibPaths := FindGPULibs ( nvcudaMgmtName , nvcudaMgmtPatterns )
if len ( nvcudaLibPaths ) > 0 {
deviceCount , nvcuda , libPath := LoadNVCUDAMgmt ( nvcudaLibPaths )
if nvcuda != nil {
2024-05-07 17:54:26 -04:00
slog . Debug ( "detected GPUs" , "count" , deviceCount , "library" , libPath )
2024-05-29 19:37:34 -04:00
cHandles . nvcuda = nvcuda
cHandles . deviceCount = deviceCount
2024-05-15 18:13:16 -04:00
nvcudaLibPath = libPath
2024-05-29 19:37:34 -04:00
return cHandles
2024-04-30 19:42:48 -04:00
}
}
2024-03-25 11:07:44 -04:00
cudartLibPaths := FindGPULibs ( cudartMgmtName , cudartMgmtPatterns )
if len ( cudartLibPaths ) > 0 {
2024-03-30 12:50:05 -04:00
deviceCount , cudart , libPath := LoadCUDARTMgmt ( cudartLibPaths )
2024-03-25 11:07:44 -04:00
if cudart != nil {
2024-05-07 17:54:26 -04:00
slog . Debug ( "detected GPUs" , "library" , libPath , "count" , deviceCount )
2024-05-29 19:37:34 -04:00
cHandles . cudart = cudart
cHandles . deviceCount = deviceCount
2024-05-15 18:13:16 -04:00
cudartLibPath = libPath
2024-05-29 19:37:34 -04:00
return cHandles
2024-03-25 11:07:44 -04:00
}
}
2024-05-23 23:18:27 -04:00
2024-05-29 19:37:34 -04:00
return cHandles
}
// Note: gpuMutex must already be held
func initOneAPIHandles ( ) * oneapiHandles {
oHandles := & oneapiHandles { }
var oneapiMgmtName string
var oneapiMgmtPatterns [ ] string
// Short Circuit if we already know which library to use
if oneapiLibPath != "" {
oHandles . deviceCount , oHandles . oneapi , _ = LoadOneapiMgmt ( [ ] string { oneapiLibPath } )
return oHandles
}
switch runtime . GOOS {
case "windows" :
oneapiMgmtName = "ze_intel_gpu64.dll"
oneapiMgmtPatterns = OneapiWindowsGlobs
case "linux" :
oneapiMgmtName = "libze_intel_gpu.so"
oneapiMgmtPatterns = OneapiLinuxGlobs
default :
return oHandles
}
2024-06-03 11:31:48 -04:00
oneapiLibPaths := FindGPULibs ( oneapiMgmtName , oneapiMgmtPatterns )
if len ( oneapiLibPaths ) > 0 {
2024-05-29 19:37:34 -04:00
oHandles . deviceCount , oHandles . oneapi , oneapiLibPath = LoadOneapiMgmt ( oneapiLibPaths )
2024-06-03 11:31:48 -04:00
}
2024-05-29 19:37:34 -04:00
return oHandles
2023-11-29 14:00:37 -05:00
}
2024-06-03 22:09:23 -04:00
func GetCPUInfo ( ) GpuInfoList {
gpuMutex . Lock ( )
if ! bootstrapped {
gpuMutex . Unlock ( )
GetGPUInfo ( )
} else {
gpuMutex . Unlock ( )
}
return GpuInfoList { cpus [ 0 ] . GpuInfo }
}
2024-03-30 12:50:05 -04:00
func GetGPUInfo ( ) GpuInfoList {
2023-11-29 14:00:37 -05:00
// TODO - consider exploring lspci (and equivalent on windows) to check for
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex . Lock ( )
defer gpuMutex . Unlock ( )
2024-05-15 18:13:16 -04:00
needRefresh := true
2024-05-29 19:37:34 -04:00
var cHandles * cudaHandles
var oHandles * oneapiHandles
2024-03-30 18:34:21 -04:00
defer func ( ) {
2024-05-29 19:37:34 -04:00
if cHandles != nil {
if cHandles . cudart != nil {
C . cudart_release ( * cHandles . cudart )
}
if cHandles . nvcuda != nil {
C . nvcuda_release ( * cHandles . nvcuda )
}
2024-06-03 18:07:50 -04:00
if cHandles . nvml != nil {
C . nvml_release ( * cHandles . nvml )
}
2024-03-30 18:34:21 -04:00
}
2024-05-29 19:37:34 -04:00
if oHandles != nil {
if oHandles . oneapi != nil {
// TODO - is this needed?
C . oneapi_release ( * oHandles . oneapi )
}
2024-04-30 19:42:48 -04:00
}
2024-03-30 18:34:21 -04:00
} ( )
2023-11-29 14:00:37 -05:00
2024-05-15 18:13:16 -04:00
if ! bootstrapped {
slog . Debug ( "Detecting GPUs" )
needRefresh = false
cpuCapability = getCPUCapability ( )
var memInfo C . mem_info_t
2024-06-03 22:09:23 -04:00
mem , err := GetCPUMem ( )
if err != nil {
slog . Warn ( "error looking up system memory" , "error" , err )
2024-05-15 18:13:16 -04:00
}
2024-06-03 22:09:23 -04:00
cpus = [ ] CPUInfo { CPUInfo {
2024-05-15 18:13:16 -04:00
GpuInfo : GpuInfo {
2024-06-03 22:09:23 -04:00
memInfo : mem ,
2024-05-15 18:13:16 -04:00
Library : "cpu" ,
Variant : cpuCapability . ToVariant ( ) ,
2024-06-03 22:09:23 -04:00
ID : "0" ,
2024-05-15 18:13:16 -04:00
} ,
2024-06-03 22:09:23 -04:00
} }
2024-05-15 18:13:16 -04:00
// Fallback to CPU mode if we're lacking required vector extensions on x86
if cpuCapability < GPURunnerCPUCapability && runtime . GOARCH == "amd64" {
slog . Warn ( "CPU does not have minimum vector extensions, GPU inference disabled" , "required" , GPURunnerCPUCapability . ToString ( ) , "detected" , cpuCapability . ToString ( ) )
bootstrapped = true
// No need to do any GPU discovery, since we can't run on them
return GpuInfoList { cpus [ 0 ] . GpuInfo }
}
2024-01-26 14:11:09 -05:00
2024-05-15 18:13:16 -04:00
// On windows we bundle the nvidia library one level above the runner dir
depPath := ""
if runtime . GOOS == "windows" && envconfig . RunnersDir != "" {
depPath = filepath . Dir ( envconfig . RunnersDir )
2024-03-25 11:07:44 -04:00
}
2024-05-15 18:13:16 -04:00
// Load ALL libraries
2024-05-29 19:37:34 -04:00
cHandles = initCudaHandles ( )
2024-05-15 18:13:16 -04:00
// NVIDIA
2024-05-29 19:37:34 -04:00
for i := range cHandles . deviceCount {
if cHandles . cudart != nil || cHandles . nvcuda != nil {
2024-05-15 18:13:16 -04:00
gpuInfo := CudaGPUInfo {
GpuInfo : GpuInfo {
Library : "cuda" ,
} ,
index : i ,
}
var driverMajor int
var driverMinor int
2024-05-29 19:37:34 -04:00
if cHandles . cudart != nil {
C . cudart_bootstrap ( * cHandles . cudart , C . int ( i ) , & memInfo )
2024-05-15 18:13:16 -04:00
} else {
2024-05-29 19:37:34 -04:00
C . nvcuda_bootstrap ( * cHandles . nvcuda , C . int ( i ) , & memInfo )
driverMajor = int ( cHandles . nvcuda . driver_major )
driverMinor = int ( cHandles . nvcuda . driver_minor )
2024-05-15 18:13:16 -04:00
}
if memInfo . err != nil {
slog . Info ( "error looking up nvidia GPU memory" , "error" , C . GoString ( memInfo . err ) )
C . free ( unsafe . Pointer ( memInfo . err ) )
continue
}
if memInfo . major < CudaComputeMin [ 0 ] || ( memInfo . major == CudaComputeMin [ 0 ] && memInfo . minor < CudaComputeMin [ 1 ] ) {
slog . Info ( fmt . Sprintf ( "[%d] CUDA GPU is too old. Compute Capability detected: %d.%d" , i , memInfo . major , memInfo . minor ) )
continue
}
gpuInfo . TotalMemory = uint64 ( memInfo . total )
gpuInfo . FreeMemory = uint64 ( memInfo . free )
gpuInfo . ID = C . GoString ( & memInfo . gpu_id [ 0 ] )
gpuInfo . Compute = fmt . Sprintf ( "%d.%d" , memInfo . major , memInfo . minor )
gpuInfo . MinimumMemory = cudaMinimumMemory
gpuInfo . DependencyPath = depPath
gpuInfo . Name = C . GoString ( & memInfo . gpu_name [ 0 ] )
gpuInfo . DriverMajor = int ( driverMajor )
gpuInfo . DriverMinor = int ( driverMinor )
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
cudaGPUs = append ( cudaGPUs , gpuInfo )
2024-05-23 23:18:27 -04:00
}
2024-05-29 19:37:34 -04:00
}
// Intel
oHandles = initOneAPIHandles ( )
for d := 0 ; oHandles . oneapi != nil && d < int ( oHandles . oneapi . num_drivers ) ; d ++ {
if oHandles . oneapi == nil {
// shouldn't happen
slog . Warn ( "nil oneapi handle with driver count" , "count" , int ( oHandles . oneapi . num_drivers ) )
continue
}
devCount := C . oneapi_get_device_count ( * oHandles . oneapi , C . int ( d ) )
for i := 0 ; i < int ( devCount ) ; i ++ {
2024-05-15 18:13:16 -04:00
gpuInfo := OneapiGPUInfo {
GpuInfo : GpuInfo {
Library : "oneapi" ,
} ,
2024-05-29 19:37:34 -04:00
driverIndex : d ,
gpuIndex : i ,
2024-05-15 18:13:16 -04:00
}
// TODO - split bootstrapping from updating free memory
2024-05-29 19:37:34 -04:00
C . oneapi_check_vram ( * oHandles . oneapi , C . int ( d ) , C . int ( i ) , & memInfo )
// TODO - convert this to MinimumMemory based on testing...
2024-05-15 18:13:16 -04:00
var totalFreeMem float64 = float64 ( memInfo . free ) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo . free = C . uint64_t ( totalFreeMem )
gpuInfo . TotalMemory = uint64 ( memInfo . total )
gpuInfo . FreeMemory = uint64 ( memInfo . free )
2024-05-29 19:37:34 -04:00
gpuInfo . ID = C . GoString ( & memInfo . gpu_id [ 0 ] )
gpuInfo . Name = C . GoString ( & memInfo . gpu_name [ 0 ] )
// TODO dependency path?
2024-05-15 18:13:16 -04:00
oneapiGPUs = append ( oneapiGPUs , gpuInfo )
}
}
rocmGPUs = AMDGetGPUInfo ( )
bootstrapped = true
}
// For detected GPUs, load library if not loaded
// Refresh free memory usage
if needRefresh {
2024-06-03 22:09:23 -04:00
mem , err := GetCPUMem ( )
if err != nil {
slog . Warn ( "error looking up system memory" , "error" , err )
} else {
slog . Debug ( "updating system memory data" ,
slog . Group (
"before" ,
"total" , format . HumanBytes2 ( cpus [ 0 ] . TotalMemory ) ,
"free" , format . HumanBytes2 ( cpus [ 0 ] . FreeMemory ) ,
) ,
slog . Group (
"now" ,
"total" , format . HumanBytes2 ( mem . TotalMemory ) ,
"free" , format . HumanBytes2 ( mem . FreeMemory ) ,
) ,
)
cpus [ 0 ] . FreeMemory = mem . FreeMemory
}
2024-05-15 18:13:16 -04:00
var memInfo C . mem_info_t
2024-05-29 19:37:34 -04:00
if cHandles == nil && len ( cudaGPUs ) > 0 {
cHandles = initCudaHandles ( )
2024-05-15 18:13:16 -04:00
}
for i , gpu := range cudaGPUs {
2024-06-03 18:07:50 -04:00
if cHandles . nvml != nil {
C . nvml_get_free ( * cHandles . nvml , C . int ( gpu . index ) , & memInfo . free , & memInfo . total , & memInfo . used )
} else if cHandles . cudart != nil {
2024-05-29 19:37:34 -04:00
C . cudart_bootstrap ( * cHandles . cudart , C . int ( gpu . index ) , & memInfo )
2024-06-03 18:07:50 -04:00
} else if cHandles . nvcuda != nil {
C . nvcuda_get_free ( * cHandles . nvcuda , C . int ( gpu . index ) , & memInfo . free , & memInfo . total )
memInfo . used = memInfo . total - memInfo . free
2024-05-23 23:18:27 -04:00
} else {
2024-06-03 18:07:50 -04:00
// shouldn't happen
slog . Warn ( "no valid cuda library loaded to refresh vram usage" )
break
2024-05-23 23:18:27 -04:00
}
if memInfo . err != nil {
2024-05-15 18:13:16 -04:00
slog . Warn ( "error looking up nvidia GPU memory" , "error" , C . GoString ( memInfo . err ) )
2024-05-23 23:18:27 -04:00
C . free ( unsafe . Pointer ( memInfo . err ) )
continue
}
2024-05-15 18:13:16 -04:00
if memInfo . free == 0 {
slog . Warn ( "error looking up nvidia GPU memory" )
2024-05-23 23:18:27 -04:00
continue
}
2024-06-03 18:07:50 -04:00
slog . Debug ( "updating cuda memory data" ,
"gpu" , gpu . ID ,
"name" , gpu . Name ,
slog . Group (
"before" ,
"total" , format . HumanBytes2 ( gpu . TotalMemory ) ,
"free" , format . HumanBytes2 ( gpu . FreeMemory ) ,
) ,
slog . Group (
"now" ,
"total" , format . HumanBytes2 ( uint64 ( memInfo . total ) ) ,
"free" , format . HumanBytes2 ( uint64 ( memInfo . free ) ) ,
"used" , format . HumanBytes2 ( uint64 ( memInfo . used ) ) ,
) ,
)
2024-05-15 18:13:16 -04:00
cudaGPUs [ i ] . FreeMemory = uint64 ( memInfo . free )
2023-12-13 20:26:47 -05:00
}
2024-05-29 19:37:34 -04:00
if oHandles == nil && len ( oneapiGPUs ) > 0 {
oHandles = initOneAPIHandles ( )
}
for i , gpu := range oneapiGPUs {
if oHandles . oneapi == nil {
// shouldn't happen
slog . Warn ( "nil oneapi handle with device count" , "count" , oHandles . deviceCount )
continue
}
C . oneapi_check_vram ( * oHandles . oneapi , C . int ( gpu . driverIndex ) , C . int ( gpu . gpuIndex ) , & memInfo )
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64 ( memInfo . free ) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo . free = C . uint64_t ( totalFreeMem )
oneapiGPUs [ i ] . FreeMemory = uint64 ( memInfo . free )
}
2024-06-03 22:09:23 -04:00
err = RocmGPUInfoList ( rocmGPUs ) . RefreshFreeMemory ( )
2024-05-15 18:13:16 -04:00
if err != nil {
slog . Debug ( "problem refreshing ROCm free memory" , "error" , err )
2024-06-03 11:31:48 -04:00
}
2023-12-13 20:26:47 -05:00
}
2024-03-30 12:50:05 -04:00
2024-05-15 18:13:16 -04:00
resp := [ ] GpuInfo { }
for _ , gpu := range cudaGPUs {
resp = append ( resp , gpu . GpuInfo )
}
for _ , gpu := range rocmGPUs {
resp = append ( resp , gpu . GpuInfo )
}
2024-05-29 19:37:34 -04:00
for _ , gpu := range oneapiGPUs {
resp = append ( resp , gpu . GpuInfo )
}
2024-03-30 12:50:05 -04:00
if len ( resp ) == 0 {
2024-05-15 18:13:16 -04:00
resp = append ( resp , cpus [ 0 ] . GpuInfo )
2023-11-29 14:00:37 -05:00
}
return resp
}
2024-03-30 12:50:05 -04:00
func GetCPUMem ( ) ( memInfo , error ) {
2024-06-03 22:09:23 -04:00
if runtime . GOOS == "linux" {
return GetLinuxMemInfo ( )
}
2023-12-22 18:43:31 -05:00
var ret memInfo
var info C . mem_info_t
C . cpu_check_ram ( & info )
if info . err != nil {
defer C . free ( unsafe . Pointer ( info . err ) )
return ret , fmt . Errorf ( C . GoString ( info . err ) )
}
ret . FreeMemory = uint64 ( info . free )
ret . TotalMemory = uint64 ( info . total )
return ret , nil
}
2024-04-30 19:42:48 -04:00
func FindGPULibs ( baseLibName string , defaultPatterns [ ] string ) [ ] string {
2024-01-10 17:39:51 -05:00
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var ldPaths [ ] string
2024-04-30 19:42:48 -04:00
var patterns [ ] string
2024-01-10 17:39:51 -05:00
gpuLibPaths := [ ] string { }
2024-03-30 12:50:05 -04:00
slog . Debug ( "Searching for GPU library" , "name" , baseLibName )
2024-01-10 17:39:51 -05:00
switch runtime . GOOS {
case "windows" :
ldPaths = strings . Split ( os . Getenv ( "PATH" ) , ";" )
case "linux" :
ldPaths = strings . Split ( os . Getenv ( "LD_LIBRARY_PATH" ) , ":" )
default :
return gpuLibPaths
}
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
for _ , ldPath := range ldPaths {
d , err := filepath . Abs ( ldPath )
if err != nil {
continue
}
patterns = append ( patterns , filepath . Join ( d , baseLibName + "*" ) )
}
2024-04-30 19:42:48 -04:00
patterns = append ( patterns , defaultPatterns ... )
2024-03-30 12:50:05 -04:00
slog . Debug ( "gpu library search" , "globs" , patterns )
2024-01-10 17:39:51 -05:00
for _ , pattern := range patterns {
2024-05-03 14:55:32 -04:00
// Nvidia PhysX known to return bogus results
if strings . Contains ( pattern , "PhysX" ) {
slog . Debug ( "skipping PhysX cuda library path" , "path" , pattern )
2024-06-13 16:17:19 -04:00
continue
2024-05-03 14:55:32 -04:00
}
2024-01-10 17:39:51 -05:00
// Ignore glob discovery errors
matches , _ := filepath . Glob ( pattern )
for _ , match := range matches {
// Resolve any links so we don't try the same lib multiple times
// and weed out any dups across globs
libPath := match
tmp := match
var err error
for ; err == nil ; tmp , err = os . Readlink ( libPath ) {
if ! filepath . IsAbs ( tmp ) {
tmp = filepath . Join ( filepath . Dir ( libPath ) , tmp )
}
libPath = tmp
}
new := true
for _ , cmp := range gpuLibPaths {
if cmp == libPath {
new = false
break
}
}
if new {
gpuLibPaths = append ( gpuLibPaths , libPath )
}
}
}
2024-03-30 12:50:05 -04:00
slog . Debug ( "discovered GPU libraries" , "paths" , gpuLibPaths )
2024-01-10 17:39:51 -05:00
return gpuLibPaths
}
2024-03-30 12:50:05 -04:00
func LoadCUDARTMgmt ( cudartLibPaths [ ] string ) ( int , * C . cudart_handle_t , string ) {
2024-03-25 11:07:44 -04:00
var resp C . cudart_init_resp_t
2024-01-22 19:03:32 -05:00
resp . ch . verbose = getVerboseState ( )
2024-03-25 11:07:44 -04:00
for _ , libPath := range cudartLibPaths {
2024-01-10 17:39:51 -05:00
lib := C . CString ( libPath )
defer C . free ( unsafe . Pointer ( lib ) )
2024-03-25 11:07:44 -04:00
C . cudart_init ( lib , & resp )
2024-01-10 17:39:51 -05:00
if resp . err != nil {
2024-03-30 12:50:05 -04:00
slog . Debug ( "Unable to load cudart" , "library" , libPath , "error" , C . GoString ( resp . err ) )
2024-01-10 17:39:51 -05:00
C . free ( unsafe . Pointer ( resp . err ) )
} else {
2024-03-30 12:50:05 -04:00
return int ( resp . num_devices ) , & resp . ch , libPath
2024-01-10 17:39:51 -05:00
}
}
2024-03-30 12:50:05 -04:00
return 0 , nil , ""
2024-01-10 17:39:51 -05:00
}
2024-04-30 19:42:48 -04:00
func LoadNVCUDAMgmt ( nvcudaLibPaths [ ] string ) ( int , * C . nvcuda_handle_t , string ) {
var resp C . nvcuda_init_resp_t
resp . ch . verbose = getVerboseState ( )
for _ , libPath := range nvcudaLibPaths {
lib := C . CString ( libPath )
defer C . free ( unsafe . Pointer ( lib ) )
C . nvcuda_init ( lib , & resp )
if resp . err != nil {
slog . Debug ( "Unable to load nvcuda" , "library" , libPath , "error" , C . GoString ( resp . err ) )
C . free ( unsafe . Pointer ( resp . err ) )
} else {
return int ( resp . num_devices ) , & resp . ch , libPath
}
}
return 0 , nil , ""
}
2024-06-03 18:07:50 -04:00
func LoadNVMLMgmt ( nvmlLibPaths [ ] string ) ( * C . nvml_handle_t , string ) {
var resp C . nvml_init_resp_t
resp . ch . verbose = getVerboseState ( )
for _ , libPath := range nvmlLibPaths {
lib := C . CString ( libPath )
defer C . free ( unsafe . Pointer ( lib ) )
C . nvml_init ( lib , & resp )
if resp . err != nil {
slog . Info ( fmt . Sprintf ( "Unable to load NVML management library %s: %s" , libPath , C . GoString ( resp . err ) ) )
C . free ( unsafe . Pointer ( resp . err ) )
} else {
return & resp . ch , libPath
}
}
return nil , ""
}
2024-05-23 23:18:27 -04:00
func LoadOneapiMgmt ( oneapiLibPaths [ ] string ) ( int , * C . oneapi_handle_t , string ) {
var resp C . oneapi_init_resp_t
2024-05-29 19:37:34 -04:00
num_devices := 0
2024-05-23 23:18:27 -04:00
resp . oh . verbose = getVerboseState ( )
for _ , libPath := range oneapiLibPaths {
lib := C . CString ( libPath )
defer C . free ( unsafe . Pointer ( lib ) )
C . oneapi_init ( lib , & resp )
if resp . err != nil {
slog . Debug ( "Unable to load oneAPI management library" , "library" , libPath , "error" , C . GoString ( resp . err ) )
C . free ( unsafe . Pointer ( resp . err ) )
} else {
2024-05-29 19:37:34 -04:00
for i := 0 ; i < int ( resp . oh . num_drivers ) ; i ++ {
num_devices += int ( C . oneapi_get_device_count ( resp . oh , C . int ( i ) ) )
}
return num_devices , & resp . oh , libPath
2024-05-23 23:18:27 -04:00
}
}
return 0 , nil , ""
}
2024-01-22 19:03:32 -05:00
func getVerboseState ( ) C . uint16_t {
2024-05-04 14:46:01 -04:00
if envconfig . Debug {
2024-01-22 19:03:32 -05:00
return C . uint16_t ( 1 )
}
return C . uint16_t ( 0 )
}
2024-03-30 12:50:05 -04:00
// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func ( l GpuInfoList ) GetVisibleDevicesEnv ( ) ( string , string ) {
if len ( l ) == 0 {
return "" , ""
}
switch l [ 0 ] . Library {
case "cuda" :
return cudaGetVisibleDevicesEnv ( l )
case "rocm" :
return rocmGetVisibleDevicesEnv ( l )
2024-05-23 23:18:27 -04:00
case "oneapi" :
return oneapiGetVisibleDevicesEnv ( l )
2024-03-30 12:50:05 -04:00
default :
slog . Debug ( "no filter required for library " + l [ 0 ] . Library )
return "" , ""
}
}
2024-06-03 22:09:23 -04:00
func GetLinuxMemInfo ( ) ( memInfo , error ) {
var mem memInfo
var total , available , free , buffers , cached uint64
f , err := os . Open ( "/proc/meminfo" )
if err != nil {
return mem , err
}
defer f . Close ( )
s := bufio . NewScanner ( f )
for s . Scan ( ) {
switch {
case bytes . HasPrefix ( s . Bytes ( ) , [ ] byte ( ` MemTotal: ` ) ) :
_ , err = fmt . Sscanf ( s . Text ( ) , "MemTotal:%d" , & total )
case bytes . HasPrefix ( s . Bytes ( ) , [ ] byte ( ` MemAvailable: ` ) ) :
_ , err = fmt . Sscanf ( s . Text ( ) , "MemAvailable:%d" , & available )
case bytes . HasPrefix ( s . Bytes ( ) , [ ] byte ( ` MemFree: ` ) ) :
_ , err = fmt . Sscanf ( s . Text ( ) , "MemFree:%d" , & free )
case bytes . HasPrefix ( s . Bytes ( ) , [ ] byte ( ` Buffers: ` ) ) :
_ , err = fmt . Sscanf ( s . Text ( ) , "Buffers:%d" , & buffers )
case bytes . HasPrefix ( s . Bytes ( ) , [ ] byte ( ` Cached: ` ) ) :
_ , err = fmt . Sscanf ( s . Text ( ) , "Cached:%d" , & cached )
default :
continue
}
if err != nil {
return mem , err
}
if total > 0 && available > 0 {
mem . TotalMemory = total * 1024
mem . FreeMemory = available * 1024
return mem , nil
}
}
mem . TotalMemory = total * 1024
mem . FreeMemory = ( free + buffers + cached ) * 1024
return mem , nil
}