diff --git a/gpu/gpu.go b/gpu/gpu.go index 52da39ec..b9f6e4e0 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -66,7 +66,7 @@ func GetGPUInfo() GpuInfo { } var memInfo C.mem_info_t - resp := GpuInfo{"", 0, 0} + resp := GpuInfo{} if gpuHandles.cuda != nil { C.cuda_check_vram(*gpuHandles.cuda, &memInfo) if memInfo.err != nil { @@ -103,6 +103,19 @@ func GetGPUInfo() GpuInfo { return resp } +func getCPUMem() (memInfo, error) { + var ret memInfo + var info C.mem_info_t + C.cpu_check_ram(&info) + if info.err != nil { + defer C.free(unsafe.Pointer(info.err)) + return ret, fmt.Errorf(C.GoString(info.err)) + } + ret.FreeMemory = uint64(info.free) + ret.TotalMemory = uint64(info.total) + return ret, nil +} + func CheckVRAM() (int64, error) { gpuInfo := GetGPUInfo() if gpuInfo.FreeMemory > 0 && (gpuInfo.Library == "cuda" || gpuInfo.Library == "rocm") { diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go index 3a7319b4..ecca3b7b 100644 --- a/gpu/gpu_darwin.go +++ b/gpu/gpu_darwin.go @@ -18,12 +18,18 @@ func CheckVRAM() (int64, error) { func GetGPUInfo() GpuInfo { // TODO - Metal vs. x86 macs... - + mem, _ := getCPUMem() return GpuInfo{ - Library: "default", + Library: "default", + memInfo: mem, + } +} + +func getCPUMem() (memInfo, error) { + return memInfo{ TotalMemory: 0, FreeMemory: 0, - } + }, nil } func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int { diff --git a/gpu/gpu_info.h b/gpu/gpu_info.h index 7de36465..3b2edc70 100644 --- a/gpu/gpu_info.h +++ b/gpu/gpu_info.h @@ -9,20 +9,21 @@ #include #define LOAD_LIBRARY(lib, flags) dlopen(lib, flags) #define LOAD_SYMBOL(handle, sym) dlsym(handle, sym) -#define LOAD_ERR() dlerror() +#define LOAD_ERR() strdup(dlerror()) #define UNLOAD_LIBRARY(handle) dlclose(handle) #else #include #define LOAD_LIBRARY(lib, flags) LoadLibrary(lib) #define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym) #define UNLOAD_LIBRARY(handle) FreeLibrary(handle) - -// TODO - refactor this with proper error message handling on windows -inline static char *LOAD_ERR() { - static char errbuf[8]; - snprintf(errbuf, 8, "0x%lx", GetLastError()); - return errbuf; -} +#define LOAD_ERR() ({\ + LPSTR messageBuffer = NULL; \ + size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, \ + NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); \ + char *resp = strdup(messageBuffer); \ + LocalFree(messageBuffer); \ + resp; \ +}) #endif diff --git a/gpu/gpu_info_cpu.c b/gpu/gpu_info_cpu.c index a7987cd4..38e2a563 100644 --- a/gpu/gpu_info_cpu.c +++ b/gpu/gpu_info_cpu.c @@ -6,11 +6,12 @@ void cpu_check_ram(mem_info_t *resp) { resp->err = NULL; MEMORYSTATUSEX info; + info.dwLength = sizeof(info); if (GlobalMemoryStatusEx(&info) != 0) { resp->total = info.ullTotalPhys; resp->free = info.ullAvailPhys; } else { - resp->err = strdup(LOAD_ERR()); + resp->err = LOAD_ERR(); } return; } diff --git a/gpu/gpu_info_cuda.c b/gpu/gpu_info_cuda.c index 20055ed6..52738710 100644 --- a/gpu/gpu_info_cuda.c +++ b/gpu/gpu_info_cuda.c @@ -43,9 +43,11 @@ void cuda_init(cuda_init_resp_t *resp) { if (!resp->ch.handle) { // TODO improve error message, as the LOAD_ERR will have typically have the // final path that was checked which might be confusing. + char *msg = LOAD_ERR(); snprintf(buf, buflen, "Unable to load %s library to query for Nvidia GPUs: %s", - cuda_lib_paths[0], LOAD_ERR()); + cuda_lib_paths[0], msg); + free(msg); resp->err = strdup(buf); return; } @@ -55,8 +57,10 @@ void cuda_init(cuda_init_resp_t *resp) { if (!l[i].p) { UNLOAD_LIBRARY(resp->ch.handle); resp->ch.handle = NULL; + char *msg = LOAD_ERR(); snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, - LOAD_ERR()); + msg); + free(msg); resp->err = strdup(buf); return; } diff --git a/gpu/gpu_info_rocm.c b/gpu/gpu_info_rocm.c index e69d5cba..367d11fd 100644 --- a/gpu/gpu_info_rocm.c +++ b/gpu/gpu_info_rocm.c @@ -40,9 +40,11 @@ void rocm_init(rocm_init_resp_t *resp) { resp->rh.handle = LOAD_LIBRARY(rocm_lib_paths[i], RTLD_LAZY); } if (!resp->rh.handle) { + char *msg = LOAD_ERR(); snprintf(buf, buflen, "Unable to load %s library to query for Radeon GPUs: %s\n", - rocm_lib_paths[0], LOAD_ERR()); + rocm_lib_paths[0], msg); + free(msg); resp->err = strdup(buf); return; } @@ -51,8 +53,10 @@ void rocm_init(rocm_init_resp_t *resp) { *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s); if (!l[i].p) { UNLOAD_LIBRARY(resp->rh.handle); + char *msg = LOAD_ERR(); snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, - LOAD_ERR()); + msg); + free(msg); resp->err = strdup(buf); return; } diff --git a/gpu/gpu_test.go b/gpu/gpu_test.go index a5b5c892..d5585d3c 100644 --- a/gpu/gpu_test.go +++ b/gpu/gpu_test.go @@ -23,4 +23,19 @@ func TestBasicGetGPUInfo(t *testing.T) { } } +func TestCPUMemInfo(t *testing.T) { + info, err := getCPUMem() + assert.NoError(t, err) + switch runtime.GOOS { + case "darwin": + t.Skip("CPU memory not populated on darwin") + case "linux", "windows": + assert.Greater(t, info.TotalMemory, uint64(0)) + assert.Greater(t, info.FreeMemory, uint64(0)) + default: + return + } + +} + // TODO - add some logic to figure out card type through other means and actually verify we got back what we expected diff --git a/gpu/types.go b/gpu/types.go index 637e32e6..c3c39210 100644 --- a/gpu/types.go +++ b/gpu/types.go @@ -1,10 +1,14 @@ package gpu +type memInfo struct { + TotalMemory uint64 `json:"total_memory,omitempty"` + FreeMemory uint64 `json:"free_memory,omitempty"` +} + // Beginning of an `ollama info` command type GpuInfo struct { - Library string `json:"library,omitempty"` - TotalMemory uint64 `json:"total_memory,omitempty"` - FreeMemory uint64 `json:"free_memory,omitempty"` + memInfo + Library string `json:"library,omitempty"` // TODO add other useful attributes about the card here for discovery information }