From 380378cc80f3cffd703d5946e87a5ac990df273c Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Sun, 5 May 2024 17:45:43 -0700 Subject: [PATCH] Use our libraries first Trying to live off the land for cuda libraries was not the right strategy. We need to use the version we compiled against to ensure things work properly --- gpu/gpu.go | 7 +++++++ llm/server.go | 39 ++++++++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/gpu/gpu.go b/gpu/gpu.go index 30c25bfc..21666c8d 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList { slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.") } + // On windows we bundle the nvidia library one level above the runner dir + depPath := "" + if runtime.GOOS == "windows" && envconfig.RunnersDir != "" { + depPath = filepath.Dir(envconfig.RunnersDir) + } + var memInfo C.mem_info_t resp := []GpuInfo{} @@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList { gpuInfo.Major = int(memInfo.major) gpuInfo.Minor = int(memInfo.minor) gpuInfo.MinimumMemory = cudaMinimumMemory + gpuInfo.DependencyPath = depPath // TODO potentially sort on our own algorithm instead of what the underlying GPU library does... resp = append(resp, gpuInfo) diff --git a/llm/server.go b/llm/server.go index db1b0e23..e2402256 100644 --- a/llm/server.go +++ b/llm/server.go @@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr if runtime.GOOS == "windows" { pathEnv = "PATH" } - // append the server directory to LD_LIBRARY_PATH/PATH + // prepend the server directory to LD_LIBRARY_PATH/PATH libraryPaths := []string{dir} if libraryPath, ok := os.LookupEnv(pathEnv); ok { // Append our runner directory to the path // This will favor system libraries over our bundled library dependencies - libraryPaths = append(filepath.SplitList(libraryPath), libraryPaths...) + libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...) } // Note: we always put the dependency path first @@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr sem: semaphore.NewWeighted(int64(numParallel)), } - libEnv := fmt.Sprintf("%s=%s", pathEnv, strings.Join(libraryPaths, string(filepath.ListSeparator))) - s.cmd.Env = append(os.Environ(), libEnv) + s.cmd.Env = os.Environ() s.cmd.Stdout = os.Stdout s.cmd.Stderr = s.status - // TODO - multiple GPU selection logic... - key, val := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv() - if key != "" { - s.cmd.Env = append(s.cmd.Env, key+"="+val) + visibleDevicesEnv, visibleDevicesEnvVal := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv() + pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator)) + + // Update or add the path and visible devices variable with our adjusted version + pathNeeded := true + devicesNeeded := visibleDevicesEnv != "" + for i := range s.cmd.Env { + cmp := strings.SplitN(s.cmd.Env[i], "=", 2) + if strings.EqualFold(cmp[0], pathEnv) { + s.cmd.Env[i] = pathEnv + "=" + pathEnvVal + pathNeeded = false + } else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) { + s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal + devicesNeeded = false + } + } + if pathNeeded { + s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal) + } + if devicesNeeded { + s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal) } slog.Info("starting llama server", "cmd", s.cmd.String()) @@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr continue } - // TODO - make sure this is all wired up correctly - // if err = s.WaitUntilRunning(); err != nil { - // slog.Error("error starting llama server", "server", servers[i], "error", err) - // s.Close() - // finalErr = err - // continue - // } return s, nil }