diff --git a/integration/concurrency_test.go b/integration/concurrency_test.go index 81d0b587..42e9d074 100644 --- a/integration/concurrency_test.go +++ b/integration/concurrency_test.go @@ -5,6 +5,7 @@ package integration import ( "context" "log/slog" + "os" "strconv" "sync" "testing" @@ -13,7 +14,6 @@ import ( "github.com/stretchr/testify/require" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" ) @@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) { }, } resp = [2][]string{ - []string{"sunlight"}, - []string{"england", "english", "massachusetts", "pilgrims", "british"}, + {"sunlight"}, + {"england", "english", "massachusetts", "pilgrims", "british"}, } ) var wg sync.WaitGroup @@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) { reqLimit := len(req) iterLimit := 5 - vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM - if vram != "" { - max, err := strconv.ParseUint(vram, 10, 64) + if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" { + maxVram, err := strconv.ParseUint(s, 10, 64) require.NoError(t, err) // Don't hammer on small VRAM cards... - if max < 4*1024*1024*1024 { + if maxVram < 4*format.GibiByte { reqLimit = min(reqLimit, 2) iterLimit = 2 } @@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) { consumed := uint64(256 * format.MebiByte) // Assume some baseline usage for i := 0; i < len(req); i++ { // Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long - if i > 1 && consumed > vram { - slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) + if i > 1 && consumed > maxVram { + slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed)) break } consumed += chosenModels[i].size - slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) + slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed)) wg.Add(1) go func(i int) { diff --git a/integration/llm_test.go b/integration/llm_test.go index 4952b072..398e0a03 100644 --- a/integration/llm_test.go +++ b/integration/llm_test.go @@ -35,8 +35,8 @@ var ( }, } resp = [2][]string{ - []string{"sunlight"}, - []string{"england", "english", "massachusetts", "pilgrims"}, + {"sunlight"}, + {"england", "english", "massachusetts", "pilgrims"}, } ) diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go index b06197e1..ec9e085a 100644 --- a/integration/max_queue_test.go +++ b/integration/max_queue_test.go @@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) { // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits threadCount := 32 if maxQueue := envconfig.MaxQueue(); maxQueue != 0 { - threadCount = maxQueue + threadCount = int(maxQueue) } else { t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount)) } diff --git a/integration/utils_test.go b/integration/utils_test.go index c2b27ee9..a6010995 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) { }, }, [][]string{ - []string{"sunlight"}, - []string{"soil", "organic", "earth", "black", "tan"}, - []string{"england", "english", "massachusetts", "pilgrims", "british"}, - []string{"fourth", "july", "declaration", "independence"}, - []string{"nitrogen", "oxygen", "carbon", "dioxide"}, + {"sunlight"}, + {"soil", "organic", "earth", "black", "tan"}, + {"england", "english", "massachusetts", "pilgrims", "british"}, + {"fourth", "july", "declaration", "independence"}, + {"nitrogen", "oxygen", "carbon", "dioxide"}, } }