diff --git a/api/client_test.go b/api/client_test.go index fe9fd74f..23fe9334 100644 --- a/api/client_test.go +++ b/api/client_test.go @@ -2,8 +2,6 @@ package api import ( "testing" - - "github.com/ollama/ollama/envconfig" ) func TestClientFromEnvironment(t *testing.T) { @@ -33,7 +31,6 @@ func TestClientFromEnvironment(t *testing.T) { for k, v := range testCases { t.Run(k, func(t *testing.T) { t.Setenv("OLLAMA_HOST", v.value) - envconfig.LoadConfig() client, err := ClientFromEnvironment() if err != v.err { diff --git a/integration/concurrency_test.go b/integration/concurrency_test.go index 8593285b..81d0b587 100644 --- a/integration/concurrency_test.go +++ b/integration/concurrency_test.go @@ -5,14 +5,16 @@ package integration import ( "context" "log/slog" - "os" "strconv" "sync" "testing" "time" - "github.com/ollama/ollama/api" "github.com/stretchr/testify/require" + + "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" + "github.com/ollama/ollama/format" ) func TestMultiModelConcurrency(t *testing.T) { @@ -106,13 +108,16 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) { // Stress the system if we know how much VRAM it has, and attempt to load more models than will fit func TestMultiModelStress(t *testing.T) { - vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM - if vram == "" { + s := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM + if s == "" { t.Skip("OLLAMA_MAX_VRAM not specified, can't pick the right models for the stress test") } - max, err := strconv.ParseUint(vram, 10, 64) - require.NoError(t, err) - const MB = uint64(1024 * 1024) + + maxVram, err := strconv.ParseUint(s, 10, 64) + if err != nil { + t.Fatal(err) + } + type model struct { name string size uint64 // Approximate amount of VRAM they typically use when fully loaded in VRAM @@ -121,83 +126,82 @@ func TestMultiModelStress(t *testing.T) { smallModels := []model{ { name: "orca-mini", - size: 2992 * MB, + size: 2992 * format.MebiByte, }, { name: "phi", - size: 2616 * MB, + size: 2616 * format.MebiByte, }, { name: "gemma:2b", - size: 2364 * MB, + size: 2364 * format.MebiByte, }, { name: "stable-code:3b", - size: 2608 * MB, + size: 2608 * format.MebiByte, }, { name: "starcoder2:3b", - size: 2166 * MB, + size: 2166 * format.MebiByte, }, } mediumModels := []model{ { name: "llama2", - size: 5118 * MB, + size: 5118 * format.MebiByte, }, { name: "mistral", - size: 4620 * MB, + size: 4620 * format.MebiByte, }, { name: "orca-mini:7b", - size: 5118 * MB, + size: 5118 * format.MebiByte, }, { name: "dolphin-mistral", - size: 4620 * MB, + size: 4620 * format.MebiByte, }, { name: "gemma:7b", - size: 5000 * MB, + size: 5000 * format.MebiByte, + }, + { + name: "codellama:7b", + size: 5118 * format.MebiByte, }, - // TODO - uncomment this once #3565 is merged and this is rebased on it - // { - // name: "codellama:7b", - // size: 5118 * MB, - // }, } // These seem to be too slow to be useful... // largeModels := []model{ // { // name: "llama2:13b", - // size: 7400 * MB, + // size: 7400 * format.MebiByte, // }, // { // name: "codellama:13b", - // size: 7400 * MB, + // size: 7400 * format.MebiByte, // }, // { // name: "orca-mini:13b", - // size: 7400 * MB, + // size: 7400 * format.MebiByte, // }, // { // name: "gemma:7b", - // size: 5000 * MB, + // size: 5000 * format.MebiByte, // }, // { // name: "starcoder2:15b", - // size: 9100 * MB, + // size: 9100 * format.MebiByte, // }, // } var chosenModels []model switch { - case max < 10000*MB: + case maxVram < 10000*format.MebiByte: slog.Info("selecting small models") chosenModels = smallModels - // case max < 30000*MB: + // case maxVram < 30000*format.MebiByte: default: slog.Info("selecting medium models") chosenModels = mediumModels @@ -226,15 +230,15 @@ func TestMultiModelStress(t *testing.T) { } var wg sync.WaitGroup - consumed := uint64(256 * MB) // Assume some baseline usage + consumed := uint64(256 * format.MebiByte) // Assume some baseline usage for i := 0; i < len(req); i++ { // Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long - if i > 1 && consumed > max { - slog.Info("achieved target vram exhaustion", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024) + if i > 1 && consumed > vram { + slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) break } consumed += chosenModels[i].size - slog.Info("target vram", "count", i, "vramMB", max/1024/1024, "modelsMB", consumed/1024/1024) + slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) wg.Add(1) go func(i int) { diff --git a/server/manifest_test.go b/server/manifest_test.go index ca6c3d2e..a4af5d5e 100644 --- a/server/manifest_test.go +++ b/server/manifest_test.go @@ -7,7 +7,6 @@ import ( "slices" "testing" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/types/model" ) @@ -108,7 +107,6 @@ func TestManifests(t *testing.T) { t.Run(n, func(t *testing.T) { d := t.TempDir() t.Setenv("OLLAMA_MODELS", d) - envconfig.LoadConfig() for _, p := range wants.ps { createManifest(t, d, p) diff --git a/server/modelpath_test.go b/server/modelpath_test.go index 6c4dfbee..849e0fa7 100644 --- a/server/modelpath_test.go +++ b/server/modelpath_test.go @@ -7,8 +7,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - "github.com/ollama/ollama/envconfig" ) func TestGetBlobsPath(t *testing.T) { @@ -63,7 +61,6 @@ func TestGetBlobsPath(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { t.Setenv("OLLAMA_MODELS", dir) - envconfig.LoadConfig() got, err := GetBlobsPath(tc.digest) diff --git a/server/routes_create_test.go b/server/routes_create_test.go index 3234ea5e..c853a9e9 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -15,7 +15,6 @@ import ( "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/llm" ) @@ -89,7 +88,6 @@ func TestCreateFromBin(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -117,7 +115,6 @@ func TestCreateFromModel(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -160,7 +157,6 @@ func TestCreateRemovesLayers(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -209,7 +205,6 @@ func TestCreateUnsetsSystem(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -267,7 +262,6 @@ func TestCreateMergeParameters(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -372,7 +366,6 @@ func TestCreateReplacesMessages(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -450,7 +443,6 @@ func TestCreateTemplateSystem(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -534,7 +526,6 @@ func TestCreateLicenses(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server w := createRequest(t, s.CreateModelHandler, api.CreateRequest{ @@ -582,7 +573,6 @@ func TestCreateDetectTemplate(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server t.Run("matched", func(t *testing.T) { diff --git a/server/routes_delete_test.go b/server/routes_delete_test.go index 33a97a73..2354d730 100644 --- a/server/routes_delete_test.go +++ b/server/routes_delete_test.go @@ -10,7 +10,6 @@ import ( "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/types/model" ) @@ -19,7 +18,6 @@ func TestDelete(t *testing.T) { p := t.TempDir() t.Setenv("OLLAMA_MODELS", p) - envconfig.LoadConfig() var s Server diff --git a/server/routes_list_test.go b/server/routes_list_test.go index c2d9c113..29e3214c 100644 --- a/server/routes_list_test.go +++ b/server/routes_list_test.go @@ -9,14 +9,12 @@ import ( "github.com/gin-gonic/gin" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" ) func TestList(t *testing.T) { gin.SetMode(gin.TestMode) t.Setenv("OLLAMA_MODELS", t.TempDir()) - envconfig.LoadConfig() expectNames := []string{ "mistral:7b-instruct-q4_0", diff --git a/server/routes_test.go b/server/routes_test.go index 97786ba2..17da2305 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -19,7 +19,6 @@ import ( "github.com/stretchr/testify/require" "github.com/ollama/ollama/api" - "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/openai" "github.com/ollama/ollama/parser" @@ -347,7 +346,6 @@ func Test_Routes(t *testing.T) { } t.Setenv("OLLAMA_MODELS", t.TempDir()) - envconfig.LoadConfig() s := &Server{} router := s.GenerateRoutes() @@ -378,7 +376,6 @@ func Test_Routes(t *testing.T) { func TestCase(t *testing.T) { t.Setenv("OLLAMA_MODELS", t.TempDir()) - envconfig.LoadConfig() cases := []string{ "mistral", @@ -458,7 +455,6 @@ func TestCase(t *testing.T) { func TestShow(t *testing.T) { t.Setenv("OLLAMA_MODELS", t.TempDir()) - envconfig.LoadConfig() var s Server