diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4ffab937..441a66e2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -8,7 +8,15 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] + arch: [amd64, arm64] + exclude: + - os: ubuntu-latest + arch: arm64 + - os: windows-latest + arch: arm64 runs-on: ${{ matrix.os }} + env: + GOARCH: ${{ matrix.arch }} steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v4 @@ -33,7 +41,7 @@ jobs: - run: go generate -x ./... - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.os }}-libraries + name: ${{ matrix.os }}-${{ matrix.arch }}-libraries path: | llm/llama.cpp/build/**/lib/* lint: @@ -41,7 +49,18 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] + arch: [amd64, arm64] + exclude: + - os: ubuntu-latest + arch: arm64 + - os: windows-latest + arch: arm64 + - os: macos-latest + arch: amd64 runs-on: ${{ matrix.os }} + env: + GOARCH: ${{ matrix.arch }} + CGO_ENABLED: "1" steps: - uses: actions/checkout@v4 with: @@ -52,7 +71,7 @@ jobs: cache: false - uses: actions/download-artifact@v4 with: - name: ${{ matrix.os }}-libraries + name: ${{ matrix.os }}-${{ matrix.arch }}-libraries path: llm/llama.cpp/build - uses: golangci/golangci-lint-action@v3 test: @@ -60,6 +79,12 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] + arch: [amd64, arm64] + exclude: + - os: ubuntu-latest + arch: arm64 + - os: windows-latest + arch: arm64 runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 @@ -72,7 +97,7 @@ jobs: - run: go get - uses: actions/download-artifact@v4 with: - name: ${{ matrix.os }}-libraries + name: ${{ matrix.os }}-${{ matrix.arch }}-libraries path: llm/llama.cpp/build - run: go build - run: go test -v ./... diff --git a/README.md b/README.md index 1f7318a3..9bf9b8ae 100644 --- a/README.md +++ b/README.md @@ -248,6 +248,10 @@ curl http://localhost:11434/api/chat -d '{ See the [API documentation](./docs/api.md) for all endpoints. +## Integrations + +- [ollama-python](https://github.com/jmorganca/ollama-python) + ## Community Integrations ### Web & Desktop diff --git a/api/client.py b/api/client.py deleted file mode 100644 index a2eb50e8..00000000 --- a/api/client.py +++ /dev/null @@ -1,284 +0,0 @@ -import os -import json -import requests -import os -import hashlib -import json -from pathlib import Path - -BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434') - -# Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses. -# The final response object will include statistics and additional data from the request. Use the callback function to override -# the default handler. -def generate(model_name, prompt, system=None, template=None, format="", context=None, options=None, callback=None): - try: - url = f"{BASE_URL}/api/generate" - payload = { - "model": model_name, - "prompt": prompt, - "system": system, - "template": template, - "context": context, - "options": options, - "format": format, - } - - # Remove keys with None values - payload = {k: v for k, v in payload.items() if v is not None} - - with requests.post(url, json=payload, stream=True) as response: - response.raise_for_status() - - # Creating a variable to hold the context history of the final chunk - final_context = None - - # Variable to hold concatenated response strings if no callback is provided - full_response = "" - - # Iterating over the response line by line and displaying the details - for line in response.iter_lines(): - if line: - # Parsing each line (JSON chunk) and extracting the details - chunk = json.loads(line) - - # If a callback function is provided, call it with the chunk - if callback: - callback(chunk) - else: - # If this is not the last chunk, add the "response" field value to full_response and print it - if not chunk.get("done"): - response_piece = chunk.get("response", "") - full_response += response_piece - print(response_piece, end="", flush=True) - - # Check if it's the last chunk (done is true) - if chunk.get("done"): - final_context = chunk.get("context") - - # Return the full response and the final context - return full_response, final_context - except requests.exceptions.RequestException as e: - print(f"An error occurred: {e}") - return None, None - - -# Create a blob file on the server if it doesn't exist. -def create_blob(digest, file_path): - url = f"{BASE_URL}/api/blobs/{digest}" - - # Check if the blob exists - response = requests.head(url) - if response.status_code != 404: - return # Blob already exists, no need to upload - response.raise_for_status() - - # Upload the blob - with open(file_path, 'rb') as file_data: - requests.post(url, data=file_data) - - -# Create a model from a Modelfile. Use the callback function to override the default handler. -def create(model_name, filename, callback=None): - try: - file_path = Path(filename).expanduser().resolve() - processed_lines = [] - - # Read and process the modelfile - with open(file_path, 'r') as f: - for line in f: - # Skip empty or whitespace-only lines - if not line.strip(): - continue - - command, args = line.split(maxsplit=1) - - if command.upper() in ["FROM", "ADAPTER"]: - path = Path(args.strip()).expanduser() - - # Check if path is relative and resolve it - if not path.is_absolute(): - path = (file_path.parent / path) - - # Skip if file does not exist for "model", this is handled by the server - if not path.exists(): - processed_lines.append(line) - continue - - # Calculate SHA-256 hash - with open(path, 'rb') as bin_file: - hash = hashlib.sha256() - hash.update(bin_file.read()) - blob = f"sha256:{hash.hexdigest()}" - - # Add the file to the remote server - create_blob(blob, path) - - # Replace path with digest in the line - line = f"{command} @{blob}\n" - - processed_lines.append(line) - - # Combine processed lines back into a single string - modelfile_content = '\n'.join(processed_lines) - - url = f"{BASE_URL}/api/create" - payload = {"name": model_name, "modelfile": modelfile_content} - - # Making a POST request with the stream parameter set to True to handle streaming responses - with requests.post(url, json=payload, stream=True) as response: - response.raise_for_status() - # Iterating over the response line by line and displaying the status - for line in response.iter_lines(): - if line: - chunk = json.loads(line) - if callback: - callback(chunk) - else: - print(f"Status: {chunk.get('status')}") - - except Exception as e: - print(f"An error occurred: {e}") - - -# Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple -# calls to will share the same download progress. Use the callback function to override the default handler. -def pull(model_name, insecure=False, callback=None): - try: - url = f"{BASE_URL}/api/pull" - payload = { - "name": model_name, - "insecure": insecure - } - - # Making a POST request with the stream parameter set to True to handle streaming responses - with requests.post(url, json=payload, stream=True) as response: - response.raise_for_status() - - # Iterating over the response line by line and displaying the details - for line in response.iter_lines(): - if line: - # Parsing each line (JSON chunk) and extracting the details - chunk = json.loads(line) - - # If a callback function is provided, call it with the chunk - if callback: - callback(chunk) - else: - # Print the status message directly to the console - print(chunk.get('status', ''), end='', flush=True) - - # If there's layer data, you might also want to print that (adjust as necessary) - if 'digest' in chunk: - print(f" - Digest: {chunk['digest']}", end='', flush=True) - print(f" - Total: {chunk['total']}", end='', flush=True) - print(f" - Completed: {chunk['completed']}", end='\n', flush=True) - else: - print() - except requests.exceptions.RequestException as e: - print(f"An error occurred: {e}") - -# Push a model to the model registry. Use the callback function to override the default handler. -def push(model_name, insecure=False, callback=None): - try: - url = f"{BASE_URL}/api/push" - payload = { - "name": model_name, - "insecure": insecure - } - - # Making a POST request with the stream parameter set to True to handle streaming responses - with requests.post(url, json=payload, stream=True) as response: - response.raise_for_status() - - # Iterating over the response line by line and displaying the details - for line in response.iter_lines(): - if line: - # Parsing each line (JSON chunk) and extracting the details - chunk = json.loads(line) - - # If a callback function is provided, call it with the chunk - if callback: - callback(chunk) - else: - # Print the status message directly to the console - print(chunk.get('status', ''), end='', flush=True) - - # If there's layer data, you might also want to print that (adjust as necessary) - if 'digest' in chunk: - print(f" - Digest: {chunk['digest']}", end='', flush=True) - print(f" - Total: {chunk['total']}", end='', flush=True) - print(f" - Completed: {chunk['completed']}", end='\n', flush=True) - else: - print() - except requests.exceptions.RequestException as e: - print(f"An error occurred: {e}") - -# List models that are available locally. -def list(): - try: - response = requests.get(f"{BASE_URL}/api/tags") - response.raise_for_status() - data = response.json() - models = data.get('models', []) - return models - - except requests.exceptions.RequestException as e: - print(f"An error occurred: {e}") - return None - -# Copy a model. Creates a model with another name from an existing model. -def copy(source, destination): - try: - # Create the JSON payload - payload = { - "source": source, - "destination": destination - } - - response = requests.post(f"{BASE_URL}/api/copy", json=payload) - response.raise_for_status() - - # If the request was successful, return a message indicating that the copy was successful - return "Copy successful" - - except requests.exceptions.RequestException as e: - print(f"An error occurred: {e}") - return None - -# Delete a model and its data. -def delete(model_name): - try: - url = f"{BASE_URL}/api/delete" - payload = {"name": model_name} - response = requests.delete(url, json=payload) - response.raise_for_status() - return "Delete successful" - except requests.exceptions.RequestException as e: - print(f"An error occurred: {e}") - return None - -# Show info about a model. -def show(model_name): - try: - url = f"{BASE_URL}/api/show" - payload = {"name": model_name} - response = requests.post(url, json=payload) - response.raise_for_status() - - # Parse the JSON response and return it - data = response.json() - return data - except requests.exceptions.RequestException as e: - print(f"An error occurred: {e}") - return None - -def heartbeat(): - try: - url = f"{BASE_URL}/" - response = requests.head(url) - response.raise_for_status() - return "Ollama is running" - except requests.exceptions.RequestException as e: - print(f"An error occurred: {e}") - return "Ollama is not running" diff --git a/llm/dyn_ext_server.go b/llm/dyn_ext_server.go index 105df634..fa0d7750 100644 --- a/llm/dyn_ext_server.go +++ b/llm/dyn_ext_server.go @@ -75,7 +75,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts updatePath(filepath.Dir(library)) libPath := C.CString(library) defer C.free(unsafe.Pointer(libPath)) - resp := newExtServerResp(128) + resp := newExtServerResp(512) defer freeExtServerResp(resp) var srv C.struct_dynamic_llama_server C.dyn_init(libPath, &srv, &resp) @@ -181,7 +181,6 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu "seed": predict.Options.Seed, "stop": predict.Options.Stop, "image_data": imageData, - "cache_prompt": true, } if predict.Format == "json" { diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh index b7f1f684..3a57d0cb 100755 --- a/llm/generate/gen_darwin.sh +++ b/llm/generate/gen_darwin.sh @@ -14,9 +14,11 @@ BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal" case "${GOARCH}" in "amd64") CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" + ARCH="x86_64" ;; "arm64") CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}" + ARCH="arm64" ;; *) echo "GOARCH must be set" @@ -30,6 +32,7 @@ apply_patches build install gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ + -arch ${ARCH} \ -Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \ ${BUILD_DIR}/lib/libcommon.a \ ${BUILD_DIR}/lib/libllama.a \ diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index a6e665c3..e9f8f4be 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -39,8 +39,13 @@ amdGPUs() { } echo "Starting linux generate script" -if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then - export CUDACXX=/usr/local/cuda/bin/nvcc +if [ -z "${CUDACXX}" ]; then + if [ -x /usr/local/cuda/bin/nvcc ]; then + export CUDACXX=/usr/local/cuda/bin/nvcc + else + # Try the default location in case it exists + export CUDACXX=$(command -v nvcc) + fi fi COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" source $(dirname $0)/gen_common.sh @@ -109,33 +114,41 @@ else echo "Skipping CPU generation step as requested" fi -for cudalibpath in "/usr/local/cuda/lib64" "/opt/cuda/targets/x86_64-linux/lib"; do - if [ -d "$cudalibpath" ]; then - echo "CUDA libraries detected - building dynamic CUDA library" - init_vars - CUDA_MAJOR=$(find "$cudalibpath" -name 'libcudart.so.*' -print | head -1 | cut -f3 -d. || true) - if [ -n "${CUDA_MAJOR}" ]; then - CUDA_VARIANT="_v${CUDA_MAJOR}" - fi - CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" - BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}" - CUDA_LIB_DIR="$cudalibpath" - build - install - gcc -fPIC -g -shared -o "${BUILD_DIR}/lib/libext_server.so" \ - -Wl,--whole-archive \ - "${BUILD_DIR}/lib/libext_server.a" \ - "${BUILD_DIR}/lib/libcommon.a" \ - "${BUILD_DIR}/lib/libllama.a" \ - -Wl,--no-whole-archive \ - "${CUDA_LIB_DIR}/libcudart_static.a" \ - "${CUDA_LIB_DIR}/libcublas_static.a" \ - "${CUDA_LIB_DIR}/libcublasLt_static.a" \ - "${CUDA_LIB_DIR}/libcudadevrt.a" \ - "${CUDA_LIB_DIR}/libculibos.a" \ - -lrt -lpthread -ldl -lstdc++ -lm +# If needed, look for the default CUDA toolkit location +if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then + CUDA_LIB_DIR=/usr/local/cuda/lib64 +fi + +# If needed, look for CUDA on Arch Linux +if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then + CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib +fi + +if [ -d "${CUDA_LIB_DIR}" ]; then + echo "CUDA libraries detected - building dynamic CUDA library" + init_vars + CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true) + if [ -n "${CUDA_MAJOR}" ]; then + CUDA_VARIANT=_v${CUDA_MAJOR} fi -done + CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" + BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}" + build + install + gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ + -Wl,--whole-archive \ + ${BUILD_DIR}/lib/libext_server.a \ + ${BUILD_DIR}/lib/libcommon.a \ + ${BUILD_DIR}/lib/libllama.a \ + -Wl,--no-whole-archive \ + ${CUDA_LIB_DIR}/libcudart_static.a \ + ${CUDA_LIB_DIR}/libcublas_static.a \ + ${CUDA_LIB_DIR}/libcublasLt_static.a \ + ${CUDA_LIB_DIR}/libcudadevrt.a \ + ${CUDA_LIB_DIR}/libculibos.a \ + -lcuda \ + -lrt -lpthread -ldl -lstdc++ -lm +fi if [ -z "${ROCM_PATH}" ]; then # Try the default location in case it exists diff --git a/llm/llama.cpp b/llm/llama.cpp index 328b83de..584d674b 160000 --- a/llm/llama.cpp +++ b/llm/llama.cpp @@ -1 +1 @@ -Subproject commit 328b83de23b33240e28f4e74900d1d06726f5eb1 +Subproject commit 584d674be622fbf1578694ada6e62eebedbfd377 diff --git a/server/images.go b/server/images.go index 4742a363..2e00cace 100644 --- a/server/images.go +++ b/server/images.go @@ -1132,49 +1132,46 @@ func GetSHA256Digest(r io.Reader) (string, int64) { var errUnauthorized = fmt.Errorf("unauthorized") func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) { - resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) - if err != nil { - if !errors.Is(err, context.Canceled) { - log.Printf("request failed: %v", err) - } - - return nil, err - } - - switch { - case resp.StatusCode == http.StatusUnauthorized: - // Handle authentication error with one retry - auth := resp.Header.Get("www-authenticate") - authRedir := ParseAuthRedirectString(auth) - token, err := getAuthToken(ctx, authRedir) + for i := 0; i < 2; i++ { + resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) if err != nil { + if !errors.Is(err, context.Canceled) { + log.Printf("request failed: %v", err) + } + return nil, err } - regOpts.Token = token - if body != nil { - _, err = body.Seek(0, io.SeekStart) + + switch { + case resp.StatusCode == http.StatusUnauthorized: + // Handle authentication error with one retry + auth := resp.Header.Get("www-authenticate") + authRedir := ParseAuthRedirectString(auth) + token, err := getAuthToken(ctx, authRedir) if err != nil { return nil, err } + regOpts.Token = token + if body != nil { + _, err = body.Seek(0, io.SeekStart) + if err != nil { + return nil, err + } + } + case resp.StatusCode == http.StatusNotFound: + return nil, os.ErrNotExist + case resp.StatusCode >= http.StatusBadRequest: + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("%d: %s", resp.StatusCode, err) + } + return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody) + default: + return resp, nil } - - resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts) - if resp.StatusCode == http.StatusUnauthorized { - return nil, errUnauthorized - } - - return resp, err - case resp.StatusCode == http.StatusNotFound: - return nil, os.ErrNotExist - case resp.StatusCode >= http.StatusBadRequest: - responseBody, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("%d: %s", resp.StatusCode, err) - } - return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody) } - return resp, nil + return nil, errUnauthorized } func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) { diff --git a/server/modelpath.go b/server/modelpath.go index f09ff8e9..af3f36ab 100644 --- a/server/modelpath.go +++ b/server/modelpath.go @@ -46,6 +46,7 @@ func ParseModelPath(name string) ModelPath { name = after } + name = strings.ReplaceAll(name, string(os.PathSeparator), "/") parts := strings.Split(name, "/") switch len(parts) { case 3: diff --git a/server/routes.go b/server/routes.go index 72c0d051..d76d4b4e 100644 --- a/server/routes.go +++ b/server/routes.go @@ -15,7 +15,6 @@ import ( "path/filepath" "reflect" "runtime" - "strconv" "strings" "sync" "syscall" @@ -668,27 +667,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) { cs := 30 for k, v := range model.Options { switch val := v.(type) { - case string: - params = append(params, fmt.Sprintf("%-*s %s", cs, k, val)) - case int: - params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val))) - case float64: - params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64))) - case bool: - params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val))) case []interface{}: for _, nv := range val { - switch nval := nv.(type) { - case string: - params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval)) - case int: - params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval))) - case float64: - params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64))) - case bool: - params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval))) - } + params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv)) } + default: + params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v)) } } resp.Parameters = strings.Join(params, "\n") diff --git a/server/routes_test.go b/server/routes_test.go index aa561d98..b2d93958 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -9,6 +9,7 @@ import ( "net/http" "net/http/httptest" "os" + "sort" "strings" "testing" @@ -50,7 +51,7 @@ func Test_Routes(t *testing.T) { createTestModel := func(t *testing.T, name string) { fname := createTestFile(t, "ollama-model") - modelfile := strings.NewReader(fmt.Sprintf("FROM %s", fname)) + modelfile := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname)) commands, err := parser.Parse(modelfile) assert.Nil(t, err) fn := func(resp api.ProgressResponse) { @@ -167,6 +168,42 @@ func Test_Routes(t *testing.T) { assert.Equal(t, "beefsteak:latest", model.ShortName) }, }, + { + Name: "Show Model Handler", + Method: http.MethodPost, + Path: "/api/show", + Setup: func(t *testing.T, req *http.Request) { + createTestModel(t, "show-model") + showReq := api.ShowRequest{Model: "show-model"} + jsonData, err := json.Marshal(showReq) + assert.Nil(t, err) + req.Body = io.NopCloser(bytes.NewReader(jsonData)) + }, + Expected: func(t *testing.T, resp *http.Response) { + contentType := resp.Header.Get("Content-Type") + assert.Equal(t, contentType, "application/json; charset=utf-8") + body, err := io.ReadAll(resp.Body) + assert.Nil(t, err) + + var showResp api.ShowResponse + err = json.Unmarshal(body, &showResp) + assert.Nil(t, err) + + var params []string + paramsSplit := strings.Split(showResp.Parameters, "\n") + for _, p := range paramsSplit { + params = append(params, strings.Join(strings.Fields(p), " ")) + } + sort.Strings(params) + expectedParams := []string{ + "seed 42", + "stop \"bar\"", + "stop \"foo\"", + "top_p 0.9", + } + assert.Equal(t, expectedParams, params) + }, + }, } s, err := setupServer(t)