From 1524f323a3392adc2613b066b2b1c1e46887001d Mon Sep 17 00:00:00 2001
From: Blake Mizerany <blake.mizerany@gmail.com>
Date: Tue, 9 Apr 2024 15:57:45 -0700
Subject: [PATCH] Revert "build.go: introduce a friendlier way to build Ollama
 (#3548)" (#3564)

---
 .github/workflows/release.yaml   |   3 +-
 .github/workflows/test.yaml      |  42 ++-----
 README.md                        |   8 +-
 build.go                         | 192 -------------------------------
 docs/development.md              |  42 ++++---
 llm/generate/gen_darwin.sh       |   8 +-
 llm/generate/gen_linux.sh        |   6 +-
 llm/generate/gen_windows.ps1     |   2 +-
 llm/generate/generate_darwin.go  |   3 +
 llm/generate/generate_linux.go   |   3 +
 llm/generate/generate_windows.go |   3 +
 11 files changed, 61 insertions(+), 251 deletions(-)
 delete mode 100644 build.go
 create mode 100644 llm/generate/generate_darwin.go
 create mode 100644 llm/generate/generate_linux.go
 create mode 100644 llm/generate/generate_windows.go

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 9694457e..ffb2cf9d 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -95,8 +95,7 @@ jobs:
           cd $env:GITHUB_WORKSPACE
           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
           $env:PATH="$gopath;$env:PATH"
-          
-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
+          go generate -x ./...
         name: go generate
       - uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 39b93227..e4242997 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,16 +1,5 @@
 name: test
 
-concurrency:
-  # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
-  # cancels running CI jobs and starts all new ones.
-  #
-  # For non-PR pushes, concurrency.group needs to be unique for every distinct
-  # CI run we want to have happen. Use run_id, which in practice means all
-  # non-PR CI runs will be allowed to run without preempting each other.
-  group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
-  cancel-in-progress: true
-
-
 on:
   pull_request:
     paths:
@@ -73,12 +62,10 @@ jobs:
           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
           $env:PATH="$gopath;$gccpath;$env:PATH"
           echo $env:PATH
-
-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
+          go generate -x ./...
         if: ${{ startsWith(matrix.os, 'windows-') }}
         name: 'Windows Go Generate'
-      - run: |
-          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
+      - run: go generate -x ./...
         if: ${{ ! startsWith(matrix.os, 'windows-') }}
         name: 'Unix Go Generate'
       - uses: actions/upload-artifact@v4
@@ -111,7 +98,7 @@ jobs:
       - run: go get ./...
       - run: |
           git config --global --add safe.directory /__w/ollama/ollama
-          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
+          go generate -x ./...
         env:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       - uses: actions/upload-artifact@v4
@@ -142,7 +129,7 @@ jobs:
       - run: go get ./...
       - run: |
           git config --global --add safe.directory /__w/ollama/ollama
-          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
+          go generate -x ./...
         env:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       - uses: actions/upload-artifact@v4
@@ -181,9 +168,8 @@ jobs:
           $env:PATH="$gopath;$env:PATH"
           $env:OLLAMA_SKIP_CPU_GENERATE="1"
           $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
-
-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
-        name: go run build.go
+          go generate -x ./...
+        name: go generate
         env:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       # TODO - do we need any artifacts?
@@ -216,7 +202,7 @@ jobs:
       - name: 'Verify CUDA'
         run: nvcc -V
       - run: go get ./...
-      - name: go run build.go
+      - name: go generate
         run: |
           $gopath=(get-command go).source | split-path -parent
           $cudabin=(get-command nvcc).source | split-path
@@ -225,8 +211,7 @@ jobs:
           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
           $env:PATH="$gopath;$cudabin;$env:PATH"
           $env:OLLAMA_SKIP_CPU_GENERATE="1"
-          
-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
+          go generate -x ./...
         env:
           OLLAMA_SKIP_CPU_GENERATE: '1'
       # TODO - do we need any artifacts?
@@ -300,12 +285,6 @@ jobs:
         with:
           go-version-file: go.mod
           cache: true
-      - run: |
-          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
-        if: ${{ ! startsWith(matrix.os, 'windows-') }}
-      - run: |
-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
-        if: ${{ startsWith(matrix.os, 'windows-') }}
       - run: go get
       - run: |
           case ${{ matrix.arch }} in
@@ -326,8 +305,9 @@ jobs:
           touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server
         if: ${{ startsWith(matrix.os, 'windows-') }}
         shell: bash
-      - run: |
-          go test -v ./...
+      - run: go generate ./...
+      - run: go build
+      - run: go test -v ./...
       - uses: actions/upload-artifact@v4
         with:
           name: ${{ matrix.os }}-binaries
diff --git a/README.md b/README.md
index 9b4015ca..d5e265ff 100644
--- a/README.md
+++ b/README.md
@@ -201,10 +201,16 @@ Install `cmake` and `go`:
 brew install cmake go
 ```
 
+Then generate dependencies:
+
+```
+go generate ./...
+```
+
 Then build the binary:
 
 ```
-go run build.go
+go build .
 ```
 
 More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
diff --git a/build.go b/build.go
deleted file mode 100644
index b7a55fb1..00000000
--- a/build.go
+++ /dev/null
@@ -1,192 +0,0 @@
-//go:build ignore
-
-package main
-
-import (
-	"cmp"
-	"errors"
-	"flag"
-	"log"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"runtime"
-)
-
-// Flags
-var (
-	flagForce     = flag.Bool("f", false, "force re-generation of dependencies")
-	flagSkipBuild = flag.Bool("d", false, "generate dependencies only (e.g. skip 'go build .')")
-
-	// Flags to set GOARCH and GOOS explicitly for cross-platform builds,
-	// e.g., in CI to target a different platform than the build matrix
-	// default. These allows us to run generate without a separate build
-	// step for building the script binary for the host ARCH and then
-	// runing the generate script for the target ARCH. Instead, we can
-	// just run `go run build.go -target=$GOARCH` to generate the
-	// deps.
-	flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
-)
-
-func buildEnv() []string {
-	return append(os.Environ(),
-		"GOARCH="+cmp.Or(*flagGOARCH, runtime.GOARCH),
-	)
-}
-
-func main() {
-	log.SetFlags(0)
-	flag.Usage = func() {
-		log.Printf("Usage: go run build.go [flags]")
-		log.Println()
-		log.Println("Flags:")
-		flag.PrintDefaults()
-		log.Println()
-		log.Println("This script builds the Ollama server binary and generates the llama.cpp")
-		log.Println("bindings for the current platform. It assumes that the current working")
-		log.Println("directory is the root directory of the Ollama project.")
-		log.Println()
-		log.Println("If the -d flag is provided, the script will only generate the dependencies")
-		log.Println("and skip building the Ollama server binary.")
-		log.Println()
-		log.Println("If the -f flag is provided, the script will force re-generation of the")
-		log.Println("dependencies.")
-		log.Println()
-		log.Println("If the -target flag is provided, the script will set GOARCH to the value")
-		log.Println("of the flag. This is useful for cross-platform builds.")
-		log.Println()
-		log.Println("The script will check for the required dependencies (cmake, gcc) and")
-		log.Println("print their version.")
-		log.Println()
-		log.Println("The script will also check if it is being run from the root directory of")
-		log.Println("the Ollama project.")
-		log.Println()
-		os.Exit(1)
-	}
-	flag.Parse()
-
-	log.Printf("=== Building Ollama ===")
-	defer func() {
-		log.Printf("=== Done building Ollama ===")
-		log.Println()
-		log.Println("To run the Ollama server, use:")
-		log.Println()
-		log.Println("    ./ollama serve")
-		log.Println()
-	}()
-
-	if flag.NArg() > 0 {
-		flag.Usage()
-	}
-
-	if !inRootDir() {
-		log.Fatalf("Please run this script from the root directory of the Ollama project.")
-	}
-
-	if err := checkDependencies(); err != nil {
-		log.Fatalf("Failed dependency check: %v", err)
-	}
-	if err := buildLlammaCPP(); err != nil {
-		log.Fatalf("Failed to build llama.cpp: %v", err)
-	}
-	if err := goBuildOllama(); err != nil {
-		log.Fatalf("Failed to build ollama Go binary: %v", err)
-	}
-}
-
-// checkDependencies does a quick check to see if the required dependencies are
-// installed on the system and functioning enough to print their version.
-//
-// TODO(bmizerany): Check the actual version of the dependencies? Seems a
-// little daunting given diff versions might print diff things. This should
-// be good enough for now.
-func checkDependencies() error {
-	var err error
-	check := func(name string, args ...string) {
-		log.Printf("=== Checking for %s ===", name)
-		defer log.Printf("=== Done checking for %s ===\n\n", name)
-		cmd := exec.Command(name, args...)
-		cmd.Stdout = os.Stdout
-		cmd.Stderr = os.Stderr
-		err = errors.Join(err, cmd.Run())
-	}
-
-	check("cmake", "--version")
-	check("gcc", "--version")
-	return err
-}
-
-func goBuildOllama() error {
-	log.Println("=== Building Ollama binary ===")
-	defer log.Printf("=== Done building Ollama binary ===\n\n")
-	if *flagSkipBuild {
-		log.Println("Skipping 'go build -o ollama .'")
-		return nil
-	}
-	cmd := exec.Command("go", "build", "-o", "ollama", ".")
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	cmd.Env = buildEnv()
-	return cmd.Run()
-}
-
-// buildLlammaCPP generates the llama.cpp bindings for the current platform.
-//
-// It assumes that the current working directory is the root directory of the
-// Ollama project.
-func buildLlammaCPP() error {
-	log.Println("=== Generating dependencies ===")
-	defer log.Printf("=== Done generating dependencies ===\n\n")
-	if *flagForce {
-		if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
-			return err
-		}
-	}
-	if isDirectory(filepath.Join("llm", "build")) {
-		log.Println("llm/build already exists; skipping.  Use -f to force re-generate.")
-		return nil
-	}
-
-	scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
-	if err != nil {
-		return err
-	}
-
-	var cmd *exec.Cmd
-	switch runtime.GOOS {
-	case "windows":
-		script := filepath.Join(scriptDir, "gen_windows.ps1")
-		cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
-	case "linux":
-		script := filepath.Join(scriptDir, "gen_linux.sh")
-		cmd = exec.Command("bash", script)
-	case "darwin":
-		script := filepath.Join(scriptDir, "gen_darwin.sh")
-		cmd = exec.Command("bash", script)
-	default:
-		log.Fatalf("Unsupported OS: %s", runtime.GOOS)
-	}
-	cmd.Dir = filepath.Join("llm", "generate")
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	cmd.Env = buildEnv()
-
-	log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
-
-	return cmd.Run()
-}
-
-func isDirectory(path string) bool {
-	info, err := os.Stat(path)
-	if err != nil {
-		return false
-	}
-	return info.IsDir()
-}
-
-// inRootDir returns true if the current working directory is the root
-// directory of the Ollama project. It looks for a file named "go.mod".
-func inRootDir() bool {
-	_, err := os.Stat("go.mod")
-	return err == nil
-}
diff --git a/docs/development.md b/docs/development.md
index 178fb439..76936c35 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -23,7 +23,13 @@ export OLLAMA_DEBUG=1
 Get the required libraries and build the native LLM code:
 
 ```bash
-go run build.go
+go generate ./...
+```
+
+Then build ollama:
+
+```bash
+go build .
 ```
 
 Now you can run `ollama`:
@@ -32,16 +38,6 @@ Now you can run `ollama`:
 ./ollama
 ```
 
-### Rebuilding the native code
-
-If at any point you need to rebuild the native code, you can run the
-build.go script again using the `-f` flag to force a rebuild, and,
-optionally, the `-d` flag to skip building the Go binary:
-
-```bash
-go run build.go -f -d
-```
-
 ### Linux
 
 #### Linux CUDA (NVIDIA)
@@ -57,10 +53,16 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
 libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
 set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
 
+Then generate dependencies:
+
+```
+go generate ./...
+```
+
 Then build the binary:
 
 ```
-go run build.go
+go build .
 ```
 
 #### Linux ROCm (AMD)
@@ -76,17 +78,21 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
 CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
 the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
 
+```
+go generate ./...
+```
+
 Then build the binary:
 
 ```
-go run build.go
+go build .
 ```
 
 ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
 
 #### Advanced CPU Settings
 
-By default, running `go run build.go` will compile a few different variations
+By default, running `go generate ./...` will compile a few different variations
 of the LLM library based on common CPU families and vector math capabilities,
 including a lowest-common-denominator which should run on almost any 64 bit CPU
 somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
@@ -96,7 +102,8 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
 you might use:
 
 ```
-OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
+go build .
 ```
 
 #### Containerized Linux Build
@@ -117,7 +124,8 @@ Install required tools:
 
 ```powershell
 $env:CGO_ENABLED="1"
-go run build.go
+go generate ./...
+go build .
 ```
 
 #### Windows CUDA (NVIDIA)
@@ -134,4 +142,4 @@ In addition to the common Windows development tools described above, install AMD
 - [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
 - [Strawberry Perl](https://strawberryperl.com/)
 
-Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
+Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
\ No newline at end of file
diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh
index 81e05d71..f79534cd 100755
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
-# This script is intended to run inside the `go run build.go` script, which
-# sets the working directory to the correct location: ./llm/generate/.
+# This script is intended to run inside the go generate
+# working directory must be ./llm/generate/
 
 # TODO - add hardening to detect missing tools (cmake, etc.)
 
@@ -89,10 +89,10 @@ case "${GOARCH}" in
     ;;
 *)
     echo "GOARCH must be set"
-    echo "this script is meant to be run from within 'go run build.go'"
+    echo "this script is meant to be run from within go generate"
     exit 1
     ;;
 esac
 
 cleanup
-echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
+echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
index ec542aca..fd4a6bc0 100755
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
-# This script is intended to run with the `go run build.go` script, which
-# sets the working directory to the correct location: ./llm/generate/.
+# This script is intended to run inside the go generate
+# working directory must be llm/generate/
 
 # First we build one or more CPU based LLM libraries
 #
@@ -237,4 +237,4 @@ if [ -d "${ROCM_PATH}" ]; then
 fi
 
 cleanup
-echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
+echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1
index 8880a269..0d2ae57f 100644
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -288,4 +288,4 @@ if ($null -ne $env:HIP_PATH) {
 
 
 cleanup
-write-host "`ncode generation completed.  LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
+write-host "`ngo generate completed.  LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
diff --git a/llm/generate/generate_darwin.go b/llm/generate/generate_darwin.go
new file mode 100644
index 00000000..77685234
--- /dev/null
+++ b/llm/generate/generate_darwin.go
@@ -0,0 +1,3 @@
+package generate
+
+//go:generate bash ./gen_darwin.sh
diff --git a/llm/generate/generate_linux.go b/llm/generate/generate_linux.go
new file mode 100644
index 00000000..2b7e116d
--- /dev/null
+++ b/llm/generate/generate_linux.go
@@ -0,0 +1,3 @@
+package generate
+
+//go:generate bash ./gen_linux.sh
diff --git a/llm/generate/generate_windows.go b/llm/generate/generate_windows.go
new file mode 100644
index 00000000..d2ee5428
--- /dev/null
+++ b/llm/generate/generate_windows.go
@@ -0,0 +1,3 @@
+package generate
+
+//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1