From dccac8c8fa8ea7d81f73bce0bcf0ae6b69a391a8 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 30 Oct 2023 17:10:54 -0700
Subject: [PATCH 1/2] k8s example

---
 examples/kubernetes/README.md | 36 ++++++++++++++++++++++
 examples/kubernetes/cpu.yaml  | 42 ++++++++++++++++++++++++++
 examples/kubernetes/gpu.yaml  | 56 +++++++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+)
 create mode 100644 examples/kubernetes/README.md
 create mode 100644 examples/kubernetes/cpu.yaml
 create mode 100644 examples/kubernetes/gpu.yaml

diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md
new file mode 100644
index 00000000..5a888b8c
--- /dev/null
+++ b/examples/kubernetes/README.md
@@ -0,0 +1,36 @@
+# Deploy Ollama to Kubernetes
+
+## Prerequisites
+
+- Ollama: https://ollama.ai/download
+- Kubernetes cluster. This example will use Google Kubernetes Engine.
+
+## Steps
+
+1. Create the Ollama namespace, daemon set, and service
+
+    ```bash
+    kubectl apply -f cpu.yaml
+    ```
+
+1. Port forward the Ollama service to connect and use it locally
+
+    ```bash
+    kubectl -n ollama port-forward service/ollama 11434:80
+    ```
+
+1. Pull and run `orca-mini:3b`
+
+    ```bash
+    ollama run orca-mini:3b
+    ```
+
+## (Optional) Hardware Acceleration
+
+Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin). Follow the link for more details.
+
+Once configured, create a GPU enabled Ollama deployment.
+
+```bash
+kubectl apply -f gpu.yaml
+```
diff --git a/examples/kubernetes/cpu.yaml b/examples/kubernetes/cpu.yaml
new file mode 100644
index 00000000..b8ddcdde
--- /dev/null
+++ b/examples/kubernetes/cpu.yaml
@@ -0,0 +1,42 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ollama
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  selector:
+    matchLabels:
+      name: ollama
+  template:
+    metadata:
+      labels:
+        name: ollama
+    spec:
+      containers:
+      - name: ollama
+        image: ollama/ollama:latest
+        ports:
+        - name: http
+          containerPort: 11434
+          protocol: TCP
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  type: ClusterIP
+  selector:
+    name: ollama
+  ports:
+  - port: 80
+    name: http
+    targetPort: http
+    protocol: TCP
diff --git a/examples/kubernetes/gpu.yaml b/examples/kubernetes/gpu.yaml
new file mode 100644
index 00000000..4ee5f07d
--- /dev/null
+++ b/examples/kubernetes/gpu.yaml
@@ -0,0 +1,56 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ollama
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      name: ollama
+  template:
+    metadata:
+      labels:
+        name: ollama
+    spec:
+      containers:
+      - name: ollama
+        image: ollama/ollama:latest
+        env:
+        - name: PATH
+          value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin
+        - name: LD_LIBRARY_PATH
+          value: /usr/local/nvidia/lib64
+        ports:
+        - name: http
+          containerPort: 11434
+          protocol: TCP
+        resources:
+          limits:
+            nvidia.com/gpu: 1
+      tolerations:
+      - key: nvidia.com/gpu
+        operator: Exists
+        effect: NoSchedule
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama
+  namespace: ollama
+spec:
+  type: ClusterIP
+  selector:
+    name: ollama
+  ports:
+  - port: 80
+    name: http
+    targetPort: http
+    protocol: TCP

From 145e0608553d61993857a4acb27cbd37eabf6e20 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 6 Nov 2023 11:32:23 -0800
Subject: [PATCH 2/2] Apply suggestions from code review

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
---
 examples/kubernetes/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md
index 5a888b8c..cb5f39f9 100644
--- a/examples/kubernetes/README.md
+++ b/examples/kubernetes/README.md
@@ -19,7 +19,7 @@
     kubectl -n ollama port-forward service/ollama 11434:80
     ```
 
-1. Pull and run `orca-mini:3b`
+1. Pull and run a model, for example `orca-mini:3b`
 
     ```bash
     ollama run orca-mini:3b