From dccac8c8fa8ea7d81f73bce0bcf0ae6b69a391a8 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 30 Oct 2023 17:10:54 -0700 Subject: [PATCH 1/2] k8s example --- examples/kubernetes/README.md | 36 ++++++++++++++++++++++ examples/kubernetes/cpu.yaml | 42 ++++++++++++++++++++++++++ examples/kubernetes/gpu.yaml | 56 +++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 examples/kubernetes/README.md create mode 100644 examples/kubernetes/cpu.yaml create mode 100644 examples/kubernetes/gpu.yaml diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md new file mode 100644 index 00000000..5a888b8c --- /dev/null +++ b/examples/kubernetes/README.md @@ -0,0 +1,36 @@ +# Deploy Ollama to Kubernetes + +## Prerequisites + +- Ollama: https://ollama.ai/download +- Kubernetes cluster. This example will use Google Kubernetes Engine. + +## Steps + +1. Create the Ollama namespace, daemon set, and service + + ```bash + kubectl apply -f cpu.yaml + ``` + +1. Port forward the Ollama service to connect and use it locally + + ```bash + kubectl -n ollama port-forward service/ollama 11434:80 + ``` + +1. Pull and run `orca-mini:3b` + + ```bash + ollama run orca-mini:3b + ``` + +## (Optional) Hardware Acceleration + +Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin). Follow the link for more details. + +Once configured, create a GPU enabled Ollama deployment. + +```bash +kubectl apply -f gpu.yaml +``` diff --git a/examples/kubernetes/cpu.yaml b/examples/kubernetes/cpu.yaml new file mode 100644 index 00000000..b8ddcdde --- /dev/null +++ b/examples/kubernetes/cpu.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ollama +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: ollama +spec: + selector: + matchLabels: + name: ollama + template: + metadata: + labels: + name: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + ports: + - name: http + containerPort: 11434 + protocol: TCP +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: ollama +spec: + type: ClusterIP + selector: + name: ollama + ports: + - port: 80 + name: http + targetPort: http + protocol: TCP diff --git a/examples/kubernetes/gpu.yaml b/examples/kubernetes/gpu.yaml new file mode 100644 index 00000000..4ee5f07d --- /dev/null +++ b/examples/kubernetes/gpu.yaml @@ -0,0 +1,56 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ollama +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: ollama +spec: + strategy: + type: Recreate + selector: + matchLabels: + name: ollama + template: + metadata: + labels: + name: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + env: + - name: PATH + value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + ports: + - name: http + containerPort: 11434 + protocol: TCP + resources: + limits: + nvidia.com/gpu: 1 + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: ollama +spec: + type: ClusterIP + selector: + name: ollama + ports: + - port: 80 + name: http + targetPort: http + protocol: TCP From 145e0608553d61993857a4acb27cbd37eabf6e20 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Mon, 6 Nov 2023 11:32:23 -0800 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Bruce MacDonald --- examples/kubernetes/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md index 5a888b8c..cb5f39f9 100644 --- a/examples/kubernetes/README.md +++ b/examples/kubernetes/README.md @@ -19,7 +19,7 @@ kubectl -n ollama port-forward service/ollama 11434:80 ``` -1. Pull and run `orca-mini:3b` +1. Pull and run a model, for example `orca-mini:3b` ```bash ollama run orca-mini:3b