diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md new file mode 100644 index 00000000..cb5f39f9 --- /dev/null +++ b/examples/kubernetes/README.md @@ -0,0 +1,36 @@ +# Deploy Ollama to Kubernetes + +## Prerequisites + +- Ollama: https://ollama.ai/download +- Kubernetes cluster. This example will use Google Kubernetes Engine. + +## Steps + +1. Create the Ollama namespace, daemon set, and service + + ```bash + kubectl apply -f cpu.yaml + ``` + +1. Port forward the Ollama service to connect and use it locally + + ```bash + kubectl -n ollama port-forward service/ollama 11434:80 + ``` + +1. Pull and run a model, for example `orca-mini:3b` + + ```bash + ollama run orca-mini:3b + ``` + +## (Optional) Hardware Acceleration + +Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin). Follow the link for more details. + +Once configured, create a GPU enabled Ollama deployment. + +```bash +kubectl apply -f gpu.yaml +``` diff --git a/examples/kubernetes/cpu.yaml b/examples/kubernetes/cpu.yaml new file mode 100644 index 00000000..b8ddcdde --- /dev/null +++ b/examples/kubernetes/cpu.yaml @@ -0,0 +1,42 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ollama +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: ollama +spec: + selector: + matchLabels: + name: ollama + template: + metadata: + labels: + name: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + ports: + - name: http + containerPort: 11434 + protocol: TCP +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: ollama +spec: + type: ClusterIP + selector: + name: ollama + ports: + - port: 80 + name: http + targetPort: http + protocol: TCP diff --git a/examples/kubernetes/gpu.yaml b/examples/kubernetes/gpu.yaml new file mode 100644 index 00000000..4ee5f07d --- /dev/null +++ b/examples/kubernetes/gpu.yaml @@ -0,0 +1,56 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ollama +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: ollama +spec: + strategy: + type: Recreate + selector: + matchLabels: + name: ollama + template: + metadata: + labels: + name: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + env: + - name: PATH + value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + ports: + - name: http + containerPort: 11434 + protocol: TCP + resources: + limits: + nvidia.com/gpu: 1 + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: ollama +spec: + type: ClusterIP + selector: + name: ollama + ports: + - port: 80 + name: http + targetPort: http + protocol: TCP