├── .drone.yml ├── .github ├── FUNDING.yaml ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── feature_request.yml ├── pull_request_template.md └── workflows │ ├── cd.yaml │ └── ci.yaml ├── .gitignore ├── .helmignore ├── Chart.yaml ├── LICENSE ├── README.md ├── artifacthub-repo.yml ├── banner.png ├── ci └── test-values.yaml ├── kind-config.yml ├── templates ├── NOTES.txt ├── _helpers.tpl ├── deployment.yaml ├── extra-manifests.yaml ├── hpa.yaml ├── ingress.yaml ├── knative │ └── service.yaml ├── pvc.yaml ├── service.yaml ├── serviceaccount.yaml └── tests │ └── test-connection.yaml └── values.yaml /.drone.yml: -------------------------------------------------------------------------------- 1 | kind: pipeline 2 | type: docker 3 | name: helm-lint 4 | 5 | # Use docker:dind for running kind 6 | services: 7 | - name: kubernetes 8 | image: docker:dind 9 | privileged: true 10 | volumes: 11 | - name: dockersock 12 | path: /var/run 13 | 14 | volumes: 15 | - name: dockersock 16 | temp: {} 17 | - name: kubeconfig 18 | temp: {} 19 | 20 | steps: 21 | 22 | - name: wait for docker 23 | image: alpine 24 | commands: 25 | - sleep 5 26 | 27 | - name: create kind cluster 28 | image: otwld/drone-kind 29 | settings: 30 | verbose: 1 31 | cluster_name: "kind-default" 32 | hostname: "kubernetes" # Use same name as service`s name 33 | config: 'kind-config.yml' 34 | volumes: 35 | - name: dockersock 36 | path: /var/run 37 | - name: kubeconfig 38 | path: /root/.kube 39 | depends_on: 40 | - wait for docker 41 | 42 | - name: lint & install chart 43 | image: quay.io/helmpack/chart-testing 44 | commands: 45 | - ct lint-and-install --chart-dirs . --charts . 46 | depends_on: 47 | - create kind cluster 48 | volumes: 49 | - name: kubeconfig 50 | path: /root/.kube 51 | 52 | - name: delete kind cluster 53 | image: otwld/drone-kind 54 | settings: 55 | clean_only: true 56 | volumes: 57 | - name: dockersock 58 | path: /var/run 59 | - name: kubeconfig 60 | path: /root/.kube 61 | when: 62 | status: 63 | - success 64 | - failure 65 | depends_on: 66 | - lint & install chart 67 | 68 | trigger: 69 | event: 70 | - push 71 | - pull_request 72 | - custom 73 | branch: 74 | - main 75 | 76 | --- 77 | kind: pipeline 78 | type: docker 79 | name: publish 80 | 81 | 82 | trigger: 83 | event: 84 | - promote 85 | branch: 86 | - main 87 | 88 | steps: 89 | - name: publish helm chart to github pages 90 | image: otwld/drone-chart-releaser-github-pages 91 | settings: 92 | cr_token: 93 | from_secret: github_access_token 94 | skip_existing: true 95 | root_package: true 96 | -------------------------------------------------------------------------------- /.github/FUNDING.yaml: -------------------------------------------------------------------------------- 1 | github: jdetroyes 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug report. 3 | labels: ["bug", "triage"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | **Please fill out the details below to help us understand and resolve the issue effectively.** 9 | - type: textarea 10 | id: what-happened 11 | attributes: 12 | label: What happened? 13 | description: Please describe the issue or the goal you are trying to achieve with the Ollama Chart. 14 | placeholder: Tell us what you see! 15 | validations: 16 | required: true 17 | - type: input 18 | id: chart-version 19 | attributes: 20 | label: Chart version 21 | description: Please provide the version of the Ollama Chart you are using. 22 | placeholder: 0.x.x 23 | validations: 24 | required: true 25 | - type: input 26 | id: Kubernetes-version 27 | attributes: 28 | label: Kubernetes version 29 | description: Please specify the version of Kubernetes you are using. 30 | placeholder: 1.x.x 31 | validations: 32 | required: true 33 | - type: input 34 | id: Kubernetes-distribution 35 | attributes: 36 | label: Kubernetes distribution 37 | description: Please mention the Kubernetes engine you are using (e.g., k8s, k3s, minikube, kind, rancher). 38 | validations: 39 | required: true 40 | - type: textarea 41 | id: logs 42 | attributes: 43 | label: Relevant log output 44 | description: If relevant, please provide the logs from the pods to help diagnose the issue. (Enable OLLAMA_DEBUG for more details) 45 | render: shell 46 | - type: textarea 47 | id: values 48 | attributes: 49 | label: Your values.yaml 50 | description: Please share the contents of your values.yaml file. If sensitive information is included, consider redacting it. 51 | render: yaml 52 | - type: textarea 53 | id: additional-context 54 | attributes: 55 | label: Additional context 56 | description: Add any other context about the problem here. 57 | render: markdown -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea for this project. 3 | labels: ["enhancement", "triage"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | **Please fill out the details below to help us understand.** 9 | - type: textarea 10 | id: request-related 11 | attributes: 12 | label: Is your feature request related to a problem? Please describe 13 | description: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 14 | validations: 15 | required: true 16 | - type: textarea 17 | id: solution 18 | attributes: 19 | label: Describe the solution you'd like 20 | description: A clear and concise description of what you want to happen. 21 | validations: 22 | required: true 23 | - type: textarea 24 | id: alternatives 25 | attributes: 26 | label: Describe alternatives you've considered 27 | description: A clear and concise description of any alternative solutions or features you've considered. 28 | - type: textarea 29 | id: additional-context 30 | attributes: 31 | label: Additional context 32 | description: Add any other context or screenshots about the feature request here. 33 | render: markdown -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | **Summary of changes:** 2 | 3 | **Checklist:** 4 | 5 | * [ ] I updated the `artifacthub.io/changes` annotation in _Chart.yml_ according to the [documentation](https://artifacthub.io/docs/topics/annotations/helm/#supported-annotations) 6 | * [ ] Optional: I updated in _README.md_ the [Helm Values](https://github.com/otwld/ollama-helm?tab=readme-ov-file#helm-values) -------------------------------------------------------------------------------- /.github/workflows/cd.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy to registry and Release 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | release: 8 | permissions: 9 | contents: write 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | 17 | - name: Configure Git 18 | run: | 19 | git config user.name "$GITHUB_ACTOR" 20 | git config user.email "$GITHUB_ACTOR@users.noreply.github.com" 21 | 22 | - name: Install Helm 23 | uses: azure/setup-helm@v4 24 | env: 25 | GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 26 | 27 | - name: Run chart-releaser 28 | uses: helm/chart-releaser-action@v1.6.0 29 | with: 30 | install_only: true 31 | 32 | - name: Clean 33 | run: | 34 | rm -rf .cr-release-packages 35 | mkdir -p .cr-release-packages 36 | rm -rf .cr-index 37 | mkdir -p .cr-index 38 | rm -rf .ollama-helm 39 | mkdir -p .ollama-helm 40 | 41 | - name: Copy Ollama chart 42 | run: | 43 | cp -r templates .ollama-helm/ 44 | cp Chart.yaml .ollama-helm/ 45 | cp LICENSE .ollama-helm/ 46 | cp values.yaml .ollama-helm/ 47 | cp .helmignore .ollama-helm/ 48 | cp README.md .ollama-helm/ 49 | 50 | - name: Package chart 51 | run: cr package . 52 | 53 | - name: Upload chart 54 | run: cr upload --owner otwld --git-repo ollama-helm --generate-release-notes -c "$(git rev-parse HEAD)" 55 | env: 56 | CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 57 | 58 | - name: Upload chart index 59 | run: cr index --owner otwld --git-repo ollama-helm --push 60 | env: 61 | CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 62 | 63 | # Migration to OTWLD helm charts repository 64 | - name: Push to central helm-charts repo 65 | uses: cpina/github-action-push-to-another-repository@v1.7.2 66 | env: 67 | API_TOKEN_GITHUB: ${{ secrets.HELM_CHARTS_REPO_TOKEN }} 68 | with: 69 | source-directory: ".ollama-helm/" 70 | target-directory: "charts/ollama" 71 | destination-github-username: ${{ github.repository_owner }} 72 | destination-repository-name: "helm-charts" 73 | user-email: "$GITHUB_ACTOR@users.noreply.github.com" 74 | target-branch: main -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: Helm Lint and Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths-ignore: 8 | - .github/ISSUE_TEMPLATE/** 9 | - .github/workflows/cd.yaml 10 | pull_request: 11 | types: 12 | - opened 13 | - synchronize 14 | - reopened 15 | paths-ignore: 16 | - .github/ISSUE_TEMPLATE/** 17 | - .github/workflows/cd.yaml 18 | workflow_dispatch: 19 | 20 | jobs: 21 | helm-lint: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | 27 | - name: Set up Helm 28 | uses: azure/setup-helm@v4 29 | 30 | - uses: actions/setup-python@v5 31 | with: 32 | python-version: '3.x' 33 | check-latest: true 34 | 35 | - name: Set up chart-testing 36 | uses: helm/chart-testing-action@v2 37 | 38 | - name: Create kind cluster 39 | uses: helm/kind-action@v1 40 | 41 | - name: Run chart-testing 42 | run: ct lint-and-install --chart-dirs . --charts . -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See http://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # compiled output 4 | dist 5 | tmp 6 | /out-tsc 7 | 8 | # dependencies 9 | node_modules 10 | 11 | # IDEs and editors 12 | /.idea 13 | .cr-release-packages 14 | .project 15 | .classpath 16 | .c9/ 17 | *.launch 18 | .settings/ 19 | *.sublime-workspace 20 | 21 | .env.* 22 | 23 | 24 | # IDE - VSCode 25 | .vscode/* 26 | !.vscode/settings.json 27 | !.vscode/tasks.json 28 | !.vscode/launch.json 29 | !.vscode/extensions.json 30 | 31 | # misc 32 | /.sass-cache 33 | /connect.lock 34 | /coverage 35 | /libpeerconnection.log 36 | npm-debug.log 37 | yarn-error.log 38 | testem.log 39 | /typings 40 | 41 | # System Files 42 | .DS_Store 43 | Thumbs.db 44 | 45 | 46 | venv/ 47 | 48 | *.pyc 49 | __pycache__/ 50 | 51 | instance/ 52 | 53 | .pytest_cache/ 54 | .coverage 55 | htmlcov/ 56 | 57 | build/ 58 | *.egg-info/ -------------------------------------------------------------------------------- /.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | .drone.yml 23 | *.tmproj 24 | .vscode/ 25 | 26 | #others 27 | .github 28 | kind-config.yml 29 | ci/ 30 | 31 | -------------------------------------------------------------------------------- /Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: ollama 3 | description: Get up and running with large language models locally. 4 | 5 | type: application 6 | 7 | version: 1.19.0 8 | 9 | appVersion: "0.9.0" 10 | 11 | annotations: 12 | artifacthub.io/category: ai-machine-learning 13 | artifacthub.io/changes: | 14 | - kind: changed 15 | description: upgrade app version to 0.9.0 16 | links: 17 | - name: Ollama release v0.9.0 18 | url: https://github.com/ollama/ollama/releases/tag/v0.9.0 19 | 20 | kubeVersion: "^1.16.0-0" 21 | home: https://ollama.ai/ 22 | icon: https://ollama.ai/public/ollama.png 23 | keywords: 24 | - ai 25 | - llm 26 | - llama 27 | - mistral 28 | sources: 29 | - https://github.com/ollama/ollama 30 | - https://github.com/otwld/ollama-helm 31 | maintainers: 32 | - name: OTWLD 33 | email: contact@otwld.com 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 OTWLD 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![otwld ollama helm chart banner](./banner.png) 2 | 3 | ![GitHub License](https://img.shields.io/github/license/otwld/ollama-helm) 4 | [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/ollama-helm)](https://artifacthub.io/packages/helm/ollama-helm/ollama) 5 | [![Helm Lint and Test](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml) 6 | [![Discord](https://img.shields.io/badge/Discord-OTWLD-blue?logo=discord&logoColor=white)](https://discord.gg/U24mpqTynB) 7 | 8 | [Ollama](https://ollama.ai/), get up and running with large language models, locally. 9 | 10 | This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama). 11 | 12 | ## Requirements 13 | 14 | - Kubernetes: `>= 1.16.0-0` for **CPU only** 15 | 16 | - Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD) 17 | 18 | *Not all GPUs are currently supported with ollama (especially with AMD)* 19 | 20 | ## Deploying Ollama chart 21 | 22 | To install the `ollama` chart in the `ollama` namespace: 23 | 24 | > [!IMPORTANT] 25 | > We are migrating the registry from https://otwld.github.io/ollama-helm/ url to OTWLD Helm central registry https://helm.otwld.com/ 26 | > Please update your Helm registry accordingly. 27 | 28 | ```console 29 | helm repo add otwld https://helm.otwld.com/ 30 | helm repo update 31 | helm install ollama otwld/ollama --namespace ollama --create-namespace 32 | ``` 33 | 34 | ## Upgrading Ollama chart 35 | 36 | First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no 37 | backwards incompatible changes. 38 | 39 | Make adjustments to your values as needed, then run `helm upgrade`: 40 | 41 | ```console 42 | # -- This pulls the latest version of the ollama chart from the repo. 43 | helm repo update 44 | helm upgrade ollama otwld/ollama --namespace ollama --values values.yaml 45 | ``` 46 | 47 | ## Uninstalling Ollama chart 48 | 49 | To uninstall/delete the `ollama` deployment in the `ollama` namespace: 50 | 51 | ```console 52 | helm delete ollama --namespace ollama 53 | ``` 54 | 55 | Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete` 56 | parameters and flags. 57 | 58 | ## Interact with Ollama 59 | 60 | - **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)** 61 | - Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md) 62 | - Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client) 63 | and [ollama-python](https://github.com/ollama/ollama-python#custom-client) 64 | - Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md) 65 | and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md) 66 | 67 | ## Examples 68 | 69 | - **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU** 70 | 71 | ### Basic values.yaml example with GPU and two models pulled at startup 72 | 73 | ``` 74 | ollama: 75 | gpu: 76 | # -- Enable GPU integration 77 | enabled: true 78 | 79 | # -- GPU type: 'nvidia' or 'amd' 80 | type: 'nvidia' 81 | 82 | # -- Specify the number of GPU to 1 83 | number: 1 84 | 85 | # -- List of models to pull at container startup 86 | models: 87 | pull: 88 | - mistral 89 | - llama2 90 | ``` 91 | 92 | --- 93 | 94 | ### Basic values.yaml example with Ingress 95 | 96 | ``` 97 | ollama: 98 | models: 99 | pull: 100 | - llama2 101 | 102 | ingress: 103 | enabled: true 104 | hosts: 105 | - host: ollama.domain.lan 106 | paths: 107 | - path: / 108 | pathType: Prefix 109 | ``` 110 | 111 | - *API is now reachable at `ollama.domain.lan`* 112 | 113 | --- 114 | 115 | ### Create and run model from template 116 | 117 | ``` 118 | ollama: 119 | models: 120 | create: 121 | - name: llama3.1-ctx32768 122 | template: | 123 | FROM llama3.1 124 | PARAMETER num_ctx 32768 125 | run: 126 | - llama3.1-ctx32768 127 | ``` 128 | 129 | ## Upgrading from 0.X.X to 1.X.X 130 | 131 | The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed. 132 | 133 | Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading: 134 | 135 | ```yaml 136 | ollama: 137 | models: 138 | - mistral 139 | - llama2 140 | ``` 141 | 142 | To: 143 | 144 | ```yaml 145 | ollama: 146 | models: 147 | pull: 148 | - mistral 149 | - llama2 150 | ``` 151 | 152 | ## Helm Values 153 | 154 | - See [values.yaml](values.yaml) to see the Chart's default values. 155 | 156 | | Key | Type | Default | Description | 157 | |--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 158 | | affinity | object | `{}` | Affinity for pod assignment | 159 | | autoscaling.enabled | bool | `false` | Enable autoscaling | 160 | | autoscaling.maxReplicas | int | `100` | Number of maximum replicas | 161 | | autoscaling.minReplicas | int | `1` | Number of minimum replicas | 162 | | autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica | 163 | | deployment.labels | object | `{}` | Labels to add to the deployment | 164 | | extraArgs | list | `[]` | Additional arguments on the output Deployment definition. | 165 | | extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go | 166 | | extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) | 167 | | extraObjects | list | `[]` | Extra K8s manifests to deploy | 168 | | fullnameOverride | string | `""` | String to fully override template | 169 | | hostIPC | bool | `false` | Use the host’s ipc namespace. | 170 | | hostNetwork | bool | `false` | Use the host's network namespace. | 171 | | hostPID | bool | `false` | Use the host’s pid namespace | 172 | | image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy | 173 | | image.repository | string | `"ollama/ollama"` | Docker image registry | 174 | | image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. | 175 | | imagePullSecrets | list | `[]` | Docker registry secret names as an array | 176 | | ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. | 177 | | ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) | 178 | | ingress.enabled | bool | `false` | Enable ingress controller resource | 179 | | ingress.hosts[0].host | string | `"ollama.local"` | | 180 | | ingress.hosts[0].paths[0].path | string | `"/"` | | 181 | | ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | | 182 | | ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. | 183 | | initContainers | list | `[]` | Init containers to add to the pod | 184 | | knative.annotations | object | `{}` | Knative service annotations | 185 | | knative.containerConcurrency | int | `0` | Knative service container concurrency | 186 | | knative.enabled | bool | `false` | Enable Knative integration | 187 | | knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds | 188 | | knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds | 189 | | knative.timeoutSeconds | int | `300` | Knative service timeout seconds | 190 | | lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pull/run) | 191 | | livenessProbe.enabled | bool | `true` | Enable livenessProbe | 192 | | livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe | 193 | | livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe | 194 | | livenessProbe.path | string | `"/"` | Request path for livenessProbe | 195 | | livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe | 196 | | livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe | 197 | | livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe | 198 | | nameOverride | string | `""` | String to partially override template (will maintain the release name) | 199 | | namespaceOverride | string | `""` | String to fully override namespace | 200 | | nodeSelector | object | `{}` | Node labels for pod assignment. | 201 | | ollama.gpu.enabled | bool | `false` | Enable GPU integration | 202 | | ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number | 203 | | ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored | 204 | | ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored | 205 | | ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice | 206 | | ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images | 207 | | ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup | 208 | | ollama.models.create | list | `[]` | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create: - name: llama3.1-ctx32768 configMapRef: my-configmap configMapKeyRef: configmap-key - name: llama3.1-ctx32768 template: | FROM llama3.1 PARAMETER num_ctx 32768 | 209 | | ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral | 210 | | ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral | 211 | | ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" | 212 | | ollama.port | int | `11434` | | 213 | | persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ | 214 | | persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations | 215 | | persistentVolume.enabled | bool | `false` | Enable persistence using PVC | 216 | | persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true | 217 | | persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size | 218 | | persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) | 219 | | persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty | 220 | | persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. | 221 | | persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward | 222 | | podAnnotations | object | `{}` | Map of annotations to add to the pods | 223 | | podLabels | object | `{}` | Map of labels to add to the pods | 224 | | podSecurityContext | object | `{}` | Pod Security Context | 225 | | readinessProbe.enabled | bool | `true` | Enable readinessProbe | 226 | | readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe | 227 | | readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe | 228 | | readinessProbe.path | string | `"/"` | Request path for readinessProbe | 229 | | readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe | 230 | | readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe | 231 | | readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe | 232 | | replicaCount | int | `1` | Number of replicas | 233 | | resources.limits | object | `{}` | Pod limit | 234 | | resources.requests | object | `{}` | Pod requests | 235 | | runtimeClassName | string | `""` | Specify runtime class | 236 | | securityContext | object | `{}` | Container Security Context | 237 | | service.annotations | object | `{}` | Annotations to add to the service | 238 | | service.labels | object | `{}` | Labels to add to the service | 239 | | service.loadBalancerIP | string | `nil` | Load Balancer IP address | 240 | | service.nodePort | int | `31434` | Service node port when service type is 'NodePort' | 241 | | service.port | int | `11434` | Service port | 242 | | service.type | string | `"ClusterIP"` | Service type | 243 | | serviceAccount.annotations | object | `{}` | Annotations to add to the service account | 244 | | serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? | 245 | | serviceAccount.create | bool | `true` | Specifies whether a service account should be created | 246 | | serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | 247 | | tolerations | list | `[]` | Tolerations for pod assignment | 248 | | tests.enabled | bool | `true` | Enable or disable test pods | 249 | | tests.labels | object | `{}` | Labels to add to the test pods | 250 | | topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment | 251 | | updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate | 252 | | volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. | 253 | | volumes | list | `[]` | Additional volumes on the output Deployment definition. | 254 | 255 | ---------------------------------------------- 256 | 257 | ## Core team 258 | 259 | 260 | 261 | 285 | 314 | 315 |
262 | Jean Baptiste Detroyes
     Jean Baptiste Detroyes     
270 |
271 | 276 | 283 |
284 |
286 | Jean Baptiste Detroyes
     Nathan Tréhout     
294 |
295 | 300 | 305 | 312 |
313 |
316 | 317 | ## Support 318 | 319 | - For questions, suggestions, and discussion about Ollama please refer to 320 | the [Ollama issue page](https://github.com/ollama/ollama/issues) 321 | - For questions, suggestions, and discussion about this chart please 322 | visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join 323 | our [OTWLD Discord](https://discord.gg/U24mpqTynB) 324 | -------------------------------------------------------------------------------- /artifacthub-repo.yml: -------------------------------------------------------------------------------- 1 | # Artifact Hub repository metadata file 2 | # 3 | # Some settings like the verified publisher flag or the ignored packages won't 4 | # be applied until the next time the repository is processed. Please keep in 5 | # mind that the repository won't be processed if it has not changed since the 6 | # last time it was processed. Depending on the repository kind, this is checked 7 | # in a different way. For Helm http based repositories, we consider it has 8 | # changed if the `index.yaml` file changes. For git based repositories, it does 9 | # when the hash of the last commit in the branch you set up changes. This does 10 | # NOT apply to ownership claim operations, which are processed immediately. 11 | # 12 | repositoryID: 2ccfd0bd-c123-483e-96f6-eb446fa850ac 13 | owners: 14 | - name: OTWLD 15 | email: tech@otwld.com 16 | - name: Jean-Baptiste DETROYES 17 | email: jean-baptiste@detroyes.fr -------------------------------------------------------------------------------- /banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/otwld/ollama-helm/fdd773f5174bb08e1aee94166e18ba3bdb57d83b/banner.png -------------------------------------------------------------------------------- /ci/test-values.yaml: -------------------------------------------------------------------------------- 1 | ollama: 2 | models: 3 | pull: 4 | - moondream 5 | run: 6 | - moondream 7 | -------------------------------------------------------------------------------- /kind-config.yml: -------------------------------------------------------------------------------- 1 | kind: Cluster 2 | apiVersion: kind.x-k8s.io/v1alpha4 3 | networking: 4 | apiServerAddress: "0.0.0.0" 5 | -------------------------------------------------------------------------------- /templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. Get the application URL by running these commands: 2 | {{- if .Values.knative.enabled }} 3 | export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url}) 4 | echo "Visit $KSERVICE_URL to use your application" 5 | {{- else if .Values.ingress.enabled }} 6 | {{- range $host := .Values.ingress.hosts }} 7 | {{- range .paths }} 8 | http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} 9 | {{- end }} 10 | {{- end }} 11 | {{- else if contains "NodePort" .Values.service.type }} 12 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }}) 13 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") 14 | echo http://$NODE_IP:$NODE_PORT 15 | {{- else if contains "LoadBalancer" .Values.service.type }} 16 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 17 | You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}' 18 | export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") 19 | echo http://$SERVICE_IP:{{ .Values.service.port }} 20 | {{- else if contains "ClusterIP" .Values.service.type }} 21 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 22 | export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") 23 | echo "Visit http://127.0.0.1:8080 to use your application" 24 | kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT 25 | {{- end }} 26 | -------------------------------------------------------------------------------- /templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Allow the release namespace to be overridden for multi-namespace deployments in combined charts 3 | */}} 4 | {{- define "ollama.namespace" -}} 5 | {{- if .Values.namespaceOverride -}} 6 | {{- .Values.namespaceOverride -}} 7 | {{- else -}} 8 | {{- .Release.Namespace -}} 9 | {{- end -}} 10 | {{- end -}} 11 | 12 | {{/* 13 | Expand the name of the chart. 14 | */}} 15 | {{- define "ollama.name" -}} 16 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 17 | {{- end }} 18 | 19 | {{/* 20 | Create a default fully qualified app name. 21 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 22 | If release name contains chart name it will be used as a full name. 23 | */}} 24 | {{- define "ollama.fullname" -}} 25 | {{- if .Values.fullnameOverride }} 26 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 27 | {{- else }} 28 | {{- $name := default .Chart.Name .Values.nameOverride }} 29 | {{- if contains $name .Release.Name }} 30 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 31 | {{- else }} 32 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 33 | {{- end }} 34 | {{- end }} 35 | {{- end }} 36 | 37 | {{/* 38 | Create chart name and version as used by the chart label. 39 | */}} 40 | {{- define "ollama.chart" -}} 41 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 42 | {{- end }} 43 | 44 | {{/* 45 | Common labels 46 | */}} 47 | {{- define "ollama.labels" -}} 48 | helm.sh/chart: {{ include "ollama.chart" . }} 49 | {{ include "ollama.selectorLabels" . }} 50 | {{- if .Chart.AppVersion }} 51 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 52 | {{- end }} 53 | app.kubernetes.io/managed-by: {{ .Release.Service }} 54 | {{- end }} 55 | 56 | {{/* 57 | Selector labels 58 | */}} 59 | {{- define "ollama.selectorLabels" -}} 60 | app.kubernetes.io/name: {{ include "ollama.name" . }} 61 | app.kubernetes.io/instance: {{ .Release.Name }} 62 | {{- end }} 63 | 64 | {{/* 65 | Create the name of the service account to use 66 | */}} 67 | {{- define "ollama.serviceAccountName" -}} 68 | {{- if .Values.serviceAccount.create }} 69 | {{- default (include "ollama.fullname" .) .Values.serviceAccount.name }} 70 | {{- else }} 71 | {{- default "default" .Values.serviceAccount.name }} 72 | {{- end }} 73 | {{- end }} 74 | 75 | {{/* 76 | Models mount path 77 | */}} 78 | {{- define "ollama.modelsMountPath" -}} 79 | {{- printf "%s/models" ( default "/root/.ollama") }} 80 | {{- end -}} 81 | -------------------------------------------------------------------------------- /templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | {{- if not .Values.knative.enabled }} 3 | apiVersion: apps/v1 4 | kind: Deployment 5 | metadata: 6 | name: {{ include "ollama.fullname" . }} 7 | namespace: {{ include "ollama.namespace" . }} 8 | labels: 9 | {{- include "ollama.labels" . | nindent 4 }} 10 | {{- with .Values.deployment.labels }} 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | {{- if not .Values.autoscaling.enabled }} 15 | replicas: {{ .Values.replicaCount }} 16 | {{- end }} 17 | {{- if or .Values.updateStrategy.type .Values.updateStrategy.rollingUpdate }} 18 | strategy: {{ .Values.updateStrategy | toYaml | nindent 4 }} 19 | {{- end }} 20 | selector: 21 | matchLabels: 22 | {{- include "ollama.selectorLabels" . | nindent 6 }} 23 | template: 24 | metadata: 25 | {{- with .Values.podAnnotations }} 26 | annotations: 27 | {{- toYaml . | nindent 8 }} 28 | {{- end }} 29 | labels: 30 | {{- include "ollama.labels" . | nindent 8 }} 31 | {{- with .Values.podLabels }} 32 | {{- toYaml . | nindent 8 }} 33 | {{- end }} 34 | spec: 35 | {{- if .Values.hostIPC }} 36 | hostIPC: {{ .Values.hostIPC }} 37 | {{- end }} 38 | {{- if .Values.hostPID }} 39 | hostPID: {{ .Values.hostPID }} 40 | {{- end }} 41 | {{- if .Values.hostNetwork }} 42 | hostNetwork: {{ .Values.hostNetwork }} 43 | {{- end }} 44 | {{- with .Values.imagePullSecrets }} 45 | imagePullSecrets: 46 | {{- toYaml . | nindent 8 }} 47 | {{- end }} 48 | serviceAccountName: {{ include "ollama.serviceAccountName" . }} 49 | securityContext: 50 | {{- toYaml .Values.podSecurityContext | nindent 8 }} 51 | {{- if .Values.runtimeClassName }} 52 | runtimeClassName: {{ .Values.runtimeClassName | quote }} 53 | {{- end }} 54 | {{- with .Values.initContainers }} 55 | initContainers: 56 | {{- tpl (toYaml . ) $ | nindent 8 }} 57 | {{- end }} 58 | containers: 59 | - name: {{ .Chart.Name }} 60 | securityContext: 61 | {{- toYaml .Values.securityContext | nindent 12 }} 62 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}" 63 | imagePullPolicy: {{ .Values.image.pullPolicy }} 64 | ports: 65 | - name: http 66 | containerPort: {{ .Values.ollama.port }} 67 | protocol: TCP 68 | env: 69 | - name: OLLAMA_HOST 70 | value: "0.0.0.0:{{ .Values.ollama.port }}" 71 | {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}} 72 | - name: PATH 73 | value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin 74 | {{- end}} 75 | {{- with .Values.extraEnv }} 76 | {{- toYaml . | nindent 12 }} 77 | {{- end }} 78 | envFrom: 79 | {{- with .Values.extraEnvFrom }} 80 | {{- toYaml . | nindent 12 }} 81 | {{- end }} 82 | args: 83 | {{- with .Values.extraArgs }} 84 | {{- toYaml . | nindent 12 }} 85 | {{- end }} 86 | {{- if .Values.resources }} 87 | resources: 88 | {{- $limits := default dict .Values.resources.limits }} 89 | {{- if .Values.ollama.gpu.enabled }} 90 | # If gpu is enabled, it can either be a NVIDIA card or a AMD card 91 | {{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }} 92 | # NVIDIA is assumed by default if no value is set and GPU is enabled 93 | # NVIDIA cards can have mig enabled (i.e., the card is sliced into parts 94 | # Therefore, the first case is no migs enabled 95 | {{- if or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled ) }} 96 | {{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }} 97 | {{- $limits = merge $limits $gpuLimit }} 98 | # Second case is mig is enabled 99 | {{- else if or (.Values.ollama.gpu.mig.enabled) }} 100 | # Initialize empty dictionary 101 | {{- $migDevices := dict -}} 102 | # Loop over the entries in the mig devices 103 | {{- range $key, $value := .Values.ollama.gpu.mig.devices }} 104 | {{- $migKey := printf "nvidia.com/mig-%s" $key -}} 105 | {{- $migDevices = merge $migDevices (dict $migKey $value) -}} 106 | {{- end }} 107 | {{- $limits = merge $limits $migDevices}} 108 | {{- end }} 109 | {{- end }} 110 | {{- if eq .Values.ollama.gpu.type "amd" }} 111 | {{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }} 112 | {{- $limits = merge $limits $gpuLimit }} 113 | {{- end }} 114 | {{- end }} 115 | {{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }} 116 | {{- toYaml $ressources | nindent 12 }} 117 | {{- end}} 118 | volumeMounts: 119 | - name: ollama-data 120 | mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }} 121 | {{- if .Values.persistentVolume.subPath }} 122 | subPath: {{ .Values.persistentVolume.subPath }} 123 | {{- end }} 124 | {{- range .Values.ollama.models.create }} 125 | {{- if .configMapRef }} 126 | - name: {{ .name }}-config-model-volume 127 | mountPath: /models 128 | {{- end }} 129 | {{- end }} 130 | {{- with .Values.volumeMounts }} 131 | {{- toYaml . | nindent 12 }} 132 | {{- end }} 133 | {{- if .Values.livenessProbe.enabled }} 134 | livenessProbe: 135 | httpGet: 136 | path: {{ .Values.livenessProbe.path }} 137 | port: http 138 | initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} 139 | periodSeconds: {{ .Values.livenessProbe.periodSeconds }} 140 | timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} 141 | successThreshold: {{ .Values.livenessProbe.successThreshold }} 142 | failureThreshold: {{ .Values.livenessProbe.failureThreshold }} 143 | {{- end }} 144 | {{- if .Values.readinessProbe.enabled }} 145 | readinessProbe: 146 | httpGet: 147 | path: {{ .Values.readinessProbe.path }} 148 | port: http 149 | initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} 150 | periodSeconds: {{ .Values.readinessProbe.periodSeconds }} 151 | timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} 152 | successThreshold: {{ .Values.readinessProbe.successThreshold }} 153 | failureThreshold: {{ .Values.readinessProbe.failureThreshold }} 154 | {{- end }} 155 | {{- with .Values.lifecycle}} 156 | lifecycle: 157 | {{- toYaml . | nindent 12 }} 158 | {{- else }} 159 | {{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }} 160 | lifecycle: 161 | postStart: 162 | exec: 163 | command: 164 | - /bin/sh 165 | - -c 166 | - | 167 | while ! /bin/ollama ps > /dev/null 2>&1; do 168 | sleep 5 169 | done 170 | 171 | {{- if .Values.ollama.models.pull }} 172 | {{- range .Values.ollama.models.pull }} 173 | /bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }} 174 | {{- end }} 175 | {{- end }} 176 | 177 | {{- if .Values.ollama.models.create }} 178 | {{- range .Values.ollama.models.create }} 179 | {{- if .template }} 180 | cat < {{ include "ollama.modelsMountPath" $ }}/{{ .name }} 181 | {{- .template | nindent 20 }} 182 | EOF 183 | /bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" . }}/{{ .name }} 184 | {{- end }} 185 | {{- if .configMapRef }} 186 | /bin/ollama create {{ .name }} -f /models/{{ .name }} 187 | {{- end }} 188 | {{- end }} 189 | {{- end }} 190 | 191 | {{- if .Values.ollama.models.run }} 192 | {{- range .Values.ollama.models.run }} 193 | /bin/ollama run {{ . }} 194 | {{- end }} 195 | {{- end }} 196 | {{- end }} 197 | {{- end }} 198 | volumes: 199 | - name: ollama-data 200 | {{- if .Values.persistentVolume.enabled }} 201 | persistentVolumeClaim: 202 | claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }} 203 | {{- else }} 204 | emptyDir: { } 205 | {{- end }} 206 | {{- range .Values.ollama.models.create }} 207 | {{- if .configMapRef }} 208 | - name: {{ .name }}-config-model-volume 209 | configMap: 210 | name: {{ .configMapRef }} 211 | items: 212 | - key: {{ .configMapKeyRef }} 213 | path: {{ .name }} 214 | {{- end }} 215 | {{- end }} 216 | {{- with .Values.volumes }} 217 | {{- toYaml . | nindent 8 }} 218 | {{- end }} 219 | {{- with .Values.nodeSelector }} 220 | nodeSelector: 221 | {{- toYaml . | nindent 8 }} 222 | {{- end }} 223 | {{- with .Values.affinity }} 224 | affinity: 225 | {{- toYaml . | nindent 8 }} 226 | {{- end }} 227 | {{- with .Values.topologySpreadConstraints }} 228 | topologySpreadConstraints: 229 | {{- toYaml . | nindent 8 }} 230 | {{- end }} 231 | {{- if or .Values.ollama.gpu.enabled .Values.tolerations }} 232 | tolerations: 233 | {{- if and .Values.ollama.gpu.enabled (and 234 | ( or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) 235 | ( or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled)) 236 | ) }} 237 | - key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}" 238 | operator: Exists 239 | effect: NoSchedule 240 | {{- else if and .Values.ollama.gpu.enabled (and 241 | ( or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) 242 | (( .Values.ollama.gpu.mig.enabled)) 243 | ) }} 244 | {{- range $key, $value := .Values.ollama.gpu.mig.devices }} 245 | - key: nvidia.com/mig-{{ $key }} 246 | operator: Exists 247 | effect: NoSchedule 248 | {{- end }} 249 | {{- end }} 250 | {{- with .Values.tolerations }} 251 | {{- toYaml . | nindent 8 }} 252 | {{- end }} 253 | {{- end }} 254 | {{- end }} 255 | -------------------------------------------------------------------------------- /templates/extra-manifests.yaml: -------------------------------------------------------------------------------- 1 | {{ range .Values.extraObjects }} 2 | --- 3 | {{ tpl (toYaml .) $ }} 4 | {{ end }} -------------------------------------------------------------------------------- /templates/hpa.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | {{- if .Values.autoscaling.enabled }} 3 | apiVersion: autoscaling/v2 4 | kind: HorizontalPodAutoscaler 5 | metadata: 6 | name: {{ include "ollama.fullname" . }} 7 | namespace: {{ include "ollama.namespace" . }} 8 | labels: 9 | {{- include "ollama.labels" . | nindent 4 }} 10 | spec: 11 | scaleTargetRef: 12 | apiVersion: apps/v1 13 | kind: Deployment 14 | name: {{ include "ollama.fullname" . }} 15 | minReplicas: {{ .Values.autoscaling.minReplicas }} 16 | maxReplicas: {{ .Values.autoscaling.maxReplicas }} 17 | metrics: 18 | {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} 19 | - type: Resource 20 | resource: 21 | name: cpu 22 | target: 23 | type: Utilization 24 | averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} 25 | {{- end }} 26 | {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} 27 | - type: Resource 28 | resource: 29 | name: memory 30 | target: 31 | type: Utilization 32 | averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} 33 | {{- end }} 34 | {{- end }} 35 | -------------------------------------------------------------------------------- /templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.ingress.enabled -}} 2 | {{- $fullName := include "ollama.fullname" . -}} 3 | {{- $svcPort := .Values.service.port -}} 4 | {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} 5 | {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} 6 | {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className }} 7 | {{- end }} 8 | {{- end }} 9 | --- 10 | {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }} 11 | apiVersion: networking.k8s.io/v1 12 | {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }} 13 | apiVersion: networking.k8s.io/v1beta1 14 | {{- else }} 15 | apiVersion: extensions/v1beta1 16 | {{- end }} 17 | kind: Ingress 18 | metadata: 19 | name: {{ $fullName }} 20 | namespace: {{ include "ollama.namespace" . }} 21 | labels: 22 | {{- include "ollama.labels" . | nindent 4 }} 23 | {{- with .Values.ingress.annotations }} 24 | annotations: 25 | {{- toYaml . | nindent 4 }} 26 | {{- end }} 27 | spec: 28 | {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} 29 | ingressClassName: {{ .Values.ingress.className }} 30 | {{- end }} 31 | {{- if .Values.ingress.tls }} 32 | tls: 33 | {{- range .Values.ingress.tls }} 34 | - hosts: 35 | {{- range .hosts }} 36 | - {{ . | quote }} 37 | {{- end }} 38 | secretName: {{ .secretName }} 39 | {{- end }} 40 | {{- end }} 41 | rules: 42 | {{- range .Values.ingress.hosts }} 43 | - host: {{ .host | quote }} 44 | http: 45 | paths: 46 | {{- range .paths }} 47 | - path: {{ .path }} 48 | {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} 49 | pathType: {{ .pathType }} 50 | {{- end }} 51 | backend: 52 | {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} 53 | service: 54 | name: {{ $fullName }} 55 | port: 56 | number: {{ $svcPort }} 57 | {{- else }} 58 | serviceName: {{ $fullName }} 59 | servicePort: {{ $svcPort }} 60 | {{- end }} 61 | {{- end }} 62 | {{- end }} 63 | {{- end }} 64 | -------------------------------------------------------------------------------- /templates/knative/service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | {{- if .Values.knative.enabled }} 3 | apiVersion: serving.knative.dev/v1 4 | kind: Service 5 | metadata: 6 | name: {{ include "ollama.fullname" . }} 7 | namespace: {{ include "ollama.namespace" . }} 8 | labels: 9 | {{- include "ollama.labels" . | nindent 4 }} 10 | {{- with .Values.knative.annotations }} 11 | annotations: 12 | {{- toYaml . | nindent 4 }} 13 | {{- end }} 14 | spec: 15 | template: 16 | spec: 17 | containerConcurrency: {{ .Values.knative.containerConcurrency }} 18 | timeoutSeconds: {{ .Values.knative.timeoutSeconds }} 19 | responseStartTimeoutSeconds: {{ .Values.knative.responseStartTimeoutSeconds }} 20 | idleTimeoutSeconds: {{ .Values.knative.idleTimeoutSeconds }} 21 | {{- with .Values.imagePullSecrets }} 22 | imagePullSecrets: 23 | {{- toYaml . | nindent 8 }} 24 | {{- end }} 25 | serviceAccountName: {{ include "ollama.serviceAccountName" . }} 26 | {{- if .Values.runtimeClassName }} 27 | runtimeClassName: {{ .Values.runtimeClassName | quote }} 28 | {{- end }} 29 | {{- with .Values.initContainers }} 30 | initContainers: 31 | {{- tpl (toYaml . ) $ | nindent 8 }} 32 | {{- end }} 33 | containers: 34 | - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (ternary (printf "%s-rocm" .Chart.AppVersion) (.Chart.AppVersion) (and (.Values.ollama.gpu.enabled) (eq .Values.ollama.gpu.type "amd"))) }}" 35 | imagePullPolicy: {{ .Values.image.pullPolicy }} 36 | securityContext: 37 | {{- toYaml .Values.securityContext | nindent 12 }} 38 | ports: 39 | - containerPort: {{ .Values.ollama.port }} 40 | env: 41 | - name: OLLAMA_HOST 42 | value: "0.0.0.0:{{ .Values.ollama.port }}" 43 | {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type))}} 44 | - name: PATH 45 | value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin 46 | {{- end}} 47 | {{- with .Values.extraEnv }} 48 | {{- toYaml . | nindent 12 }} 49 | {{- end }} 50 | envFrom: 51 | {{- with .Values.extraEnvFrom }} 52 | {{- toYaml . | nindent 12 }} 53 | {{- end }} 54 | args: 55 | {{- with .Values.extraArgs }} 56 | {{- toYaml . | nindent 12 }} 57 | {{- end }} 58 | {{- if .Values.resources }} 59 | resources: 60 | {{- $limits := default dict .Values.resources.limits }} 61 | {{- if .Values.ollama.gpu.enabled }} 62 | {{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }} 63 | {{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }} 64 | {{- $limits = merge $limits $gpuLimit }} 65 | {{- end }} 66 | {{- if eq .Values.ollama.gpu.type "amd" }} 67 | {{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }} 68 | {{- $limits = merge $limits $gpuLimit }} 69 | {{- end }} 70 | {{- end }} 71 | {{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }} 72 | {{- toYaml $ressources | nindent 12 }} 73 | {{- end}} 74 | volumeMounts: 75 | - name: ollama-data 76 | mountPath: {{ .Values.ollama.mountPath | default "/root/.ollama" }} 77 | {{- if .Values.persistentVolume.subPath }} 78 | subPath: {{ .Values.persistentVolume.subPath }} 79 | {{- end }} 80 | {{- range .Values.ollama.models.create }} 81 | {{- if .configMapRef }} 82 | - name: {{ .name }}-config-model-volume 83 | mountPath: /models 84 | {{- end }} 85 | {{- end }} 86 | {{- with .Values.volumeMounts }} 87 | {{- toYaml . | nindent 12 }} 88 | {{- end }} 89 | {{- if .Values.livenessProbe.enabled }} 90 | livenessProbe: 91 | httpGet: 92 | path: {{ .Values.livenessProbe.path }} 93 | port: http 94 | initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} 95 | periodSeconds: {{ .Values.livenessProbe.periodSeconds }} 96 | timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} 97 | successThreshold: {{ .Values.livenessProbe.successThreshold }} 98 | failureThreshold: {{ .Values.livenessProbe.failureThreshold }} 99 | {{- end }} 100 | {{- if .Values.readinessProbe.enabled }} 101 | readinessProbe: 102 | httpGet: 103 | path: {{ .Values.readinessProbe.path }} 104 | port: http 105 | initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} 106 | periodSeconds: {{ .Values.readinessProbe.periodSeconds }} 107 | timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} 108 | successThreshold: {{ .Values.readinessProbe.successThreshold }} 109 | failureThreshold: {{ .Values.readinessProbe.failureThreshold }} 110 | {{- end }} 111 | {{- with .Values.lifecycle}} 112 | lifecycle: 113 | {{- toYaml . | nindent 12 }} 114 | {{- else }} 115 | {{- if or .Values.ollama.models.pull .Values.ollama.models.run .Values.ollama.models.create }} 116 | lifecycle: 117 | postStart: 118 | exec: 119 | command: 120 | - /bin/sh 121 | - -c 122 | - | 123 | while ! /bin/ollama ps > /dev/null 2>&1; do 124 | sleep 5 125 | done 126 | {{- if .Values.ollama.models.pull }} 127 | {{- range .Values.ollama.models.pull }} 128 | /bin/ollama pull {{ternary "--insecure" "" $.Values.ollama.insecure | toString }} {{ . }} 129 | {{- end }} 130 | {{- end }} 131 | 132 | {{- if .Values.ollama.models.create }} 133 | {{- range .Values.ollama.models.create }} 134 | {{- if .template }} 135 | cat < {{ include "ollama.modelsMountPath" $ }}/{{ .name }} 136 | {{- .template | nindent 20 }} 137 | EOF 138 | /bin/ollama create {{ .name }} -f {{ include "ollama.modelsMountPath" . }}/{{ .name }} 139 | {{- end }} 140 | {{- if .configMapRef }} 141 | /bin/ollama create {{ .name }} -f /models/{{ .name }} 142 | {{- end }} 143 | {{- end }} 144 | {{- end }} 145 | 146 | {{- if .Values.ollama.models.run }} 147 | {{- range .Values.ollama.models.run }} 148 | /bin/ollama run {{ . }} 149 | {{- end }} 150 | {{- end }} 151 | {{- end }} 152 | {{- end }} 153 | volumes: 154 | - name: ollama-data 155 | {{- if .Values.persistentVolume.enabled }} 156 | persistentVolumeClaim: 157 | claimName: {{ .Values.persistentVolume.existingClaim | default (printf "%s" (include "ollama.fullname" .)) }} 158 | {{- else }} 159 | emptyDir: { } 160 | {{- end }} 161 | {{- range .Values.ollama.models.create }} 162 | {{- if .configMapRef }} 163 | - name: {{ .name }}-config-model-volume 164 | configMap: 165 | name: {{ .configMapRef }} 166 | items: 167 | - key: {{ .configMapKeyRef }} 168 | path: {{ .name }} 169 | {{- end }} 170 | {{- end }} 171 | {{- with .Values.volumes }} 172 | {{- toYaml . | nindent 8 }} 173 | {{- end }} 174 | {{- with .Values.nodeSelector }} 175 | nodeSelector: 176 | {{- toYaml . | nindent 8 }} 177 | {{- end }} 178 | {{- with .Values.affinity }} 179 | affinity: 180 | {{- toYaml . | nindent 8 }} 181 | {{- end }} 182 | {{- with .Values.topologySpreadConstraints }} 183 | topologySpreadConstraints: 184 | {{- toYaml . | nindent 8 }} 185 | {{- end }} 186 | {{- if or .Values.ollama.gpu.enabled .Values.tolerations }} 187 | tolerations: 188 | {{- if and .Values.ollama.gpu.enabled (or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type)) }} 189 | - key: "{{(.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu")}}" 190 | operator: Exists 191 | effect: NoSchedule 192 | {{- end }} 193 | {{- with .Values.tolerations }} 194 | {{- toYaml . | nindent 8 }} 195 | {{- end }} 196 | {{- end }} 197 | {{- end }} 198 | -------------------------------------------------------------------------------- /templates/pvc.yaml: -------------------------------------------------------------------------------- 1 | {{- if (and .Values.persistentVolume.enabled (not .Values.persistentVolume.existingClaim)) -}} 2 | --- 3 | apiVersion: v1 4 | kind: PersistentVolumeClaim 5 | metadata: 6 | {{- if .Values.persistentVolume.annotations }} 7 | annotations: 8 | {{ toYaml .Values.persistentVolume.annotations | indent 4 }} 9 | {{- end }} 10 | labels: 11 | {{- include "ollama.labels" . | nindent 4 }} 12 | name: {{ template "ollama.fullname" . }} 13 | namespace: {{ include "ollama.namespace" . }} 14 | spec: 15 | accessModes: 16 | {{ toYaml .Values.persistentVolume.accessModes | indent 4 }} 17 | {{- if .Values.persistentVolume.storageClass }} 18 | {{- if (eq "-" .Values.persistentVolume.storageClass) }} 19 | storageClassName: "" 20 | {{- else }} 21 | storageClassName: "{{ .Values.persistentVolume.storageClass }}" 22 | {{- end }} 23 | {{- end }} 24 | {{- if .Values.persistentVolume.volumeMode }} 25 | volumeMode: "{{ .Values.persistentVolume.volumeMode }}" 26 | {{- end }} 27 | {{- if .Values.persistentVolume.volumeName }} 28 | volumeName: "{{ .Values.persistentVolume.volumeName }}" 29 | {{- end }} 30 | resources: 31 | requests: 32 | storage: "{{ .Values.persistentVolume.size }}" 33 | {{- end -}} 34 | -------------------------------------------------------------------------------- /templates/service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | {{- if not .Values.knative.enabled }} 3 | apiVersion: v1 4 | kind: Service 5 | metadata: 6 | name: {{ include "ollama.fullname" . }} 7 | namespace: {{ include "ollama.namespace" . }} 8 | labels: 9 | {{- include "ollama.labels" . | nindent 4 }} 10 | {{- with .Values.service.labels }} 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | {{- with .Values.service.annotations }} 14 | annotations: 15 | {{- toYaml . | nindent 4 }} 16 | {{- end }} 17 | spec: 18 | type: {{ .Values.service.type }} 19 | ports: 20 | - port: {{ .Values.service.port }} 21 | targetPort: http 22 | protocol: TCP 23 | name: http 24 | {{- if contains "NodePort" .Values.service.type }} 25 | nodePort: {{ .Values.service.nodePort }} 26 | {{- end }} 27 | {{- if .Values.service.loadBalancerIP }} 28 | loadBalancerIP: {{ .Values.service.loadBalancerIP | quote }} 29 | {{- end }} 30 | selector: 31 | {{- include "ollama.selectorLabels" . | nindent 4 }} 32 | {{- end }} 33 | -------------------------------------------------------------------------------- /templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | --- 3 | apiVersion: v1 4 | kind: ServiceAccount 5 | metadata: 6 | name: {{ include "ollama.serviceAccountName" . }} 7 | namespace: {{ include "ollama.namespace" . }} 8 | labels: 9 | {{- include "ollama.labels" . | nindent 4 }} 10 | {{- with .Values.serviceAccount.annotations }} 11 | annotations: 12 | {{- toYaml . | nindent 4 }} 13 | {{- end }} 14 | automountServiceAccountToken: {{ .Values.serviceAccount.automount }} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /templates/tests/test-connection.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | {{- if .Values.tests.enabled }} 3 | apiVersion: v1 4 | kind: Pod 5 | metadata: 6 | name: "{{ include "ollama.fullname" . }}-test-connection" 7 | namespace: {{ include "ollama.namespace" . }} 8 | labels: 9 | {{- include "ollama.labels" . | nindent 4 }} 10 | {{- with .Values.tests.labels }} 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | annotations: 14 | "helm.sh/hook": test 15 | spec: 16 | containers: 17 | - name: wget 18 | image: busybox 19 | command: ['wget'] 20 | args: ['{{ include "ollama.fullname" . }}:{{ .Values.service.port }}'] 21 | restartPolicy: Never 22 | {{ end }} 23 | -------------------------------------------------------------------------------- /values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for ollama-helm. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | # -- Number of replicas 6 | replicaCount: 1 7 | 8 | # Knative configuration 9 | knative: 10 | # -- Enable Knative integration 11 | enabled: false 12 | # -- Knative service container concurrency 13 | containerConcurrency: 0 14 | # -- Knative service timeout seconds 15 | timeoutSeconds: 300 16 | # -- Knative service response start timeout seconds 17 | responseStartTimeoutSeconds: 300 18 | # -- Knative service idle timeout seconds 19 | idleTimeoutSeconds: 300 20 | # -- Knative service annotations 21 | annotations: {} 22 | 23 | # Docker image 24 | image: 25 | # -- Docker image registry 26 | repository: ollama/ollama 27 | 28 | # -- Docker pull policy 29 | pullPolicy: IfNotPresent 30 | 31 | # -- Docker image tag, overrides the image tag whose default is the chart appVersion. 32 | tag: "" 33 | 34 | # -- Docker registry secret names as an array 35 | imagePullSecrets: [] 36 | 37 | # -- String to partially override template (will maintain the release name) 38 | nameOverride: "" 39 | 40 | # -- String to fully override template 41 | fullnameOverride: "" 42 | 43 | # -- String to fully override namespace 44 | namespaceOverride: "" 45 | 46 | # Ollama parameters 47 | ollama: 48 | # Port Ollama is listening on 49 | port: 11434 50 | 51 | gpu: 52 | # -- Enable GPU integration 53 | enabled: false 54 | 55 | # -- GPU type: 'nvidia' or 'amd' 56 | # If 'ollama.gpu.enabled', default value is nvidia 57 | # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override 58 | # This is due cause AMD and CPU/CUDA are different images 59 | type: 'nvidia' 60 | 61 | # -- Specify the number of GPU 62 | # If you use MIG section below then this parameter is ignored 63 | number: 1 64 | 65 | # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice 66 | nvidiaResource: "nvidia.com/gpu" 67 | # nvidiaResource: "nvidia.com/mig-1g.10gb" # example 68 | # If you want to use more than one NVIDIA MIG you can use the following syntax (then nvidiaResource is ignored and only the configuration in the following MIG section is used) 69 | 70 | mig: 71 | # -- Enable multiple mig devices 72 | # If enabled you will have to specify the mig devices 73 | # If enabled is set to false this section is ignored 74 | enabled: false 75 | 76 | # -- Specify the mig devices and the corresponding number 77 | devices: {} 78 | # 1g.10gb: 1 79 | # 3g.40gb: 1 80 | 81 | models: 82 | # -- List of models to pull at container startup 83 | # The more you add, the longer the container will take to start if models are not present 84 | # pull: 85 | # - llama2 86 | # - mistral 87 | pull: [] 88 | 89 | # -- List of models to load in memory at container startup 90 | # run: 91 | # - llama2 92 | # - mistral 93 | run: [] 94 | 95 | # -- List of models to create at container startup, there are two options 96 | # 1. Create a raw model 97 | # 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. 98 | # create: 99 | # - name: llama3.1-ctx32768 100 | # configMapRef: my-configmap 101 | # configMapKeyRef: configmap-key 102 | # - name: llama3.1-ctx32768 103 | # template: | 104 | # FROM llama3.1 105 | # PARAMETER num_ctx 32768 106 | create: [] 107 | 108 | # -- Add insecure flag for pulling at container startup 109 | insecure: false 110 | 111 | # -- Override ollama-data volume mount path, default: "/root/.ollama" 112 | mountPath: "" 113 | 114 | # Service account 115 | # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ 116 | serviceAccount: 117 | # -- Specifies whether a service account should be created 118 | create: true 119 | 120 | # -- Automatically mount a ServiceAccount's API credentials? 121 | automount: true 122 | 123 | # -- Annotations to add to the service account 124 | annotations: {} 125 | 126 | # -- The name of the service account to use. 127 | # If not set and create is true, a name is generated using the fullname template 128 | name: "" 129 | 130 | # -- Map of annotations to add to the pods 131 | podAnnotations: {} 132 | 133 | # -- Map of labels to add to the pods 134 | podLabels: {} 135 | 136 | # -- Pod Security Context 137 | podSecurityContext: {} 138 | # fsGroup: 2000 139 | 140 | # -- Container Security Context 141 | securityContext: {} 142 | # capabilities: 143 | # drop: 144 | # - ALL 145 | # readOnlyRootFilesystem: true 146 | # runAsNonRoot: true 147 | # runAsUser: 1000 148 | 149 | # -- Specify runtime class 150 | runtimeClassName: "" 151 | 152 | # Configure Service 153 | service: 154 | 155 | # -- Service type 156 | type: ClusterIP 157 | 158 | # -- Service port 159 | port: 11434 160 | 161 | # -- Service node port when service type is 'NodePort' 162 | nodePort: 31434 163 | 164 | # -- Load Balancer IP address 165 | loadBalancerIP: 166 | 167 | # -- Annotations to add to the service 168 | annotations: {} 169 | 170 | # -- Labels to add to the service 171 | labels: {} 172 | 173 | # Configure Deployment 174 | deployment: 175 | 176 | # -- Labels to add to the deployment 177 | labels: {} 178 | 179 | # Configure the ingress resource that allows you to access the 180 | ingress: 181 | # -- Enable ingress controller resource 182 | enabled: false 183 | 184 | # -- IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) 185 | className: "" 186 | 187 | # -- Additional annotations for the Ingress resource. 188 | annotations: {} 189 | # kubernetes.io/ingress.class: traefik 190 | # kubernetes.io/ingress.class: nginx 191 | # kubernetes.io/tls-acme: "true" 192 | 193 | # The list of hostnames to be covered with this ingress record. 194 | hosts: 195 | - host: ollama.local 196 | paths: 197 | - path: / 198 | pathType: Prefix 199 | 200 | # -- The tls configuration for hostnames to be covered with this ingress record. 201 | tls: [] 202 | # - secretName: chart-example-tls 203 | # hosts: 204 | # - chart-example.local 205 | 206 | # Configure resource requests and limits 207 | # ref: http://kubernetes.io/docs/user-guide/compute-resources/ 208 | resources: 209 | # -- Pod requests 210 | requests: {} 211 | # Memory request 212 | # memory: 4096Mi 213 | 214 | # CPU request 215 | # cpu: 2000m 216 | 217 | # -- Pod limit 218 | limits: {} 219 | # Memory limit 220 | # memory: 8192Mi 221 | 222 | # CPU limit 223 | # cpu: 4000m 224 | 225 | # Configure extra options for liveness probe 226 | # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes 227 | livenessProbe: 228 | # -- Enable livenessProbe 229 | enabled: true 230 | 231 | # -- Request path for livenessProbe 232 | path: / 233 | 234 | # -- Initial delay seconds for livenessProbe 235 | initialDelaySeconds: 60 236 | 237 | # -- Period seconds for livenessProbe 238 | periodSeconds: 10 239 | 240 | # -- Timeout seconds for livenessProbe 241 | timeoutSeconds: 5 242 | 243 | # -- Failure threshold for livenessProbe 244 | failureThreshold: 6 245 | 246 | # -- Success threshold for livenessProbe 247 | successThreshold: 1 248 | 249 | # Configure extra options for readiness probe 250 | # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes 251 | readinessProbe: 252 | # -- Enable readinessProbe 253 | enabled: true 254 | 255 | # -- Request path for readinessProbe 256 | path: / 257 | 258 | # -- Initial delay seconds for readinessProbe 259 | initialDelaySeconds: 30 260 | 261 | # -- Period seconds for readinessProbe 262 | periodSeconds: 5 263 | 264 | # -- Timeout seconds for readinessProbe 265 | timeoutSeconds: 3 266 | 267 | # -- Failure threshold for readinessProbe 268 | failureThreshold: 6 269 | 270 | # -- Success threshold for readinessProbe 271 | successThreshold: 1 272 | 273 | # Configure autoscaling 274 | autoscaling: 275 | # -- Enable autoscaling 276 | enabled: false 277 | 278 | # -- Number of minimum replicas 279 | minReplicas: 1 280 | 281 | # -- Number of maximum replicas 282 | maxReplicas: 100 283 | 284 | # -- CPU usage to target replica 285 | targetCPUUtilizationPercentage: 80 286 | 287 | # -- targetMemoryUtilizationPercentage: 80 288 | 289 | # -- Additional volumes on the output Deployment definition. 290 | volumes: [] 291 | # -- - name: foo 292 | # secret: 293 | # secretName: mysecret 294 | # optional: false 295 | 296 | # -- Additional volumeMounts on the output Deployment definition. 297 | volumeMounts: [] 298 | # -- - name: foo 299 | # mountPath: "/etc/foo" 300 | # readOnly: true 301 | 302 | # -- Additional arguments on the output Deployment definition. 303 | extraArgs: [] 304 | 305 | # -- Additional environments variables on the output Deployment definition. 306 | # For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go 307 | extraEnv: [] 308 | # - name: OLLAMA_DEBUG 309 | # value: "1" 310 | 311 | # -- Additionl environment variables from external sources (like ConfigMap) 312 | extraEnvFrom: [] 313 | # - configMapRef: 314 | # name: my-env-configmap 315 | 316 | # Enable persistence using Persistent Volume Claims 317 | # ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ 318 | persistentVolume: 319 | # -- Enable persistence using PVC 320 | enabled: false 321 | 322 | # -- Ollama server data Persistent Volume access modes 323 | # Must match those of existing PV or dynamic provisioner 324 | # Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ 325 | accessModes: 326 | - ReadWriteOnce 327 | 328 | # -- Ollama server data Persistent Volume annotations 329 | annotations: {} 330 | 331 | # -- If you'd like to bring your own PVC for persisting Ollama state, pass the name of the 332 | # created + ready PVC here. If set, this Chart will not create the default PVC. 333 | # Requires server.persistentVolume.enabled: true 334 | existingClaim: "" 335 | 336 | # -- Ollama server data Persistent Volume size 337 | size: 30Gi 338 | 339 | # -- Ollama server data Persistent Volume Storage Class 340 | # If defined, storageClassName: 341 | # If set to "-", storageClassName: "", which disables dynamic provisioning 342 | # If undefined (the default) or set to null, no storageClassName spec is 343 | # set, choosing the default provisioner. (gp2 on AWS, standard on 344 | # GKE, AWS & OpenStack) 345 | storageClass: "" 346 | 347 | # -- Ollama server data Persistent Volume Binding Mode 348 | # If defined, volumeMode: 349 | # If empty (the default) or set to null, no volumeBindingMode spec is 350 | # set, choosing the default mode. 351 | volumeMode: "" 352 | 353 | # -- Subdirectory of Ollama server data Persistent Volume to mount 354 | # Useful if the volume's root directory is not empty 355 | subPath: "" 356 | 357 | # -- Pre-existing PV to attach this claim to 358 | # Useful if a CSI auto-provisions a PV for you and you want to always 359 | # reference the PV moving forward 360 | volumeName: "" 361 | 362 | # -- Node labels for pod assignment. 363 | nodeSelector: {} 364 | 365 | # -- Tolerations for pod assignment 366 | tolerations: [] 367 | 368 | # -- Affinity for pod assignment 369 | affinity: {} 370 | 371 | # -- Lifecycle for pod assignment (override ollama.models startup pull/run) 372 | lifecycle: {} 373 | 374 | # How to replace existing pods 375 | updateStrategy: 376 | # -- Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate 377 | type: "Recreate" 378 | 379 | # -- Topology Spread Constraints for pod assignment 380 | topologySpreadConstraints: {} 381 | 382 | # -- Init containers to add to the pod 383 | initContainers: [] 384 | # - name: startup-tool 385 | # image: alpine:3 386 | # command: [sh, -c] 387 | # args: 388 | # - echo init 389 | 390 | # -- Use the host’s ipc namespace. 391 | hostIPC: false 392 | 393 | # -- Use the host’s pid namespace 394 | hostPID: false 395 | 396 | # -- Use the host's network namespace. 397 | hostNetwork: false 398 | 399 | # -- Extra K8s manifests to deploy 400 | extraObjects: [] 401 | # - apiVersion: v1 402 | # kind: PersistentVolume 403 | # metadata: 404 | # name: aws-efs 405 | # data: 406 | # key: "value" 407 | # - apiVersion: scheduling.k8s.io/v1 408 | # kind: PriorityClass 409 | # metadata: 410 | # name: high-priority 411 | # value: 1000000 412 | # globalDefault: false 413 | # description: "This priority class should be used for XYZ service pods only." 414 | 415 | # -- Test connection pods 416 | tests: 417 | enabled: true 418 | # -- Labels to add to the tests 419 | labels: {} 420 | --------------------------------------------------------------------------------