├── .github ├── dependabot.yml └── workflows │ ├── integration_test.yml │ └── release.yml ├── LICENSE ├── README.md ├── chart └── kepler │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── ci │ └── model-server-enabled-values.yaml │ ├── templates │ ├── _helpers.tpl │ ├── daemonset.yaml │ ├── model-server │ │ ├── _helpers.tpl │ │ ├── deployment.yaml │ │ └── service.yaml │ ├── networkpolicy.yaml │ ├── rolebinding.yaml │ ├── secret-redfish.yaml │ ├── service.yaml │ ├── serviceaccount.yaml │ └── servicemonitor.yaml │ └── values.yaml └── cr.yaml /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Please see the documentation for all configuration options: 2 | # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 3 | 4 | version: 2 5 | updates: 6 | - package-ecosystem: github-actions 7 | directory: / 8 | schedule: 9 | day: monday 10 | interval: weekly 11 | groups: 12 | github-actions: 13 | patterns: 14 | - "*" 15 | -------------------------------------------------------------------------------- /.github/workflows/integration_test.yml: -------------------------------------------------------------------------------- 1 | name: Integration test 2 | 3 | on: 4 | pull_request: 5 | schedule: 6 | - cron: "0 0 * * *" 7 | 8 | jobs: 9 | integration_test: 10 | strategy: 11 | matrix: 12 | testConfig: 13 | - expectModelServer: false 14 | - expectModelServer: true 15 | extraHelmFlags: '-f ci/model-server-enabled-values.yaml' 16 | runs-on: ubuntu-latest 17 | defaults: 18 | run: 19 | working-directory: ./chart/kepler 20 | steps: 21 | - uses: actions/checkout@v4.2.2 22 | 23 | - name: use kepler action for kind cluster build 24 | uses: sustainable-computing-io/kepler-action@v0.0.10 25 | with: 26 | cluster_provider: kind 27 | local_dev_cluster_version: v0.0.9 28 | 29 | - name: install helm 30 | uses: azure/setup-helm@v4 31 | 32 | - name: helm lint 33 | run: | 34 | helm lint . --debug 35 | 36 | - name: deploy kepler using helm chart 37 | run: | 38 | tree -a 39 | helm install kepler . --values values.yaml --create-namespace --namespace kepler --dry-run --debug ${{ matrix.testConfig.extraHelmFlags }} 40 | helm install kepler . --values values.yaml --create-namespace --namespace kepler --debug ${{ matrix.testConfig.extraHelmFlags }} 41 | 42 | - name: test if kepler is alive 43 | run: | 44 | echo "Waiting for kepler pods to become ready" 45 | kubectl rollout status daemonset,deployment --namespace kepler --timeout 120s 46 | kubectl logs $(kubectl -n kepler get pods -l app.kubernetes.io/component=exporter -oname) -n kepler 47 | kubectl get all -n kepler 48 | 49 | - name: test model server 50 | if: matrix.testConfig.expectModelServer 51 | run: | 52 | # if the model-server configuration is correct the kepler pods should use the model served through the Estimator Sidecar 53 | kubectl logs $(kubectl -n kepler get pods -l app.kubernetes.io/component=exporter -oname) -n kepler | grep 'Using the EstimatorSidecar/AbsPower Power Model to estimate Node Component Power' 54 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release Charts 2 | on: 3 | push: 4 | branches: 5 | - main 6 | paths: 7 | - 'chart/**' 8 | 9 | jobs: 10 | release: 11 | permissions: 12 | contents: write # needed to write releases 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@85e6279cec87321a52edac9c87bce653a07cf6c2 # v3 18 | with: 19 | fetch-depth: 0 20 | 21 | - name: Configure Git 22 | run: | 23 | git config user.name "$GITHUB_ACTOR" 24 | git config user.email "$GITHUB_ACTOR@users.noreply.github.com" 25 | 26 | - name: Install Helm 27 | uses: azure/setup-helm@b7246b12e77f7134dc2d460a3d5bad15bbe29390 # v4.1.0 28 | with: 29 | version: latest 30 | 31 | - name: Prepare keys for signing 32 | env: 33 | SIGNING_KEY_BASE64: ${{ secrets.HELM_SIGNING_PRIVATE_KEY }} 34 | SIGNING_KEY_PASSPHRASE_BASE64: ${{ secrets.HELM_SIGNING_PRIVATE_KEY_PASSPHRASE }} 35 | KEY_PATH: ".gpg-dir" 36 | SIGNING_KEY_PATH: ".gpg-dir/secring.gpg" 37 | SIGNING_KEY_PASSPHRASE_PATH: ".gpg-dir/passphrase" 38 | run: | 39 | mkdir "$KEY_PATH" 40 | base64 -d <<< "$SIGNING_KEY_BASE64" > "$SIGNING_KEY_PATH" 41 | base64 -d <<< "$SIGNING_KEY_PASSPHRASE_BASE64" > "$SIGNING_KEY_PASSPHRASE_PATH" 42 | echo "CR_PASSPHRASE_FILE=$SIGNING_KEY_PASSPHRASE_PATH" >> "$GITHUB_ENV" 43 | echo "CR_KEYRING=$SIGNING_KEY_PATH" >> "$GITHUB_ENV" 44 | 45 | - name: Run chart-releaser 46 | uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f # v1.7.0 47 | with: 48 | charts_dir: chart 49 | config: cr.yaml 50 | env: 51 | CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2021 Sustainable Computing Collaborators 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![GitHub](https://img.shields.io/github/license/sustainable-computing-io/kepler-helm-chart) [![Contribute](https://img.shields.io/static/v1?label=Contributing&message=guide&color=blue)](https://github.com/sustainable-computing-io/kepler/blob/main/CONTRIBUTING.md) ![Release Charts](https://github.com/sustainable-computing-io/kepler-helm-chart/workflows/Release%20Charts/badge.svg?branch=main) [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/kepler)](https://artifacthub.io/packages/search?repo=kepler) [![Releases downloads](https://img.shields.io/github/downloads/sustainable-computing-io/kepler-helm-chart/total.svg)](https://github.com/sustainable-computing-io/kepler-helm-chart/releases) 2 | # kepler-helm-chart 3 | 4 | This repository is for the Helm chart for Kepler. We are using `gh-pages` branch to host and index the chart. When modifying the chart please bump the version in the [Chart.yaml](/chart/kepler/Chart.yaml) file. 5 | 6 | [Helm](https://helm.sh) must be installed to use the charts. 7 | Please refer to Helm's [documentation](https://helm.sh/docs/) to get started. 8 | 9 | The chart is accessible using the following commands: 10 | 11 | Add the helm repo 12 | 13 | ```bash 14 | helm repo add kepler https://sustainable-computing-io.github.io/kepler-helm-chart 15 | ``` 16 | 17 | You can see the latest version by using the folllowing command: 18 | 19 | ```bash 20 | helm search repo kepler 21 | ``` 22 | 23 | If you would like to test and look at the manifest files before deploying you can run: 24 | 25 | ```bash 26 | helm install kepler kepler/kepler --namespace kepler --create-namespace --dry-run --devel 27 | ``` 28 | 29 | Then to install run the following: 30 | 31 | ```bash 32 | helm install kepler kepler/kepler --namespace kepler --create-namespace 33 | ``` 34 | -------------------------------------------------------------------------------- /chart/kepler/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | *.tar 25 | kubectl 26 | *.gz 27 | *.deb 28 | # CI 29 | KeplerK8SAction 30 | .deb 31 | kubectl 32 | local-dev-cluster 33 | -------------------------------------------------------------------------------- /chart/kepler/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: kepler 3 | description: A Helm chart for kepler (Kubernetes-based Efficient Power Level Exporter) 4 | icon: "https://avatars.githubusercontent.com/u/91567619?s=200&v=4" 5 | home: https://sustainable-computing.io/html/index.html 6 | sources: 7 | - https://github.com/sustainable-computing-io/kepler 8 | keywords: 9 | - cloud-native 10 | - sustainable-computing 11 | - kepler 12 | - ebpf 13 | annotations: 14 | artifacthub.io/links: | 15 | - name: support 16 | url: https://github.com/sustainable-computing-io/kepler/issues/new 17 | - name: docs 18 | url: https://sustainable-computing.io/ 19 | artifacthub.io/license: "Apache-2.0" 20 | artifacthub.io/signKey: | 21 | fingerprint: 91BF31657FB6BB5931CBFCF92A544B84946E3621 22 | url: https://keybase.io/bradmccoydev/pgp_keys.asc 23 | 24 | type: application 25 | version: 0.6.0 26 | appVersion: release-0.8.0 27 | -------------------------------------------------------------------------------- /chart/kepler/README.md: -------------------------------------------------------------------------------- 1 | # Kepler 2 | Kepler (Kubernetes-based Efficient Power Level Exporter) uses eBPF to probe energy related system stats and exports as Prometheus metrics 3 | 4 | ## Parameters 5 | 6 | | Name | Description | Value | 7 | | ---------------------------- | -------------------------------------- | ------------ | 8 | | `nameOverride` | overrides the name of the chart | `""` | 9 | | `fullnameOverride` | replaces the generated name | `""` | 10 | | `image.repository` | repository to pull the image from | `"quay.io/sustainable_computing_io/kepler"` | 11 | | `image.tag` | image tag defaults to chart appVersion | `""` | 12 | | `image.pullPolicy` | image pull policy | `Always` | 13 | | `imagePullSecrets` | Secret name for pulling images from private repository | `[]` | 14 | | `podAnnotations` | Additional pod annotations | `{}` | 15 | | `podSecurityContext` | privileges and access control settings for a Pod | `{}` | 16 | | `securityContext.privileged` | privileges and access control settings | `true` | 17 | | `nodeSelector` | node selection constraint | `{}` | 18 | | `tolerations[].effect` | toleration effect | `NoSchedule` | 19 | | `tolerations[].key` | toleration key | `node-role.kubernetes.io/master` | 20 | | `affinity` | affinity rules | `{}` | 21 | 22 | ## Resources 23 | 24 | | Name | Description | Value | 25 | | ---------------------------- | ------------------------------------- | ------------ | 26 | | `resources.requests.cpu` | cpu request | `100m` | 27 | | `resources.requests.memory` | memory request | `200Mi` | 28 | | `resources.limits.cpu` | cpu limit | `100m` | 29 | | `resources.limits.memory` | memory limit | `200Mi` | 30 | 31 | ## Environment Variables 32 | 33 | | Name | Description | Value | 34 | | ---------------------------------------- | ------------------------------ | -------- | 35 | | `extraEnvVars.KEPLER_LOG_LEVEL` | the kepler log level | `"1"` | 36 | | `extraEnvVars.ENABLE_GPU` | enable GPU | `"true"` | 37 | | `extraEnvVars.ENABLE_EBPF_CGROUPID` | enable EBPF CGROUPID | `"true"` | 38 | | `extraEnvVars.EXPOSE_IRQ_COUNTER_METRICS`| expose IRQ Counter metrics | `"true"` | 39 | | `extraEnvVars.EXPOSE_KUBELET_METRICS` | expose kubelet metrics | `"true"` | 40 | | `extraEnvVars.ENABLE_PROCESS_METRICS` | enable process metrics | `"true"` | 41 | | `extraEnvVars.CPU_ARCH_OVERRIDE` | override CPU architechure | `""` | 42 | | `extraEnvVars.CGROUP_METRICS` | specify CGROUP Metrics | `"*"` | 43 | 44 | ## Service 45 | 46 | | Name | Description | Value | 47 | | ---------------------------- | -------------------------------------- | ------------ | 48 | | `service.annotations` | annotations for the service | `{}` | 49 | | `service.type` | the service type | `ClusterIP` | 50 | | `service.port` | the service port | `9102` | 51 | 52 | ## Service Account 53 | 54 | | Name | Description | Value | 55 | | ---------------------------- | -------------------------------------- | ------------ | 56 | | `serviceAccount.create` | whether the service account is created | `false` | 57 | | `serviceAccount.annotations` | annotations for the service account | `{}` | 58 | | `serviceAccount.name` | name override | `""` | 59 | 60 | ## Service Monitor 61 | 62 | | Name | Description | Value | 63 | | ----------------------------- | -------------------------------------- | ----------- | 64 | | `serviceMonitor.enabled` | whether the service monitor is enabled | `false` | 65 | | `serviceMonitor.namespace` | which namespace to put it in | `""` | 66 | | `serviceMonitor.interval` | the scrape interval | `30s` | 67 | | `serviceMonitor.scrapeTimeout`| the scrape timeout | `5s` | 68 | | `serviceMonitor.labels` | labels for the service monitor | `{} ` | 69 | 70 | ## Redfish BMC and IPMI 71 | 72 | | Name | Description | Value | 73 | | --------------------- | -------------------------------------- | --------- | 74 | | `redfish.name` | redfish secret name | `redfish` | 75 | | `redfish.enabled` | whether the redfisch secret is enabled | `false` | 76 | | `redfish.annotations` | annotations for redfish secret | `{}` | 77 | | `redfish.fileContent` | redfish credentials | `` | 78 | | `redfish.labels` | labels for redfish secret | `{}` | 79 | 80 | ## Model Server & Estimator Sidecar 81 | 82 | | Name | Description | Value | 83 | | --------------------------------- | ----------------------------------------------------------------------------------------- | --------- | 84 | | `modelServer.enabled` | whether model-server and estimator sidecar should be deployed | `false` | 85 | | `modelServer.modelConfig` | [modelConfig](https://sustainable-computing.io/kepler_model_server/get_started/) contents | `NODE_COMPONENTS_ESTIMATOR=true` | 86 | | `modelServer.nameOverride` | overrides the name-suffix of the model-server deployment and service | `""` | 87 | | `modelServer.fullnameOverride` | replaces the name of the model-server deployment and service | `""` | 88 | | `modelServer.replicas` | replicas of the model-server deployment | `""` | 89 | | `modelServer.image.repository` | repository to pull the model-server image from | `"quay.io/sustainable_computing_io/kepler_model_server"` | 90 | | `modelServer.image.tag` | image tag for the model-server | `"v0.7.12"` | 91 | | `modelServer.image.pullPolicy` | image pull policy for the model-server image | `Always` | 92 | | `modelServer.imagePullSecrets` | Secret name for pulling model-server images from private repository | `[]` | 93 | | `modelServer.podAnnotations` | Additional pod annotations for the model-server pods | `{}` | 94 | | `modelServer.securityContext` | privileges and access control settings for the model-server container | `{}` | 95 | | `modelServer.podSecurityContext` | privileges and access control settings for model-server pods | `{}` | 96 | | `modelServer.resources` | resource limits and requests for the model-server | `{}` | 97 | | `modelServer.sidecarResources` | resource limits and requests for the estimator sidecar | `{}` | 98 | | `modelServer.service.annotations` | annotations for the model-server service | `{}` | 99 | | `modelServer.service.type` | the model-server service type | `ClusterIP` | 100 | | `modelServer.service.port` | the model-server service port | `8100` | 101 | -------------------------------------------------------------------------------- /chart/kepler/ci/model-server-enabled-values.yaml: -------------------------------------------------------------------------------- 1 | modelServer: 2 | enabled: true 3 | -------------------------------------------------------------------------------- /chart/kepler/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "kepler.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "kepler.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "kepler.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "kepler.labels" -}} 37 | helm.sh/chart: {{ include "kepler.chart" . }} 38 | {{ include "kepler.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "kepler.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "kepler.name" . }} 50 | app.kubernetes.io/component: exporter 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "kepler.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "kepler.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} 63 | -------------------------------------------------------------------------------- /chart/kepler/templates/daemonset.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: DaemonSet 4 | metadata: 5 | name: {{ include "kepler.fullname" . }} 6 | namespace: {{ .Release.Namespace }} 7 | labels: 8 | {{- include "kepler.labels" . | nindent 4 }} 9 | {{- with .Values.annotations }} 10 | annotations: 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | selector: 15 | matchLabels: 16 | {{- include "kepler.selectorLabels" . | nindent 6 }} 17 | template: 18 | metadata: 19 | {{- with .Values.podAnnotations }} 20 | annotations: 21 | {{- toYaml . | nindent 8 }} 22 | {{- end }} 23 | labels: 24 | {{- include "kepler.selectorLabels" . | nindent 8 }} 25 | {{- with .Values.podLabels }} 26 | {{- . | toYaml | nindent 8 }} 27 | {{- end }} 28 | spec: 29 | hostNetwork: true 30 | serviceAccountName: {{ include "kepler.serviceAccountName" . }} 31 | {{- with .Values.imagePullSecrets }} 32 | imagePullSecrets: 33 | {{- toYaml . | nindent 8 }} 34 | {{- end }} 35 | {{- if .Values.modelServer.enabled }} 36 | initContainers: 37 | - name: estimator 38 | command: 39 | - python3 40 | args: 41 | - -u 42 | - src/kepler_model/estimate/estimator.py 43 | image: "{{ .Values.modelServer.image.repository }}:{{ .Values.modelServer.image.tag }}" 44 | imagePullPolicy: {{ .Values.modelServer.image.pullPolicy }} 45 | {{- with .Values.modelServer.sidecarResources }} 46 | resources: 47 | {{- toYaml . | nindent 12 }} 48 | {{- end }} 49 | restartPolicy: Always 50 | startupProbe: 51 | exec: 52 | command: 53 | - test 54 | - -S 55 | - /tmp/estimator.sock 56 | volumeMounts: 57 | - mountPath: /tmp 58 | name: estimator-sock 59 | {{- end }} 60 | containers: 61 | - name: kepler-exporter 62 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" 63 | imagePullPolicy: {{ .Values.image.pullPolicy }} 64 | securityContext: 65 | {{- toYaml .Values.securityContext | nindent 12 }} 66 | args: 67 | - -v=$(KEPLER_LOG_LEVEL) 68 | {{- if .Values.redfish.enabled }} 69 | - -redfish-cred-file-path=/etc/redfish/redfish.csv 70 | {{- end }} 71 | env: 72 | - name: NODE_IP 73 | valueFrom: 74 | fieldRef: 75 | fieldPath: status.hostIP 76 | - name: NODE_NAME 77 | valueFrom: 78 | fieldRef: 79 | fieldPath: spec.nodeName 80 | - name: METRIC_PATH 81 | value: "/metrics" 82 | - name: BIND_ADDRESS 83 | value: "0.0.0.0:{{ .Values.service.port }}" 84 | {{- if .Values.modelServer.enabled }} 85 | - name: MODEL_SERVER_ENABLE 86 | value: "true" 87 | - name: MODEL_SERVER_ENDPOINT 88 | value: {{ printf "http://%s:%d/model" (include "modelServer.fullname" .) .Values.modelServer.service.port }} 89 | {{- with .Values.modelServer.modelConfig }} 90 | - name: MODEL_CONFIG 91 | value: | 92 | {{- . | nindent 14 }} 93 | {{- end }} 94 | {{- end }} 95 | {{- range $key, $value := .Values.extraEnvVars }} 96 | - name: {{ $key | quote }} 97 | value: {{ $value | quote }} 98 | {{- end }} 99 | ports: 100 | - containerPort: {{ .Values.service.port }} 101 | hostPort: {{ .Values.service.port }} 102 | name: http 103 | livenessProbe: 104 | failureThreshold: 5 105 | httpGet: 106 | path: /healthz 107 | port: http 108 | scheme: HTTP 109 | initialDelaySeconds: 10 110 | periodSeconds: 60 111 | successThreshold: 1 112 | timeoutSeconds: 10 113 | readinessProbe: 114 | httpGet: 115 | path: /healthz 116 | port: http 117 | scheme: HTTP 118 | initialDelaySeconds: 10 119 | startupProbe: 120 | httpGet: 121 | path: /healthz 122 | port: http 123 | scheme: HTTP 124 | initialDelaySeconds: 1 125 | volumeMounts: 126 | - name: lib-modules 127 | mountPath: /lib/modules 128 | - name: tracing 129 | mountPath: /sys 130 | - name: proc 131 | mountPath: /proc 132 | - name: config-dir 133 | mountPath: /etc/kepler 134 | {{- if .Values.canMount.usrSrc }} 135 | - name: usr-src 136 | mountPath: /usr/src 137 | {{- end }} 138 | {{- if .Values.redfish.enabled }} 139 | - name: redfish 140 | mountPath: /etc/redfish 141 | readOnly: true 142 | {{- end }} 143 | {{- if .Values.modelServer.enabled }} 144 | - name: estimator-sock 145 | mountPath: /tmp 146 | {{- end }} 147 | {{- with .Values.resources }} 148 | resources: 149 | {{- toYaml . | nindent 12 }} 150 | {{- end }} 151 | volumes: 152 | - name: lib-modules 153 | hostPath: 154 | path: /lib/modules 155 | type: DirectoryOrCreate 156 | - name: tracing 157 | hostPath: 158 | path: /sys 159 | type: Directory 160 | - name: proc 161 | hostPath: 162 | path: /proc 163 | type: Directory 164 | - name: config-dir 165 | emptyDir: 166 | sizeLimit: 100Ki 167 | {{- if .Values.canMount.usrSrc }} 168 | - name: usr-src 169 | hostPath: 170 | path: /usr/src 171 | type: Directory 172 | {{- end }} 173 | {{- if .Values.redfish.enabled }} 174 | - name: redfish 175 | secret: 176 | secretName: {{ .Values.redfish.name }} 177 | {{- end }} 178 | {{- if .Values.modelServer.enabled }} 179 | - name: estimator-sock 180 | emptyDir: {} 181 | {{- end }} 182 | {{- with .Values.podSecurityContext }} 183 | securityContext: 184 | {{- toYaml . | nindent 8 }} 185 | {{- end }} 186 | {{- with .Values.nodeSelector }} 187 | nodeSelector: 188 | {{- toYaml . | nindent 8 }} 189 | {{- end }} 190 | {{- with .Values.affinity }} 191 | affinity: 192 | {{- toYaml . | nindent 8 }} 193 | {{- end }} 194 | {{- with .Values.tolerations }} 195 | tolerations: 196 | {{- toYaml . | nindent 8 }} 197 | {{- end }} 198 | -------------------------------------------------------------------------------- /chart/kepler/templates/model-server/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{- define "modelServer.name" -}} 2 | {{- default "model-server" .Values.modelServer.nameOverride | trunc 63 | trimSuffix "-" }} 3 | {{- end }} 4 | 5 | {{/* 6 | Create a default fully qualified app name. 7 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 8 | If release name contains chart name it will be used as a full name. 9 | */}} 10 | {{- define "modelServer.fullname" -}} 11 | {{- if .Values.fullnameOverride }} 12 | {{- .Values.modelServer.fullnameOverride | trunc 63 | trimSuffix "-" }} 13 | {{- else }} 14 | {{- $name := default "model-server" .Values.modelServer.nameOverride }} 15 | {{- if contains $name .Release.Name }} 16 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 17 | {{- else }} 18 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 19 | {{- end }} 20 | {{- end }} 21 | {{- end }} 22 | 23 | {{/* 24 | Common labels 25 | */}} 26 | {{- define "modelServer.labels" -}} 27 | helm.sh/chart: {{ include "kepler.chart" . }} 28 | {{ include "modelServer.selectorLabels" . }} 29 | {{- if .Chart.AppVersion }} 30 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 31 | {{- end }} 32 | app.kubernetes.io/managed-by: {{ .Release.Service }} 33 | {{- end }} 34 | 35 | {{/* 36 | Selector labels 37 | */}} 38 | {{- define "modelServer.selectorLabels" -}} 39 | app.kubernetes.io/name: {{ include "kepler.name" . }} 40 | app.kubernetes.io/component: model-server 41 | {{- end }} 42 | -------------------------------------------------------------------------------- /chart/kepler/templates/model-server/deployment.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.modelServer.enabled }} 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: {{ include "modelServer.fullname" . }} 6 | labels: 7 | {{- include "modelServer.labels" . | nindent 4 }} 8 | {{- with .Values.modelServer.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | spec: 13 | replicas: {{ .Values.modelServer.replicas }} 14 | selector: 15 | matchLabels: 16 | {{- include "modelServer.selectorLabels" . | nindent 6 }} 17 | template: 18 | metadata: 19 | {{- with .Values.modelServer.podAnnotations }} 20 | annotations: 21 | {{- toYaml . | nindent 8 }} 22 | {{- end }} 23 | labels: 24 | {{- include "modelServer.selectorLabels" . | nindent 8 }} 25 | {{- with .Values.modelServer.podLabels }} 26 | {{- . | toYaml | nindent 8 }} 27 | {{- end }} 28 | spec: 29 | {{- with .Values.imagePullSecrets }} 30 | imagePullSecrets: 31 | {{- toYaml . | nindent 8 }} 32 | {{- end }} 33 | containers: 34 | - name: server-api 35 | args: 36 | - model-server 37 | image: "{{ .Values.modelServer.image.repository }}:{{ .Values.modelServer.image.tag }}" 38 | imagePullPolicy: {{ .Values.modelServer.image.pullPolicy }} 39 | ports: 40 | - containerPort: 8100 41 | name: http 42 | protocol: TCP 43 | volumeMounts: 44 | - mountPath: /mnt 45 | name: mnt 46 | {{- with .Values.modelServer.resources }} 47 | resources: 48 | {{- toYaml . | nindent 12 }} 49 | {{- end }} 50 | {{- with .Values.modelServer.securityContext }} 51 | securityContext: 52 | {{- toYaml . | nindent 12 }} 53 | {{- end }} 54 | startupProbe: 55 | httpGet: 56 | path: /best-models 57 | port: http 58 | initialDelaySeconds: 1 59 | readinessProbe: 60 | httpGet: 61 | path: /best-models 62 | port: http 63 | volumes: 64 | - name: mnt 65 | emptyDir: {} 66 | {{- with .Values.modelServer.podSecurityContext }} 67 | securityContext: 68 | {{- toYaml . | nindent 8 }} 69 | {{- end }} 70 | {{- with .Values.modelServer.nodeSelector }} 71 | nodeSelector: 72 | {{- toYaml . | nindent 8 }} 73 | {{- end }} 74 | {{- with .Values.modelServer.affinity }} 75 | affinity: 76 | {{- toYaml . | nindent 8 }} 77 | {{- end }} 78 | {{- with .Values.modelServer.tolerations }} 79 | tolerations: 80 | {{- toYaml . | nindent 8 }} 81 | {{- end }} 82 | {{- end }} 83 | -------------------------------------------------------------------------------- /chart/kepler/templates/model-server/service.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.modelServer.enabled }} 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ include "modelServer.fullname" . }} 6 | labels: 7 | {{- include "modelServer.labels" . | nindent 4 }} 8 | {{- with .Values.modelServer.service.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | spec: 13 | type: {{ .Values.modelServer.service.type }} 14 | ports: 15 | - name: http 16 | port: {{ .Values.modelServer.service.port }} 17 | targetPort: http 18 | protocol: TCP 19 | selector: 20 | {{- include "modelServer.selectorLabels" . | nindent 4 }} 21 | {{- end }} 22 | -------------------------------------------------------------------------------- /chart/kepler/templates/networkpolicy.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.networkPolicy.enabled }} 2 | apiVersion: networking.k8s.io/v1 3 | kind: NetworkPolicy 4 | metadata: 5 | name: {{ include "kepler.fullname" . }} 6 | namespace: {{ .Release.Namespace }} 7 | labels: 8 | {{- include "kepler.labels" . | nindent 4 }} 9 | {{- with .Values.annotations }} 10 | annotations: 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | {{- with .Values.networkPolicy.ingress }} 15 | ingress: 16 | {{- toYaml . | nindent 4 }} 17 | {{- end }} 18 | {{- with .Values.networkPolicy.egress }} 19 | egress: 20 | {{- toYaml . | nindent 4 }} 21 | {{- end }} 22 | podSelector: 23 | matchLabels: 24 | {{- include "kepler.selectorLabels" . | nindent 6 }} 25 | policyTypes: 26 | - Egress 27 | - Ingress 28 | {{- end }} 29 | -------------------------------------------------------------------------------- /chart/kepler/templates/rolebinding.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.rbac.create -}} 2 | --- 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | name: {{ include "kepler.fullname" . }}-clusterrole 7 | rules: 8 | - apiGroups: [""] 9 | resources: 10 | - nodes/metrics # access /metrics/resource 11 | - nodes/proxy 12 | - nodes/stats 13 | - pods 14 | verbs: 15 | - get 16 | - watch 17 | - list 18 | 19 | --- 20 | apiVersion: rbac.authorization.k8s.io/v1 21 | kind: ClusterRoleBinding 22 | metadata: 23 | name: {{ include "kepler.fullname" . }}-clusterrole-binding 24 | roleRef: 25 | kind: ClusterRole 26 | name: {{ include "kepler.fullname" . }}-clusterrole 27 | apiGroup: "rbac.authorization.k8s.io" 28 | subjects: 29 | - kind: ServiceAccount 30 | name: {{ include "kepler.serviceAccountName" . }} 31 | namespace: {{ .Release.Namespace }} 32 | {{- end }} 33 | -------------------------------------------------------------------------------- /chart/kepler/templates/secret-redfish.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.redfish.enabled }} 2 | {{- with .Values.redfish }} 3 | --- 4 | apiVersion: v1 5 | kind: Secret 6 | metadata: 7 | name: {{ .name }} 8 | namespace: {{ $.Release.Namespace }} 9 | labels: 10 | {{- include "kepler.labels" $ | nindent 4 }} 11 | {{- with .labels }} 12 | {{- toYaml . | nindent 4 }} 13 | {{- end }} 14 | {{- with .annotations }} 15 | annotations: 16 | {{- toYaml . | nindent 4 }} 17 | {{- end }} 18 | type: Opaque 19 | stringData: 20 | redfish.csv: {{- .fileContent | toYaml | indent 2 }} 21 | {{- end }} 22 | {{- end }} 23 | -------------------------------------------------------------------------------- /chart/kepler/templates/service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: {{ include "kepler.fullname" . }} 6 | namespace: {{ .Release.Namespace }} 7 | labels: 8 | {{- include "kepler.labels" . | nindent 4 }} 9 | {{- with .Values.service.annotations }} 10 | annotations: 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | type: {{ .Values.service.type }} 15 | ports: 16 | - name: http 17 | port: {{ .Values.service.port }} 18 | targetPort: http 19 | protocol: TCP 20 | selector: 21 | {{- include "kepler.selectorLabels" . | nindent 4 }} 22 | -------------------------------------------------------------------------------- /chart/kepler/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount.create -}} 2 | --- 3 | apiVersion: v1 4 | kind: ServiceAccount 5 | metadata: 6 | name: {{ include "kepler.serviceAccountName" . }} 7 | namespace: {{ .Release.Namespace }} 8 | labels: 9 | {{- include "kepler.labels" . | nindent 4 }} 10 | {{- with .Values.serviceAccount.annotations }} 11 | annotations: 12 | {{- toYaml . | nindent 4 }} 13 | {{- end }} 14 | {{- end }} 15 | -------------------------------------------------------------------------------- /chart/kepler/templates/servicemonitor.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceMonitor.enabled }} 2 | --- 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: ServiceMonitor 5 | metadata: 6 | name: {{ template "kepler.fullname" . }}-prometheus-exporter 7 | namespace: {{ .Values.serviceMonitor.namespace | default .Release.Namespace }} 8 | labels: 9 | {{- include "kepler.labels" . | nindent 4 }} 10 | {{- with .Values.serviceMonitor.labels }} 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | jobLabel: app.kubernetes.io/name 15 | {{- with .Values.serviceMonitor.attachMetadata }} 16 | attachMetadata: 17 | {{- toYaml . | nindent 4 }} 18 | {{- end }} 19 | endpoints: 20 | - port: http 21 | {{- with .Values.serviceMonitor.interval }} 22 | interval: {{ . }} 23 | {{- end }} 24 | {{- with .Values.serviceMonitor.scrapeTimeout }} 25 | scrapeTimeout: {{ . }} 26 | {{- end }} 27 | path: /metrics 28 | scheme: http 29 | {{- with .Values.serviceMonitor.relabelings }} 30 | relabelings: 31 | {{- toYaml . | nindent 8 }} 32 | {{- end }} 33 | {{- with .Values.serviceMonitor.metricRelabelings }} 34 | metricRelabelings: 35 | {{- toYaml . | nindent 8 }} 36 | {{- end }} 37 | namespaceSelector: 38 | matchNames: 39 | - {{ .Release.Namespace }} 40 | selector: 41 | matchLabels: 42 | {{- include "kepler.selectorLabels" . | nindent 6 }} 43 | {{- end }} 44 | -------------------------------------------------------------------------------- /chart/kepler/values.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # -- Replaces the name of the chart in the Chart.yaml file 3 | nameOverride: "" 4 | # -- Replaces the generated name 5 | fullnameOverride: "" 6 | 7 | image: 8 | # -- Repository to pull the image from 9 | repository: "quay.io/sustainable_computing_io/kepler" 10 | # -- Image tag, if empty it will get it from the chart's appVersion 11 | tag: "" 12 | # -- Pull policy 13 | pullPolicy: Always 14 | 15 | # -- Secret name for pulling images from private repository 16 | imagePullSecrets: [] 17 | 18 | # -- Additional DaemonSet annotations 19 | annotations: {} 20 | 21 | # -- Additional pod annotations 22 | podAnnotations: {} 23 | 24 | # -- Additional pod labels 25 | podLabels: {} 26 | 27 | # -- Privileges and access control settings for a Pod (all containers in a pod) 28 | podSecurityContext: {} 29 | # fsGroup: 2000 30 | 31 | # -- Privileges and access control settings for a container 32 | securityContext: 33 | privileged: true 34 | 35 | # -- Node selection constraint 36 | nodeSelector: 37 | kubernetes.io/os: linux 38 | 39 | # -- Toleration for taints 40 | tolerations: 41 | - effect: NoSchedule 42 | key: node-role.kubernetes.io/control-plane 43 | 44 | # -- Affinity rules 45 | affinity: {} 46 | 47 | # -- CPU/MEM resources 48 | resources: {} 49 | 50 | # -- Extra environment variables 51 | extraEnvVars: 52 | KEPLER_LOG_LEVEL: "1" 53 | # METRIC_PATH: "/metrics" 54 | # BIND_ADDRESS: "0.0.0.0:9102" 55 | ENABLE_GPU: "true" 56 | ENABLE_QAT: "false" 57 | ENABLE_EBPF_CGROUPID: "true" 58 | EXPOSE_HW_COUNTER_METRICS: "true" 59 | EXPOSE_IRQ_COUNTER_METRICS: "true" 60 | EXPOSE_CGROUP_METRICS: "false" 61 | ENABLE_PROCESS_METRICS: "false" 62 | CPU_ARCH_OVERRIDE: "" 63 | CGROUP_METRICS: '*' 64 | # REDFISH_PROBE_INTERVAL_IN_SECONDS: "60" 65 | # REDFISH_SKIP_SSL_VERIFY: "true" 66 | 67 | canMount: 68 | usrSrc: true # /usr/src may not available, ie GKE 69 | 70 | service: 71 | annotations: {} 72 | type: ClusterIP 73 | port: 9102 74 | 75 | 76 | rbac: 77 | # Specifies whether rbac should be created 78 | create: true 79 | 80 | serviceAccount: 81 | # Specifies whether a service account should be created 82 | create: true 83 | # Annotations to add to the service account 84 | annotations: {} 85 | # The name of the service account to use. 86 | # If not set and create is true, a name is generated using the fullname template 87 | name: "" 88 | 89 | serviceMonitor: 90 | enabled: false 91 | namespace: "" 92 | interval: 30s 93 | scrapeTimeout: 5s 94 | labels: {} 95 | attachMetadata: 96 | node: false 97 | relabelings: 98 | - action: replace 99 | regex: (.*) 100 | replacement: $1 101 | sourceLabels: 102 | - __meta_kubernetes_pod_node_name 103 | targetLabel: instance 104 | metricRelabelings: [] 105 | ## For example when you need the name of the pod's namespace in the 'namespace' label, e.g. for a multitenant setup 106 | # - action: replace 107 | # regex: (.*) 108 | # sourceLabels: 109 | # - namespace 110 | # targetLabel: app_namespace 111 | # - action: replace 112 | # regex: (.*) 113 | # sourceLabels: 114 | # - container_namespace 115 | # targetLabel: namespace 116 | 117 | redfish: 118 | enabled: false 119 | name: redfish 120 | labels: {} 121 | annotations: {} 122 | fileContent: |- 123 | your_kubelet_node1_name,redfish_username,redfish_password,https://node1_redfish_ip_or_hostname 124 | your_kubelet_node2_name,redfish_username,redfish_password,https://node2_redfish_ip_or_hostname 125 | 126 | networkPolicy: 127 | enabled: false 128 | # If you enable a networkPolicy, you must add any redfish IP/Ports you list 129 | # egress: 130 | # - to: 131 | # - ipBlock: 132 | # cidr: 10.0.0.0/24 133 | # ports: 134 | # - protocol: TCP 135 | # port: 443 136 | egress: {} 137 | ingress: 138 | - ports: 139 | - protocol: TCP 140 | port: 9102 141 | 142 | # Configure kepler [model-server](https://sustainable-computing.io/kepler_model_server/get_started/) 143 | modelServer: 144 | # whether model-server and estimator sidecar should be deployed 145 | enabled: false 146 | modelConfig: | 147 | NODE_COMPONENTS_ESTIMATOR=true 148 | nameOverride: "" 149 | fullNameOverride: "" 150 | image: 151 | repository: "quay.io/sustainable_computing_io/kepler_model_server" 152 | tag: "v0.7.12" 153 | pullPolicy: Always 154 | # replicas of the model-server Deployment 155 | replicas: 1 156 | # additional annotions for the model server Deployment 157 | annotations: {} 158 | # additional annotions for the model server Pods 159 | podAnnotations: {} 160 | # additional labels for the model server Pods 161 | podLabels: {} 162 | podSecurityContext: {} 163 | # security context for the model-server container in the model-server Deployment 164 | securityContext: {} 165 | nodeSelector: 166 | kubernetes.io/os: linux 167 | affinity: {} 168 | # resources for the model-server containers in the model-server Deployment 169 | resources: {} 170 | service: 171 | annotations: {} 172 | type: ClusterIP 173 | port: 8100 174 | # resources for the estimator sidecar deployed in the kepler DaemonSet 175 | sidecarResources: {} 176 | -------------------------------------------------------------------------------- /cr.yaml: -------------------------------------------------------------------------------- 1 | # cr.yaml 2 | # Set to true for GPG signing 3 | sign: true 4 | # UID of the GPG key to use 5 | key: Brad McCoy 6 | --------------------------------------------------------------------------------