├── .dockerignore ├── .github └── workflows │ ├── dev-image-build.yaml │ └── release-image-build.yml ├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── Makefile.def ├── OWNERS ├── README.md ├── cmd ├── vgpu-monitor │ ├── feedback.go │ ├── main.go │ ├── metrics.go │ └── validation.go └── vgpu │ ├── main.go │ └── watchers.go ├── doc ├── config.md ├── design.md ├── example.png ├── hard_limit.jpg ├── vgpu-on-volcano.pdf └── vgpu_device_plugin_metrics.png ├── docker └── Dockerfile.ubuntu20.04 ├── examples ├── gpu-share.yml ├── vgpu-case01.yml ├── vgpu-case02.yml ├── vgpu-case03.yml └── vgpu-deployment.yaml ├── go.mod ├── go.sum ├── lib └── nvidia │ └── ld.so.preload ├── pkg ├── apis │ ├── config.go │ ├── flags.go │ └── flags_test.go ├── filewatcher │ └── filewatcher.go ├── gpu │ └── doc.go ├── lock │ └── nodelock.go ├── monitor │ └── nvidia │ │ ├── cudevshr.go │ │ ├── v0 │ │ └── spec.go │ │ └── v1 │ │ └── spec.go └── plugin │ ├── interface.go │ └── vgpu │ ├── cache.go │ ├── config │ ├── config.go │ └── version.go │ ├── helper.go │ ├── mig-strategy.go │ ├── mig.go │ ├── nvidia.go │ ├── plugin.go │ ├── register.go │ └── util │ ├── types.go │ └── util.go └── volcano-vgpu-device-plugin.yml /.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore version control directories 2 | .git/ 3 | .github/ 4 | 5 | # Ignore build and docs directories 6 | _output/ 7 | doc/ 8 | examples/ 9 | README.md 10 | OWNERS 11 | LICENSE 12 | 13 | # Ignore IDE and OS files 14 | *.DS_Store 15 | .idea/ 16 | .vscode/ 17 | *.iml 18 | 19 | # Ignore Docker-specific files 20 | docker/ 21 | -------------------------------------------------------------------------------- /.github/workflows/dev-image-build.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA CORPORATION 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Run this workflow on pull requests or merge to main/dev branches 16 | name: Build Dev Image 17 | 18 | on: 19 | push: 20 | branches: 21 | - main 22 | - dev-vgpu-1219 23 | pull_request_target: 24 | types: 25 | - opened 26 | - synchronize 27 | - reopened 28 | 29 | jobs: 30 | build: 31 | runs-on: ubuntu-latest 32 | steps: 33 | - uses: actions/checkout@v2 34 | - uses: actions/setup-go@v2 35 | with: 36 | go-version: "^1.19.x" 37 | - name: Checkout submodule 38 | uses: Mushus/checkout-submodule@v1.0.1 39 | with: 40 | submodulePath: libvgpu 41 | - run: go version 42 | - name: Get branch name 43 | uses: nelonoel/branch-name@v1.0.1 44 | - name: Docker Login 45 | uses: docker/login-action@v2.1.0 46 | with: 47 | username: ${{ secrets.DOCKERHUB_TOKEN }} 48 | password: ${{ secrets.DOCKERHUB_PASSWD }} 49 | - name: Set up Docker Buildx 50 | id: buildx 51 | uses: docker/setup-buildx-action@v1 52 | - name: Generating image tag 53 | id: runtime-tag 54 | run: | 55 | echo tag="$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT 56 | - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}-${{ steps.runtime-tag.outputs.tag }}" make push-short 57 | - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}-${{ steps.runtime-tag.outputs.tag }}" make push-latest 58 | -------------------------------------------------------------------------------- /.github/workflows/release-image-build.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA CORPORATION 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Run this workflow on new tags 16 | name: Build Release Image 17 | 18 | on: 19 | push: 20 | tags: 21 | - v[0-9]+.[0-9]+.[0-9]+ 22 | 23 | jobs: 24 | build: 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v2 28 | - uses: actions/setup-go@v2 29 | with: 30 | go-version: "^1.19.x" 31 | - name: Checkout submodule 32 | uses: Mushus/checkout-submodule@v1.0.1 33 | with: 34 | submodulePath: libvgpu 35 | - run: go version 36 | - name: Get branch name 37 | uses: nelonoel/branch-name@v1.0.1 38 | - name: Docker Login 39 | uses: docker/login-action@v2.1.0 40 | with: 41 | username: ${{ secrets.DOCKERHUB_TOKEN }} 42 | password: ${{ secrets.DOCKERHUB_PASSWD }} 43 | - name: Set up Docker Buildx 44 | id: buildx 45 | uses: docker/setup-buildx-action@v1 46 | - name: Generating image tag 47 | id: runtime-tag 48 | run: | 49 | echo tag="$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT 50 | - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}" make push-short 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OSX leaves these everywhere on SMB shares 2 | ._* 3 | 4 | # OSX trash 5 | .DS_Store 6 | 7 | # Eclipse files 8 | .classpath 9 | .project 10 | .settings/** 11 | 12 | # Files generated by JetBrains IDEs, e.g. IntelliJ IDEA 13 | .idea/ 14 | *.iml 15 | 16 | # Vscode files 17 | .vscode 18 | 19 | # This is where the result of the go build goes 20 | /output*/ 21 | /_output*/ 22 | /_output 23 | 24 | # Emacs save files 25 | *~ 26 | \#*\# 27 | .\#* 28 | 29 | # Vim-related files 30 | [._]*.s[a-w][a-z] 31 | [._]s[a-w][a-z] 32 | *.un~ 33 | Session.vim 34 | .netrwhist 35 | 36 | # cscope-related files 37 | cscope.* 38 | 39 | # Go test binaries 40 | *.test 41 | /hack/.test-cmd-auth 42 | 43 | # JUnit test output from ginkgo e2e tests 44 | /junit*.xml 45 | 46 | # Mercurial files 47 | **/.hg 48 | **/.hg* 49 | 50 | # Vagrant 51 | .vagrant 52 | network_closure.sh 53 | 54 | # Local cluster env variables 55 | /cluster/env.sh 56 | 57 | # Compiled binaries in third_party 58 | /third_party/pkg 59 | 60 | # Also ignore etcd installed by hack/install-etcd.sh 61 | /third_party/etcd* 62 | /default.etcd 63 | 64 | # User cluster configs 65 | .kubeconfig 66 | 67 | .tags* 68 | 69 | # Version file for dockerized build 70 | .dockerized-kube-version-defs 71 | 72 | # Web UI 73 | /www/master/node_modules/ 74 | /www/master/npm-debug.log 75 | /www/master/shared/config/development.json 76 | 77 | # Karma output 78 | /www/test_out 79 | 80 | # precommit temporary directories created by ./hack/verify-generated-docs.sh and ./hack/lib/util.sh 81 | /_tmp/ 82 | /doc_tmp/ 83 | 84 | # Test artifacts produced by Jenkins jobs 85 | /_artifacts/ 86 | 87 | # Go dependencies installed on Jenkins 88 | /_gopath/ 89 | 90 | # Config directories created by gcloud and gsutil on Jenkins 91 | /.config/gcloud*/ 92 | /.gsutil/ 93 | 94 | # CoreOS stuff 95 | /cluster/libvirt-coreos/coreos_*.img 96 | 97 | # Juju Stuff 98 | /cluster/juju/charms/* 99 | /cluster/juju/bundles/local.yaml 100 | 101 | # Downloaded Kubernetes binary release 102 | /kubernetes/ 103 | 104 | # direnv .envrc files 105 | .envrc 106 | 107 | # Downloaded kubernetes binary release tar ball 108 | kubernetes.tar.gz 109 | 110 | # generated files in any directory 111 | # TODO(thockin): uncomment this when we stop committing the generated files. 112 | #zz_generated.* 113 | #zz_generated.openapi.go 114 | 115 | # make-related metadata 116 | /.make/ 117 | # Just in time generated data in the source, should never be commited 118 | /test/e2e/generated/bindata.go 119 | 120 | # This file used by some vendor repos (e.g. github.com/go-openapi/...) to store secret variables and should not be ignored 121 | !\.drone\.sec 122 | 123 | /bazel-* 124 | *.pyc 125 | 126 | # e2e log files 127 | *.log 128 | 129 | # test coverage file 130 | coverage.txt 131 | 132 | updateso.sh 133 | volcano-vgpu-device-plugin 134 | 135 | lib/nvidia/libvgpu/build 136 | 137 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libvgpu"] 2 | path = libvgpu 3 | url = https://github.com/Project-HAMi/HAMi-core.git 4 | branch = main 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | .DEFAULT_GOAL := all 17 | 18 | include Makefile.def 19 | 20 | ##### Global variables ##### 21 | REGISTRY ?= projecthami 22 | VERSION ?= 1.0.0 23 | 24 | ##### Using `BUILD_PLATFORMS=linux/arm64 make all` to build arm64 arch image locally 25 | ##### Using `BUILD_PLATFORMS=linux/amd64,linux/arm64 make push-latest` to build and publish multi-arch image 26 | BUILD_PLATFORMS ?= linux/amd64 27 | 28 | ##### Public rules ##### 29 | 30 | all: ubuntu20.04 31 | 32 | push: 33 | docker buildx build --platform $(BUILD_PLATFORMS) --push \ 34 | --tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)-ubuntu20.04 \ 35 | --file docker/Dockerfile.ubuntu20.04 . 36 | 37 | push-short: 38 | docker buildx build --platform $(BUILD_PLATFORMS) --push \ 39 | --tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)\ 40 | --file docker/Dockerfile.ubuntu20.04 . 41 | 42 | push-latest: 43 | docker buildx build --platform $(BUILD_PLATFORMS) --push \ 44 | --tag $(REGISTRY)/volcano-vgpu-device-plugin:latest\ 45 | --file docker/Dockerfile.ubuntu20.04 . 46 | 47 | ubuntu20.04: 48 | docker buildx build --platform $(BUILD_PLATFORMS) --load \ 49 | --tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)-ubuntu20.04 \ 50 | --file docker/Dockerfile.ubuntu20.04 . 51 | 52 | BIN_DIR=_output/bin 53 | RELEASE_DIR=_output/release 54 | REL_OSARCH=linux/amd64 55 | 56 | init: 57 | mkdir -p ${BIN_DIR} 58 | mkdir -p ${RELEASE_DIR} 59 | 60 | gen_bin: init 61 | go get github.com/mitchellh/gox 62 | CGO_ENABLED=1 gox -osarch=${REL_OSARCH} -ldflags ${LD_FLAGS} -output ${BIN_DIR}/${REL_OSARCH}/volcano-vgpu-device-plugin ./cmd/vgpu 63 | -------------------------------------------------------------------------------- /Makefile.def: -------------------------------------------------------------------------------- 1 | 2 | # If tag not explicitly set in users default to the git sha. 3 | TAG ?= $(shell git rev-parse --verify HEAD) 4 | GitSHA=`git rev-parse HEAD` 5 | Date=`date "+%Y-%m-%d %H:%M:%S"` 6 | RELEASE_VER=latest 7 | LD_FLAGS=" \ 8 | -X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \ 9 | -X '${REPO_PATH}/pkg/version.Built=${Date}' \ 10 | -X '${REPO_PATH}/pkg/version.Version=${RELEASE_VER}'" 11 | 12 | -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | reviewers: 2 | - k82cn 3 | - kevin-wangzefeng 4 | - william-wang 5 | - Thor-wl 6 | - archlitchi 7 | - hzxuzhonghu 8 | - wangyang0616 9 | approvers: 10 | - k82cn 11 | - kevin-wangzefeng 12 | - william-wang 13 | - hzxuzhonghu 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Volcano vgpu device plugin for Kubernetes 2 | 3 | [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin?ref=badge_shield) 4 | [![docker pulls](https://img.shields.io/docker/pulls/projecthami/volcano-vgpu-device-plugin.svg)](https://hub.docker.com/r/projecthami/volcano-vgpu-device-plugin) 5 | 6 | **Note**: 7 | 8 | Volcano vgpu device-plugin can provide device-sharing mechanism for NVIDIA devices managed by volcano. 9 | 10 | This is based on [Nvidia Device Plugin](https://github.com/NVIDIA/k8s-device-plugin), it uses [HAMi-core](https://github.com/Project-HAMi/HAMi-core) to support hard isolation of GPU card. 11 | 12 | And collaborate with volcano, it is possible to enable GPU sharing. 13 | 14 | ## Table of Contents 15 | 16 | - [About](#about) 17 | - [Prerequisites](#prerequisites) 18 | - [Quick Start](#quick-start) 19 | - [Preparing your GPU Nodes](#preparing-your-gpu-nodes) 20 | - [Enabling vGPU Support in Kubernetes](#enabling-gpu-support-in-kubernetes) 21 | - [Running vGPU Jobs](#running-vgpu-jobs) 22 | - [Issues and Contributing](#issues-and-contributing) 23 | 24 | ## About 25 | 26 | The Volcano device plugin for Kubernetes is a Daemonset that allows you to automatically: 27 | - Expose the number of GPUs on each node of your cluster 28 | - Keep track of the health of your GPUs 29 | - Run GPU enabled containers in your Kubernetes cluster. 30 | - Provide device-sharing mechanism for GPU tasks as the figure below. 31 | - Enforce hard resource limit in container. 32 | - Support dynamic-mig, for more details, see [config](doc/config.md) 33 | 34 | 35 | 36 | ## Prerequisites 37 | 38 | The list of prerequisites for running the Volcano device plugin is described below: 39 | * NVIDIA drivers > 440 40 | * nvidia-docker version > 2.0 (see how to [install](https://github.com/NVIDIA/nvidia-docker) and it's [prerequisites](https://github.com/nvidia/nvidia-docker/wiki/Installation-\(version-2.0\)#prerequisites)) 41 | * docker configured with nvidia as the [default runtime](https://github.com/NVIDIA/nvidia-docker/wiki/Advanced-topics#default-runtime). 42 | * Kubernetes version >= 1.16 43 | * Volcano verison >= 1.9 44 | 45 | ## Quick Start 46 | 47 | ### Preparing your GPU Nodes 48 | 49 | The following steps need to be executed on all your GPU nodes. 50 | This README assumes that the NVIDIA drivers and nvidia-docker have been installed. 51 | 52 | Note that you need to install the nvidia-docker2 package and not the nvidia-container-toolkit. 53 | This is because the new `--gpus` options hasn't reached kubernetes yet. Example: 54 | ```bash 55 | # Add the package repositories 56 | $ distribution=$(. /etc/os-release;echo $ID$VERSION_ID) 57 | $ curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - 58 | $ curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list 59 | 60 | $ sudo apt-get update && sudo apt-get install -y nvidia-docker2 61 | $ sudo systemctl restart docker 62 | ``` 63 | 64 | You will need to enable the nvidia runtime as your default runtime on your node. 65 | We will be editing the docker daemon config file which is usually present at `/etc/docker/daemon.json`: 66 | ```json 67 | { 68 | "default-runtime": "nvidia", 69 | "runtimes": { 70 | "nvidia": { 71 | "path": "/usr/bin/nvidia-container-runtime", 72 | "runtimeArgs": [] 73 | } 74 | } 75 | } 76 | ``` 77 | > *if `runtimes` is not already present, head to the install page of [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)* 78 | 79 | 80 | ### Configure scheduler 81 | 82 | update the scheduler configuration: 83 | 84 | ```shell script 85 | kubectl edit cm -n volcano-system volcano-scheduler-configmap 86 | ``` 87 | 88 | For volcano v1.9+,, use the following configMap 89 | ```yaml 90 | kind: ConfigMap 91 | apiVersion: v1 92 | metadata: 93 | name: volcano-scheduler-configmap 94 | namespace: volcano-system 95 | data: 96 | volcano-scheduler.conf: | 97 | actions: "enqueue, allocate, backfill" 98 | tiers: 99 | - plugins: 100 | - name: priority 101 | - name: gang 102 | - name: conformance 103 | - plugins: 104 | - name: drf 105 | - name: deviceshare 106 | arguments: 107 | deviceshare.VGPUEnable: true # enable vgpu 108 | - name: predicates 109 | - name: proportion 110 | - name: nodeorder 111 | - name: binpack 112 | ``` 113 | 114 | Customize your installation by adjusting the [configs](doc/config.md) 115 | 116 | 117 | ### Enabling GPU Support in Kubernetes 118 | 119 | Once you have enabled this option on *all* the GPU nodes you wish to use, 120 | you can then enable GPU support in your cluster by deploying the following Daemonset: 121 | 122 | ``` 123 | $ kubectl create -f volcano-vgpu-device-plugin.yml 124 | ``` 125 | 126 | ### Verify environment is ready 127 | 128 | Check the node status, it is ok if `volcano.sh/vgpu-number` is included in the allocatable resources. 129 | 130 | ```shell script 131 | $ kubectl get node {node name} -oyaml 132 | ... 133 | status: 134 | addresses: 135 | - address: 172.17.0.3 136 | type: InternalIP 137 | - address: volcano-control-plane 138 | type: Hostname 139 | allocatable: 140 | cpu: "4" 141 | ephemeral-storage: 123722704Ki 142 | hugepages-1Gi: "0" 143 | hugepages-2Mi: "0" 144 | memory: 8174332Ki 145 | pods: "110" 146 | volcano.sh/vgpu-memory: "89424" 147 | volcano.sh/vgpu-number: "10" # vGPU resource 148 | capacity: 149 | cpu: "4" 150 | ephemeral-storage: 123722704Ki 151 | hugepages-1Gi: "0" 152 | hugepages-2Mi: "0" 153 | memory: 8174332Ki 154 | pods: "110" 155 | volcano.sh/vgpu-memory: "89424" 156 | volcano.sh/vgpu-number: "10" # vGPU resource 157 | ``` 158 | 159 | ### Running VGPU Jobs 160 | 161 | VGPU can be requested by both set "volcano.sh/vgpu-number" , "volcano.sh/vgpu-cores" and "volcano.sh/vgpu-memory" in resource.limit 162 | 163 | ```shell script 164 | $ cat < **WARNING:** *if you don't request GPUs when using the device plugin with NVIDIA images all 189 | > the GPUs on the machine will be exposed inside your container. 190 | > The number of vgpu used by a container can not exceed the number of gpus on that node.* 191 | 192 | ### Monitor 193 | 194 | volcano-scheduler-metrics records every GPU usage and limitation, visit the following address to get these metrics. 195 | 196 | ``` 197 | curl {volcano scheduler cluster ip}:8080/metrics 198 | ``` 199 | 200 | You can also collect the **GPU utilization**, **GPU memory usage**, **pods' GPU memory limitations** and **pods' GPU memory usage** metrics on nodes by visiting the following addresses: 201 | 202 | ``` 203 | curl {volcano device plugin pod ip}:9394/metrics 204 | ``` 205 | ![img](./doc/vgpu_device_plugin_metrics.png) 206 | 207 | # Issues and Contributing 208 | [Checkout the Contributing document!](CONTRIBUTING.md) 209 | 210 | * You can report a bug by [filing a new issue](https://github.com/Project-HAMi/volcano-vgpu-device-plugin) 211 | * You can contribute by opening a [pull request](https://help.github.com/articles/using-pull-requests/) 212 | 213 | 214 | ## Upgrading Kubernetes with the device plugin 215 | 216 | Upgrading Kubernetes when you have a device plugin deployed doesn't require you to do any, 217 | particular changes to your workflow. 218 | The API is versioned and is pretty stable (though it is not guaranteed to be non breaking), 219 | upgrading kubernetes won't require you to deploy a different version of the device plugin and you will 220 | see GPUs re-registering themselves after you node comes back online. 221 | 222 | 223 | Upgrading the device plugin is a more complex task. It is recommended to drain GPU tasks as 224 | we cannot guarantee that GPU tasks will survive a rolling upgrade. 225 | However we make best efforts to preserve GPU tasks during an upgrade. 226 | 227 | 228 | ## License 229 | [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin?ref=badge_large) -------------------------------------------------------------------------------- /cmd/vgpu-monitor/feedback.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The HAMi Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "time" 21 | 22 | "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia" 23 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 24 | 25 | "k8s.io/klog/v2" 26 | ) 27 | 28 | type UtilizationPerDevice []int 29 | 30 | func CheckBlocking(utSwitchOn map[string]UtilizationPerDevice, p int, c *nvidia.ContainerUsage) bool { 31 | for i := 0; i < c.Info.DeviceMax(); i++ { 32 | uuid := c.Info.DeviceUUID(i) 33 | _, ok := utSwitchOn[uuid] 34 | if ok { 35 | for i := 0; i < p; i++ { 36 | if utSwitchOn[uuid][i] > 0 { 37 | return true 38 | } 39 | } 40 | return false 41 | } 42 | } 43 | return false 44 | } 45 | 46 | // Check whether task with higher priority use GPU or there are other tasks with the same priority. 47 | func CheckPriority(utSwitchOn map[string]UtilizationPerDevice, p int, c *nvidia.ContainerUsage) bool { 48 | for i := 0; i < c.Info.DeviceMax(); i++ { 49 | uuid := c.Info.DeviceUUID(i) 50 | _, ok := utSwitchOn[uuid] 51 | if ok { 52 | for i := 0; i < p; i++ { 53 | if utSwitchOn[uuid][i] > 0 { 54 | return true 55 | } 56 | } 57 | if utSwitchOn[uuid][p] > 1 { 58 | return true 59 | } 60 | } 61 | } 62 | return false 63 | } 64 | 65 | func Observe(lister *nvidia.ContainerLister) { 66 | utSwitchOn := map[string]UtilizationPerDevice{} 67 | containers := lister.ListContainers() 68 | 69 | for _, c := range containers { 70 | recentKernel := c.Info.GetRecentKernel() 71 | if recentKernel > 0 { 72 | recentKernel-- 73 | if recentKernel > 0 { 74 | for i := 0; i < c.Info.DeviceMax(); i++ { 75 | // Null device condition 76 | if !c.Info.IsValidUUID(i) { 77 | continue 78 | } 79 | uuid := c.Info.DeviceUUID(i) 80 | if len(utSwitchOn[uuid]) == 0 { 81 | utSwitchOn[uuid] = []int{0, 0} 82 | } 83 | utSwitchOn[uuid][c.Info.GetPriority()]++ 84 | } 85 | } 86 | c.Info.SetRecentKernel(recentKernel) 87 | } 88 | } 89 | for idx, c := range containers { 90 | priority := c.Info.GetPriority() 91 | recentKernel := c.Info.GetRecentKernel() 92 | utilizationSwitch := c.Info.GetUtilizationSwitch() 93 | if CheckBlocking(utSwitchOn, priority, c) { 94 | if recentKernel >= 0 { 95 | klog.Infof("utSwitchon=%v", utSwitchOn) 96 | klog.Infof("Setting Blocking to on %v", idx) 97 | c.Info.SetRecentKernel(-1) 98 | } 99 | } else { 100 | if recentKernel < 0 { 101 | klog.Infof("utSwitchon=%v", utSwitchOn) 102 | klog.Infof("Setting Blocking to off %v", idx) 103 | c.Info.SetRecentKernel(0) 104 | } 105 | } 106 | if CheckPriority(utSwitchOn, priority, c) { 107 | if utilizationSwitch != 1 { 108 | klog.Infof("utSwitchon=%v", utSwitchOn) 109 | klog.Infof("Setting UtilizationSwitch to on %v", idx) 110 | c.Info.SetUtilizationSwitch(1) 111 | } 112 | } else { 113 | if utilizationSwitch != 0 { 114 | klog.Infof("utSwitchon=%v", utSwitchOn) 115 | klog.Infof("Setting UtilizationSwitch to off %v", idx) 116 | c.Info.SetUtilizationSwitch(0) 117 | } 118 | } 119 | } 120 | } 121 | 122 | func watchAndFeedback(lister *nvidia.ContainerLister) { 123 | config.Nvml().Init() 124 | for { 125 | time.Sleep(time.Second * 5) 126 | err := lister.Update() 127 | if err != nil { 128 | klog.Errorf("Failed to update container list: %v", err) 129 | continue 130 | } 131 | Observe(lister) 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /cmd/vgpu-monitor/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The HAMi Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia" 21 | 22 | "k8s.io/klog/v2" 23 | ) 24 | 25 | func main() { 26 | if err := ValidateEnvVars(); err != nil { 27 | klog.Fatalf("Failed to validate environment variables: %v", err) 28 | } 29 | containerLister, err := nvidia.NewContainerLister() 30 | if err != nil { 31 | klog.Fatalf("Failed to create container lister: %v", err) 32 | } 33 | errchannel := make(chan error) 34 | go initMetrics(containerLister) 35 | go watchAndFeedback(containerLister) 36 | for { 37 | err := <-errchannel 38 | klog.Errorf("failed to serve: %v", err) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /cmd/vgpu-monitor/metrics.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The HAMi Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "fmt" 21 | "log" 22 | "net/http" 23 | "strings" 24 | "time" 25 | 26 | "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia" 27 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 28 | 29 | "github.com/NVIDIA/go-nvml/pkg/nvml" 30 | "github.com/prometheus/client_golang/prometheus" 31 | "github.com/prometheus/client_golang/prometheus/promhttp" 32 | 33 | "k8s.io/apimachinery/pkg/labels" 34 | "k8s.io/client-go/informers" 35 | listerscorev1 "k8s.io/client-go/listers/core/v1" 36 | "k8s.io/klog/v2" 37 | ) 38 | 39 | // ClusterManager is an example for a system that might have been built without 40 | // Prometheus in mind. It models a central manager of jobs running in a 41 | // cluster. Thus, we implement a custom Collector called 42 | // ClusterManagerCollector, which collects information from a ClusterManager 43 | // using its provided methods and turns them into Prometheus Metrics for 44 | // collection. 45 | // 46 | // An additional challenge is that multiple instances of the ClusterManager are 47 | // run within the same binary, each in charge of a different zone. We need to 48 | // make use of wrapping Registerers to be able to register each 49 | // ClusterManagerCollector instance with Prometheus. 50 | type ClusterManager struct { 51 | Zone string 52 | // Contains many more fields not listed in this example. 53 | PodLister listerscorev1.PodLister 54 | containerLister *nvidia.ContainerLister 55 | } 56 | 57 | // ReallyExpensiveAssessmentOfTheSystemState is a mock for the data gathering a 58 | // real cluster manager would have to do. Since it may actually be really 59 | // expensive, it must only be called once per collection. This implementation, 60 | // obviously, only returns some made-up data. 61 | func (c *ClusterManager) ReallyExpensiveAssessmentOfTheSystemState() ( 62 | oomCountByHost map[string]int, ramUsageByHost map[string]float64, 63 | ) { 64 | // Just example fake data. 65 | oomCountByHost = map[string]int{ 66 | "foo.example.org": 42, 67 | "bar.example.org": 2001, 68 | } 69 | ramUsageByHost = map[string]float64{ 70 | "foo.example.org": 6.023e23, 71 | "bar.example.org": 3.14, 72 | } 73 | return 74 | } 75 | 76 | // ClusterManagerCollector implements the Collector interface. 77 | type ClusterManagerCollector struct { 78 | ClusterManager *ClusterManager 79 | } 80 | 81 | // Descriptors used by the ClusterManagerCollector below. 82 | var ( 83 | hostGPUdesc = prometheus.NewDesc( 84 | "HostGPUMemoryUsage", 85 | "GPU device memory usage", 86 | []string{"deviceidx", "deviceuuid"}, nil, 87 | ) 88 | 89 | hostGPUUtilizationdesc = prometheus.NewDesc( 90 | "HostCoreUtilization", 91 | "GPU core utilization", 92 | []string{"deviceidx", "deviceuuid"}, nil, 93 | ) 94 | 95 | ctrvGPUdesc = prometheus.NewDesc( 96 | "vGPU_device_memory_usage_in_bytes", 97 | "vGPU device usage", 98 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil, 99 | ) 100 | 101 | ctrvGPUlimitdesc = prometheus.NewDesc( 102 | "vGPU_device_memory_limit_in_bytes", 103 | "vGPU device limit", 104 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil, 105 | ) 106 | ctrDeviceMemorydesc = prometheus.NewDesc( 107 | "Device_memory_desc_of_container", 108 | "Container device meory description", 109 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid", "context", "module", "data", "offset"}, nil, 110 | ) 111 | ctrDeviceUtilizationdesc = prometheus.NewDesc( 112 | "Device_utilization_desc_of_container", 113 | "Container device utilization description", 114 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil, 115 | ) 116 | ctrDeviceLastKernelDesc = prometheus.NewDesc( 117 | "Device_last_kernel_of_container", 118 | "Container device last kernel description", 119 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil, 120 | ) 121 | ) 122 | 123 | // Describe is implemented with DescribeByCollect. That's possible because the 124 | // Collect method will always return the same two metrics with the same two 125 | // descriptors. 126 | func (cc ClusterManagerCollector) Describe(ch chan<- *prometheus.Desc) { 127 | ch <- hostGPUdesc 128 | ch <- ctrvGPUdesc 129 | ch <- ctrvGPUlimitdesc 130 | ch <- hostGPUUtilizationdesc 131 | //prometheus.DescribeByCollect(cc, ch) 132 | } 133 | 134 | // Collect first triggers the ReallyExpensiveAssessmentOfTheSystemState. Then it 135 | // creates constant metrics for each host on the fly based on the returned data. 136 | // 137 | // Note that Collect could be called concurrently, so we depend on 138 | // ReallyExpensiveAssessmentOfTheSystemState to be concurrency-safe. 139 | func (cc ClusterManagerCollector) Collect(ch chan<- prometheus.Metric) { 140 | klog.Info("Starting to collect metrics for vGPUMonitor") 141 | containerLister := cc.ClusterManager.containerLister 142 | if err := containerLister.Update(); err != nil { 143 | klog.Error("Update container error: %s", err.Error()) 144 | } 145 | 146 | nvret := config.Nvml().Init() 147 | if nvret != nvml.SUCCESS { 148 | klog.Errorf("nvml Init err= %v", nvret) 149 | } 150 | devnum, nvret := config.Nvml().DeviceGetCount() 151 | if nvret != nvml.SUCCESS { 152 | klog.Errorf("nvml GetDeviceCount err= %v", nvret) 153 | } else { 154 | for ii := 0; ii < devnum; ii++ { 155 | hdev, nvret := config.Nvml().DeviceGetHandleByIndex(ii) 156 | if nvret != nvml.SUCCESS { 157 | klog.Error(nvret) 158 | } 159 | memoryUsed := 0 160 | memory, ret := hdev.GetMemoryInfo() 161 | if ret == nvml.SUCCESS { 162 | memoryUsed = int(memory.Used) 163 | } else { 164 | klog.Error("nvml get memory error ret=", ret) 165 | } 166 | 167 | uuid, nvret := hdev.GetUUID() 168 | if nvret != nvml.SUCCESS { 169 | klog.Error(nvret) 170 | } else { 171 | ch <- prometheus.MustNewConstMetric( 172 | hostGPUdesc, 173 | prometheus.GaugeValue, 174 | float64(memoryUsed), 175 | fmt.Sprint(ii), uuid, 176 | ) 177 | } 178 | util, nvret := hdev.GetUtilizationRates() 179 | if nvret != nvml.SUCCESS { 180 | klog.Error(nvret) 181 | } else { 182 | ch <- prometheus.MustNewConstMetric( 183 | hostGPUUtilizationdesc, 184 | prometheus.GaugeValue, 185 | float64(util.Gpu), 186 | fmt.Sprint(ii), uuid, 187 | ) 188 | } 189 | 190 | } 191 | } 192 | 193 | pods, err := cc.ClusterManager.PodLister.List(labels.Everything()) 194 | if err != nil { 195 | klog.Error("failed to list pods with err=", err.Error()) 196 | } 197 | nowSec := time.Now().Unix() 198 | 199 | containers := containerLister.ListContainers() 200 | for _, pod := range pods { 201 | for _, c := range containers { 202 | //for sridx := range srPodList { 203 | // if srPodList[sridx].sr == nil { 204 | // continue 205 | // } 206 | if c.Info == nil { 207 | continue 208 | } 209 | //podUID := strings.Split(srPodList[sridx].idstr, "_")[0] 210 | //ctrName := strings.Split(srPodList[sridx].idstr, "_")[1] 211 | podUID := c.PodUID 212 | ctrName := c.ContainerName 213 | if strings.Compare(string(pod.UID), podUID) != 0 { 214 | continue 215 | } 216 | fmt.Println("Pod matched!", pod.Name, pod.Namespace, pod.Labels) 217 | for _, ctr := range pod.Spec.Containers { 218 | if strings.Compare(ctr.Name, ctrName) != 0 { 219 | continue 220 | } 221 | fmt.Println("container matched", ctr.Name) 222 | //err := setHostPid(pod, pod.Status.ContainerStatuses[ctridx], &srPodList[sridx]) 223 | //if err != nil { 224 | // fmt.Println("setHostPid filed", err.Error()) 225 | //} 226 | //fmt.Println("sr.list=", srPodList[sridx].sr) 227 | podlabels := make(map[string]string) 228 | for idx, val := range pod.Labels { 229 | idxfix := strings.ReplaceAll(idx, "-", "_") 230 | valfix := strings.ReplaceAll(val, "-", "_") 231 | podlabels[idxfix] = valfix 232 | } 233 | for i := 0; i < c.Info.DeviceNum(); i++ { 234 | uuid := c.Info.DeviceUUID(i)[0:40] 235 | memoryTotal := c.Info.DeviceMemoryTotal(i) 236 | memoryLimit := c.Info.DeviceMemoryLimit(i) 237 | memoryContextSize := c.Info.DeviceMemoryContextSize(i) 238 | memoryModuleSize := c.Info.DeviceMemoryModuleSize(i) 239 | memoryBufferSize := c.Info.DeviceMemoryBufferSize(i) 240 | memoryOffset := c.Info.DeviceMemoryOffset(i) 241 | smUtil := c.Info.DeviceSmUtil(i) 242 | lastKernelTime := c.Info.LastKernelTime() 243 | 244 | //fmt.Println("uuid=", uuid, "length=", len(uuid)) 245 | ch <- prometheus.MustNewConstMetric( 246 | ctrvGPUdesc, 247 | prometheus.GaugeValue, 248 | float64(memoryTotal), 249 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, /*,string(sr.sr.uuids[i].uuid[:])*/ 250 | ) 251 | ch <- prometheus.MustNewConstMetric( 252 | ctrvGPUlimitdesc, 253 | prometheus.GaugeValue, 254 | float64(memoryLimit), 255 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, /*,string(sr.sr.uuids[i].uuid[:])*/ 256 | ) 257 | ch <- prometheus.MustNewConstMetric( 258 | ctrDeviceMemorydesc, 259 | prometheus.CounterValue, 260 | float64(memoryTotal), 261 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, 262 | fmt.Sprint(memoryContextSize), fmt.Sprint(memoryModuleSize), fmt.Sprint(memoryBufferSize), fmt.Sprint(memoryOffset), 263 | ) 264 | ch <- prometheus.MustNewConstMetric( 265 | ctrDeviceUtilizationdesc, 266 | prometheus.GaugeValue, 267 | float64(smUtil), 268 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, 269 | ) 270 | if lastKernelTime > 0 { 271 | lastSec := nowSec - lastKernelTime 272 | if lastSec < 0 { 273 | lastSec = 0 274 | } 275 | ch <- prometheus.MustNewConstMetric( 276 | ctrDeviceLastKernelDesc, 277 | prometheus.GaugeValue, 278 | float64(lastSec), 279 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, 280 | ) 281 | } 282 | } 283 | } 284 | } 285 | } 286 | } 287 | 288 | // NewClusterManager first creates a Prometheus-ignorant ClusterManager 289 | // instance. Then, it creates a ClusterManagerCollector for the just created 290 | // ClusterManager. Finally, it registers the ClusterManagerCollector with a 291 | // wrapping Registerer that adds the zone as a label. In this way, the metrics 292 | // collected by different ClusterManagerCollectors do not collide. 293 | func NewClusterManager(zone string, reg prometheus.Registerer, containerLister *nvidia.ContainerLister) *ClusterManager { 294 | c := &ClusterManager{ 295 | Zone: zone, 296 | containerLister: containerLister, 297 | } 298 | 299 | informerFactory := informers.NewSharedInformerFactoryWithOptions(containerLister.Clientset(), time.Hour*1) 300 | c.PodLister = informerFactory.Core().V1().Pods().Lister() 301 | stopCh := make(chan struct{}) 302 | informerFactory.Start(stopCh) 303 | 304 | cc := ClusterManagerCollector{ClusterManager: c} 305 | prometheus.WrapRegistererWith(prometheus.Labels{"zone": zone}, reg).MustRegister(cc) 306 | return c 307 | } 308 | 309 | func initMetrics(containerLister *nvidia.ContainerLister) { 310 | // Since we are dealing with custom Collector implementations, it might 311 | // be a good idea to try it out with a pedantic registry. 312 | klog.Info("Initializing metrics for vGPUmonitor") 313 | reg := prometheus.NewRegistry() 314 | //reg := prometheus.NewPedanticRegistry() 315 | 316 | // Construct cluster managers. In real code, we would assign them to 317 | // variables to then do something with them. 318 | NewClusterManager("vGPU", reg, containerLister) 319 | 320 | http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) 321 | log.Fatal(http.ListenAndServe(":9394", nil)) 322 | } 323 | -------------------------------------------------------------------------------- /cmd/vgpu-monitor/validation.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The HAMi Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "fmt" 21 | "os" 22 | ) 23 | 24 | var requiredEnvVars = map[string]bool{ 25 | "HOOK_PATH": true, 26 | "OTHER_ENV_VAR": false, 27 | } 28 | 29 | func ValidateEnvVars() error { 30 | for envVar, required := range requiredEnvVars { 31 | _, exists := os.LookupEnv(envVar) 32 | if required && !exists { 33 | return fmt.Errorf("required environment variable %s not set", envVar) 34 | } 35 | } 36 | return nil 37 | } 38 | -------------------------------------------------------------------------------- /cmd/vgpu/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package main 17 | 18 | import ( 19 | "fmt" 20 | "net/http" 21 | _ "net/http/pprof" 22 | "syscall" 23 | 24 | "github.com/NVIDIA/go-nvml/pkg/nvml" 25 | "github.com/fsnotify/fsnotify" 26 | "github.com/spf13/cobra" 27 | "github.com/spf13/viper" 28 | "k8s.io/klog/v2" 29 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 30 | nvidiadevice "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu" 31 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 32 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util" 33 | ) 34 | 35 | var ( 36 | failOnInitErrorFlag bool 37 | migStrategyFlag string 38 | 39 | rootCmd = &cobra.Command{ 40 | Use: "device-plugin", 41 | Short: "kubernetes vgpu device-plugin", 42 | Run: func(cmd *cobra.Command, args []string) { 43 | if err := start(); err != nil { 44 | klog.Fatal(err) 45 | } 46 | }, 47 | } 48 | ) 49 | 50 | type devicePluginConfigs struct { 51 | Nodeconfig []struct { 52 | Name string `json:"name"` 53 | Devicememoryscaling float64 `json:"devicememoryscaling"` 54 | Devicesplitcount int `json:"devicesplitcount"` 55 | Migstrategy string `json:"migstrategy"` 56 | } `json:"nodeconfig"` 57 | } 58 | 59 | func init() { 60 | // https://github.com/spf13/viper/issues/461 61 | viper.BindEnv("node-name", "NODE_NAME") 62 | 63 | rootCmd.Flags().SortFlags = false 64 | rootCmd.PersistentFlags().SortFlags = false 65 | 66 | rootCmd.Flags().StringVar(&migStrategyFlag, "mig-strategy", "none", "the desired strategy for exposing MIG devices on GPUs that support it:\n\t\t[none | single | mixed]") 67 | rootCmd.Flags().BoolVar(&failOnInitErrorFlag, "fail-on-init-error", true, "fail the plugin if an error is encountered during initialization, otherwise block indefinitely") 68 | rootCmd.Flags().UintVar(&config.DeviceSplitCount, "device-split-count", 2, "the number for NVIDIA device split") 69 | rootCmd.Flags().UintVar(&config.GPUMemoryFactor, "gpu-memory-factor", 1, "the default gpu memory block size is 1MB") 70 | rootCmd.Flags().Float64Var(&config.DeviceCoresScaling, "device-cores-scaling", 1.0, "the ratio for NVIDIA device cores scaling") 71 | rootCmd.Flags().StringVar(&config.NodeName, "node-name", viper.GetString("node-name"), "node name") 72 | 73 | rootCmd.PersistentFlags().AddGoFlagSet(util.GlobalFlagSet()) 74 | rootCmd.AddCommand(config.VersionCmd) 75 | } 76 | 77 | func start() error { 78 | go func() { 79 | klog.Info("Starting pprof server, listen on port 6060") 80 | klog.Info(http.ListenAndServe(":6060", nil)) 81 | }() 82 | 83 | klog.Info("Loading NVML") 84 | if nvret := config.Nvml().Init(); nvret != nvml.SUCCESS { 85 | klog.Infof("Failed to initialize NVML: %v.", nvret) 86 | klog.Infof("If this is a GPU node, did you set the docker default runtime to `nvidia`?") 87 | klog.Infof("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites") 88 | klog.Infof("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start") 89 | klog.Infof("If this is not a GPU node, you should set up a toleration or nodeSelector to only deploy this plugin on GPU nodes") 90 | if failOnInitErrorFlag { 91 | return fmt.Errorf("failed to initialize NVML: %v", nvret) 92 | } 93 | select {} 94 | } 95 | defer func() { klog.Info("Shutdown of NVML returned:", config.Nvml().Shutdown()) }() 96 | 97 | klog.Info("Starting FS watcher.") 98 | watcher, err := NewFSWatcher(pluginapi.DevicePluginPath) 99 | if err != nil { 100 | return fmt.Errorf("failed to create FS watcher: %v", err) 101 | } 102 | defer watcher.Close() 103 | 104 | klog.Info("Starting OS watcher.") 105 | sigs := NewOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 106 | 107 | nvidiaCfg := util.LoadNvidiaConfig() 108 | 109 | cache := nvidiadevice.NewDeviceCache() 110 | cache.Start() 111 | defer cache.Stop() 112 | 113 | register := nvidiadevice.NewDeviceRegister(cache) 114 | register.Start() 115 | defer register.Stop() 116 | 117 | var plugins []*nvidiadevice.NvidiaDevicePlugin 118 | restart: 119 | // If we are restarting, idempotently stop any running plugins before 120 | // recreating them below. 121 | for _, p := range plugins { 122 | p.Stop() 123 | } 124 | klog.Info("Retreiving plugins.") 125 | migStrategy, err := nvidiadevice.NewMigStrategy(migStrategyFlag) 126 | if err != nil { 127 | return fmt.Errorf("error creating MIG strategy: %v", err) 128 | } 129 | plugins = migStrategy.GetPlugins(nvidiaCfg, cache) 130 | 131 | started := 0 132 | pluginStartError := make(chan struct{}) 133 | for _, p := range plugins { 134 | // Just continue if there are no devices to serve for plugin p. 135 | if len(p.Devices()) == 0 { 136 | continue 137 | } 138 | 139 | // Start the gRPC server for plugin p and connect it with the kubelet. 140 | if err := p.Start(); err != nil { 141 | //klog.SetOutput(os.Stderr) 142 | klog.Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?") 143 | klog.Info("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites") 144 | klog.Info("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start") 145 | close(pluginStartError) 146 | goto events 147 | } 148 | started++ 149 | } 150 | 151 | if started == 0 { 152 | klog.Info("No devices found. Waiting indefinitely.") 153 | } 154 | 155 | events: 156 | // Start an infinite loop, waiting for several indicators to either log 157 | // some messages, trigger a restart of the plugins, or exit the program. 158 | for { 159 | select { 160 | // If there was an error starting any plugins, restart them all. 161 | case <-pluginStartError: 162 | goto restart 163 | 164 | // Detect a kubelet restart by watching for a newly created 165 | // 'pluginapi.KubeletSocket' file. When this occurs, restart this loop, 166 | // restarting all of the plugins in the process. 167 | case event := <-watcher.Events: 168 | if event.Name == pluginapi.KubeletSocket && event.Op&fsnotify.Create == fsnotify.Create { 169 | klog.Infof("inotify: %s created, restarting.", pluginapi.KubeletSocket) 170 | goto restart 171 | } 172 | 173 | // Watch for any other fs errors and log them. 174 | case err := <-watcher.Errors: 175 | klog.Infof("inotify: %s", err) 176 | 177 | // Watch for any signals from the OS. On SIGHUP, restart this loop, 178 | // restarting all of the plugins in the process. On all other 179 | // signals, exit the loop and exit the program. 180 | case s := <-sigs: 181 | switch s { 182 | case syscall.SIGHUP: 183 | klog.Info("Received SIGHUP, restarting.") 184 | goto restart 185 | default: 186 | klog.Infof("Received signal %v, shutting down.", s) 187 | for _, p := range plugins { 188 | p.Stop() 189 | } 190 | break events 191 | } 192 | } 193 | } 194 | return nil 195 | } 196 | 197 | func main() { 198 | if err := rootCmd.Execute(); err != nil { 199 | klog.Fatal(err) 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /cmd/vgpu/watchers.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "os" 21 | "os/signal" 22 | 23 | "github.com/fsnotify/fsnotify" 24 | ) 25 | 26 | func NewFSWatcher(files ...string) (*fsnotify.Watcher, error) { 27 | watcher, err := fsnotify.NewWatcher() 28 | if err != nil { 29 | return nil, err 30 | } 31 | 32 | for _, f := range files { 33 | err = watcher.Add(f) 34 | if err != nil { 35 | watcher.Close() 36 | return nil, err 37 | } 38 | } 39 | 40 | return watcher, nil 41 | } 42 | 43 | func NewOSWatcher(sigs ...os.Signal) chan os.Signal { 44 | sigChan := make(chan os.Signal, 1) 45 | signal.Notify(sigChan, sigs...) 46 | 47 | return sigChan 48 | } 49 | -------------------------------------------------------------------------------- /doc/config.md: -------------------------------------------------------------------------------- 1 | # Global Config 2 | 3 | ## Device Configs: ConfigMap 4 | 5 | **Note:** 6 | All the configurations listed below are managed within the `volcano-vgpu-device-config` ConfigMap. 7 | You can update these configurations using the following methods: 8 | 9 | 1. Directly edit the ConfigMap: If `volcano-vgpu-device-plugin` has already been successfully installed, you can manually update the `volcano-vgpu-device-config` ConfigMap using the `kubectl edit` command to manually update the hami-scheduler-device ConfigMap. 10 | 11 | ```bash 12 | kubectl edit configmap volcano-vgpu-device-config -n 13 | ``` 14 | 15 | After making changes, restart the volcano-vgpu-device-plugin and volcano-scheduler to apply the updated configurations. 16 | 17 | * `nvidia.deviceMemoryScaling`: 18 | Float type, by default: 1. The ratio for NVIDIA device memory scaling, can be greater than 1 (enable virtual device memory, experimental feature). For NVIDIA GPU with *M* memory, if we set `nvidia.deviceMemoryScaling` argument to *S*, vGPUs splitted by this GPU will totally get `S * M` memory in Kubernetes with our device plugin. 19 | * `nvidia.deviceSplitCount`: 20 | Integer type, by default: equals 10. Maximum tasks assigned to a simple GPU device. 21 | * `nvidia.migstrategy`: 22 | String type, "none" for ignoring MIG features or "mixed" for allocating MIG device by seperate resources. Default "none" 23 | * `nvidia.disablecorelimit`: 24 | String type, "true" for disable core limit, "false" for enable core limit, default: false 25 | * `nvidia.defaultMem`: 26 | Integer type, by default: 0. The default device memory of the current task, in MB.'0' means use 100% device memory 27 | * `nvidia.defaultCores`: 28 | Integer type, by default: equals 0. Percentage of GPU cores reserved for the current task. If assigned to 0, it may fit in any GPU with enough device memory. If assigned to 100, it will use an entire GPU card exclusively. 29 | * `nvidia.defaultGPUNum`: 30 | Integer type, by default: equals 1, if configuration value is 0, then the configuration value will not take effect and will be filtered. When a user does not set nvidia.com/gpu this key in pod resource, webhook should check nvidia.com/gpumem、resource-mem-percentage、nvidia.com/gpucores these three keys, anyone a key having value, webhook should add nvidia.com/gpu key and this default value to resources limits map. 31 | * `nvidia.resourceCountName`: 32 | String type, vgpu number resource name, default: "volcano.sh/vgpu-number" 33 | * `nvidia.resourceMemoryName`: 34 | String type, vgpu memory size resource name, default: "volcano.sh/vgpu-memory" 35 | * `nvidia.resourceMemoryPercentageName`: 36 | String type, vgpu memory fraction resource name, default: "volcano.sh/vgpu-memory-percentage" 37 | * `nvidia.resourceCoreName`: 38 | String type, vgpu cores resource name, default: "volcano.sh/vgpu-cores" 39 | 40 | ## Node Configs 41 | 42 | **Note:** 43 | All the configurations listed below are managed within the `volcano-vgpu-node-config` ConfigMap. 44 | You can update these configurations using the following methods: 45 | 46 | ```bash 47 | kubectl edit configmap volcano-vgpu-node-config -n 48 | ``` 49 | 50 | After making changes, restart the volcano-vgpu-device-plugin and volcano-scheduler to apply the updated configurations. 51 | 52 | * `name`: the name of the node, the following parameters will only take effect on this node. 53 | * `operatingmode`: 54 | String type, `hami-core` for using hami-core for container resource limitation, `mig` for using mig for container resource limition (only available for on architect Ampere or later GPU) 55 | * `devicememoryscaling`: 56 | Integer type, device memory oversubscription on that node 57 | * `devicecorescaling`: 58 | Integer type, device core oversubscription on that node 59 | -------------------------------------------------------------------------------- /doc/design.md: -------------------------------------------------------------------------------- 1 | ## Config the volcano device plugin binary 2 | 3 | The volcano device plugin has a number of options that can be configured. These options can be configured as command line flags, environment variables, or via a config file when launching the device plugin. The following section explains these configurations. 4 | 5 | ### As command line flags or envvars 6 | 7 | | Flag | Envvar | Default Value | 8 | |--------------------------|-------------------------|-----------------| 9 | | `--gpu-strategy` | `$GPU_STRATEGY` | `"share"` | 10 | | `--gpu-memory-factor` | `$GPU_MEMORY_FACTOR` | `1` | 11 | | `--config-file` | `$CONFIG_FILE` | `""` | 12 | 13 | when starting volcano-device-plugin.yml, users can specify these parameters by adding args to the container 'volcano-device-plugin'. 14 | For example: 15 | - args: ["--gpu-strategy=number"] will let device plugin using the gpu-number strategy 16 | - args: ["--gpu-strategy=share","--gpu-memory-factor=10"] will let device plugin using the gpu-share strategy, and memory factor is 10MB 17 | 18 | ### As a configuration file 19 | ``` 20 | version: v1 21 | flags: 22 | GPUStrategy: "number" 23 | ``` 24 | 25 | ### Configuration Option Details 26 | **`GPU_STRATEGY`(string)**: 27 | the desired strategy for exposing GPU devices 28 | 29 | `[number | share ] (default 'share')` 30 | 31 | The `GPU_STRATEGY` option configures the daemonset to be able to expose 32 | on GPU devices in numbers or sharing mode. More information on what 33 | these strategies are and how to use it in Volcano can be found in Volcano scheduler. 34 | 35 | **`GPU_MEMORY_FACTOR`(uint)**: 36 | the desired memory factor for exposing GPU shared memory virtual devices 37 | 38 | `(default 1)` 39 | 40 | The `GPU_MEMORY_FACTOR` option configures the daemonset to be able to expose 41 | on GPU shared memory virtual devices size. By default each block is set to be 1MB, 42 | but users who have large gpu memory can specify a larger number such as 10MB, 100MB. 43 | 44 | **`CONFIG_FILE`**: 45 | point the plugin at a configuration file instead of relying on command line 46 | flags or environment variables 47 | 48 | `(default '')` 49 | 50 | The order of precedence for setting each option is (1) command line flag, (2) 51 | environment variable, (3) configuration file. In this way, one could use a 52 | pre-defined configuration file, but then override the values set in it at 53 | launch time. 54 | -------------------------------------------------------------------------------- /doc/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/example.png -------------------------------------------------------------------------------- /doc/hard_limit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/hard_limit.jpg -------------------------------------------------------------------------------- /doc/vgpu-on-volcano.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/vgpu-on-volcano.pdf -------------------------------------------------------------------------------- /doc/vgpu_device_plugin_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/vgpu_device_plugin_metrics.png -------------------------------------------------------------------------------- /docker/Dockerfile.ubuntu20.04: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:20.04 AS builder 16 | ARG TARGETARCH 17 | RUN apt-get update 18 | RUN apt-get -y install ca-certificates g++ wget 19 | RUN wget -qO- https://storage.googleapis.com/golang/go1.23.7.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -zx 20 | ENV GOPATH=/go 21 | ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH 22 | WORKDIR /go/src/volcano.sh/devices 23 | 24 | COPY . . 25 | RUN go env -w GOARCH=${TARGETARCH} 26 | RUN go env -w CGO_LDFLAGS_ALLOW='-Wl,--unresolved-symbols=ignore-in-object-files' 27 | RUN go build -ldflags="-s -w" -o volcano-vgpu-device-plugin ./cmd/vgpu 28 | RUN go build -ldflags="-s -w" -o volcano-vgpu-monitor ./cmd/vgpu-monitor 29 | RUN go install github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted@latest 30 | 31 | FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 AS nvidia_builder 32 | ARG TARGETARCH 33 | RUN apt-get update 34 | RUN apt-get -y install wget openssl libssl-dev 35 | RUN case "${TARGETARCH}" in \ 36 | "amd64") wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-x86_64.tar.gz ;; \ 37 | "arm64") wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-aarch64.tar.gz ;; \ 38 | *) echo "Unsupported architecture: ${TARGETARCH}" && exit 1 ;; \ 39 | esac && \ 40 | tar -xzf cmake-3.19.8-Linux-*.tar.gz -C /opt && \ 41 | ln -s /opt/cmake-3.19.8-Linux-*/bin/cmake /usr/local/bin/cmake && \ 42 | rm cmake-3.19.8-Linux-*.tar.gz 43 | COPY ./libvgpu /libvgpu 44 | WORKDIR /libvgpu 45 | RUN rm -rf /libvgpu/build 46 | RUN bash ./build.sh 47 | 48 | FROM ubuntu:24.04 49 | 50 | ENV NVIDIA_VISIBLE_DEVICES=all 51 | ENV NVIDIA_DRIVER_CAPABILITIES=utility 52 | 53 | COPY --from=builder /go/src/volcano.sh/devices/volcano-vgpu-device-plugin /usr/bin/volcano-vgpu-device-plugin 54 | COPY --from=builder /go/src/volcano.sh/devices/volcano-vgpu-monitor /usr/bin/volcano-vgpu-monitor 55 | COPY --from=builder /go/bin/nvidia-mig-parted /usr/bin/nvidia-mig-parted 56 | COPY --from=builder /go/src/volcano.sh/devices/lib/nvidia/ld.so.preload /k8s-vgpu/lib/nvidia/ 57 | COPY --from=nvidia_builder /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/ 58 | 59 | ENTRYPOINT ["volcano-vgpu-device-plugin"] 60 | -------------------------------------------------------------------------------- /examples/gpu-share.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: pod1 5 | spec: 6 | restartPolicy: OnFailure 7 | schedulerName: volcano 8 | containers: 9 | - image: nvidia/cuda:10.1-base-ubuntu18.04 10 | name: pod1-ctr 11 | command: ["sleep"] 12 | args: ["100000"] 13 | resources: 14 | limits: 15 | volcano.sh/gpu-memory: 1024 # 1024MB 16 | -------------------------------------------------------------------------------- /examples/vgpu-case01.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: test1 5 | spec: 6 | restartPolicy: OnFailure 7 | schedulerName: volcano 8 | containers: 9 | - image: ubuntu:20.04 10 | name: pod1-ctr 11 | command: ["sleep"] 12 | args: ["100000"] 13 | resources: 14 | limits: 15 | volcano.sh/vgpu-memory: 1024 16 | volcano.sh/vgpu-number: 1 17 | -------------------------------------------------------------------------------- /examples/vgpu-case02.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: pod1 5 | spec: 6 | restartPolicy: OnFailure 7 | schedulerName: volcano 8 | containers: 9 | - image: nvidia/cuda:11.2.2-base-ubi8 10 | name: pod1-ctr 11 | command: ["sleep"] 12 | args: ["100000"] 13 | resources: 14 | limits: 15 | volcano.sh/vgpu-number: 1 #request 1 GPU 16 | volcano.sh/vgpu-cores: 50 #each GPU request 50% of compute core resources 17 | volcano.sh/vgpu-memory: 10240 #each GPU request 10G device memory 18 | -------------------------------------------------------------------------------- /examples/vgpu-case03.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: gpu-pod12 5 | spec: 6 | schedulerName: volcano 7 | containers: 8 | - name: ubuntu-container 9 | image: ubuntu:18.04 10 | command: ["bash", "-c", "sleep 86400"] 11 | resources: 12 | limits: 13 | volcano.sh/vgpu-number: 2 # requesting 2 vGPUs 14 | volcano.sh/vgpu-memory: 2000 15 | #volcano.sh/vgpu-memory-percentage: 50 #Each vGPU containers 50% device memory of that GPU. Can not be used with nvidia.com/gpumem 16 | - name: ubuntu-container0 17 | image: ubuntu:18.04 18 | command: ["bash", "-c", "sleep 86400"] 19 | - name: ubuntu-container1 20 | image: ubuntu:18.04 21 | command: ["bash", "-c", "sleep 86400"] 22 | resources: 23 | limits: 24 | volcano.sh/vgpu-number: 2 # requesting 2 vGPUs 25 | volcano.sh/vgpu-memory: 3000 26 | 27 | -------------------------------------------------------------------------------- /examples/vgpu-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: resnet101-deployment 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: resnet101-server 9 | replicas: 10 10 | template: 11 | metadata: 12 | labels: 13 | app: resnet101-server 14 | spec: 15 | schedulerName: volcano 16 | containers: 17 | - name: resnet101-container 18 | image: ubuntu:18.04 19 | command: ["sleep","infinity"] 20 | resources: 21 | limits: 22 | volcano.sh/vgpu-number: 1 # requesting 2 vGPUs 23 | volcano.sh/vgpu-memory: 16384 -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module volcano.sh/k8s-device-plugin 2 | 3 | go 1.23 4 | 5 | require ( 6 | github.com/NVIDIA/go-gpuallocator v0.5.0 7 | github.com/NVIDIA/go-nvlib v0.7.1 8 | github.com/NVIDIA/go-nvml v0.12.4-1 9 | github.com/fsnotify/fsnotify v1.4.9 10 | github.com/prometheus/client_golang v1.0.0 11 | github.com/spf13/cobra v0.0.5 12 | github.com/spf13/viper v1.3.2 13 | github.com/stretchr/testify v1.10.0 14 | github.com/urfave/cli/v2 v2.4.0 15 | golang.org/x/net v0.0.0-20200421231249-e086a090c8fd 16 | google.golang.org/grpc v1.32.0 17 | gopkg.in/yaml.v2 v2.2.8 18 | k8s.io/api v0.18.2 19 | k8s.io/apimachinery v0.18.2 20 | k8s.io/client-go v0.18.2 21 | k8s.io/klog v1.0.0 22 | k8s.io/klog/v2 v2.80.1 23 | k8s.io/kubelet v0.0.0 24 | sigs.k8s.io/yaml v1.2.0 25 | ) 26 | 27 | require ( 28 | github.com/beorn7/perks v1.0.0 // indirect 29 | github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect 30 | github.com/davecgh/go-spew v1.1.1 // indirect 31 | github.com/go-logr/logr v1.2.0 // indirect 32 | github.com/gogo/protobuf v1.3.1 // indirect 33 | github.com/golang/protobuf v1.5.0 // indirect 34 | github.com/google/go-cmp v0.5.5 // indirect 35 | github.com/google/gofuzz v1.1.0 // indirect 36 | github.com/google/uuid v1.6.0 // indirect 37 | github.com/googleapis/gnostic v0.1.0 // indirect 38 | github.com/hashicorp/golang-lru v0.5.1 // indirect 39 | github.com/hashicorp/hcl v1.0.0 // indirect 40 | github.com/imdario/mergo v0.3.5 // indirect 41 | github.com/inconshreveable/mousetrap v1.0.0 // indirect 42 | github.com/json-iterator/go v1.1.8 // indirect 43 | github.com/magiconair/properties v1.8.1 // indirect 44 | github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect 45 | github.com/mitchellh/mapstructure v1.1.2 // indirect 46 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 47 | github.com/modern-go/reflect2 v1.0.1 // indirect 48 | github.com/pelletier/go-toml v1.2.0 // indirect 49 | github.com/pmezard/go-difflib v1.0.0 // indirect 50 | github.com/prometheus/client_model v0.2.0 // indirect 51 | github.com/prometheus/common v0.4.1 // indirect 52 | github.com/prometheus/procfs v0.0.2 // indirect 53 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 54 | github.com/spf13/afero v1.2.2 // indirect 55 | github.com/spf13/cast v1.3.0 // indirect 56 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 57 | github.com/spf13/pflag v1.0.5 // indirect 58 | golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975 // indirect 59 | golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 // indirect 60 | golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 // indirect 61 | golang.org/x/text v0.3.2 // indirect 62 | golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect 63 | google.golang.org/appengine v1.5.0 // indirect 64 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55 // indirect 65 | google.golang.org/protobuf v1.34.2 // indirect 66 | gopkg.in/inf.v0 v0.9.1 // indirect 67 | gopkg.in/yaml.v3 v3.0.1 // indirect 68 | k8s.io/utils v0.0.0-20200324210504-a9aa75ae1b89 // indirect 69 | sigs.k8s.io/structured-merge-diff/v3 v3.0.0 // indirect 70 | ) 71 | 72 | replace ( 73 | k8s.io/api => k8s.io/api v0.18.2 74 | k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.18.2 75 | k8s.io/apimachinery => k8s.io/apimachinery v0.18.2 76 | k8s.io/apiserver => k8s.io/apiserver v0.18.2 77 | k8s.io/cli-runtime => k8s.io/cli-runtime v0.18.2 78 | k8s.io/client-go => k8s.io/client-go v0.18.2 79 | k8s.io/cloud-provider => k8s.io/cloud-provider v0.18.2 80 | k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.18.2 81 | k8s.io/code-generator => k8s.io/code-generator v0.18.2 82 | k8s.io/component-base => k8s.io/component-base v0.18.2 83 | k8s.io/cri-api => k8s.io/cri-api v0.18.2 84 | k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.18.2 85 | k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.18.2 86 | k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.18.2 87 | k8s.io/kube-proxy => k8s.io/kube-proxy v0.18.2 88 | k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.18.2 89 | k8s.io/kubectl => k8s.io/kubectl v0.18.2 90 | k8s.io/kubelet => k8s.io/kubelet v0.18.2 91 | k8s.io/kubernetes => k8s.io/kubernetes v1.18.2 92 | k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.18.2 93 | k8s.io/metrics => k8s.io/metrics v0.18.2 94 | k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.18.2 95 | ) 96 | -------------------------------------------------------------------------------- /lib/nvidia/ld.so.preload: -------------------------------------------------------------------------------- 1 | /usr/local/vgpu/libvgpu.so -------------------------------------------------------------------------------- /pkg/apis/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package apis 18 | 19 | import ( 20 | "fmt" 21 | "io" 22 | "log" 23 | "os" 24 | 25 | cli "github.com/urfave/cli/v2" 26 | "sigs.k8s.io/yaml" 27 | ) 28 | 29 | // Version indicates the version of the 'Config' struct used to hold configuration information. 30 | const Version = "v1beta1" 31 | 32 | // Config is a versioned struct used to hold configuration information. 33 | type Config struct { 34 | Version string `json:"version" yaml:"version"` 35 | Flags Flags `json:"flags,omitempty" yaml:"flags,omitempty"` 36 | } 37 | 38 | // NewConfig builds out a Config struct from a config file (or command line flags). 39 | // The data stored in the config will be populated in order of precedence from 40 | // (1) command line, (2) environment variable, (3) config file. 41 | func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error) { 42 | config := &Config{ 43 | Version: Version, 44 | } 45 | 46 | log.Println(c.String("gpu-strategy")) 47 | log.Println(c.Uint("gpu-memory-factor")) 48 | 49 | configFile := c.String("config-file") 50 | if configFile != "" { 51 | var err error 52 | config, err = parseConfig(configFile) 53 | if err != nil { 54 | return nil, fmt.Errorf("unable to parse config file: %v", err) 55 | } 56 | } 57 | 58 | config.Flags.CommandLineFlags = NewCommandLineFlags(c) 59 | 60 | return config, nil 61 | } 62 | 63 | // parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct. 64 | func parseConfig(configFile string) (*Config, error) { 65 | reader, err := os.Open(configFile) 66 | if err != nil { 67 | return nil, fmt.Errorf("error opening config file: %v", err) 68 | } 69 | defer reader.Close() 70 | 71 | config, err := parseConfigFrom(reader) 72 | if err != nil { 73 | return nil, fmt.Errorf("error parsing config file: %v", err) 74 | } 75 | 76 | return config, nil 77 | } 78 | 79 | func parseConfigFrom(reader io.Reader) (*Config, error) { 80 | var err error 81 | var configYaml []byte 82 | 83 | configYaml, err = io.ReadAll(reader) 84 | if err != nil { 85 | return nil, fmt.Errorf("read error: %v", err) 86 | } 87 | 88 | var config Config 89 | err = yaml.Unmarshal(configYaml, &config) 90 | if err != nil { 91 | return nil, fmt.Errorf("unmarshal error: %v", err) 92 | } 93 | 94 | if config.Version == "" { 95 | config.Version = Version 96 | } 97 | 98 | if config.Version != Version { 99 | return nil, fmt.Errorf("unknown version: %v", config.Version) 100 | } 101 | 102 | return &config, nil 103 | } 104 | -------------------------------------------------------------------------------- /pkg/apis/flags.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package apis 18 | 19 | import ( 20 | cli "github.com/urfave/cli/v2" 21 | ) 22 | 23 | // Flags holds the full list of flags used to configure the device plugin and GFD. 24 | type Flags struct { 25 | *CommandLineFlags 26 | } 27 | 28 | // CommandLineFlags holds the list of command line flags used to configure the device plugin and GFD. 29 | type CommandLineFlags struct { 30 | GPUStrategy string `json:"GPUStrategy" yaml:"GPUStrategy"` 31 | GPUMemoryFactor uint `json:"GPUMemoryFactor" yaml:"GPUMemoryFactor"` 32 | } 33 | 34 | func NewCommandLineFlags(c *cli.Context) *CommandLineFlags { 35 | return &CommandLineFlags{ 36 | GPUStrategy: c.String("gpu-strategy"), 37 | GPUMemoryFactor: c.Uint("gpu-memory-factor"), 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /pkg/apis/flags_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package apis 18 | 19 | import ( 20 | "encoding/json" 21 | "fmt" 22 | "testing" 23 | 24 | "github.com/stretchr/testify/require" 25 | ) 26 | 27 | func TestUnmarshalFlags(t *testing.T) { 28 | testCases := []struct { 29 | input string 30 | output Flags 31 | err bool 32 | }{ 33 | { 34 | input: ``, 35 | err: true, 36 | }, 37 | { 38 | input: `{}`, 39 | output: Flags{}, 40 | }, 41 | { 42 | input: `{ 43 | "GPUStrategy": "number" 44 | }`, 45 | output: Flags{ 46 | &CommandLineFlags{ 47 | GPUStrategy: "number", 48 | }, 49 | }, 50 | }, 51 | } 52 | 53 | for i, tc := range testCases { 54 | t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { 55 | var output Flags 56 | err := json.Unmarshal([]byte(tc.input), &output) 57 | if tc.err { 58 | require.Error(t, err) 59 | return 60 | } 61 | require.NoError(t, err) 62 | require.Equal(t, tc.output, output) 63 | }) 64 | } 65 | } 66 | 67 | func TestMarshalFlags(t *testing.T) { 68 | testCases := []struct { 69 | input Flags 70 | output string 71 | err bool 72 | }{ 73 | { 74 | input: Flags{ 75 | &CommandLineFlags{ 76 | GPUStrategy: "number", 77 | }, 78 | }, 79 | output: `{ 80 | "GPUStrategy": "number" 81 | }`, 82 | }, 83 | } 84 | 85 | for i, tc := range testCases { 86 | t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { 87 | output, err := json.Marshal(tc.input) 88 | if tc.err { 89 | require.Error(t, err) 90 | return 91 | } 92 | require.NoError(t, err) 93 | require.JSONEq(t, tc.output, string(output)) 94 | }) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /pkg/filewatcher/filewatcher.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package filewatcher 18 | 19 | import ( 20 | "github.com/fsnotify/fsnotify" 21 | ) 22 | 23 | // NewFileWatcher creates a file watcher watching the given files. 24 | func NewFileWatcher(files ...string) (*fsnotify.Watcher, error) { 25 | watcher, err := fsnotify.NewWatcher() 26 | if err != nil { 27 | return nil, err 28 | } 29 | 30 | for _, f := range files { 31 | err = watcher.Add(f) 32 | if err != nil { 33 | watcher.Close() 34 | return nil, err 35 | } 36 | } 37 | 38 | return watcher, nil 39 | } 40 | -------------------------------------------------------------------------------- /pkg/gpu/doc.go: -------------------------------------------------------------------------------- 1 | package gpu 2 | -------------------------------------------------------------------------------- /pkg/lock/nodelock.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package lock 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "os" 23 | "path/filepath" 24 | "time" 25 | 26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | "k8s.io/client-go/kubernetes" 28 | "k8s.io/client-go/rest" 29 | "k8s.io/client-go/tools/clientcmd" 30 | "k8s.io/klog/v2" 31 | ) 32 | 33 | const MaxLockRetry = 5 34 | 35 | var kubeClient kubernetes.Interface 36 | 37 | func GetClient() kubernetes.Interface { 38 | return kubeClient 39 | } 40 | 41 | // NewClient connects to an API server 42 | func NewClient() (kubernetes.Interface, error) { 43 | kubeConfig := os.Getenv("KUBECONFIG") 44 | if kubeConfig == "" { 45 | kubeConfig = filepath.Join(os.Getenv("HOME"), ".kube", "config") 46 | } 47 | config, err := rest.InClusterConfig() 48 | if err != nil { 49 | config, err = clientcmd.BuildConfigFromFlags("", kubeConfig) 50 | if err != nil { 51 | return nil, err 52 | } 53 | } 54 | client, err := kubernetes.NewForConfig(config) 55 | kubeClient = client 56 | return client, err 57 | } 58 | 59 | // UseClient uses existing client 60 | func UseClient(client kubernetes.Interface) error { 61 | kubeClient = client 62 | return nil 63 | } 64 | 65 | func setNodeLock(nodeName string, lockName string) error { 66 | ctx := context.Background() 67 | node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 68 | if err != nil { 69 | klog.Errorln("get node failed", err.Error()) 70 | return err 71 | } 72 | if _, ok := node.ObjectMeta.Annotations[lockName]; ok { 73 | return fmt.Errorf("node %s is locked", nodeName) 74 | } 75 | newNode := node.DeepCopy() 76 | newNode.ObjectMeta.Annotations[lockName] = time.Now().Format(time.RFC3339) 77 | _, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{}) 78 | for i := 0; i < MaxLockRetry && err != nil; i++ { 79 | klog.ErrorS(err, "Failed to update node", "node", nodeName, "retry", i) 80 | time.Sleep(100 * time.Millisecond) 81 | node, err = kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 82 | if err != nil { 83 | klog.ErrorS(err, "Failed to get node when retry to update", "node", nodeName) 84 | continue 85 | } 86 | newNode := node.DeepCopy() 87 | newNode.ObjectMeta.Annotations[lockName] = time.Now().Format(time.RFC3339) 88 | _, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{}) 89 | } 90 | if err != nil { 91 | return fmt.Errorf("setNodeLock exceeds retry count %d", MaxLockRetry) 92 | } 93 | klog.V(3).InfoS("Node lock set", "node", nodeName) 94 | return nil 95 | } 96 | 97 | // ReleaseNodeLock releases a certain lock on a certain device 98 | func ReleaseNodeLock(nodeName string, lockName string) error { 99 | ctx := context.Background() 100 | node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 101 | if err != nil { 102 | return err 103 | } 104 | if _, ok := node.ObjectMeta.Annotations[lockName]; !ok { 105 | klog.V(3).InfoS("Node lock not set", "node", nodeName, "lock", lockName) 106 | return nil 107 | } 108 | newNode := node.DeepCopy() 109 | delete(newNode.ObjectMeta.Annotations, lockName) 110 | _, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{}) 111 | for i := 0; i < MaxLockRetry && err != nil; i++ { 112 | klog.ErrorS(err, "Failed to update node", "node", nodeName, "retry", i) 113 | time.Sleep(100 * time.Millisecond) 114 | node, err = kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 115 | if err != nil { 116 | klog.ErrorS(err, "Failed to get node when retry to update", "node", nodeName) 117 | continue 118 | } 119 | newNode := node.DeepCopy() 120 | delete(newNode.ObjectMeta.Annotations, lockName) 121 | _, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{}) 122 | } 123 | if err != nil { 124 | return fmt.Errorf("releaseNodeLock exceeds retry count %d", MaxLockRetry) 125 | } 126 | klog.V(3).InfoS("Node lock released", "node", nodeName) 127 | return nil 128 | } 129 | 130 | // LockNode locks a device on a certain node 131 | func LockNode(nodeName string, lockName string) error { 132 | ctx := context.Background() 133 | node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 134 | if err != nil { 135 | return err 136 | } 137 | if _, ok := node.ObjectMeta.Annotations[lockName]; !ok { 138 | return setNodeLock(nodeName, lockName) 139 | } 140 | lockTime, err := time.Parse(time.RFC3339, node.ObjectMeta.Annotations[lockName]) 141 | if err != nil { 142 | return err 143 | } 144 | if time.Since(lockTime) > time.Minute*5 { 145 | klog.InfoS("Node lock expired", "node", nodeName, "lockTime", lockTime) 146 | err = ReleaseNodeLock(nodeName, lockName) 147 | if err != nil { 148 | klog.ErrorS(err, "Failed to release node lock", "node", nodeName) 149 | return err 150 | } 151 | return setNodeLock(nodeName, lockName) 152 | } 153 | return fmt.Errorf("node %s has been locked within 5 minutes", nodeName) 154 | } 155 | -------------------------------------------------------------------------------- /pkg/monitor/nvidia/cudevshr.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The HAMi Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package nvidia 18 | 19 | import ( 20 | "context" 21 | "errors" 22 | "fmt" 23 | "os" 24 | "path/filepath" 25 | "strings" 26 | "sync" 27 | "syscall" 28 | "time" 29 | "unsafe" 30 | 31 | v0 "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia/v0" 32 | v1 "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia/v1" 33 | 34 | corev1 "k8s.io/api/core/v1" 35 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 36 | "k8s.io/client-go/kubernetes" 37 | "k8s.io/client-go/tools/clientcmd" 38 | "k8s.io/klog/v2" 39 | ) 40 | 41 | const SharedRegionMagicFlag = 19920718 42 | 43 | type headerT struct { 44 | initializedFlag int32 45 | majorVersion int32 46 | minorVersion int32 47 | } 48 | 49 | type UsageInfo interface { 50 | DeviceMax() int 51 | DeviceNum() int 52 | DeviceMemoryContextSize(idx int) uint64 53 | DeviceMemoryModuleSize(idx int) uint64 54 | DeviceMemoryBufferSize(idx int) uint64 55 | DeviceMemoryOffset(idx int) uint64 56 | DeviceMemoryTotal(idx int) uint64 57 | DeviceSmUtil(idx int) uint64 58 | IsValidUUID(idx int) bool 59 | DeviceUUID(idx int) string 60 | DeviceMemoryLimit(idx int) uint64 61 | LastKernelTime() int64 62 | //UsedMemory(idx int) (uint64, error) 63 | GetPriority() int 64 | GetRecentKernel() int32 65 | SetRecentKernel(v int32) 66 | GetUtilizationSwitch() int32 67 | SetUtilizationSwitch(v int32) 68 | } 69 | 70 | type ContainerUsage struct { 71 | PodUID string 72 | ContainerName string 73 | data []byte 74 | Info UsageInfo 75 | } 76 | 77 | type ContainerLister struct { 78 | containerPath string 79 | containers map[string]*ContainerUsage 80 | mutex sync.Mutex 81 | clientset *kubernetes.Clientset 82 | } 83 | 84 | func NewContainerLister() (*ContainerLister, error) { 85 | hookPath, ok := os.LookupEnv("HOOK_PATH") 86 | if !ok { 87 | return nil, fmt.Errorf("HOOK_PATH not set") 88 | } 89 | config, err := clientcmd.BuildConfigFromFlags("", os.Getenv("KUBECONFIG")) 90 | if err != nil { 91 | klog.Errorf("Failed to build kubeconfig: %v", err) 92 | return nil, err 93 | } 94 | clientset, err := kubernetes.NewForConfig(config) 95 | if err != nil { 96 | klog.Errorf("Failed to build clientset: %v", err) 97 | return nil, err 98 | } 99 | return &ContainerLister{ 100 | containerPath: filepath.Join(hookPath, "containers"), 101 | containers: make(map[string]*ContainerUsage), 102 | clientset: clientset, 103 | }, nil 104 | } 105 | 106 | func (l *ContainerLister) Lock() { 107 | l.mutex.Lock() 108 | } 109 | 110 | func (l *ContainerLister) UnLock() { 111 | l.mutex.Unlock() 112 | } 113 | 114 | func (l *ContainerLister) ListContainers() map[string]*ContainerUsage { 115 | return l.containers 116 | } 117 | 118 | func (l *ContainerLister) Clientset() *kubernetes.Clientset { 119 | return l.clientset 120 | } 121 | 122 | func (l *ContainerLister) Update() error { 123 | pods, err := l.clientset.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{}) 124 | if err != nil { 125 | return err 126 | } 127 | 128 | l.mutex.Lock() 129 | defer l.mutex.Unlock() 130 | entries, err := os.ReadDir(l.containerPath) 131 | if err != nil { 132 | return err 133 | } 134 | for _, entry := range entries { 135 | if !entry.IsDir() { 136 | continue 137 | } 138 | dirName := filepath.Join(l.containerPath, entry.Name()) 139 | if !isValidPod(entry.Name(), pods) { 140 | dirInfo, err := os.Stat(dirName) 141 | if err == nil && dirInfo.ModTime().Add(time.Second*300).After(time.Now()) { 142 | continue 143 | } 144 | klog.Infof("Removing dirname %s in monitorpath", dirName) 145 | if c, ok := l.containers[entry.Name()]; ok { 146 | syscall.Munmap(c.data) 147 | delete(l.containers, entry.Name()) 148 | } 149 | _ = os.RemoveAll(dirName) 150 | continue 151 | } 152 | if _, ok := l.containers[entry.Name()]; ok { 153 | continue 154 | } 155 | usage, err := loadCache(dirName) 156 | if err != nil { 157 | klog.Errorf("Failed to load cache: %s, error: %v", dirName, err) 158 | continue 159 | } 160 | if usage == nil { 161 | // no cuInit in container 162 | continue 163 | } 164 | usage.PodUID = strings.Split(entry.Name(), "_")[0] 165 | usage.ContainerName = strings.Split(entry.Name(), "_")[1] 166 | l.containers[entry.Name()] = usage 167 | klog.Infof("Adding ctr dirname %s in monitorpath", dirName) 168 | } 169 | return nil 170 | } 171 | 172 | func loadCache(fpath string) (*ContainerUsage, error) { 173 | klog.Infof("Checking path %s", fpath) 174 | files, err := os.ReadDir(fpath) 175 | if err != nil { 176 | return nil, err 177 | } 178 | if len(files) > 2 { 179 | return nil, errors.New("cache num not matched") 180 | } 181 | if len(files) == 0 { 182 | return nil, nil 183 | } 184 | cacheFile := "" 185 | for _, val := range files { 186 | if strings.Contains(val.Name(), "libvgpu.so") { 187 | continue 188 | } 189 | if !strings.Contains(val.Name(), ".cache") { 190 | continue 191 | } 192 | cacheFile = filepath.Join(fpath, val.Name()) 193 | break 194 | } 195 | if cacheFile == "" { 196 | klog.Infof("No cache file in %s", fpath) 197 | return nil, nil 198 | } 199 | info, err := os.Stat(cacheFile) 200 | if err != nil { 201 | klog.Errorf("Failed to stat cache file: %s, error: %v", cacheFile, err) 202 | return nil, err 203 | } 204 | if info.Size() < int64(unsafe.Sizeof(headerT{})) { 205 | return nil, fmt.Errorf("cache file size %d too small", info.Size()) 206 | } 207 | f, err := os.OpenFile(cacheFile, os.O_RDWR, 0666) 208 | if err != nil { 209 | klog.Errorf("Failed to open cache file: %s, error: %v", cacheFile, err) 210 | return nil, err 211 | } 212 | defer func(f *os.File) { 213 | _ = f.Close() 214 | }(f) 215 | usage := &ContainerUsage{} 216 | usage.data, err = syscall.Mmap(int(f.Fd()), 0, int(info.Size()), syscall.PROT_WRITE|syscall.PROT_READ, syscall.MAP_SHARED) 217 | if err != nil { 218 | klog.Errorf("Failed to mmap cache file: %s, error: %v", cacheFile, err) 219 | return nil, err 220 | } 221 | head := (*headerT)(unsafe.Pointer(&usage.data[0])) 222 | if head.initializedFlag != SharedRegionMagicFlag { 223 | _ = syscall.Munmap(usage.data) 224 | return nil, fmt.Errorf("cache file magic flag not matched") 225 | } 226 | if info.Size() == 1197897 { 227 | usage.Info = v0.CastSpec(usage.data) 228 | } else if head.majorVersion == 1 { 229 | usage.Info = v1.CastSpec(usage.data) 230 | } else { 231 | _ = syscall.Munmap(usage.data) 232 | return nil, fmt.Errorf("unknown cache file size %d version %d.%d", info.Size(), head.majorVersion, head.minorVersion) 233 | } 234 | return usage, nil 235 | } 236 | 237 | func isValidPod(name string, pods *corev1.PodList) bool { 238 | for _, val := range pods.Items { 239 | if strings.Contains(name, string(val.UID)) { 240 | return true 241 | } 242 | } 243 | return false 244 | } 245 | -------------------------------------------------------------------------------- /pkg/monitor/nvidia/v0/spec.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The HAMi Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v0 18 | 19 | import "unsafe" 20 | 21 | const maxDevices = 16 22 | 23 | type deviceMemory struct { 24 | contextSize uint64 25 | moduleSize uint64 26 | bufferSize uint64 27 | offset uint64 28 | total uint64 29 | } 30 | 31 | type deviceUtilization struct { 32 | decUtil uint64 33 | encUtil uint64 34 | smUtil uint64 35 | } 36 | 37 | type shrregProcSlotT struct { 38 | pid int32 39 | hostpid int32 40 | used [16]deviceMemory 41 | monitorused [16]uint64 42 | deviceUtil [16]deviceUtilization 43 | status int32 44 | } 45 | 46 | type uuid struct { 47 | uuid [96]byte 48 | } 49 | 50 | type semT struct { 51 | sem [32]byte 52 | } 53 | 54 | type sharedRegionT struct { 55 | initializedFlag int32 56 | smInitFlag int32 57 | ownerPid uint32 58 | sem semT 59 | num uint64 60 | uuids [16]uuid 61 | 62 | limit [16]uint64 63 | smLimit [16]uint64 64 | procs [1024]shrregProcSlotT 65 | 66 | procnum int32 67 | utilizationSwitch int32 68 | recentKernel int32 69 | priority int32 70 | } 71 | 72 | type Spec struct { 73 | sr *sharedRegionT 74 | } 75 | 76 | func (s Spec) DeviceMax() int { 77 | return maxDevices 78 | } 79 | 80 | func (s Spec) DeviceNum() int { 81 | return int(s.sr.num) 82 | } 83 | 84 | func (s Spec) DeviceMemoryContextSize(idx int) uint64 { 85 | v := uint64(0) 86 | for _, p := range s.sr.procs { 87 | v += p.used[idx].contextSize 88 | } 89 | return v 90 | } 91 | 92 | func (s Spec) DeviceMemoryModuleSize(idx int) uint64 { 93 | v := uint64(0) 94 | for _, p := range s.sr.procs { 95 | v += p.used[idx].moduleSize 96 | } 97 | return v 98 | } 99 | 100 | func (s Spec) DeviceMemoryBufferSize(idx int) uint64 { 101 | v := uint64(0) 102 | for _, p := range s.sr.procs { 103 | v += p.used[idx].bufferSize 104 | } 105 | return v 106 | } 107 | 108 | func (s Spec) DeviceMemoryOffset(idx int) uint64 { 109 | v := uint64(0) 110 | for _, p := range s.sr.procs { 111 | v += p.used[idx].offset 112 | } 113 | return v 114 | } 115 | 116 | func (s Spec) DeviceMemoryTotal(idx int) uint64 { 117 | v := uint64(0) 118 | for _, p := range s.sr.procs { 119 | v += p.used[idx].total 120 | } 121 | return v 122 | } 123 | 124 | func (s Spec) DeviceSmUtil(idx int) uint64 { 125 | v := uint64(0) 126 | for _, p := range s.sr.procs { 127 | v += p.deviceUtil[idx].smUtil 128 | } 129 | return v 130 | } 131 | 132 | func (s Spec) IsValidUUID(idx int) bool { 133 | return s.sr.uuids[idx].uuid[0] != 0 134 | } 135 | 136 | func (s Spec) DeviceUUID(idx int) string { 137 | return string(s.sr.uuids[idx].uuid[:]) 138 | } 139 | 140 | func (s Spec) DeviceMemoryLimit(idx int) uint64 { 141 | return s.sr.limit[idx] 142 | } 143 | 144 | func (s Spec) LastKernelTime() int64 { 145 | return 0 146 | } 147 | 148 | func CastSpec(data []byte) Spec { 149 | return Spec{ 150 | sr: (*sharedRegionT)(unsafe.Pointer(&data[0])), 151 | } 152 | } 153 | 154 | // func (s *SharedRegionT) UsedMemory(idx int) (uint64, error) { 155 | // return 0, nil 156 | // } 157 | 158 | func (s Spec) GetPriority() int { 159 | return int(s.sr.priority) 160 | } 161 | 162 | func (s Spec) GetRecentKernel() int32 { 163 | return s.sr.recentKernel 164 | } 165 | 166 | func (s Spec) SetRecentKernel(v int32) { 167 | s.sr.recentKernel = v 168 | } 169 | 170 | func (s Spec) GetUtilizationSwitch() int32 { 171 | return s.sr.utilizationSwitch 172 | } 173 | 174 | func (s Spec) SetUtilizationSwitch(v int32) { 175 | s.sr.utilizationSwitch = v 176 | } 177 | -------------------------------------------------------------------------------- /pkg/monitor/nvidia/v1/spec.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The HAMi Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1 18 | 19 | import "unsafe" 20 | 21 | const maxDevices = 16 22 | 23 | type deviceMemory struct { 24 | contextSize uint64 25 | moduleSize uint64 26 | bufferSize uint64 27 | offset uint64 28 | total uint64 29 | unused [3]uint64 30 | } 31 | 32 | type deviceUtilization struct { 33 | decUtil uint64 34 | encUtil uint64 35 | smUtil uint64 36 | unused [3]uint64 37 | } 38 | 39 | type shrregProcSlotT struct { 40 | pid int32 41 | hostpid int32 42 | used [16]deviceMemory 43 | monitorused [16]uint64 44 | deviceUtil [16]deviceUtilization 45 | status int32 46 | unused [3]uint64 47 | } 48 | 49 | type uuid struct { 50 | uuid [96]byte 51 | } 52 | 53 | type semT struct { 54 | sem [32]byte 55 | } 56 | 57 | type sharedRegionT struct { 58 | initializedFlag int32 59 | majorVersion int32 60 | minorVersion int32 61 | smInitFlag int32 62 | ownerPid uint32 63 | sem semT 64 | num uint64 65 | uuids [16]uuid 66 | 67 | limit [16]uint64 68 | smLimit [16]uint64 69 | procs [1024]shrregProcSlotT 70 | 71 | procnum int32 72 | utilizationSwitch int32 73 | recentKernel int32 74 | priority int32 75 | lastKernelTime int64 76 | unused [4]uint64 77 | } 78 | 79 | type Spec struct { 80 | sr *sharedRegionT 81 | } 82 | 83 | func (s Spec) DeviceMax() int { 84 | return maxDevices 85 | } 86 | 87 | func (s Spec) DeviceNum() int { 88 | return int(s.sr.num) 89 | } 90 | 91 | func (s Spec) DeviceMemoryContextSize(idx int) uint64 { 92 | v := uint64(0) 93 | for _, p := range s.sr.procs { 94 | v += p.used[idx].contextSize 95 | } 96 | return v 97 | } 98 | 99 | func (s Spec) DeviceMemoryModuleSize(idx int) uint64 { 100 | v := uint64(0) 101 | for _, p := range s.sr.procs { 102 | v += p.used[idx].moduleSize 103 | } 104 | return v 105 | } 106 | 107 | func (s Spec) DeviceMemoryBufferSize(idx int) uint64 { 108 | v := uint64(0) 109 | for _, p := range s.sr.procs { 110 | v += p.used[idx].bufferSize 111 | } 112 | return v 113 | } 114 | 115 | func (s Spec) DeviceMemoryOffset(idx int) uint64 { 116 | v := uint64(0) 117 | for _, p := range s.sr.procs { 118 | v += p.used[idx].offset 119 | } 120 | return v 121 | } 122 | 123 | func (s Spec) DeviceMemoryTotal(idx int) uint64 { 124 | v := uint64(0) 125 | for _, p := range s.sr.procs { 126 | v += p.used[idx].total 127 | } 128 | return v 129 | } 130 | 131 | func (s Spec) DeviceSmUtil(idx int) uint64 { 132 | v := uint64(0) 133 | for _, p := range s.sr.procs { 134 | v += p.deviceUtil[idx].smUtil 135 | } 136 | return v 137 | } 138 | 139 | func (s Spec) IsValidUUID(idx int) bool { 140 | return s.sr.uuids[idx].uuid[0] != 0 141 | } 142 | 143 | func (s Spec) DeviceUUID(idx int) string { 144 | return string(s.sr.uuids[idx].uuid[:]) 145 | } 146 | 147 | func (s Spec) DeviceMemoryLimit(idx int) uint64 { 148 | return s.sr.limit[idx] 149 | } 150 | 151 | func (s Spec) LastKernelTime() int64 { 152 | return s.sr.lastKernelTime 153 | } 154 | 155 | func CastSpec(data []byte) Spec { 156 | return Spec{ 157 | sr: (*sharedRegionT)(unsafe.Pointer(&data[0])), 158 | } 159 | } 160 | 161 | // func (s *SharedRegionT) UsedMemory(idx int) (uint64, error) { 162 | // return 0, nil 163 | // } 164 | 165 | func (s Spec) GetPriority() int { 166 | return int(s.sr.priority) 167 | } 168 | 169 | func (s Spec) GetRecentKernel() int32 { 170 | return s.sr.recentKernel 171 | } 172 | 173 | func (s Spec) SetRecentKernel(v int32) { 174 | s.sr.recentKernel = v 175 | } 176 | 177 | func (s Spec) GetUtilizationSwitch() int32 { 178 | return s.sr.utilizationSwitch 179 | } 180 | 181 | func (s Spec) SetUtilizationSwitch(v int32) { 182 | s.sr.utilizationSwitch = v 183 | } 184 | -------------------------------------------------------------------------------- /pkg/plugin/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package plugin 18 | 19 | // DevicePlugin interface 20 | type DevicePlugin interface { 21 | // Get the device plugin name 22 | Name() string 23 | // Start the plugin 24 | Start() error 25 | // Get all the devices number which reside within the node 26 | DevicesNum() int 27 | // Stop the plugin 28 | Stop() error 29 | } 30 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/cache.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package vgpu 18 | 19 | import ( 20 | "sync" 21 | 22 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 23 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 24 | ) 25 | 26 | type DeviceCache struct { 27 | *GpuDeviceManager 28 | 29 | cache []*Device 30 | stopCh chan interface{} 31 | unhealthy chan *Device 32 | notifyCh map[string]chan *Device 33 | mutex sync.Mutex 34 | } 35 | 36 | func NewDeviceCache() *DeviceCache { 37 | skipMigEnabledGPUs := true 38 | if config.Mode == "mig" { 39 | skipMigEnabledGPUs = false 40 | } 41 | return &DeviceCache{ 42 | GpuDeviceManager: NewGpuDeviceManager(skipMigEnabledGPUs), 43 | stopCh: make(chan interface{}), 44 | unhealthy: make(chan *Device), 45 | notifyCh: make(map[string]chan *Device), 46 | } 47 | } 48 | 49 | func (d *DeviceCache) AddNotifyChannel(name string, ch chan *Device) { 50 | d.mutex.Lock() 51 | defer d.mutex.Unlock() 52 | d.notifyCh[name] = ch 53 | } 54 | 55 | func (d *DeviceCache) RemoveNotifyChannel(name string) { 56 | d.mutex.Lock() 57 | defer d.mutex.Unlock() 58 | delete(d.notifyCh, name) 59 | } 60 | 61 | func (d *DeviceCache) Start() { 62 | d.cache = d.Devices() 63 | go d.CheckHealth(d.stopCh, d.cache, d.unhealthy) 64 | go d.notify() 65 | } 66 | 67 | func (d *DeviceCache) Stop() { 68 | close(d.stopCh) 69 | } 70 | 71 | func (d *DeviceCache) GetCache() []*Device { 72 | return d.cache 73 | } 74 | 75 | func (d *DeviceCache) notify() { 76 | for { 77 | select { 78 | case <-d.stopCh: 79 | return 80 | case dev := <-d.unhealthy: 81 | dev.Health = pluginapi.Unhealthy 82 | d.mutex.Lock() 83 | for _, ch := range d.notifyCh { 84 | ch <- dev 85 | } 86 | d.mutex.Unlock() 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/config/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package config 18 | 19 | import ( 20 | "sync" 21 | 22 | "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" 23 | "github.com/NVIDIA/go-nvml/pkg/nvml" 24 | ) 25 | 26 | type NvidiaConfig struct { 27 | ResourceCountName string `yaml:"resourceCountName"` 28 | ResourceMemoryName string `yaml:"resourceMemoryName"` 29 | ResourceCoreName string `yaml:"resourceCoreName"` 30 | ResourceMemoryPercentageName string `yaml:"resourceMemoryPercentageName"` 31 | ResourcePriority string `yaml:"resourcePriorityName"` 32 | OverwriteEnv bool `yaml:"overwriteEnv"` 33 | DefaultMemory int32 `yaml:"defaultMemory"` 34 | DefaultCores int32 `yaml:"defaultCores"` 35 | DefaultGPUNum int32 `yaml:"defaultGPUNum"` 36 | DeviceSplitCount uint `yaml:"deviceSplitCount"` 37 | DeviceMemoryScaling float64 `yaml:"deviceMemoryScaling"` 38 | DeviceCoreScaling float64 `yaml:"deviceCoreScaling"` 39 | DisableCoreLimit bool `yaml:"disableCoreLimit"` 40 | MigGeometriesList []AllowedMigGeometries `yaml:"knownMigGeometries"` 41 | GPUMemoryFactor uint `yaml:"gpuMemoryFactor"` 42 | } 43 | 44 | var ( 45 | nvmllib = nvml.New() 46 | 47 | lock sync.Mutex 48 | globalDevice device.Interface 49 | ) 50 | 51 | var ( 52 | // DevicePluginFilterDevice need device-plugin filter this device, don't register this device. 53 | DevicePluginFilterDevice *FilterDevice 54 | ) 55 | 56 | func Nvml() nvml.Interface { 57 | return nvmllib 58 | } 59 | 60 | func Device() device.Interface { 61 | if globalDevice != nil { 62 | return globalDevice 63 | } 64 | 65 | lock.Lock() 66 | defer lock.Unlock() 67 | 68 | globalDevice = device.New(nvmllib) 69 | return globalDevice 70 | } 71 | 72 | var ( 73 | DeviceSplitCount uint 74 | GPUMemoryFactor uint 75 | Mode string 76 | DeviceCoresScaling float64 77 | NodeName string 78 | RuntimeSocketFlag string 79 | DisableCoreLimit bool 80 | ) 81 | 82 | type MigTemplate struct { 83 | Name string `yaml:"name"` 84 | Memory int32 `yaml:"memory"` 85 | Count int32 `yaml:"count"` 86 | } 87 | 88 | type MigTemplateUsage struct { 89 | Name string `json:"name,omitempty"` 90 | Memory int32 `json:"memory,omitempty"` 91 | InUse bool `json:"inuse,omitempty"` 92 | } 93 | 94 | type Geometry struct { 95 | Group string `yaml:"group"` 96 | Instances []MigTemplate `yaml:"geometries"` 97 | } 98 | 99 | type MIGS []MigTemplateUsage 100 | 101 | type MigInUse struct { 102 | Index int32 103 | UsageList MIGS 104 | } 105 | 106 | type AllowedMigGeometries struct { 107 | Models []string `yaml:"models"` 108 | Geometries []Geometry `yaml:"allowedGeometries"` 109 | } 110 | 111 | type Config struct { 112 | NvidiaConfig NvidiaConfig `yaml:"nvidia"` 113 | } 114 | 115 | type MigPartedSpec struct { 116 | Version string `json:"version" yaml:"version"` 117 | MigConfigs map[string]MigConfigSpecSlice `json:"mig-configs,omitempty" yaml:"mig-configs,omitempty"` 118 | } 119 | 120 | // MigConfigSpec defines the spec to declare the desired MIG configuration for a set of GPUs. 121 | type MigConfigSpec struct { 122 | DeviceFilter interface{} `json:"device-filter,omitempty" yaml:"device-filter,flow,omitempty"` 123 | Devices []int32 `json:"devices" yaml:"devices,flow"` 124 | MigEnabled bool `json:"mig-enabled" yaml:"mig-enabled"` 125 | MigDevices map[string]int32 `json:"mig-devices" yaml:"mig-devices"` 126 | } 127 | 128 | // MigConfigSpecSlice represents a slice of 'MigConfigSpec'. 129 | type MigConfigSpecSlice []MigConfigSpec 130 | 131 | type FilterDevice struct { 132 | // UUID is the device ID. 133 | UUID []string `json:"uuid"` 134 | // Index is the device index. 135 | Index []uint `json:"index"` 136 | } 137 | 138 | type DevicePluginConfigs struct { 139 | Nodeconfig []struct { 140 | Name string `json:"name"` 141 | OperatingMode string `json:"operatingmode"` 142 | Devicememoryscaling float64 `json:"devicememoryscaling"` 143 | Devicecorescaling float64 `json:"devicecorescaling"` 144 | Devicesplitcount uint `json:"devicesplitcount"` 145 | Migstrategy string `json:"migstrategy"` 146 | FilterDevice *FilterDevice `json:"filterdevices"` 147 | } `json:"nodeconfig"` 148 | } 149 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/config/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package config 18 | 19 | import ( 20 | "fmt" 21 | 22 | "github.com/spf13/cobra" 23 | ) 24 | 25 | var ( 26 | version string 27 | VersionCmd = &cobra.Command{ 28 | Use: "version", 29 | Short: "print version", 30 | Run: func(cmd *cobra.Command, args []string) { 31 | fmt.Println(Version()) 32 | }, 33 | } 34 | ) 35 | 36 | func Version() string { 37 | return version 38 | } 39 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/helper.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package vgpu 18 | 19 | // int8Slice wraps an []int8 with more functions. 20 | type int8Slice []int8 21 | 22 | // String turns a nil terminated int8Slice into a string 23 | func (s int8Slice) String() string { 24 | var b []byte 25 | for _, c := range s { 26 | if c == 0 { 27 | break 28 | } 29 | b = append(b, byte(c)) 30 | } 31 | return string(b) 32 | } 33 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/mig-strategy.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package vgpu 18 | 19 | import ( 20 | "fmt" 21 | "log" 22 | 23 | "github.com/NVIDIA/go-gpuallocator/gpuallocator" 24 | "github.com/NVIDIA/go-nvml/pkg/nvml" 25 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 26 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 27 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util" 28 | ) 29 | 30 | // Constants representing the various MIG strategies 31 | const ( 32 | MigStrategyNone = "none" 33 | MigStrategySingle = "single" 34 | MigStrategyMixed = "mixed" 35 | ) 36 | 37 | // MigStrategyResourceSet holds a set of resource names for a given MIG strategy 38 | type MigStrategyResourceSet map[string]struct{} 39 | 40 | // MigStrategy provides an interface for building the set of plugins required to implement a given MIG strategy 41 | type MigStrategy interface { 42 | GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin 43 | MatchesResource(mig *nvml.Device, resource string) bool 44 | } 45 | 46 | // NewMigStrategy returns a reference to a given MigStrategy based on the 'strategy' passed in 47 | func NewMigStrategy(strategy string) (MigStrategy, error) { 48 | switch strategy { 49 | case MigStrategyNone: 50 | return &migStrategyNone{}, nil 51 | case MigStrategySingle: 52 | return &migStrategySingle{}, nil 53 | case MigStrategyMixed: 54 | return &migStrategyMixed{}, nil 55 | } 56 | return nil, fmt.Errorf("unknown strategy: %v", strategy) 57 | } 58 | 59 | type migStrategyNone struct{} 60 | type migStrategySingle struct{} 61 | type migStrategyMixed struct{} 62 | 63 | // migStrategyNone 64 | func (s *migStrategyNone) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin { 65 | return []*NvidiaDevicePlugin{ 66 | NewNvidiaDevicePlugin( 67 | //"nvidia.com/gpu", 68 | util.ResourceName, 69 | cache, 70 | gpuallocator.NewBestEffortPolicy(), 71 | pluginapi.DevicePluginPath+"nvidia-gpu.sock", 72 | cfg), 73 | NewNvidiaDevicePlugin( 74 | util.ResourceMem, 75 | cache, 76 | gpuallocator.NewBestEffortPolicy(), 77 | pluginapi.DevicePluginPath+"nvidia-gpu-memory.sock", 78 | cfg), 79 | NewNvidiaDevicePlugin( 80 | util.ResourceCores, 81 | cache, 82 | gpuallocator.NewBestEffortPolicy(), 83 | pluginapi.DevicePluginPath+"nvidia-gpu-cores.sock", 84 | cfg), 85 | } 86 | } 87 | 88 | func (s *migStrategyNone) MatchesResource(mig *nvml.Device, resource string) bool { 89 | panic("Should never be called") 90 | } 91 | 92 | // migStrategySingle 93 | func (s *migStrategySingle) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin { 94 | panic("single mode in MIG currently not supported") 95 | } 96 | 97 | func (s *migStrategySingle) MatchesResource(mig *nvml.Device, resource string) bool { 98 | return true 99 | } 100 | 101 | // migStrategyMixed 102 | func (s *migStrategyMixed) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin { 103 | devices := NewMIGCapableDevices() 104 | 105 | if err := devices.AssertAllMigEnabledDevicesAreValid(); err != nil { 106 | panic(fmt.Errorf("at least one device with migEnabled=true was not configured correctly: %v", err)) 107 | } 108 | 109 | resources := make(MigStrategyResourceSet) 110 | migs, err := devices.GetAllMigDevices() 111 | if err != nil { 112 | panic(fmt.Errorf("unable to retrieve list of MIG devices: %v", err)) 113 | } 114 | for _, mig := range migs { 115 | // Convert old NVML device to new NVML device 116 | uuid, ret := (*mig).GetUUID() 117 | check(ret) 118 | newDevice, ret := config.Nvml().DeviceGetHandleByUUID(uuid) 119 | check(ret) 120 | 121 | r := s.getResourceName(&newDevice) 122 | if !s.validMigDevice(&newDevice) { 123 | log.Printf("Skipping unsupported MIG device: %v", r) 124 | continue 125 | } 126 | resources[r] = struct{}{} 127 | } 128 | 129 | plugins := []*NvidiaDevicePlugin{ 130 | NewNvidiaDevicePlugin( 131 | util.ResourceName, 132 | cache, 133 | gpuallocator.NewBestEffortPolicy(), 134 | pluginapi.DevicePluginPath+"nvidia-gpu.sock", 135 | cfg), 136 | } 137 | 138 | for resource := range resources { 139 | plugin := NewMIGNvidiaDevicePlugin( 140 | "nvidia.com/"+resource, 141 | NewMigDeviceManager(s, resource), 142 | "NVIDIA_VISIBLE_DEVICES", 143 | gpuallocator.Policy(nil), 144 | pluginapi.DevicePluginPath+"nvidia-"+resource+".sock") 145 | plugins = append(plugins, plugin) 146 | } 147 | 148 | return plugins 149 | } 150 | 151 | func (s *migStrategyMixed) validMigDevice(mig *nvml.Device) bool { 152 | gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig) 153 | check(ret) 154 | ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig) 155 | check(ret) 156 | return gi == ci 157 | } 158 | 159 | func (s *migStrategyMixed) getResourceName(mig *nvml.Device) string { 160 | gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig) 161 | check(ret) 162 | ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig) 163 | check(ret) 164 | 165 | memory, ret := config.Nvml().DeviceGetMemoryInfo(*mig) 166 | check(ret) 167 | gb := ((memory.Total/(1024*1024) + 1024 - 1) / 1024) 168 | 169 | var r string 170 | if gi == ci { 171 | r = fmt.Sprintf("mig-%dg.%dgb", gi, gb) 172 | } else { 173 | r = fmt.Sprintf("mig-%dc.%dg.%dgb", ci, gi, gb) 174 | } 175 | 176 | return r 177 | } 178 | 179 | func (s *migStrategyMixed) MatchesResource(mig *nvml.Device, resource string) bool { 180 | return s.getResourceName(mig) == resource 181 | } 182 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/mig.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package vgpu 18 | 19 | import ( 20 | "bufio" 21 | "fmt" 22 | "log" 23 | "os" 24 | 25 | "github.com/NVIDIA/go-nvml/pkg/nvml" 26 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 27 | ) 28 | 29 | const ( 30 | nvidiaProcDriverPath = "/proc/driver/nvidia" 31 | nvidiaCapabilitiesPath = nvidiaProcDriverPath + "/capabilities" 32 | 33 | nvcapsProcDriverPath = "/proc/driver/nvidia-caps" 34 | nvcapsMigMinorsPath = nvcapsProcDriverPath + "/mig-minors" 35 | nvcapsDevicePath = "/dev/nvidia-caps" 36 | ) 37 | 38 | // MIGCapableDevices stores information about all devices on the node 39 | type MIGCapableDevices struct { 40 | // devicesMap holds a list of devices, separated by whether they have MigEnabled or not 41 | devicesMap map[bool][]*nvml.Device 42 | } 43 | 44 | // NewMIGCapableDevices creates a new MIGCapableDevices struct and returns a pointer to it. 45 | func NewMIGCapableDevices() *MIGCapableDevices { 46 | return &MIGCapableDevices{ 47 | devicesMap: nil, // Is initialized on first use 48 | } 49 | } 50 | 51 | func (devices *MIGCapableDevices) getDevicesMap() (map[bool][]*nvml.Device, error) { 52 | if devices.devicesMap == nil { 53 | n, ret := config.Nvml().DeviceGetCount() 54 | if ret != nvml.SUCCESS { 55 | return nil, fmt.Errorf("error getting device count: %v", ret) 56 | } 57 | 58 | migEnabledDevicesMap := make(map[bool][]*nvml.Device) 59 | for i := 0; i < int(n); i++ { 60 | d, ret := config.Nvml().DeviceGetHandleByIndex(i) 61 | if ret != nvml.SUCCESS { 62 | return nil, fmt.Errorf("error getting device handle: %v", ret) 63 | } 64 | 65 | isMigEnabled, _, ret := config.Nvml().DeviceGetMigMode(d) 66 | if ret != nvml.SUCCESS { 67 | if ret == nvml.ERROR_NOT_SUPPORTED { 68 | isMigEnabled = nvml.DEVICE_MIG_DISABLE 69 | } else { 70 | return nil, fmt.Errorf("error getting MIG mode: %v", ret) 71 | } 72 | } 73 | 74 | migEnabledDevicesMap[isMigEnabled == 1] = append(migEnabledDevicesMap[isMigEnabled == 1], &d) 75 | } 76 | 77 | devices.devicesMap = migEnabledDevicesMap 78 | } 79 | return devices.devicesMap, nil 80 | } 81 | 82 | // GetDevicesWithMigEnabled returns a list of devices with migEnabled=true 83 | func (devices *MIGCapableDevices) GetDevicesWithMigEnabled() ([]*nvml.Device, error) { 84 | devicesMap, err := devices.getDevicesMap() 85 | if err != nil { 86 | return nil, err 87 | } 88 | return devicesMap[true], nil 89 | } 90 | 91 | // GetDevicesWithMigDisabled returns a list of devices with migEnabled=false 92 | func (devices *MIGCapableDevices) GetDevicesWithMigDisabled() ([]*nvml.Device, error) { 93 | devicesMap, err := devices.getDevicesMap() 94 | if err != nil { 95 | return nil, err 96 | } 97 | return devicesMap[false], nil 98 | } 99 | 100 | // AssertAllMigEnabledDevicesAreValid ensures that all devices with migEnabled=true are valid. This means: 101 | // * The have at least 1 mig devices associated with them 102 | // Returns nill if the device is valid, or an error if these are not valid 103 | func (devices *MIGCapableDevices) AssertAllMigEnabledDevicesAreValid() error { 104 | devicesMap, err := devices.getDevicesMap() 105 | if err != nil { 106 | return err 107 | } 108 | 109 | for _, d := range devicesMap[true] { 110 | var migs []*nvml.Device 111 | maxMigDevices, ret := config.Nvml().DeviceGetMaxMigDeviceCount(*d) 112 | if ret != nvml.SUCCESS { 113 | return fmt.Errorf("error getting max MIG device count: %v", ret) 114 | } 115 | for i := 0; i < int(maxMigDevices); i++ { 116 | mig, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(*d, i) 117 | if ret == nvml.SUCCESS { 118 | migs = append(migs, &mig) 119 | } 120 | } 121 | if len(migs) == 0 { 122 | uuid, ret := config.Nvml().DeviceGetUUID(*d) 123 | if ret != nvml.SUCCESS { 124 | return fmt.Errorf("error getting device UUID: %v", ret) 125 | } 126 | return fmt.Errorf("no MIG devices associated with device: %v", uuid) 127 | } 128 | } 129 | return nil 130 | } 131 | 132 | // GetAllMigDevices returns a list of all MIG devices. 133 | func (devices *MIGCapableDevices) GetAllMigDevices() ([]*nvml.Device, error) { 134 | devicesMap, err := devices.getDevicesMap() 135 | if err != nil { 136 | return nil, err 137 | } 138 | 139 | var migs []*nvml.Device 140 | for _, d := range devicesMap[true] { 141 | maxMigDevices, ret := config.Nvml().DeviceGetMaxMigDeviceCount(*d) 142 | if ret != nvml.SUCCESS { 143 | return nil, fmt.Errorf("error getting max MIG device count: %v", ret) 144 | } 145 | for i := 0; i < int(maxMigDevices); i++ { 146 | mig, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(*d, i) 147 | if ret == nvml.SUCCESS { 148 | migs = append(migs, &mig) 149 | } 150 | } 151 | } 152 | return migs, nil 153 | } 154 | 155 | // GetMigCapabilityDevicePaths returns a mapping of MIG capability path to device node path 156 | func GetMigCapabilityDevicePaths() (map[string]string, error) { 157 | // Open nvcapsMigMinorsPath for walking. 158 | // If the nvcapsMigMinorsPath does not exist, then we are not on a MIG 159 | // capable machine, so there is nothing to do. 160 | // The format of this file is discussed in: 161 | // https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#unique_1576522674 162 | minorsFile, err := os.Open(nvcapsMigMinorsPath) 163 | if os.IsNotExist(err) { 164 | return nil, nil 165 | } 166 | if err != nil { 167 | return nil, fmt.Errorf("error opening MIG minors file: %v", err) 168 | } 169 | defer minorsFile.Close() 170 | 171 | // Define a function to process each each line of nvcapsMigMinorsPath 172 | processLine := func(line string) (string, int, error) { 173 | var gpu, gi, ci, migMinor int 174 | 175 | // Look for a CI access file 176 | n, _ := fmt.Sscanf(line, "gpu%d/gi%d/ci%d/access %d", &gpu, &gi, &ci, &migMinor) 177 | if n == 4 { 178 | capPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci) 179 | return capPath, migMinor, nil 180 | } 181 | 182 | // Look for a GI access file 183 | n, _ = fmt.Sscanf(line, "gpu%d/gi%d/access %d", &gpu, &gi, &migMinor) 184 | if n == 3 { 185 | capPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", gpu, gi) 186 | return capPath, migMinor, nil 187 | } 188 | 189 | // Look for the MIG config file 190 | n, _ = fmt.Sscanf(line, "config %d", &migMinor) 191 | if n == 1 { 192 | capPath := fmt.Sprintf(nvidiaCapabilitiesPath + "/mig/config") 193 | return capPath, migMinor, nil 194 | } 195 | 196 | // Look for the MIG monitor file 197 | n, _ = fmt.Sscanf(line, "monitor %d", &migMinor) 198 | if n == 1 { 199 | capPath := fmt.Sprintf(nvidiaCapabilitiesPath + "/mig/monitor") 200 | return capPath, migMinor, nil 201 | } 202 | 203 | return "", 0, fmt.Errorf("unparsable line: %v", line) 204 | } 205 | 206 | // Walk each line of nvcapsMigMinorsPath and construct a mapping of nvidia 207 | // capabilities path to device minor for that capability 208 | capsDevicePaths := make(map[string]string) 209 | scanner := bufio.NewScanner(minorsFile) 210 | for scanner.Scan() { 211 | capPath, migMinor, err := processLine(scanner.Text()) 212 | if err != nil { 213 | log.Printf("Skipping line in MIG minors file: %v", err) 214 | continue 215 | } 216 | capsDevicePaths[capPath] = fmt.Sprintf(nvcapsDevicePath+"/nvidia-cap%d", migMinor) 217 | } 218 | return capsDevicePaths, nil 219 | } 220 | 221 | // GetMigDeviceNodePaths returns a list of device node paths associated with a MIG device 222 | func GetMigDeviceNodePaths(parent nvml.Device, mig *nvml.Device) ([]string, error) { 223 | capDevicePaths, err := GetMigCapabilityDevicePaths() 224 | if err != nil { 225 | return nil, fmt.Errorf("error getting MIG capability device paths: %v", err) 226 | } 227 | 228 | gpu, ret := parent.GetMinorNumber() 229 | if ret != nvml.SUCCESS { 230 | return nil, fmt.Errorf("error getting GPU device minor number: %v", ret) 231 | } 232 | 233 | gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig) 234 | if ret != nvml.SUCCESS { 235 | return nil, fmt.Errorf("error getting MIG GPU instance ID: %v", ret) 236 | } 237 | 238 | ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig) 239 | if ret != nvml.SUCCESS { 240 | return nil, fmt.Errorf("error getting MIG compute instance ID: %v", ret) 241 | } 242 | 243 | giCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", gpu, gi) 244 | if _, exists := capDevicePaths[giCapPath]; !exists { 245 | return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath) 246 | } 247 | 248 | ciCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci) 249 | if _, exists := capDevicePaths[ciCapPath]; !exists { 250 | return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath) 251 | } 252 | 253 | devicePaths := []string{ 254 | fmt.Sprintf("/dev/nvidia%d", gpu), 255 | capDevicePaths[giCapPath], 256 | capDevicePaths[ciCapPath], 257 | } 258 | 259 | return devicePaths, nil 260 | } 261 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/nvidia.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package vgpu 18 | 19 | import ( 20 | "bytes" 21 | "fmt" 22 | "log" 23 | "os" 24 | "strconv" 25 | "strings" 26 | 27 | "github.com/NVIDIA/go-nvlib/pkg/nvlib/device" 28 | "github.com/NVIDIA/go-nvml/pkg/nvml" 29 | "k8s.io/klog" 30 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 31 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 32 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util" 33 | ) 34 | 35 | const ( 36 | envDisableHealthChecks = "DP_DISABLE_HEALTHCHECKS" 37 | allHealthChecks = "xids" 38 | ) 39 | 40 | // Device couples an underlying pluginapi.Device type with its device node paths 41 | type Device struct { 42 | pluginapi.Device 43 | Paths []string 44 | Index string 45 | Memory uint64 46 | } 47 | 48 | // ResourceManager provides an interface for listing a set of Devices and checking health on them 49 | type ResourceManager interface { 50 | Devices() []*Device 51 | CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) 52 | } 53 | 54 | // GpuDeviceManager implements the ResourceManager interface for full GPU devices 55 | type GpuDeviceManager struct { 56 | skipMigEnabledGPUs bool 57 | } 58 | 59 | // MigDeviceManager implements the ResourceManager interface for MIG devices 60 | type MigDeviceManager struct { 61 | strategy MigStrategy 62 | resource string 63 | } 64 | 65 | func check(ret nvml.Return) { 66 | if ret != nvml.SUCCESS { 67 | log.Panicln("Fatal:", ret) 68 | } 69 | } 70 | 71 | // NewGpuDeviceManager returns a reference to a new GpuDeviceManager 72 | func NewGpuDeviceManager(skipMigEnabledGPUs bool) *GpuDeviceManager { 73 | return &GpuDeviceManager{ 74 | skipMigEnabledGPUs: skipMigEnabledGPUs, 75 | } 76 | } 77 | 78 | // NewMigDeviceManager returns a reference to a new MigDeviceManager 79 | func NewMigDeviceManager(strategy MigStrategy, resource string) *MigDeviceManager { 80 | return &MigDeviceManager{ 81 | strategy: strategy, 82 | resource: resource, 83 | } 84 | } 85 | 86 | // Devices returns a list of devices from the GpuDeviceManager 87 | func (g *GpuDeviceManager) Devices() []*Device { 88 | n, ret := config.Nvml().DeviceGetCount() 89 | check(ret) 90 | if n > util.DeviceLimit { 91 | n = util.DeviceLimit 92 | } 93 | 94 | var devs []*Device 95 | for i := 0; i < n; i++ { 96 | d, ret := config.Nvml().DeviceGetHandleByIndex(i) 97 | check(ret) 98 | 99 | migMode, _, ret := d.GetMigMode() 100 | if ret != nvml.SUCCESS { 101 | if ret == nvml.ERROR_NOT_SUPPORTED { 102 | migMode = nvml.DEVICE_MIG_DISABLE 103 | } else { 104 | check(ret) 105 | } 106 | } 107 | 108 | if migMode == nvml.DEVICE_MIG_ENABLE && g.skipMigEnabledGPUs { 109 | continue 110 | } 111 | 112 | // Auto ebale MIG mode when the plugin is running in MIG mode 113 | if config.Mode == "mig" && migMode != nvml.DEVICE_MIG_ENABLE { 114 | if ret == nvml.ERROR_NOT_SUPPORTED { 115 | klog.V(4).Infof("Node is configed as MIG mode, but GPU %v does not support MIG mode", i) 116 | continue 117 | } 118 | ret, stat := d.SetMigMode(nvml.DEVICE_MIG_ENABLE) 119 | if ret != nvml.SUCCESS || stat != nvml.SUCCESS { 120 | klog.V(4).Infof("Node is configed as MIG mode, but failed to enable MIG mode for GPU %v : ret=%v, stat=%v", i, ret, stat) 121 | continue 122 | } 123 | } 124 | 125 | dev, err := buildDevice(fmt.Sprintf("%v", i), d) 126 | if err != nil { 127 | log.Panicln("Fatal:", err) 128 | } 129 | 130 | devs = append(devs, dev) 131 | } 132 | 133 | return devs 134 | } 135 | 136 | // Devices returns a list of devices from the MigDeviceManager 137 | func (m *MigDeviceManager) Devices() []*Device { 138 | n, ret := config.Nvml().DeviceGetCount() 139 | check(ret) 140 | if n > util.DeviceLimit { 141 | n = util.DeviceLimit 142 | } 143 | 144 | var devs []*Device 145 | for i := 0; i < n; i++ { 146 | d, ret := config.Nvml().DeviceGetHandleByIndex(i) 147 | check(ret) 148 | 149 | migMode, _, ret := d.GetMigMode() 150 | if ret != nvml.SUCCESS { 151 | if ret == nvml.ERROR_NOT_SUPPORTED { 152 | migMode = nvml.DEVICE_MIG_DISABLE 153 | } else { 154 | check(ret) 155 | } 156 | } 157 | 158 | if migMode != nvml.DEVICE_MIG_ENABLE { 159 | continue 160 | } 161 | 162 | err := config.Device().VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error { 163 | dev, err := buildMigDevice(fmt.Sprintf("%v:%v", i, j), mig) 164 | if err != nil { 165 | log.Panicln("Fatal:", err) 166 | } 167 | devs = append(devs, dev) 168 | return nil 169 | }) 170 | if err != nil { 171 | log.Fatalf("VisitMigDevices error: %v", err) 172 | } 173 | } 174 | 175 | return devs 176 | } 177 | 178 | // CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices 179 | func (g *GpuDeviceManager) CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) { 180 | checkHealth(stop, devices, unhealthy) 181 | } 182 | 183 | // CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices 184 | func (m *MigDeviceManager) CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) { 185 | checkHealth(stop, devices, unhealthy) 186 | } 187 | 188 | func buildDevice(index string, d nvml.Device) (*Device, error) { 189 | uuid, ret := config.Nvml().DeviceGetUUID(d) 190 | if ret != nvml.SUCCESS { 191 | return nil, fmt.Errorf("error getting UUID of device: %v", ret) 192 | } 193 | 194 | minor, ret := config.Nvml().DeviceGetMinorNumber(d) 195 | if ret != nvml.SUCCESS { 196 | return nil, fmt.Errorf("error getting minor number of device: %v", ret) 197 | } 198 | paths := []string{fmt.Sprintf("/dev/nvidia%d", minor)} 199 | 200 | memory, ret := config.Nvml().DeviceGetMemoryInfo(d) 201 | if ret != nvml.SUCCESS { 202 | return nil, fmt.Errorf("error getting memory info of device: %v", ret) 203 | } 204 | 205 | hasNuma, numa, err := getNumaNode(d) 206 | if err != nil { 207 | return nil, fmt.Errorf("error getting device NUMA node: %v", err) 208 | } 209 | 210 | dev := Device{} 211 | dev.ID = uuid 212 | dev.Health = pluginapi.Healthy 213 | dev.Paths = paths 214 | dev.Index = index 215 | dev.Memory = memory.Total / (1024 * 1024) 216 | if hasNuma { 217 | dev.Topology = &pluginapi.TopologyInfo{ 218 | Nodes: []*pluginapi.NUMANode{ 219 | { 220 | ID: int64(numa), 221 | }, 222 | }, 223 | } 224 | } 225 | return &dev, nil 226 | } 227 | 228 | func buildMigDevice(index string, d device.MigDevice) (*Device, error) { 229 | uuid, ret := config.Nvml().DeviceGetUUID(d) 230 | if ret != nvml.SUCCESS { 231 | return nil, fmt.Errorf("error getting UUID of device: %v", ret) 232 | } 233 | 234 | paths, err := getMigPaths(d) 235 | if err != nil { 236 | return nil, fmt.Errorf("error getting MIG paths of device: %v", err) 237 | } 238 | 239 | memory, ret := config.Nvml().DeviceGetMemoryInfo(d) 240 | if ret != nvml.SUCCESS { 241 | return nil, fmt.Errorf("error getting memory info of device: %v", ret) 242 | } 243 | 244 | parent, ret := d.GetDeviceHandleFromMigDeviceHandle() 245 | if ret != nvml.SUCCESS { 246 | return nil, fmt.Errorf("error getting parent GPU device from MIG device: %v", ret) 247 | } 248 | hasNuma, numa, err := getNumaNode(parent) 249 | if err != nil { 250 | return nil, fmt.Errorf("error getting device NUMA node: %v", err) 251 | } 252 | 253 | dev := Device{} 254 | dev.ID = uuid 255 | dev.Health = pluginapi.Healthy 256 | dev.Paths = paths 257 | dev.Index = index 258 | dev.Memory = memory.Total / (1024 * 1024) 259 | if hasNuma { 260 | dev.Topology = &pluginapi.TopologyInfo{ 261 | Nodes: []*pluginapi.NUMANode{ 262 | { 263 | ID: int64(numa), 264 | }, 265 | }, 266 | } 267 | } 268 | return &dev, nil 269 | } 270 | 271 | func getMigPaths(d device.MigDevice) ([]string, error) { 272 | capDevicePaths, err := GetMigCapabilityDevicePaths() 273 | if err != nil { 274 | return nil, fmt.Errorf("error getting MIG capability device paths: %v", err) 275 | } 276 | 277 | gi, ret := d.GetGpuInstanceId() 278 | if ret != nvml.SUCCESS { 279 | return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret) 280 | } 281 | 282 | ci, ret := d.GetComputeInstanceId() 283 | if ret != nvml.SUCCESS { 284 | return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret) 285 | } 286 | 287 | parent, ret := d.GetDeviceHandleFromMigDeviceHandle() 288 | if ret != nvml.SUCCESS { 289 | return nil, fmt.Errorf("error getting parent device: %v", ret) 290 | } 291 | minor, ret := parent.GetMinorNumber() 292 | if ret != nvml.SUCCESS { 293 | return nil, fmt.Errorf("error getting GPU device minor number: %v", ret) 294 | } 295 | parentPath := fmt.Sprintf("/dev/nvidia%d", minor) 296 | 297 | giCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", minor, gi) 298 | if _, exists := capDevicePaths[giCapPath]; !exists { 299 | return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath) 300 | } 301 | 302 | ciCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", minor, gi, ci) 303 | if _, exists := capDevicePaths[ciCapPath]; !exists { 304 | return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath) 305 | } 306 | 307 | devicePaths := []string{ 308 | parentPath, 309 | capDevicePaths[giCapPath], 310 | capDevicePaths[ciCapPath], 311 | } 312 | 313 | return devicePaths, nil 314 | } 315 | 316 | func getNumaNode(d nvml.Device) (bool, int, error) { 317 | pciInfo, ret := d.GetPciInfo() 318 | if ret != nvml.SUCCESS { 319 | return false, 0, fmt.Errorf("error getting PCI Bus Info of device: %v", ret) 320 | } 321 | 322 | // Discard leading zeros. 323 | busID := strings.ToLower(strings.TrimPrefix(int8Slice(pciInfo.BusId[:]).String(), "0000")) 324 | 325 | b, err := os.ReadFile(fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", busID)) 326 | if err != nil { 327 | return false, 0, nil 328 | } 329 | 330 | node, err := strconv.Atoi(string(bytes.TrimSpace(b))) 331 | if err != nil { 332 | return false, 0, fmt.Errorf("eror parsing value for NUMA node: %v", err) 333 | } 334 | 335 | if node < 0 { 336 | return false, 0, nil 337 | } 338 | 339 | return true, node, nil 340 | } 341 | 342 | func checkHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) { 343 | disableHealthChecks := strings.ToLower(os.Getenv(envDisableHealthChecks)) 344 | if disableHealthChecks == "all" { 345 | disableHealthChecks = allHealthChecks 346 | } 347 | if strings.Contains(disableHealthChecks, "xids") { 348 | return 349 | } 350 | 351 | // FIXME: formalize the full list and document it. 352 | // http://docs.nvidia.com/deploy/xid-errors/index.html#topic_4 353 | // Application errors: the GPU should still be healthy 354 | applicationErrorXids := []uint64{ 355 | 13, // Graphics Engine Exception 356 | 31, // GPU memory page fault 357 | 43, // GPU stopped processing 358 | 45, // Preemptive cleanup, due to previous errors 359 | 68, // Video processor exception 360 | } 361 | 362 | skippedXids := make(map[uint64]bool) 363 | for _, id := range applicationErrorXids { 364 | skippedXids[id] = true 365 | } 366 | 367 | for _, additionalXid := range getAdditionalXids(disableHealthChecks) { 368 | skippedXids[additionalXid] = true 369 | } 370 | 371 | eventSet, ret := config.Nvml().EventSetCreate() 372 | if ret != nvml.SUCCESS { 373 | klog.Warningf("could not create event set: %v", ret) 374 | return 375 | } 376 | defer eventSet.Free() 377 | 378 | parentToDeviceMap := make(map[string]*Device) 379 | deviceIDToGiMap := make(map[string]int) 380 | deviceIDToCiMap := make(map[string]int) 381 | 382 | eventMask := uint64(nvml.EventTypeXidCriticalError | nvml.EventTypeDoubleBitEccError | nvml.EventTypeSingleBitEccError) 383 | for _, d := range devices { 384 | uuid, gi, ci, err := getDevicePlacement(d) 385 | if err != nil { 386 | klog.Warningf("Could not determine device placement for %v: %v; Marking it unhealthy.", d.ID, err) 387 | unhealthy <- d 388 | continue 389 | } 390 | deviceIDToGiMap[d.ID] = gi 391 | deviceIDToCiMap[d.ID] = ci 392 | parentToDeviceMap[uuid] = d 393 | 394 | gpu, ret := config.Nvml().DeviceGetHandleByUUID(uuid) 395 | if ret != nvml.SUCCESS { 396 | klog.Infof("unable to get device handle from UUID: %v; marking it as unhealthy", ret) 397 | unhealthy <- d 398 | continue 399 | } 400 | 401 | supportedEvents, ret := gpu.GetSupportedEventTypes() 402 | if ret != nvml.SUCCESS { 403 | klog.Infof("Unable to determine the supported events for %v: %v; marking it as unhealthy", d.ID, ret) 404 | unhealthy <- d 405 | continue 406 | } 407 | 408 | ret = gpu.RegisterEvents(eventMask&supportedEvents, eventSet) 409 | if ret == nvml.ERROR_NOT_SUPPORTED { 410 | klog.Warningf("Device %v is too old to support healthchecking.", d.ID) 411 | } 412 | if ret != nvml.SUCCESS { 413 | klog.Infof("Marking device %v as unhealthy: %v", d.ID, ret) 414 | unhealthy <- d 415 | } 416 | } 417 | 418 | for { 419 | select { 420 | case <-stop: 421 | return 422 | default: 423 | } 424 | 425 | e, ret := eventSet.Wait(5000) 426 | if ret == nvml.ERROR_TIMEOUT { 427 | continue 428 | } 429 | if ret != nvml.SUCCESS { 430 | klog.Infof("Error waiting for event: %v; Marking all devices as unhealthy", ret) 431 | for _, d := range devices { 432 | unhealthy <- d 433 | } 434 | continue 435 | } 436 | 437 | if e.EventType != nvml.EventTypeXidCriticalError { 438 | klog.Infof("Skipping non-nvmlEventTypeXidCriticalError event: %+v", e) 439 | continue 440 | } 441 | 442 | if skippedXids[e.EventData] { 443 | klog.Infof("Skipping event %+v", e) 444 | continue 445 | } 446 | 447 | klog.Infof("Processing event %+v", e) 448 | eventUUID, ret := e.Device.GetUUID() 449 | if ret != nvml.SUCCESS { 450 | // If we cannot reliably determine the device UUID, we mark all devices as unhealthy. 451 | klog.Infof("Failed to determine uuid for event %v: %v; Marking all devices as unhealthy.", e, ret) 452 | for _, d := range devices { 453 | unhealthy <- d 454 | } 455 | continue 456 | } 457 | 458 | d, exists := parentToDeviceMap[eventUUID] 459 | if !exists { 460 | klog.Infof("Ignoring event for unexpected device: %v", eventUUID) 461 | continue 462 | } 463 | 464 | if d.IsMigDevice() && e.GpuInstanceId != 0xFFFFFFFF && e.ComputeInstanceId != 0xFFFFFFFF { 465 | gi := deviceIDToGiMap[d.ID] 466 | ci := deviceIDToCiMap[d.ID] 467 | if !(uint32(gi) == e.GpuInstanceId && uint32(ci) == e.ComputeInstanceId) { 468 | continue 469 | } 470 | klog.Infof("Event for mig device %v (gi=%v, ci=%v)", d.ID, gi, ci) 471 | } 472 | 473 | klog.Infof("XidCriticalError: Xid=%d on Device=%s; marking device as unhealthy.", e.EventData, d.ID) 474 | unhealthy <- d 475 | } 476 | } 477 | 478 | // getAdditionalXids returns a list of additional Xids to skip from the specified string. 479 | // The input is treaded as a comma-separated string and all valid uint64 values are considered as Xid values. Invalid values 480 | // are ignored. 481 | func getAdditionalXids(input string) []uint64 { 482 | if input == "" { 483 | return nil 484 | } 485 | 486 | var additionalXids []uint64 487 | for _, additionalXid := range strings.Split(input, ",") { 488 | trimmed := strings.TrimSpace(additionalXid) 489 | if trimmed == "" { 490 | continue 491 | } 492 | xid, err := strconv.ParseUint(trimmed, 10, 64) 493 | if err != nil { 494 | log.Printf("Ignoring malformed Xid value %v: %v", trimmed, err) 495 | continue 496 | } 497 | additionalXids = append(additionalXids, xid) 498 | } 499 | 500 | return additionalXids 501 | } 502 | 503 | // getDevicePlacement returns the placement of the specified device. 504 | // For a MIG device the placement is defined by the 3-tuple 505 | // For a full device the returned 3-tuple is the device's uuid and 0xFFFFFFFF for the other two elements. 506 | func getDevicePlacement(d *Device) (string, int, int, error) { 507 | if !d.IsMigDevice() { 508 | return d.GetUUID(), 0xFFFFFFFF, 0xFFFFFFFF, nil 509 | } 510 | return getMigDeviceParts(d) 511 | } 512 | 513 | // getMigDeviceParts returns the parent GI and CI ids of the MIG device. 514 | func getMigDeviceParts(d *Device) (string, int, int, error) { 515 | if !d.IsMigDevice() { 516 | return "", 0, 0, fmt.Errorf("cannot get GI and CI of full device") 517 | } 518 | 519 | uuid := d.GetUUID() 520 | // For older driver versions, the call to DeviceGetHandleByUUID will fail for MIG devices. 521 | mig, ret := config.Nvml().DeviceGetHandleByUUID(uuid) 522 | if ret == nvml.SUCCESS { 523 | parentHandle, ret := mig.GetDeviceHandleFromMigDeviceHandle() 524 | if ret != nvml.SUCCESS { 525 | return "", 0, 0, fmt.Errorf("failed to get parent device handle: %v", ret) 526 | } 527 | 528 | parentUUID, ret := parentHandle.GetUUID() 529 | if ret != nvml.SUCCESS { 530 | return "", 0, 0, fmt.Errorf("failed to get parent uuid: %v", ret) 531 | } 532 | gi, ret := mig.GetGpuInstanceId() 533 | if ret != nvml.SUCCESS { 534 | return "", 0, 0, fmt.Errorf("failed to get GPU Instance ID: %v", ret) 535 | } 536 | 537 | ci, ret := mig.GetComputeInstanceId() 538 | if ret != nvml.SUCCESS { 539 | return "", 0, 0, fmt.Errorf("failed to get Compute Instance ID: %v", ret) 540 | } 541 | return parentUUID, gi, ci, nil 542 | } 543 | return parseMigDeviceUUID(uuid) 544 | } 545 | 546 | // parseMigDeviceUUID splits the MIG device UUID into the parent device UUID and ci and gi 547 | func parseMigDeviceUUID(mig string) (string, int, int, error) { 548 | tokens := strings.SplitN(mig, "-", 2) 549 | if len(tokens) != 2 || tokens[0] != "MIG" { 550 | return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device") 551 | } 552 | 553 | tokens = strings.SplitN(tokens[1], "/", 3) 554 | if len(tokens) != 3 || !strings.HasPrefix(tokens[0], "GPU-") { 555 | return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device") 556 | } 557 | 558 | gi, err := strconv.ParseInt(tokens[1], 10, 32) 559 | if err != nil { 560 | return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device") 561 | } 562 | 563 | ci, err := strconv.ParseInt(tokens[2], 10, 32) 564 | if err != nil { 565 | return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device") 566 | } 567 | 568 | return tokens[0], int(gi), int(ci), nil 569 | } 570 | 571 | // IsMigDevice returns checks whether d is a MIG device or not. 572 | func (d Device) IsMigDevice() bool { 573 | return strings.Contains(d.Index, ":") 574 | } 575 | 576 | // GetUUID returns the UUID for the device from the annotated ID. 577 | func (d Device) GetUUID() string { 578 | return AnnotatedID(d.ID).GetID() 579 | } 580 | 581 | // AnnotatedID represents an ID with a replica number embedded in it. 582 | type AnnotatedID string 583 | 584 | // Split splits a AnnotatedID into its ID and replica number parts. 585 | func (r AnnotatedID) Split() (string, int) { 586 | split := strings.SplitN(string(r), "::", 2) 587 | if len(split) != 2 { 588 | return string(r), 0 589 | } 590 | replica, _ := strconv.ParseInt(split[1], 10, 0) 591 | return split[0], int(replica) 592 | } 593 | 594 | // GetID returns just the ID part of the replicated ID 595 | func (r AnnotatedID) GetID() string { 596 | id, _ := r.Split() 597 | return id 598 | } 599 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/plugin.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package vgpu 18 | 19 | import ( 20 | "bytes" 21 | "errors" 22 | "fmt" 23 | "log" 24 | "net" 25 | "os" 26 | "os/exec" 27 | "path" 28 | "strings" 29 | "time" 30 | 31 | "gopkg.in/yaml.v2" 32 | "k8s.io/apimachinery/pkg/util/uuid" 33 | "k8s.io/klog/v2" 34 | "volcano.sh/k8s-device-plugin/pkg/lock" 35 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 36 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util" 37 | 38 | "github.com/NVIDIA/go-gpuallocator/gpuallocator" 39 | "golang.org/x/net/context" 40 | "google.golang.org/grpc" 41 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 42 | ) 43 | 44 | // Constants to represent the various device list strategies 45 | const ( 46 | DeviceListStrategyEnvvar = "envvar" 47 | DeviceListStrategyVolumeMounts = "volume-mounts" 48 | ) 49 | 50 | // Constants to represent the various device id strategies 51 | const ( 52 | DeviceIDStrategyUUID = "uuid" 53 | DeviceIDStrategyIndex = "index" 54 | ) 55 | 56 | // Constants for use by the 'volume-mounts' device list strategy 57 | const ( 58 | deviceListAsVolumeMountsHostPath = "/dev/null" 59 | deviceListAsVolumeMountsContainerPathRoot = "/var/run/nvidia-container-devices" 60 | ) 61 | 62 | // NvidiaDevicePlugin implements the Kubernetes device plugin API 63 | type NvidiaDevicePlugin struct { 64 | ResourceManager 65 | deviceCache *DeviceCache 66 | resourceName string 67 | deviceListEnvvar string 68 | allocatePolicy gpuallocator.Policy 69 | socket string 70 | schedulerConfig *config.NvidiaConfig 71 | operatingMode string 72 | 73 | virtualDevices []*pluginapi.Device 74 | migCurrent config.MigPartedSpec 75 | 76 | server *grpc.Server 77 | cachedDevices []*Device 78 | health chan *Device 79 | stop chan interface{} 80 | changed chan struct{} 81 | migStrategy string 82 | } 83 | 84 | // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin 85 | func NewNvidiaDevicePlugin(resourceName string, deviceCache *DeviceCache, allocatePolicy gpuallocator.Policy, socket string, cfg *config.NvidiaConfig) *NvidiaDevicePlugin { 86 | dp := &NvidiaDevicePlugin{ 87 | deviceCache: deviceCache, 88 | resourceName: resourceName, 89 | allocatePolicy: allocatePolicy, 90 | socket: socket, 91 | migStrategy: "none", 92 | operatingMode: config.Mode, 93 | schedulerConfig: cfg, 94 | // These will be reinitialized every 95 | // time the plugin server is restarted. 96 | server: nil, 97 | health: nil, 98 | stop: nil, 99 | } 100 | return dp 101 | } 102 | 103 | // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin 104 | func NewMIGNvidiaDevicePlugin(resourceName string, resourceManager ResourceManager, deviceListEnvvar string, allocatePolicy gpuallocator.Policy, socket string) *NvidiaDevicePlugin { 105 | return &NvidiaDevicePlugin{ 106 | ResourceManager: resourceManager, 107 | resourceName: resourceName, 108 | deviceListEnvvar: deviceListEnvvar, 109 | allocatePolicy: allocatePolicy, 110 | socket: socket, 111 | 112 | // These will be reinitialized every 113 | // time the plugin server is restarted. 114 | cachedDevices: nil, 115 | server: nil, 116 | health: nil, 117 | stop: nil, 118 | migStrategy: "mixed", 119 | } 120 | } 121 | 122 | func (m *NvidiaDevicePlugin) initialize() { 123 | if strings.Compare(m.migStrategy, "mixed") == 0 { 124 | m.cachedDevices = m.ResourceManager.Devices() 125 | } 126 | m.server = grpc.NewServer([]grpc.ServerOption{}...) 127 | m.health = make(chan *Device) 128 | m.stop = make(chan interface{}) 129 | m.virtualDevices, _ = util.GetDevices(config.GPUMemoryFactor) 130 | } 131 | 132 | func (m *NvidiaDevicePlugin) cleanup() { 133 | close(m.stop) 134 | m.server = nil 135 | m.health = nil 136 | m.stop = nil 137 | } 138 | 139 | // Start starts the gRPC server, registers the device plugin with the Kubelet, 140 | // and starts the device healthchecks. 141 | func (m *NvidiaDevicePlugin) Start() error { 142 | m.initialize() 143 | 144 | err := m.Serve() 145 | if err != nil { 146 | log.Printf("Could not start device plugin for '%s': %s", m.resourceName, err) 147 | m.cleanup() 148 | return err 149 | } 150 | log.Printf("Starting to serve '%s' on %s", m.resourceName, m.socket) 151 | 152 | err = m.Register() 153 | if err != nil { 154 | log.Printf("Could not register device plugin: %s", err) 155 | m.Stop() 156 | return err 157 | } 158 | log.Printf("Registered device plugin for '%s' with Kubelet", m.resourceName) 159 | 160 | if m.operatingMode == "mig" { 161 | cmd := exec.Command("nvidia-mig-parted", "export") 162 | var stdout, stderr bytes.Buffer 163 | cmd.Stdout = &stdout 164 | cmd.Stderr = &stderr 165 | err := cmd.Run() 166 | if err != nil { 167 | klog.Fatalf("nvidia-mig-parted failed with %s\n", err) 168 | } 169 | outStr := stdout.Bytes() 170 | yaml.Unmarshal(outStr, &m.migCurrent) 171 | os.WriteFile("/tmp/migconfig.yaml", outStr, os.ModePerm) 172 | if len(m.migCurrent.MigConfigs["current"]) == 1 && len(m.migCurrent.MigConfigs["current"][0].Devices) == 0 { 173 | idx := 0 174 | m.migCurrent.MigConfigs["current"][0].Devices = make([]int32, 0) 175 | for idx < util.GetDeviceNums() { 176 | m.migCurrent.MigConfigs["current"][0].Devices = append(m.migCurrent.MigConfigs["current"][0].Devices, int32(idx)) 177 | idx++ 178 | } 179 | } 180 | klog.Infoln("Mig export", m.migCurrent) 181 | } 182 | 183 | if strings.Compare(m.migStrategy, "none") == 0 { 184 | m.deviceCache.AddNotifyChannel("plugin", m.health) 185 | } else if strings.Compare(m.migStrategy, "mixed") == 0 { 186 | go m.CheckHealth(m.stop, m.cachedDevices, m.health) 187 | } else { 188 | log.Panicln("migstrategy not recognized", m.migStrategy) 189 | } 190 | return nil 191 | } 192 | 193 | // Stop stops the gRPC server. 194 | func (m *NvidiaDevicePlugin) Stop() error { 195 | if m == nil || m.server == nil { 196 | return nil 197 | } 198 | log.Printf("Stopping to serve '%s' on %s", m.resourceName, m.socket) 199 | m.deviceCache.RemoveNotifyChannel("plugin") 200 | m.server.Stop() 201 | if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) { 202 | return err 203 | } 204 | m.cleanup() 205 | return nil 206 | } 207 | 208 | // Serve starts the gRPC server of the device plugin. 209 | func (m *NvidiaDevicePlugin) Serve() error { 210 | os.Remove(m.socket) 211 | sock, err := net.Listen("unix", m.socket) 212 | if err != nil { 213 | return err 214 | } 215 | 216 | pluginapi.RegisterDevicePluginServer(m.server, m) 217 | 218 | go func() { 219 | lastCrashTime := time.Now() 220 | restartCount := 0 221 | for { 222 | log.Printf("Starting GRPC server for '%s'", m.resourceName) 223 | err := m.server.Serve(sock) 224 | if err == nil { 225 | break 226 | } 227 | 228 | log.Printf("GRPC server for '%s' crashed with error: %v", m.resourceName, err) 229 | 230 | // restart if it has not been too often 231 | // i.e. if server has crashed more than 5 times and it didn't last more than one hour each time 232 | if restartCount > 5 { 233 | // quit 234 | log.Fatalf("GRPC server for '%s' has repeatedly crashed recently. Quitting", m.resourceName) 235 | } 236 | timeSinceLastCrash := time.Since(lastCrashTime).Seconds() 237 | lastCrashTime = time.Now() 238 | if timeSinceLastCrash > 3600 { 239 | // it has been one hour since the last crash.. reset the count 240 | // to reflect on the frequency 241 | restartCount = 1 242 | } else { 243 | restartCount++ 244 | } 245 | } 246 | }() 247 | 248 | // Wait for server to start by launching a blocking connexion 249 | conn, err := m.dial(m.socket, 5*time.Second) 250 | if err != nil { 251 | return err 252 | } 253 | conn.Close() 254 | 255 | return nil 256 | } 257 | 258 | // Register registers the device plugin for the given resourceName with Kubelet. 259 | func (m *NvidiaDevicePlugin) Register() error { 260 | conn, err := m.dial(pluginapi.KubeletSocket, 5*time.Second) 261 | if err != nil { 262 | return err 263 | } 264 | defer conn.Close() 265 | 266 | client := pluginapi.NewRegistrationClient(conn) 267 | reqt := &pluginapi.RegisterRequest{ 268 | Version: pluginapi.Version, 269 | Endpoint: path.Base(m.socket), 270 | ResourceName: m.resourceName, 271 | Options: &pluginapi.DevicePluginOptions{}, 272 | } 273 | 274 | _, err = client.Register(context.Background(), reqt) 275 | if err != nil { 276 | return err 277 | } 278 | return nil 279 | } 280 | 281 | // GetDevicePluginOptions returns the values of the optional settings for this plugin 282 | func (m *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) { 283 | options := &pluginapi.DevicePluginOptions{} 284 | return options, nil 285 | } 286 | 287 | // ListAndWatch lists devices and update that list according to the health status 288 | func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error { 289 | if m.resourceName == util.ResourceMem { 290 | err := s.Send(&pluginapi.ListAndWatchResponse{Devices: m.virtualDevices}) 291 | if err != nil { 292 | log.Fatalf("failed sending devices %d: %v", len(m.virtualDevices), err) 293 | } 294 | 295 | for { 296 | select { 297 | case <-m.stop: 298 | return nil 299 | case d := <-m.health: 300 | // FIXME: there is no way to recover from the Unhealthy state. 301 | //isChange := false 302 | //if d.Health != pluginapi.Unhealthy { 303 | //isChange = true 304 | //} 305 | d.Health = pluginapi.Unhealthy 306 | log.Printf("'%s' device marked unhealthy: %s", m.resourceName, d.ID) 307 | s.Send(&pluginapi.ListAndWatchResponse{Devices: m.virtualDevices}) 308 | //if isChange { 309 | // m.kubeInteractor.PatchUnhealthyGPUListOnNode(m.physicalDevices) 310 | //} 311 | } 312 | } 313 | 314 | } else { 315 | _ = s.Send(&pluginapi.ListAndWatchResponse{Devices: m.apiDevices()}) 316 | for { 317 | select { 318 | case <-m.stop: 319 | return nil 320 | case d := <-m.health: 321 | // FIXME: there is no way to recover from the Unhealthy state. 322 | //d.Health = pluginapi.Unhealthy 323 | log.Printf("'%s' device marked unhealthy: %s", m.resourceName, d.ID) 324 | _ = s.Send(&pluginapi.ListAndWatchResponse{Devices: m.apiDevices()}) 325 | } 326 | } 327 | } 328 | } 329 | 330 | func (m *NvidiaDevicePlugin) MIGAllocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { 331 | responses := pluginapi.AllocateResponse{} 332 | for _, req := range reqs.ContainerRequests { 333 | for _, id := range req.DevicesIDs { 334 | if !m.deviceExists(id) { 335 | return nil, fmt.Errorf("invalid allocation request for '%s': unknown device: %s", m.resourceName, id) 336 | } 337 | } 338 | 339 | response := pluginapi.ContainerAllocateResponse{} 340 | 341 | uuids := req.DevicesIDs 342 | deviceIDs := m.deviceIDsFromUUIDs(uuids) 343 | 344 | response.Envs = m.apiEnvs(m.deviceListEnvvar, deviceIDs) 345 | 346 | klog.Infof("response=", response.Envs) 347 | responses.ContainerResponses = append(responses.ContainerResponses, &response) 348 | } 349 | 350 | return &responses, nil 351 | } 352 | 353 | // Allocate which return list of devices. 354 | func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { 355 | if len(reqs.ContainerRequests) > 1 { 356 | return &pluginapi.AllocateResponse{}, errors.New("multiple Container Requests not supported") 357 | } 358 | if strings.Compare(m.migStrategy, "mixed") == 0 { 359 | return m.MIGAllocate(ctx, reqs) 360 | } 361 | responses := pluginapi.AllocateResponse{} 362 | 363 | if strings.Compare(m.resourceName, util.ResourceMem) == 0 || strings.Compare(m.resourceName, util.ResourceCores) == 0 { 364 | for range reqs.ContainerRequests { 365 | responses.ContainerResponses = append(responses.ContainerResponses, &pluginapi.ContainerAllocateResponse{}) 366 | } 367 | return &responses, nil 368 | } 369 | nodename := os.Getenv("NODE_NAME") 370 | 371 | current, err := util.GetPendingPod(nodename) 372 | if err != nil { 373 | lock.ReleaseNodeLock(nodename, util.VGPUDeviceName) 374 | return &pluginapi.AllocateResponse{}, err 375 | } 376 | if current == nil { 377 | klog.Errorf("no pending pod found on node %s", nodename) 378 | lock.ReleaseNodeLock(nodename, util.VGPUDeviceName) 379 | return &pluginapi.AllocateResponse{}, errors.New("no pending pod found on node") 380 | } 381 | 382 | for idx := range reqs.ContainerRequests { 383 | currentCtr, devreq, err := util.GetNextDeviceRequest(util.NvidiaGPUDevice, *current) 384 | klog.Infoln("deviceAllocateFromAnnotation=", devreq) 385 | if err != nil { 386 | klog.Errorln("get device from annotation failed", err.Error()) 387 | util.PodAllocationFailed(nodename, current) 388 | return &pluginapi.AllocateResponse{}, err 389 | } 390 | if len(devreq) != len(reqs.ContainerRequests[idx].DevicesIDs) { 391 | klog.Errorln("device number not matched", devreq, reqs.ContainerRequests[idx].DevicesIDs) 392 | util.PodAllocationFailed(nodename, current) 393 | return &pluginapi.AllocateResponse{}, errors.New("device number not matched") 394 | } 395 | 396 | response := pluginapi.ContainerAllocateResponse{} 397 | response.Envs = make(map[string]string) 398 | response.Envs["NVIDIA_VISIBLE_DEVICES"] = strings.Join(m.GetContainerDeviceStrArray(devreq), ",") 399 | 400 | err = util.EraseNextDeviceTypeFromAnnotation(util.NvidiaGPUDevice, *current) 401 | if err != nil { 402 | klog.Errorln("Erase annotation failed", err.Error()) 403 | util.PodAllocationFailed(nodename, current) 404 | return &pluginapi.AllocateResponse{}, err 405 | } 406 | 407 | if m.operatingMode != "mig" { 408 | 409 | for i, dev := range devreq { 410 | limitKey := fmt.Sprintf("CUDA_DEVICE_MEMORY_LIMIT_%v", i) 411 | response.Envs[limitKey] = fmt.Sprintf("%vm", dev.Usedmem*int32(config.GPUMemoryFactor)) 412 | } 413 | response.Envs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprint(devreq[0].Usedcores) 414 | response.Envs["CUDA_DEVICE_MEMORY_SHARED_CACHE"] = fmt.Sprintf("/tmp/vgpu/%v.cache", uuid.NewUUID()) 415 | 416 | cacheFileHostDirectory := "/tmp/vgpu/containers/" + string(current.UID) + "_" + currentCtr.Name 417 | os.MkdirAll(cacheFileHostDirectory, 0777) 418 | os.Chmod(cacheFileHostDirectory, 0777) 419 | os.MkdirAll("/tmp/vgpulock", 0777) 420 | os.Chmod("/tmp/vgpulock", 0777) 421 | hostHookPath := os.Getenv("HOOK_PATH") 422 | 423 | response.Mounts = append(response.Mounts, 424 | &pluginapi.Mount{ContainerPath: "/usr/local/vgpu/libvgpu.so", 425 | HostPath: hostHookPath + "/libvgpu.so", 426 | ReadOnly: true}, 427 | &pluginapi.Mount{ContainerPath: "/tmp/vgpu", 428 | HostPath: cacheFileHostDirectory, 429 | ReadOnly: false}, 430 | &pluginapi.Mount{ContainerPath: "/tmp/vgpulock", 431 | HostPath: "/tmp/vgpulock", 432 | ReadOnly: false}, 433 | ) 434 | found := false 435 | for _, val := range currentCtr.Env { 436 | if strings.Compare(val.Name, "CUDA_DISABLE_CONTROL") == 0 { 437 | found = true 438 | break 439 | } 440 | } 441 | if !found { 442 | response.Mounts = append(response.Mounts, &pluginapi.Mount{ContainerPath: "/etc/ld.so.preload", 443 | HostPath: hostHookPath + "/ld.so.preload", 444 | ReadOnly: true}, 445 | ) 446 | } 447 | } 448 | responses.ContainerResponses = append(responses.ContainerResponses, &response) 449 | } 450 | klog.Infoln("Allocate Response", responses.ContainerResponses) 451 | util.PodAllocationTrySuccess(nodename, current) 452 | return &responses, nil 453 | } 454 | 455 | // PreStartContainer is unimplemented for this plugin 456 | func (m *NvidiaDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) { 457 | return &pluginapi.PreStartContainerResponse{}, nil 458 | } 459 | 460 | // dial establishes the gRPC communication with the registered device plugin. 461 | func (m *NvidiaDevicePlugin) dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) { 462 | c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(), 463 | grpc.WithTimeout(timeout), 464 | grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) { 465 | return net.DialTimeout("unix", addr, timeout) 466 | }), 467 | ) 468 | 469 | if err != nil { 470 | return nil, err 471 | } 472 | 473 | return c, nil 474 | } 475 | 476 | func (m *NvidiaDevicePlugin) Devices() []*Device { 477 | if strings.Compare(m.migStrategy, "none") == 0 { 478 | return m.deviceCache.GetCache() 479 | } 480 | if strings.Compare(m.migStrategy, "mixed") == 0 { 481 | return m.ResourceManager.Devices() 482 | } 483 | log.Panic("migStrategy not recognized,exiting...") 484 | return []*Device{} 485 | } 486 | 487 | func (m *NvidiaDevicePlugin) deviceExists(id string) bool { 488 | for _, d := range m.cachedDevices { 489 | if d.ID == id { 490 | return true 491 | } 492 | } 493 | return false 494 | } 495 | 496 | func (m *NvidiaDevicePlugin) deviceIDsFromUUIDs(uuids []string) []string { 497 | return uuids 498 | } 499 | 500 | func (m *NvidiaDevicePlugin) apiDevices() []*pluginapi.Device { 501 | if strings.Compare(m.migStrategy, "mixed") == 0 { 502 | var pdevs []*pluginapi.Device 503 | for _, d := range m.cachedDevices { 504 | pdevs = append(pdevs, &d.Device) 505 | } 506 | return pdevs 507 | } 508 | devices := m.Devices() 509 | var res []*pluginapi.Device 510 | 511 | if strings.Compare(m.resourceName, util.ResourceMem) == 0 { 512 | for _, dev := range devices { 513 | i := 0 514 | klog.Infoln("memory=", dev.Memory, "id=", dev.ID) 515 | for i < int(32767) { 516 | res = append(res, &pluginapi.Device{ 517 | ID: fmt.Sprintf("%v-memory-%v", dev.ID, i), 518 | Health: dev.Health, 519 | Topology: nil, 520 | }) 521 | i++ 522 | } 523 | } 524 | klog.Infoln("res length=", len(res)) 525 | return res 526 | } 527 | if strings.Compare(m.resourceName, util.ResourceCores) == 0 { 528 | for _, dev := range devices { 529 | i := 0 530 | for i < 100 { 531 | res = append(res, &pluginapi.Device{ 532 | ID: fmt.Sprintf("%v-core-%v", dev.ID, i), 533 | Health: dev.Health, 534 | Topology: nil, 535 | }) 536 | i++ 537 | } 538 | } 539 | return res 540 | } 541 | 542 | for _, dev := range devices { 543 | for i := uint(0); i < config.DeviceSplitCount; i++ { 544 | id := fmt.Sprintf("%v-%v", dev.ID, i) 545 | res = append(res, &pluginapi.Device{ 546 | ID: id, 547 | Health: dev.Health, 548 | Topology: nil, 549 | }) 550 | } 551 | } 552 | return res 553 | } 554 | 555 | func (m *NvidiaDevicePlugin) apiEnvs(envvar string, deviceIDs []string) map[string]string { 556 | return map[string]string{ 557 | envvar: strings.Join(deviceIDs, ","), 558 | } 559 | } 560 | 561 | func (m *NvidiaDevicePlugin) ApplyMigTemplate() { 562 | data, err := yaml.Marshal(m.migCurrent) 563 | if err != nil { 564 | klog.Error("marshal failed", err.Error()) 565 | } 566 | klog.Infoln("Applying data=", string(data)) 567 | os.WriteFile("/tmp/migconfig.yaml", data, os.ModePerm) 568 | cmd := exec.Command("nvidia-mig-parted", "apply", "-f", "/tmp/migconfig.yaml") 569 | var stdout, stderr bytes.Buffer 570 | cmd.Stdout = &stdout 571 | cmd.Stderr = &stderr 572 | err = cmd.Run() 573 | if err != nil { 574 | klog.Fatalf("nvidia-mig-parted failed with %s\n", err) 575 | } 576 | outStr := stdout.String() 577 | klog.Infoln("Mig apply", outStr) 578 | } 579 | 580 | func (m *NvidiaDevicePlugin) GetContainerDeviceStrArray(c util.ContainerDevices) []string { 581 | tmp := []string{} 582 | needsreset := false 583 | position := 0 584 | for _, val := range c { 585 | if !strings.Contains(val.UUID, "[") { 586 | tmp = append(tmp, val.UUID) 587 | } else { 588 | devtype, devindex := util.GetIndexAndTypeFromUUID(val.UUID) 589 | position, needsreset = m.GenerateMigTemplate(devtype, devindex, val) 590 | if needsreset { 591 | m.ApplyMigTemplate() 592 | } 593 | tmp = append(tmp, util.GetMigUUIDFromIndex(val.UUID, position)) 594 | } 595 | } 596 | klog.V(3).Infoln("mig current=", m.migCurrent, ":", needsreset, "position=", position, "uuid lists", tmp) 597 | return tmp 598 | } 599 | 600 | func (m *NvidiaDevicePlugin) GenerateMigTemplate(devtype string, devindex int, val util.ContainerDevice) (int, bool) { 601 | needsreset := false 602 | position := -1 // Initialize to an invalid position 603 | 604 | for _, migTemplate := range m.schedulerConfig.MigGeometriesList { 605 | if containsModel(devtype, migTemplate.Models) { 606 | klog.InfoS("type found", "Type", devtype, "Models", strings.Join(migTemplate.Models, ", ")) 607 | 608 | templateGroupName, pos, err := util.ExtractMigTemplatesFromUUID(val.UUID) 609 | if err != nil { 610 | klog.ErrorS(err, "failed to extract template index from UUID", "UUID", val.UUID) 611 | return -1, false 612 | } 613 | 614 | templateIdx := -1 615 | for i, migTemplateEntry := range migTemplate.Geometries { 616 | if migTemplateEntry.Group == templateGroupName { 617 | templateIdx = i 618 | break 619 | } 620 | } 621 | 622 | if templateIdx < 0 || templateIdx >= len(migTemplate.Geometries) { 623 | klog.ErrorS(nil, "invalid template index extracted from UUID", "UUID", val.UUID, "Index", templateIdx) 624 | return -1, false 625 | } 626 | 627 | position = pos 628 | 629 | v := migTemplate.Geometries[templateIdx].Instances 630 | 631 | for migidx, migpartedDev := range m.migCurrent.MigConfigs["current"] { 632 | if containsDevice(devindex, migpartedDev.Devices) { 633 | for _, migTemplateEntry := range v { 634 | currentCount, ok := migpartedDev.MigDevices[migTemplateEntry.Name] 635 | expectedCount := migTemplateEntry.Count 636 | 637 | if !ok || currentCount != expectedCount { 638 | needsreset = true 639 | klog.InfoS("updated mig device count", "Template", v) 640 | } else { 641 | klog.InfoS("incremented mig device count", "TemplateName", migTemplateEntry.Name, "Count", currentCount+1) 642 | } 643 | } 644 | 645 | if needsreset { 646 | for k := range m.migCurrent.MigConfigs["current"][migidx].MigDevices { 647 | delete(m.migCurrent.MigConfigs["current"][migidx].MigDevices, k) 648 | } 649 | 650 | for _, migTemplateEntry := range v { 651 | m.migCurrent.MigConfigs["current"][migidx].MigDevices[migTemplateEntry.Name] = migTemplateEntry.Count 652 | m.migCurrent.MigConfigs["current"][migidx].MigEnabled = true 653 | } 654 | } 655 | break 656 | } 657 | } 658 | break 659 | } 660 | } 661 | 662 | return position, needsreset 663 | } 664 | 665 | // Helper function to check if a model is in the list of models. 666 | func containsModel(target string, models []string) bool { 667 | for _, model := range models { 668 | if strings.Contains(target, model) { 669 | return true 670 | } 671 | } 672 | return false 673 | } 674 | 675 | // Helper function to check if a device index is in the list of devices. 676 | func containsDevice(target int, devices []int32) bool { 677 | for _, device := range devices { 678 | if int(device) == target { 679 | return true 680 | } 681 | } 682 | return false 683 | } 684 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package vgpu 18 | 19 | import ( 20 | "fmt" 21 | "strings" 22 | "time" 23 | 24 | "github.com/NVIDIA/go-nvml/pkg/nvml" 25 | "k8s.io/klog/v2" 26 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 27 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util" 28 | ) 29 | 30 | type DevListFunc func() []*Device 31 | 32 | type DeviceRegister struct { 33 | deviceCache *DeviceCache 34 | unhealthy chan *Device 35 | stopCh chan struct{} 36 | } 37 | 38 | func NewDeviceRegister(deviceCache *DeviceCache) *DeviceRegister { 39 | return &DeviceRegister{ 40 | deviceCache: deviceCache, 41 | unhealthy: make(chan *Device), 42 | stopCh: make(chan struct{}), 43 | } 44 | } 45 | 46 | func (r *DeviceRegister) Start() { 47 | r.deviceCache.AddNotifyChannel("register", r.unhealthy) 48 | go r.WatchAndRegister() 49 | } 50 | 51 | func (r *DeviceRegister) Stop() { 52 | close(r.stopCh) 53 | } 54 | 55 | func (r *DeviceRegister) apiDevices() *[]*util.DeviceInfo { 56 | devs := r.deviceCache.GetCache() 57 | res := make([]*util.DeviceInfo, 0, len(devs)) 58 | for _, dev := range devs { 59 | ndev, ret := config.Nvml().DeviceGetHandleByUUID(dev.ID) 60 | if ret != nvml.SUCCESS { 61 | fmt.Println("nvml new device by uuid error id=", dev.ID) 62 | panic(ret) 63 | } 64 | 65 | memory, ret := config.Nvml().DeviceGetMemoryInfo(ndev) 66 | if ret != nvml.SUCCESS { 67 | fmt.Println("failed to get memory info for device id=", dev.ID) 68 | panic(ret) 69 | } 70 | 71 | model, ret := config.Nvml().DeviceGetName(ndev) 72 | if ret != nvml.SUCCESS { 73 | fmt.Println("failed to get model name for device id=", dev.ID) 74 | panic(ret) 75 | } 76 | 77 | klog.V(3).Infoln("nvml registered device id=", dev.ID, "memory=", memory.Total, "type=", model) 78 | 79 | registeredmem := int32(memory.Total/(1024*1024)) / int32(config.GPUMemoryFactor) 80 | klog.V(3).Infoln("GPUMemoryFactor=", config.GPUMemoryFactor, "registeredmem=", registeredmem) 81 | res = append(res, &util.DeviceInfo{ 82 | Id: dev.ID, 83 | Count: int32(config.DeviceSplitCount), 84 | Devmem: registeredmem, 85 | Mode: config.Mode, 86 | Type: fmt.Sprintf("%v-%v", "NVIDIA", model), 87 | Health: strings.EqualFold(dev.Health, "healthy"), 88 | }) 89 | } 90 | return &res 91 | } 92 | 93 | func (r *DeviceRegister) RegisterInAnnotation() error { 94 | devices := r.apiDevices() 95 | annos := make(map[string]string) 96 | node, err := util.GetNode(config.NodeName) 97 | if err != nil { 98 | klog.Errorln("get node error", err.Error()) 99 | return err 100 | } 101 | encodeddevices := util.EncodeNodeDevices(*devices) 102 | annos[util.NodeHandshake] = "Reported " + time.Now().String() 103 | annos[util.NodeNvidiaDeviceRegistered] = encodeddevices 104 | klog.Infoln("Reporting devices", encodeddevices, "in", time.Now().String()) 105 | err = util.PatchNodeAnnotations(node, annos) 106 | 107 | if err != nil { 108 | klog.Errorln("patch node error", err.Error()) 109 | } 110 | return err 111 | } 112 | 113 | func (r *DeviceRegister) WatchAndRegister() { 114 | klog.Infof("into WatchAndRegister") 115 | for { 116 | if len(config.Mode) == 0 { 117 | klog.V(5).Info("register skipped, waiting for device config to be loaded") 118 | time.Sleep(time.Second * 2) 119 | continue 120 | } 121 | err := r.RegisterInAnnotation() 122 | if err != nil { 123 | klog.Errorf("register error, %v", err) 124 | time.Sleep(time.Second * 5) 125 | } else { 126 | time.Sleep(time.Second * 30) 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/util/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package util 18 | 19 | import "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 20 | 21 | const ( 22 | AssignedTimeAnnotations = "volcano.sh/vgpu-time" 23 | AssignedIDsAnnotations = "volcano.sh/vgpu-ids-new" 24 | AssignedIDsToAllocateAnnotations = "volcano.sh/devices-to-allocate" 25 | AssignedNodeAnnotations = "volcano.sh/vgpu-node" 26 | BindTimeAnnotations = "volcano.sh/bind-time" 27 | DeviceBindPhase = "volcano.sh/bind-phase" 28 | 29 | // PodAnnotationMaxLength pod annotation max data length 1MB 30 | PodAnnotationMaxLength = 1024 * 1024 31 | 32 | GPUInUse = "nvidia.com/use-gputype" 33 | GPUNoUse = "nvidia.com/nouse-gputype" 34 | 35 | DeviceBindAllocating = "allocating" 36 | DeviceBindFailed = "failed" 37 | DeviceBindSuccess = "success" 38 | 39 | DeviceLimit = 100 40 | 41 | BestEffort string = "best-effort" 42 | Restricted string = "restricted" 43 | Guaranteed string = "guaranteed" 44 | 45 | NvidiaGPUDevice = "NVIDIA" 46 | NvidiaGPUCommonWord = "GPU" 47 | 48 | NodeLockTime = "volcano.sh/mutex.lock" 49 | MaxLockRetry = 5 50 | 51 | NodeHandshake = "volcano.sh/node-vgpu-handshake" 52 | NodeNvidiaDeviceRegistered = "volcano.sh/node-vgpu-register" 53 | 54 | // DeviceName used to indicate this device 55 | VGPUDeviceName = "hamivgpu" 56 | 57 | // DeviceConfigurationConfigMapKey specifies in what ConfigMap key the device configuration should be stored 58 | DeviceConfigurationConfigMapKey = "device-config.yaml" 59 | ) 60 | 61 | var ( 62 | ResourceName string 63 | ResourceMem string 64 | ResourceCores string 65 | ResourceMemPercentage string 66 | ResourcePriority string 67 | DebugMode bool 68 | 69 | MLUResourceCount string 70 | MLUResourceMemory string 71 | 72 | KnownDevice = map[string]string{ 73 | NodeHandshake: NodeNvidiaDeviceRegistered, 74 | } 75 | ) 76 | 77 | type ContainerDevice struct { 78 | UUID string 79 | Type string 80 | Usedmem int32 81 | Usedcores int32 82 | } 83 | 84 | type ContainerDeviceRequest struct { 85 | Nums int32 86 | Type string 87 | Memreq int32 88 | MemPercentagereq int32 89 | Coresreq int32 90 | } 91 | 92 | type ContainerDevices []ContainerDevice 93 | 94 | type PodDevices []ContainerDevices 95 | 96 | type DeviceInfo struct { 97 | Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` 98 | Count int32 `protobuf:"varint,2,opt,name=count,proto3" json:"count,omitempty"` 99 | Devmem int32 `protobuf:"varint,3,opt,name=devmem,proto3" json:"devmem,omitempty"` 100 | Type string `protobuf:"bytes,4,opt,name=type,proto3" json:"type,omitempty"` 101 | Health bool `protobuf:"varint,5,opt,name=health,proto3" json:"health,omitempty"` 102 | Mode string `json:"mode,omitempty"` 103 | MIGTemplate []config.Geometry `json:"migtemplate,omitempty"` 104 | XXX_NoUnkeyedLiteral struct{} `json:"-"` 105 | XXX_unrecognized []byte `json:"-"` 106 | XXX_sizecache int32 `json:"-"` 107 | } 108 | -------------------------------------------------------------------------------- /pkg/plugin/vgpu/util/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2023 The Volcano Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package util 18 | 19 | import ( 20 | "bytes" 21 | "context" 22 | "encoding/json" 23 | "errors" 24 | "flag" 25 | "fmt" 26 | "math" 27 | "os" 28 | "os/exec" 29 | "strconv" 30 | "strings" 31 | 32 | "github.com/NVIDIA/go-nvml/pkg/nvml" 33 | "gopkg.in/yaml.v2" 34 | v1 "k8s.io/api/core/v1" 35 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 36 | k8stypes "k8s.io/apimachinery/pkg/types" 37 | "k8s.io/klog/v2" 38 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 39 | "volcano.sh/k8s-device-plugin/pkg/lock" 40 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config" 41 | ) 42 | 43 | var DevicesToHandle []string 44 | 45 | func init() { 46 | client, _ := lock.NewClient() 47 | lock.UseClient(client) 48 | DevicesToHandle = []string{} 49 | DevicesToHandle = append(DevicesToHandle, NvidiaGPUCommonWord) 50 | } 51 | 52 | func GlobalFlagSet() *flag.FlagSet { 53 | fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError) 54 | fs.StringVar(&ResourceName, "resource-name", "volcano.sh/vgpu-number", "resource name") 55 | fs.StringVar(&ResourceMem, "resource-memory-name", "volcano.sh/vgpu-memory", "resource name for resource memory resources") 56 | fs.StringVar(&ResourceCores, "resource-core-name", "volcano.sh/vgpu-cores", "resource name for resource core resources") 57 | fs.BoolVar(&DebugMode, "debug", false, "debug mode") 58 | klog.InitFlags(fs) 59 | return fs 60 | } 61 | 62 | func GetNode(nodename string) (*v1.Node, error) { 63 | n, err := lock.GetClient().CoreV1().Nodes().Get(context.Background(), nodename, metav1.GetOptions{}) 64 | return n, err 65 | } 66 | 67 | func GetPendingPod(node string) (*v1.Pod, error) { 68 | podList, err := lock.GetClient().CoreV1().Pods("").List(context.Background(), metav1.ListOptions{}) 69 | if err != nil { 70 | return nil, err 71 | } 72 | 73 | oldestPod := getOldestPod(podList.Items, node) 74 | if oldestPod == nil { 75 | return nil, fmt.Errorf("cannot get valid pod") 76 | } 77 | 78 | return oldestPod, nil 79 | } 80 | 81 | func getOldestPod(pods []v1.Pod, nodename string) *v1.Pod { 82 | if len(pods) == 0 { 83 | return nil 84 | } 85 | oldest := pods[0] 86 | for _, pod := range pods { 87 | if pod.Annotations[AssignedNodeAnnotations] == nodename { 88 | klog.V(4).Infof("pod %s, predicate time: %s", pod.Name, pod.Annotations[AssignedTimeAnnotations]) 89 | if getPredicateTimeFromPodAnnotation(&oldest) > getPredicateTimeFromPodAnnotation(&pod) { 90 | oldest = pod 91 | } 92 | } 93 | } 94 | klog.V(4).Infof("oldest pod %#v, predicate time: %#v", oldest.Name, 95 | oldest.Annotations[AssignedTimeAnnotations]) 96 | annotation := map[string]string{AssignedTimeAnnotations: strconv.FormatUint(math.MaxUint64, 10)} 97 | if err := PatchPodAnnotations(&oldest, annotation); err != nil { 98 | klog.Errorf("update pod %s failed, err: %v", oldest.Name, err) 99 | return nil 100 | } 101 | return &oldest 102 | } 103 | 104 | func getPredicateTimeFromPodAnnotation(pod *v1.Pod) uint64 { 105 | assumeTimeStr, ok := pod.Annotations[AssignedTimeAnnotations] 106 | if !ok { 107 | klog.Warningf("volcano not write timestamp, pod Name: %s", pod.Name) 108 | return math.MaxUint64 109 | } 110 | if len(assumeTimeStr) > PodAnnotationMaxLength { 111 | klog.Warningf("timestamp fmt invalid, pod Name: %s", pod.Name) 112 | return math.MaxUint64 113 | } 114 | predicateTime, err := strconv.ParseUint(assumeTimeStr, 10, 64) 115 | if err != nil { 116 | klog.Errorf("parse timestamp failed, %v", err) 117 | return math.MaxUint64 118 | } 119 | return predicateTime 120 | } 121 | 122 | func DecodeNodeDevices(str string) []*DeviceInfo { 123 | if !strings.Contains(str, ":") { 124 | return []*DeviceInfo{} 125 | } 126 | tmp := strings.Split(str, ":") 127 | var retval []*DeviceInfo 128 | for _, val := range tmp { 129 | if strings.Contains(val, ",") { 130 | items := strings.Split(val, ",") 131 | count, _ := strconv.Atoi(items[1]) 132 | devmem, _ := strconv.Atoi(items[2]) 133 | health, _ := strconv.ParseBool(items[4]) 134 | i := DeviceInfo{ 135 | Id: items[0], 136 | Count: int32(count), 137 | Devmem: int32(devmem), 138 | Type: items[3], 139 | Health: health, 140 | } 141 | retval = append(retval, &i) 142 | } 143 | } 144 | return retval 145 | } 146 | 147 | func EncodeNodeDevices(dlist []*DeviceInfo) string { 148 | tmp := "" 149 | for _, val := range dlist { 150 | tmp += val.Id + "," + strconv.FormatInt(int64(val.Count), 10) + "," + strconv.Itoa(int(val.Devmem)) + "," + val.Type + "," + strconv.FormatBool(val.Health) + "," + val.Mode + ":" 151 | } 152 | klog.V(3).Infoln("Encoded node Devices", tmp) 153 | return tmp 154 | } 155 | 156 | func EncodeContainerDevices(cd ContainerDevices) string { 157 | tmp := "" 158 | for _, val := range cd { 159 | tmp += val.UUID + "," + val.Type + "," + strconv.Itoa(int(val.Usedmem)) + "," + strconv.Itoa(int(val.Usedcores)) + ":" 160 | } 161 | fmt.Println("Encoded container Devices=", tmp) 162 | return tmp 163 | //return strings.Join(cd, ",") 164 | } 165 | 166 | func EncodePodDevices(pd PodDevices) string { 167 | var ss []string 168 | for _, cd := range pd { 169 | ss = append(ss, EncodeContainerDevices(cd)) 170 | } 171 | return strings.Join(ss, ";") 172 | } 173 | 174 | func DecodeContainerDevices(str string) ContainerDevices { 175 | if len(str) == 0 { 176 | return ContainerDevices{} 177 | } 178 | cd := strings.Split(str, ":") 179 | contdev := ContainerDevices{} 180 | tmpdev := ContainerDevice{} 181 | if len(str) == 0 { 182 | return contdev 183 | } 184 | for _, val := range cd { 185 | if strings.Contains(val, ",") { 186 | tmpstr := strings.Split(val, ",") 187 | tmpdev.UUID = tmpstr[0] 188 | tmpdev.Type = tmpstr[1] 189 | devmem, _ := strconv.ParseInt(tmpstr[2], 10, 32) 190 | tmpdev.Usedmem = int32(devmem) 191 | devcores, _ := strconv.ParseInt(tmpstr[3], 10, 32) 192 | tmpdev.Usedcores = int32(devcores) 193 | contdev = append(contdev, tmpdev) 194 | } 195 | } 196 | return contdev 197 | } 198 | 199 | func DecodePodDevices(str string) PodDevices { 200 | if len(str) == 0 { 201 | return PodDevices{} 202 | } 203 | var pd PodDevices 204 | for _, s := range strings.Split(str, ";") { 205 | cd := DecodeContainerDevices(s) 206 | pd = append(pd, cd) 207 | } 208 | return pd 209 | } 210 | 211 | func GetNextDeviceRequest(dtype string, p v1.Pod) (v1.Container, ContainerDevices, error) { 212 | pdevices := DecodePodDevices(p.Annotations[AssignedIDsToAllocateAnnotations]) 213 | klog.Infoln("pdevices=", pdevices) 214 | res := ContainerDevices{} 215 | for idx, val := range pdevices { 216 | found := false 217 | for _, dev := range val { 218 | if strings.Compare(dtype, dev.Type) == 0 { 219 | res = append(res, dev) 220 | found = true 221 | } 222 | } 223 | if found { 224 | return p.Spec.Containers[idx], res, nil 225 | } 226 | } 227 | return v1.Container{}, res, errors.New("device request not found") 228 | } 229 | 230 | func EraseNextDeviceTypeFromAnnotation(dtype string, p v1.Pod) error { 231 | pdevices := DecodePodDevices(p.Annotations[AssignedIDsToAllocateAnnotations]) 232 | res := PodDevices{} 233 | found := false 234 | for _, val := range pdevices { 235 | if found { 236 | res = append(res, val) 237 | continue 238 | } else { 239 | tmp := ContainerDevices{} 240 | for _, dev := range val { 241 | if strings.Compare(dtype, dev.Type) == 0 { 242 | found = true 243 | } else { 244 | tmp = append(tmp, dev) 245 | } 246 | } 247 | if !found { 248 | res = append(res, val) 249 | } else { 250 | res = append(res, tmp) 251 | } 252 | } 253 | } 254 | klog.Infoln("After erase res=", res) 255 | newannos := make(map[string]string) 256 | newannos[AssignedIDsToAllocateAnnotations] = EncodePodDevices(res) 257 | return PatchPodAnnotations(&p, newannos) 258 | } 259 | 260 | func PodAllocationTrySuccess(nodeName string, pod *v1.Pod) { 261 | refreshed, _ := lock.GetClient().CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{}) 262 | annos := refreshed.Annotations[AssignedIDsToAllocateAnnotations] 263 | klog.Infoln("TrySuccess:", annos) 264 | for _, val := range DevicesToHandle { 265 | if strings.Contains(annos, val) { 266 | return 267 | } 268 | } 269 | klog.Infoln("AllDevicesAllocateSuccess releasing lock") 270 | PodAllocationSuccess(nodeName, pod) 271 | } 272 | 273 | func PodAllocationSuccess(nodeName string, pod *v1.Pod) { 274 | newannos := make(map[string]string) 275 | newannos[DeviceBindPhase] = DeviceBindSuccess 276 | err := PatchPodAnnotations(pod, newannos) 277 | if err != nil { 278 | klog.Errorf("patchPodAnnotations failed:%v", err.Error()) 279 | } 280 | err = lock.ReleaseNodeLock(nodeName, VGPUDeviceName) 281 | if err != nil { 282 | klog.Errorf("release lock failed:%v", err.Error()) 283 | } 284 | } 285 | 286 | func PodAllocationFailed(nodeName string, pod *v1.Pod) { 287 | newannos := make(map[string]string) 288 | newannos[DeviceBindPhase] = DeviceBindFailed 289 | err := PatchPodAnnotations(pod, newannos) 290 | if err != nil { 291 | klog.Errorf("patchPodAnnotations failed:%v", err.Error()) 292 | } 293 | err = lock.ReleaseNodeLock(nodeName, VGPUDeviceName) 294 | if err != nil { 295 | klog.Errorf("release lock failed:%v", err.Error()) 296 | } 297 | } 298 | 299 | func PatchNodeAnnotations(node *v1.Node, annotations map[string]string) error { 300 | type patchMetadata struct { 301 | Annotations map[string]string `json:"annotations,omitempty"` 302 | } 303 | type patchPod struct { 304 | Metadata patchMetadata `json:"metadata"` 305 | //Spec patchSpec `json:"spec,omitempty"` 306 | } 307 | 308 | p := patchPod{} 309 | p.Metadata.Annotations = annotations 310 | 311 | bytes, err := json.Marshal(p) 312 | if err != nil { 313 | return err 314 | } 315 | _, err = lock.GetClient().CoreV1().Nodes(). 316 | Patch(context.Background(), node.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{}) 317 | if err != nil { 318 | klog.Infof("patch pod %v failed, %v", node.Name, err) 319 | } 320 | return err 321 | } 322 | 323 | func PatchPodAnnotations(pod *v1.Pod, annotations map[string]string) error { 324 | type patchMetadata struct { 325 | Annotations map[string]string `json:"annotations,omitempty"` 326 | } 327 | type patchPod struct { 328 | Metadata patchMetadata `json:"metadata"` 329 | //Spec patchSpec `json:"spec,omitempty"` 330 | } 331 | 332 | p := patchPod{} 333 | p.Metadata.Annotations = annotations 334 | 335 | bytes, err := json.Marshal(p) 336 | if err != nil { 337 | return err 338 | } 339 | _, err = lock.GetClient().CoreV1().Pods(pod.Namespace). 340 | Patch(context.Background(), pod.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{}) 341 | if err != nil { 342 | klog.Infof("patch pod %v failed, %v", pod.Name, err) 343 | } 344 | return err 345 | } 346 | 347 | func LoadConfigFromCM(cmName string) (*config.Config, error) { 348 | lock.NewClient() 349 | cm, err := lock.GetClient().CoreV1().ConfigMaps("kube-system").Get(context.Background(), cmName, metav1.GetOptions{}) 350 | if err != nil { 351 | cm, err = lock.GetClient().CoreV1().ConfigMaps("volcano-system").Get(context.Background(), cmName, metav1.GetOptions{}) 352 | if err != nil { 353 | return nil, err 354 | } 355 | } 356 | data, ok := cm.Data[DeviceConfigurationConfigMapKey] 357 | if !ok { 358 | return nil, fmt.Errorf("%v not found in ConfigMap %v", DeviceConfigurationConfigMapKey, cmName) 359 | } 360 | var yamlData config.Config 361 | err = yaml.Unmarshal([]byte(data), &yamlData) 362 | if err != nil { 363 | return nil, err 364 | } 365 | return &yamlData, nil 366 | } 367 | 368 | func LoadConfig(path string) (*config.Config, error) { 369 | data, err := os.ReadFile(path) 370 | if err != nil { 371 | return nil, err 372 | } 373 | var yamlData config.Config 374 | err = yaml.Unmarshal(data, &yamlData) 375 | if err != nil { 376 | return nil, err 377 | } 378 | return &yamlData, nil 379 | } 380 | 381 | func GenerateVirtualDeviceID(id uint, fakeCounter uint) string { 382 | return fmt.Sprintf("%d-%d", id, fakeCounter) 383 | } 384 | 385 | // GetDevices returns virtual devices and all physical devices by index. 386 | func GetDevices(gpuMemoryFactor uint) ([]*pluginapi.Device, map[uint]string) { 387 | n, ret := config.Nvml().DeviceGetCount() 388 | if ret != nvml.SUCCESS { 389 | klog.Fatalf("call nvml.DeviceGetCount with error: %v", ret) 390 | } 391 | 392 | var virtualDevs []*pluginapi.Device 393 | deviceByIndex := map[uint]string{} 394 | for i := uint(0); i < uint(n); i++ { 395 | d, ret := config.Nvml().DeviceGetHandleByIndex(int(i)) 396 | if ret != nvml.SUCCESS { 397 | klog.Fatalf("call nvml.DeviceGetHandleByIndex with error: %v", ret) 398 | } 399 | uuid, ret := d.GetUUID() 400 | if ret != nvml.SUCCESS { 401 | klog.Fatalf("call GetUUID with error: %v", ret) 402 | } 403 | id := i 404 | deviceByIndex[id] = uuid 405 | memory, ret := d.GetMemoryInfo() 406 | if ret != nvml.SUCCESS { 407 | klog.Fatalf("call GetMemoryInfo with error: %v", ret) 408 | } 409 | deviceGPUMemory := uint(memory.Total / (1024 * 1024)) 410 | for j := uint(0); j < deviceGPUMemory/gpuMemoryFactor; j++ { 411 | klog.V(4).Infof("adding virtual device: %d", j) 412 | fakeID := GenerateVirtualDeviceID(id, j) 413 | virtualDevs = append(virtualDevs, &pluginapi.Device{ 414 | ID: fakeID, 415 | Health: pluginapi.Healthy, 416 | }) 417 | } 418 | } 419 | 420 | return virtualDevs, deviceByIndex 421 | } 422 | 423 | func GetDeviceNums() int { 424 | count, ret := config.Nvml().DeviceGetCount() 425 | if ret != nvml.SUCCESS { 426 | klog.Error(`nvml get count error ret=`, ret) 427 | } 428 | return count 429 | } 430 | 431 | func GetIndexAndTypeFromUUID(uuid string) (string, int) { 432 | originuuid := strings.Split(uuid, "[")[0] 433 | ndev, ret := config.Nvml().DeviceGetHandleByUUID(originuuid) 434 | if ret != nvml.SUCCESS { 435 | klog.Error("nvml get handlebyuuid error ret=", ret) 436 | panic(0) 437 | } 438 | model, ret := ndev.GetName() 439 | if ret != nvml.SUCCESS { 440 | klog.Error("nvml get name error ret=", ret) 441 | panic(0) 442 | } 443 | index, ret := ndev.GetIndex() 444 | if ret != nvml.SUCCESS { 445 | klog.Error("nvml get index error ret=", ret) 446 | panic(0) 447 | } 448 | return model, index 449 | } 450 | 451 | func GetMigUUIDFromIndex(uuid string, idx int) string { 452 | originuuid := strings.Split(uuid, "[")[0] 453 | ndev, ret := config.Nvml().DeviceGetHandleByUUID(originuuid) 454 | if ret != nvml.SUCCESS { 455 | klog.Error(`nvml get device uuid error ret=`, ret) 456 | panic(0) 457 | } 458 | migdev, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(ndev, idx) 459 | if ret != nvml.SUCCESS { 460 | klog.Error("nvml get mig dev error ret=", ret, ",idx=", idx, "using nvidia-smi -L for query") 461 | cmd := exec.Command("nvidia-smi", "-L") 462 | var stdout, stderr bytes.Buffer 463 | cmd.Stdout = &stdout 464 | cmd.Stderr = &stderr 465 | err := cmd.Run() 466 | if err != nil { 467 | klog.Fatalf("nvidia-smi -L failed with %s\n", err) 468 | } 469 | outStr := stdout.String() 470 | uuid := GetMigUUIDFromSmiOutput(outStr, originuuid, idx) 471 | return uuid 472 | } 473 | res, ret := migdev.GetUUID() 474 | if ret != nvml.SUCCESS { 475 | klog.Error(`nvml get mig uuid error ret=`, ret) 476 | panic(0) 477 | } 478 | return res 479 | } 480 | 481 | func GetMigUUIDFromSmiOutput(output string, uuid string, idx int) string { 482 | migmode := false 483 | for _, val := range strings.Split(output, "\n") { 484 | if !strings.Contains(val, "MIG") && strings.Contains(val, uuid) { 485 | migmode = true 486 | continue 487 | } 488 | if !strings.Contains(val, "MIG") && !strings.Contains(val, uuid) { 489 | migmode = false 490 | continue 491 | } 492 | if !migmode { 493 | continue 494 | } 495 | klog.Infoln("inspecting", val) 496 | num := strings.Split(val, "Device")[1] 497 | num = strings.Split(num, ":")[0] 498 | num = strings.TrimSpace(num) 499 | index, err := strconv.Atoi(num) 500 | if err != nil { 501 | klog.Fatal("atoi failed num=", num) 502 | } 503 | if index == idx { 504 | outputStr := strings.Split(val, ":")[2] 505 | outputStr = strings.TrimSpace(outputStr) 506 | outputStr = strings.TrimRight(outputStr, ")") 507 | return outputStr 508 | } 509 | } 510 | return "" 511 | } 512 | 513 | // Enhanced ExtractMigTemplatesFromUUID with error handling. 514 | func ExtractMigTemplatesFromUUID(uuid string) (string, int, error) { 515 | parts := strings.Split(uuid, "[") 516 | if len(parts) < 2 { 517 | return "", -1, fmt.Errorf("invalid UUID format: missing '[' delimiter") 518 | } 519 | 520 | tmp := parts[1] 521 | parts = strings.Split(tmp, "]") 522 | if len(parts) < 2 { 523 | return "", -1, fmt.Errorf("invalid UUID format: missing ']' delimiter") 524 | } 525 | 526 | tmp = parts[0] 527 | parts = strings.Split(tmp, "-") 528 | if len(parts) < 2 { 529 | return "", -1, fmt.Errorf("invalid UUID format: missing '-' delimiter") 530 | } 531 | 532 | templateGroupName := strings.TrimSpace(parts[0]) 533 | if len(templateGroupName) == 0 { 534 | return "", -1, fmt.Errorf("invalid UUID format: missing template group name") 535 | } 536 | 537 | pos, err := strconv.Atoi(parts[1]) 538 | if err != nil { 539 | return "", -1, fmt.Errorf("invalid position: %v", err) 540 | } 541 | 542 | return templateGroupName, pos, nil 543 | } 544 | 545 | func LoadNvidiaConfig() *config.NvidiaConfig { 546 | configs, err := LoadConfigFromCM("volcano-vgpu-device-config") 547 | if err != nil { 548 | klog.InfoS("configMap not found", err.Error()) 549 | } 550 | nvidiaConfig := config.NvidiaConfig{} 551 | if configs != nil { 552 | nvidiaConfig = configs.NvidiaConfig 553 | } 554 | nvidiaConfig.DeviceSplitCount = config.DeviceSplitCount 555 | nvidiaConfig.DeviceCoreScaling = config.DeviceCoresScaling 556 | nvidiaConfig.GPUMemoryFactor = config.GPUMemoryFactor 557 | if err := readFromConfigFile(&nvidiaConfig); err != nil { 558 | klog.InfoS("readFrom device cm error", err.Error()) 559 | } 560 | klog.Infoln("Loaded config=", nvidiaConfig) 561 | return &nvidiaConfig 562 | } 563 | 564 | func readFromConfigFile(sConfig *config.NvidiaConfig) error { 565 | config.Mode = "hami-core" 566 | jsonbyte, err := os.ReadFile("/config/config.json") 567 | if err != nil { 568 | return err 569 | } 570 | var deviceConfigs config.DevicePluginConfigs 571 | err = json.Unmarshal(jsonbyte, &deviceConfigs) 572 | if err != nil { 573 | return err 574 | } 575 | klog.Infof("Device Plugin Configs: %v", fmt.Sprintf("%v", deviceConfigs)) 576 | for _, val := range deviceConfigs.Nodeconfig { 577 | if os.Getenv("NODE_NAME") == val.Name { 578 | klog.Infof("Reading config from file %s", val.Name) 579 | if val.Devicememoryscaling > 0 { 580 | sConfig.DeviceMemoryScaling = val.Devicememoryscaling 581 | } 582 | if val.Devicecorescaling > 0 { 583 | sConfig.DeviceCoreScaling = val.Devicecorescaling 584 | } 585 | if val.Devicesplitcount > 0 { 586 | sConfig.DeviceSplitCount = val.Devicesplitcount 587 | } 588 | if val.FilterDevice != nil && (len(val.FilterDevice.UUID) > 0 || len(val.FilterDevice.Index) > 0) { 589 | config.DevicePluginFilterDevice = val.FilterDevice 590 | } 591 | if len(val.OperatingMode) > 0 { 592 | config.Mode = val.OperatingMode 593 | } 594 | klog.Infof("FilterDevice: %v", val.FilterDevice) 595 | } 596 | } 597 | return nil 598 | } 599 | -------------------------------------------------------------------------------- /volcano-vgpu-device-plugin.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | --- 15 | apiVersion: v1 16 | kind: ConfigMap 17 | metadata: 18 | name: volcano-vgpu-device-config 19 | namespace: kube-system 20 | labels: 21 | app.kubernetes.io/component: volcano-vgpu-device-plugin 22 | data: 23 | device-config.yaml: |- 24 | nvidia: 25 | resourceCountName: volcano.sh/vgpu-number 26 | resourceMemoryName: volcano.sh/vgpu-memory 27 | resourceMemoryPercentageName: volcano.sh/vgpu-memory-percentage 28 | resourceCoreName: volcano.sh/vgpu-cores 29 | overwriteEnv: false 30 | defaultMemory: 0 31 | defaultCores: 0 32 | defaultGPUNum: 1 33 | deviceSplitCount: 10 34 | deviceMemoryScaling: 1 35 | deviceCoreScaling: 1 36 | gpuMemoryFactor: 1 37 | knownMigGeometries: 38 | - models: [ "A30" ] 39 | allowedGeometries: 40 | - group: group1 41 | geometries: 42 | - name: 1g.6gb 43 | memory: 6144 44 | count: 4 45 | - group: group2 46 | geometries: 47 | - name: 2g.12gb 48 | memory: 12288 49 | count: 2 50 | - group: group3 51 | geometries: 52 | - name: 4g.24gb 53 | memory: 24576 54 | count: 1 55 | - models: [ "A100-SXM4-40GB", "A100-40GB-PCIe", "A100-PCIE-40GB", "A100-SXM4-40GB" ] 56 | allowedGeometries: 57 | - group: "group1" 58 | geometries: 59 | - name: 1g.5gb 60 | memory: 5120 61 | count: 7 62 | - group: "group2" 63 | geometries: 64 | - name: 2g.10gb 65 | memory: 10240 66 | count: 3 67 | - name: 1g.5gb 68 | memory: 5120 69 | count: 1 70 | - group: "group3" 71 | geometries: 72 | - name: 3g.20gb 73 | memory: 20480 74 | count: 2 75 | - group: "group4" 76 | geometries: 77 | - name: 7g.40gb 78 | memory: 40960 79 | count: 1 80 | - models: [ "A100-SXM4-80GB", "A100-80GB-PCIe", "A100-PCIE-80GB"] 81 | allowedGeometries: 82 | - group: "group1" 83 | geometries: 84 | - name: 1g.10gb 85 | memory: 10240 86 | count: 7 87 | - group: "group2" 88 | geometries: 89 | - name: 2g.20gb 90 | memory: 20480 91 | count: 3 92 | - name: 1g.10gb 93 | memory: 10240 94 | count: 1 95 | - group: "group3" 96 | geometries: 97 | - name: 3g.40gb 98 | memory: 40960 99 | count: 2 100 | - group: "group4" 101 | geometries: 102 | - name: 7g.79gb 103 | memory: 80896 104 | count: 1 105 | --- 106 | apiVersion: v1 107 | kind: ConfigMap 108 | metadata: 109 | name: volcano-vgpu-node-config 110 | namespace: kube-system 111 | labels: 112 | app.kubernetes.io/component: volcano-vgpu-node-plugin 113 | data: 114 | config.json: | 115 | { 116 | "nodeconfig": [ 117 | { 118 | "name": "aio-node67", 119 | "operatingmode": "hami-core", 120 | "devicememoryscaling": 1.8, 121 | "devicesplitcount": 10, 122 | "migstrategy":"none", 123 | "filterdevices": { 124 | "uuid": [], 125 | "index": [] 126 | } 127 | } 128 | ] 129 | } 130 | --- 131 | apiVersion: v1 132 | kind: ServiceAccount 133 | metadata: 134 | name: volcano-device-plugin 135 | namespace: kube-system 136 | --- 137 | kind: ClusterRole 138 | apiVersion: rbac.authorization.k8s.io/v1 139 | metadata: 140 | name: volcano-device-plugin 141 | rules: 142 | - apiGroups: [""] 143 | resources: ["nodes"] 144 | verbs: ["get", "list", "watch", "update", "patch"] 145 | - apiGroups: [""] 146 | resources: ["nodes/status"] 147 | verbs: ["patch"] 148 | - apiGroups: [""] 149 | resources: ["pods"] 150 | verbs: ["get", "list", "update", "patch", "watch"] 151 | - apiGroups: [""] 152 | resources: ["configmaps"] 153 | verbs: ["get", "list", "watch", "create", "update"] 154 | --- 155 | kind: ClusterRoleBinding 156 | apiVersion: rbac.authorization.k8s.io/v1 157 | metadata: 158 | name: volcano-device-plugin 159 | subjects: 160 | - kind: ServiceAccount 161 | name: volcano-device-plugin 162 | namespace: kube-system 163 | roleRef: 164 | kind: ClusterRole 165 | name: volcano-device-plugin 166 | apiGroup: rbac.authorization.k8s.io 167 | --- 168 | apiVersion: apps/v1 169 | kind: DaemonSet 170 | metadata: 171 | name: volcano-device-plugin 172 | namespace: kube-system 173 | spec: 174 | selector: 175 | matchLabels: 176 | name: volcano-device-plugin 177 | updateStrategy: 178 | type: RollingUpdate 179 | template: 180 | metadata: 181 | # This annotation is deprecated. Kept here for backward compatibility 182 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ 183 | annotations: 184 | scheduler.alpha.kubernetes.io/critical-pod: "" 185 | labels: 186 | name: volcano-device-plugin 187 | spec: 188 | tolerations: 189 | # This toleration is deprecated. Kept here for backward compatibility 190 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ 191 | - key: CriticalAddonsOnly 192 | operator: Exists 193 | - key: volcano.sh/gpu-memory 194 | operator: Exists 195 | effect: NoSchedule 196 | # Mark this pod as a critical add-on; when enabled, the critical add-on 197 | # scheduler reserves resources for critical add-on pods so that they can 198 | # be rescheduled after a failure. 199 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ 200 | priorityClassName: "system-node-critical" 201 | serviceAccount: volcano-device-plugin 202 | containers: 203 | - image: docker.io/projecthami/volcano-vgpu-device-plugin:v1.9.4 204 | args: ["--device-split-count=10"] 205 | lifecycle: 206 | postStart: 207 | exec: 208 | command: ["/bin/sh", "-c", "cp -f /k8s-vgpu/lib/nvidia/* /usr/local/vgpu/"] 209 | name: volcano-device-plugin 210 | env: 211 | - name: NODE_NAME 212 | valueFrom: 213 | fieldRef: 214 | fieldPath: spec.nodeName 215 | - name: HOOK_PATH 216 | value: "/usr/local/vgpu" 217 | - name: NVIDIA_VISIBLE_DEVICES 218 | value: "all" 219 | - name: NVIDIA_MIG_MONITOR_DEVICES 220 | value: "all" 221 | - name: NVIDIA_DRIVER_CAPABILITIES 222 | value: "utility" 223 | securityContext: 224 | allowPrivilegeEscalation: true 225 | previleged: true 226 | capabilities: 227 | drop: ["ALL"] 228 | add: ["SYS_ADMIN"] 229 | volumeMounts: 230 | - name: deviceconfig 231 | mountPath: /config 232 | - name: device-plugin 233 | mountPath: /var/lib/kubelet/device-plugins 234 | - name: lib 235 | mountPath: /usr/local/vgpu 236 | - name: hosttmp 237 | mountPath: /tmp 238 | - image: docker.io/projecthami/volcano-vgpu-device-plugin:v1.9.4 239 | name: monitor 240 | command: 241 | - /bin/bash 242 | - -c 243 | - volcano-vgpu-monitor 244 | env: 245 | - name: NVIDIA_VISIBLE_DEVICES 246 | value: "all" 247 | - name: NVIDIA_MIG_MONITOR_DEVICES 248 | value: "all" 249 | - name: HOOK_PATH 250 | value: "/tmp/vgpu" 251 | - name: NODE_NAME 252 | valueFrom: 253 | fieldRef: 254 | fieldPath: spec.nodeName 255 | securityContext: 256 | privileged: true 257 | allowPrivilegeEscalation: true 258 | capabilities: 259 | drop: ["ALL"] 260 | add: ["SYS_ADMIN"] 261 | volumeMounts: 262 | - name: dockers 263 | mountPath: /run/docker 264 | - name: containerds 265 | mountPath: /run/containerd 266 | - name: sysinfo 267 | mountPath: /sysinfo 268 | - name: hostvar 269 | mountPath: /hostvar 270 | - name: hosttmp 271 | mountPath: /tmp 272 | volumes: 273 | - name: deviceconfig 274 | configMap: 275 | name: volcano-vgpu-node-config 276 | - hostPath: 277 | path: /var/lib/kubelet/device-plugins 278 | type: Directory 279 | name: device-plugin 280 | - hostPath: 281 | path: /usr/local/vgpu 282 | type: DirectoryOrCreate 283 | name: lib 284 | - name: hosttmp 285 | hostPath: 286 | path: /tmp 287 | type: DirectoryOrCreate 288 | - name: dockers 289 | hostPath: 290 | path: /run/docker 291 | type: DirectoryOrCreate 292 | - name: containerds 293 | hostPath: 294 | path: /run/containerd 295 | type: DirectoryOrCreate 296 | - name: usrbin 297 | hostPath: 298 | path: /usr/bin 299 | type: Directory 300 | - name: sysinfo 301 | hostPath: 302 | path: /sys 303 | type: Directory 304 | - name: hostvar 305 | hostPath: 306 | path: /var 307 | type: Directory 308 | --------------------------------------------------------------------------------