├── .dockerignore
├── .github
    └── workflows
    │   ├── dev-image-build.yaml
    │   └── release-image-build.yml
├── .gitignore
├── .gitmodules
├── LICENSE
├── Makefile
├── Makefile.def
├── OWNERS
├── README.md
├── cmd
    ├── vgpu-monitor
    │   ├── feedback.go
    │   ├── main.go
    │   ├── metrics.go
    │   └── validation.go
    └── vgpu
    │   ├── main.go
    │   └── watchers.go
├── doc
    ├── config.md
    ├── design.md
    ├── example.png
    ├── hard_limit.jpg
    ├── vgpu-on-volcano.pdf
    └── vgpu_device_plugin_metrics.png
├── docker
    └── Dockerfile.ubuntu20.04
├── examples
    ├── gpu-share.yml
    ├── vgpu-case01.yml
    ├── vgpu-case02.yml
    ├── vgpu-case03.yml
    └── vgpu-deployment.yaml
├── go.mod
├── go.sum
├── lib
    └── nvidia
    │   └── ld.so.preload
├── pkg
    ├── apis
    │   ├── config.go
    │   ├── flags.go
    │   └── flags_test.go
    ├── filewatcher
    │   └── filewatcher.go
    ├── gpu
    │   └── doc.go
    ├── lock
    │   └── nodelock.go
    ├── monitor
    │   └── nvidia
    │   │   ├── cudevshr.go
    │   │   ├── v0
    │   │       └── spec.go
    │   │   └── v1
    │   │       └── spec.go
    └── plugin
    │   ├── interface.go
    │   └── vgpu
    │       ├── cache.go
    │       ├── config
    │           ├── config.go
    │           └── version.go
    │       ├── helper.go
    │       ├── mig-strategy.go
    │       ├── mig.go
    │       ├── nvidia.go
    │       ├── plugin.go
    │       ├── register.go
    │       └── util
    │           ├── types.go
    │           └── util.go
└── volcano-vgpu-device-plugin.yml


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Ignore version control directories
 2 | .git/
 3 | .github/
 4 | 
 5 | # Ignore build and docs directories
 6 | _output/
 7 | doc/
 8 | examples/
 9 | README.md
10 | OWNERS
11 | LICENSE
12 | 
13 | # Ignore IDE and OS files
14 | *.DS_Store
15 | .idea/
16 | .vscode/
17 | *.iml
18 | 
19 | # Ignore Docker-specific files
20 | docker/
21 | 


--------------------------------------------------------------------------------
/.github/workflows/dev-image-build.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Run this workflow on pull requests or merge to main/dev branches
16 | name: Build Dev Image
17 | 
18 | on:
19 |   push:
20 |     branches:
21 |     - main
22 |     - dev-vgpu-1219
23 |   pull_request_target:
24 |     types:
25 |     - opened
26 |     - synchronize
27 |     - reopened
28 | 
29 | jobs:
30 |   build:
31 |     runs-on: ubuntu-latest
32 |     steps:
33 |     - uses: actions/checkout@v2
34 |     - uses: actions/setup-go@v2
35 |       with:
36 |         go-version: "^1.19.x"
37 |     - name: Checkout submodule
38 |       uses: Mushus/checkout-submodule@v1.0.1
39 |       with:
40 |         submodulePath: libvgpu
41 |     - run: go version
42 |     - name: Get branch name
43 |       uses: nelonoel/branch-name@v1.0.1
44 |     - name: Docker Login
45 |       uses: docker/login-action@v2.1.0
46 |       with:
47 |         username: ${{ secrets.DOCKERHUB_TOKEN }}
48 |         password: ${{ secrets.DOCKERHUB_PASSWD }}
49 |     - name: Set up Docker Buildx
50 |       id: buildx
51 |       uses: docker/setup-buildx-action@v1
52 |     - name: Generating image tag
53 |       id: runtime-tag
54 |       run: |
55 |         echo tag="$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
56 |     - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}-${{ steps.runtime-tag.outputs.tag }}" make push-short
57 |     - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}-${{ steps.runtime-tag.outputs.tag }}" make push-latest
58 | 


--------------------------------------------------------------------------------
/.github/workflows/release-image-build.yml:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Run this workflow on new tags
16 | name: Build Release Image
17 | 
18 | on:
19 |   push:
20 |     tags:
21 |     - v[0-9]+.[0-9]+.[0-9]+
22 | 
23 | jobs:
24 |   build:
25 |     runs-on: ubuntu-latest
26 |     steps:
27 |     - uses: actions/checkout@v2
28 |     - uses: actions/setup-go@v2
29 |       with:
30 |         go-version: "^1.19.x"
31 |     - name: Checkout submodule
32 |       uses: Mushus/checkout-submodule@v1.0.1
33 |       with:
34 |         submodulePath: libvgpu
35 |     - run: go version
36 |     - name: Get branch name
37 |       uses: nelonoel/branch-name@v1.0.1
38 |     - name: Docker Login
39 |       uses: docker/login-action@v2.1.0
40 |       with:
41 |         username: ${{ secrets.DOCKERHUB_TOKEN }}
42 |         password: ${{ secrets.DOCKERHUB_PASSWD }}
43 |     - name: Set up Docker Buildx
44 |       id: buildx
45 |       uses: docker/setup-buildx-action@v1
46 |     - name: Generating image tag
47 |       id: runtime-tag
48 |       run: |
49 |         echo tag="$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
50 |     - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}" make push-short
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # OSX leaves these everywhere on SMB shares
  2 | ._*
  3 | 
  4 | # OSX trash
  5 | .DS_Store
  6 | 
  7 | # Eclipse files
  8 | .classpath
  9 | .project
 10 | .settings/**
 11 | 
 12 | # Files generated by JetBrains IDEs, e.g. IntelliJ IDEA
 13 | .idea/
 14 | *.iml
 15 | 
 16 | # Vscode files
 17 | .vscode
 18 | 
 19 | # This is where the result of the go build goes
 20 | /output*/
 21 | /_output*/
 22 | /_output
 23 | 
 24 | # Emacs save files
 25 | *~
 26 | \#*\#
 27 | .\#*
 28 | 
 29 | # Vim-related files
 30 | [._]*.s[a-w][a-z]
 31 | [._]s[a-w][a-z]
 32 | *.un~
 33 | Session.vim
 34 | .netrwhist
 35 | 
 36 | # cscope-related files
 37 | cscope.*
 38 | 
 39 | # Go test binaries
 40 | *.test
 41 | /hack/.test-cmd-auth
 42 | 
 43 | # JUnit test output from ginkgo e2e tests
 44 | /junit*.xml
 45 | 
 46 | # Mercurial files
 47 | **/.hg
 48 | **/.hg*
 49 | 
 50 | # Vagrant
 51 | .vagrant
 52 | network_closure.sh
 53 | 
 54 | # Local cluster env variables
 55 | /cluster/env.sh
 56 | 
 57 | # Compiled binaries in third_party
 58 | /third_party/pkg
 59 | 
 60 | # Also ignore etcd installed by hack/install-etcd.sh
 61 | /third_party/etcd*
 62 | /default.etcd
 63 | 
 64 | # User cluster configs
 65 | .kubeconfig
 66 | 
 67 | .tags*
 68 | 
 69 | # Version file for dockerized build
 70 | .dockerized-kube-version-defs
 71 | 
 72 | # Web UI
 73 | /www/master/node_modules/
 74 | /www/master/npm-debug.log
 75 | /www/master/shared/config/development.json
 76 | 
 77 | # Karma output
 78 | /www/test_out
 79 | 
 80 | # precommit temporary directories created by ./hack/verify-generated-docs.sh and ./hack/lib/util.sh
 81 | /_tmp/
 82 | /doc_tmp/
 83 | 
 84 | # Test artifacts produced by Jenkins jobs
 85 | /_artifacts/
 86 | 
 87 | # Go dependencies installed on Jenkins
 88 | /_gopath/
 89 | 
 90 | # Config directories created by gcloud and gsutil on Jenkins
 91 | /.config/gcloud*/
 92 | /.gsutil/
 93 | 
 94 | # CoreOS stuff
 95 | /cluster/libvirt-coreos/coreos_*.img
 96 | 
 97 | # Juju Stuff
 98 | /cluster/juju/charms/*
 99 | /cluster/juju/bundles/local.yaml
100 | 
101 | # Downloaded Kubernetes binary release
102 | /kubernetes/
103 | 
104 | # direnv .envrc files
105 | .envrc
106 | 
107 | # Downloaded kubernetes binary release tar ball
108 | kubernetes.tar.gz
109 | 
110 | # generated files in any directory
111 | # TODO(thockin): uncomment this when we stop committing the generated files.
112 | #zz_generated.*
113 | #zz_generated.openapi.go
114 | 
115 | # make-related metadata
116 | /.make/
117 | # Just in time generated data in the source, should never be commited
118 | /test/e2e/generated/bindata.go
119 | 
120 | # This file used by some vendor repos (e.g. github.com/go-openapi/...) to store secret variables and should not be ignored
121 | !\.drone\.sec
122 | 
123 | /bazel-*
124 | *.pyc
125 | 
126 | # e2e log files
127 | *.log
128 | 
129 | # test coverage file
130 | coverage.txt
131 | 
132 | updateso.sh
133 | volcano-vgpu-device-plugin
134 | 
135 | lib/nvidia/libvgpu/build
136 | 
137 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "libvgpu"]
2 | 	path = libvgpu
3 | 	url = https://github.com/Project-HAMi/HAMi-core.git
4 | 	branch = main
5 | 
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | .DEFAULT_GOAL := all
17 | 
18 | include Makefile.def
19 | 
20 | ##### Global variables #####
21 | REGISTRY ?= projecthami
22 | VERSION  ?= 1.0.0
23 | 
24 | ##### Using `BUILD_PLATFORMS=linux/arm64 make all` to build arm64 arch image locally
25 | ##### Using `BUILD_PLATFORMS=linux/amd64,linux/arm64 make push-latest` to build and publish multi-arch image
26 | BUILD_PLATFORMS ?= linux/amd64
27 | 
28 | ##### Public rules #####
29 | 
30 | all: ubuntu20.04
31 | 
32 | push:
33 | 	docker buildx build --platform $(BUILD_PLATFORMS) --push \
34 | 		--tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)-ubuntu20.04 \
35 | 		--file docker/Dockerfile.ubuntu20.04 .
36 | 
37 | push-short:
38 | 	docker buildx build --platform $(BUILD_PLATFORMS) --push \
39 | 		--tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)\
40 | 		--file docker/Dockerfile.ubuntu20.04 .
41 | 
42 | push-latest:
43 | 	docker buildx build --platform $(BUILD_PLATFORMS) --push \
44 | 		--tag $(REGISTRY)/volcano-vgpu-device-plugin:latest\
45 | 		--file docker/Dockerfile.ubuntu20.04 .
46 | 
47 | ubuntu20.04:
48 | 	docker buildx build --platform $(BUILD_PLATFORMS) --load \
49 | 		--tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)-ubuntu20.04 \
50 | 		--file docker/Dockerfile.ubuntu20.04 .
51 | 
52 | BIN_DIR=_output/bin
53 | RELEASE_DIR=_output/release
54 | REL_OSARCH=linux/amd64
55 | 
56 | init:
57 | 	mkdir -p ${BIN_DIR}
58 | 	mkdir -p ${RELEASE_DIR}
59 | 
60 | gen_bin: init
61 | 	go get github.com/mitchellh/gox
62 | 	CGO_ENABLED=1 gox -osarch=${REL_OSARCH} -ldflags ${LD_FLAGS} -output ${BIN_DIR}/${REL_OSARCH}/volcano-vgpu-device-plugin ./cmd/vgpu
63 | 


--------------------------------------------------------------------------------
/Makefile.def:
--------------------------------------------------------------------------------
 1 | 
 2 | # If tag not explicitly set in users default to the git sha.
 3 | TAG ?= $(shell git rev-parse --verify HEAD)
 4 | GitSHA=`git rev-parse HEAD`
 5 | Date=`date "+%Y-%m-%d %H:%M:%S"`
 6 | RELEASE_VER=latest
 7 | LD_FLAGS=" \
 8 |     -X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \
 9 |     -X '${REPO_PATH}/pkg/version.Built=${Date}'   \
10 |     -X '${REPO_PATH}/pkg/version.Version=${RELEASE_VER}'"
11 | 
12 | 


--------------------------------------------------------------------------------
/OWNERS:
--------------------------------------------------------------------------------
 1 | reviewers:
 2 |   - k82cn
 3 |   - kevin-wangzefeng
 4 |   - william-wang
 5 |   - Thor-wl
 6 |   - archlitchi
 7 |   - hzxuzhonghu
 8 |   - wangyang0616
 9 | approvers:
10 |   - k82cn
11 |   - kevin-wangzefeng
12 |   - william-wang
13 |   - hzxuzhonghu
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Volcano vgpu device plugin for Kubernetes
  2 | 
  3 | [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin?ref=badge_shield)
  4 | [![docker pulls](https://img.shields.io/docker/pulls/projecthami/volcano-vgpu-device-plugin.svg)](https://hub.docker.com/r/projecthami/volcano-vgpu-device-plugin)
  5 | 
  6 | **Note**:
  7 | 
  8 | Volcano vgpu device-plugin can provide device-sharing mechanism for NVIDIA devices managed by volcano.
  9 | 
 10 | This is based on [Nvidia Device Plugin](https://github.com/NVIDIA/k8s-device-plugin), it uses [HAMi-core](https://github.com/Project-HAMi/HAMi-core) to support hard isolation of GPU card.
 11 | 
 12 | And collaborate with volcano, it is possible to enable GPU sharing.
 13 | 
 14 | ## Table of Contents
 15 | 
 16 | - [About](#about)
 17 | - [Prerequisites](#prerequisites)
 18 | - [Quick Start](#quick-start)
 19 |   - [Preparing your GPU Nodes](#preparing-your-gpu-nodes)
 20 |   - [Enabling vGPU Support in Kubernetes](#enabling-gpu-support-in-kubernetes)
 21 |   - [Running vGPU Jobs](#running-vgpu-jobs)
 22 | - [Issues and Contributing](#issues-and-contributing)
 23 | 
 24 | ## About
 25 | 
 26 | The Volcano device plugin for Kubernetes is a Daemonset that allows you to automatically:
 27 | - Expose the number of GPUs on each node of your cluster
 28 | - Keep track of the health of your GPUs
 29 | - Run GPU enabled containers in your Kubernetes cluster.
 30 | - Provide device-sharing mechanism for GPU tasks as the figure below.
 31 | - Enforce hard resource limit in container.
 32 | - Support dynamic-mig, for more details, see [config](doc/config.md)  
 33 | 
 34 | <img src="./doc/example.png" width = "600" /> 
 35 | 
 36 | ## Prerequisites
 37 | 
 38 | The list of prerequisites for running the Volcano device plugin is described below:
 39 | * NVIDIA drivers > 440
 40 | * nvidia-docker version > 2.0 (see how to [install](https://github.com/NVIDIA/nvidia-docker) and it's [prerequisites](https://github.com/nvidia/nvidia-docker/wiki/Installation-\(version-2.0\)#prerequisites))
 41 | * docker configured with nvidia as the [default runtime](https://github.com/NVIDIA/nvidia-docker/wiki/Advanced-topics#default-runtime).
 42 | * Kubernetes version >= 1.16
 43 | * Volcano verison >= 1.9
 44 | 
 45 | ## Quick Start
 46 | 
 47 | ### Preparing your GPU Nodes
 48 | 
 49 | The following steps need to be executed on all your GPU nodes.
 50 | This README assumes that the NVIDIA drivers and nvidia-docker have been installed.
 51 | 
 52 | Note that you need to install the nvidia-docker2 package and not the nvidia-container-toolkit.
 53 | This is because the new `--gpus` options hasn't reached kubernetes yet. Example:
 54 | ```bash
 55 | # Add the package repositories
 56 | $ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
 57 | $ curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
 58 | $ curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
 59 | 
 60 | $ sudo apt-get update && sudo apt-get install -y nvidia-docker2
 61 | $ sudo systemctl restart docker
 62 | ```
 63 | 
 64 | You will need to enable the nvidia runtime as your default runtime on your node.
 65 | We will be editing the docker daemon config file which is usually present at `/etc/docker/daemon.json`:
 66 | ```json
 67 | {
 68 |     "default-runtime": "nvidia",
 69 |     "runtimes": {
 70 |         "nvidia": {
 71 |             "path": "/usr/bin/nvidia-container-runtime",
 72 |             "runtimeArgs": []
 73 |         }
 74 |     }
 75 | }
 76 | ```
 77 | > *if `runtimes` is not already present, head to the install page of [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)*
 78 | 
 79 | 
 80 | ### Configure scheduler
 81 | 
 82 | update the scheduler configuration:
 83 | 
 84 | ```shell script
 85 | kubectl edit cm -n volcano-system volcano-scheduler-configmap
 86 | ```
 87 | 
 88 | For volcano v1.9+,, use the following configMap 
 89 | ```yaml
 90 | kind: ConfigMap
 91 | apiVersion: v1
 92 | metadata:
 93 |   name: volcano-scheduler-configmap
 94 |   namespace: volcano-system
 95 | data:
 96 |   volcano-scheduler.conf: |
 97 |     actions: "enqueue, allocate, backfill"
 98 |     tiers:
 99 |     - plugins:
100 |       - name: priority
101 |       - name: gang
102 |       - name: conformance
103 |     - plugins:
104 |       - name: drf
105 |       - name: deviceshare
106 |         arguments:
107 |           deviceshare.VGPUEnable: true # enable vgpu
108 |       - name: predicates
109 |       - name: proportion
110 |       - name: nodeorder
111 |       - name: binpack
112 | ```
113 | 
114 | Customize your installation by adjusting the [configs](doc/config.md)
115 | 
116 | 
117 | ### Enabling GPU Support in Kubernetes
118 | 
119 | Once you have enabled this option on *all* the GPU nodes you wish to use,
120 | you can then enable GPU support in your cluster by deploying the following Daemonset:
121 | 
122 | ```
123 | $ kubectl create -f volcano-vgpu-device-plugin.yml
124 | ```
125 | 
126 | ### Verify environment is ready
127 | 
128 | Check the node status, it is ok if `volcano.sh/vgpu-number` is included in the allocatable resources.
129 | 
130 | ```shell script
131 | $ kubectl get node {node name} -oyaml
132 | ...
133 | status:
134 |   addresses:
135 |   - address: 172.17.0.3
136 |     type: InternalIP
137 |   - address: volcano-control-plane
138 |     type: Hostname
139 |   allocatable:
140 |     cpu: "4"
141 |     ephemeral-storage: 123722704Ki
142 |     hugepages-1Gi: "0"
143 |     hugepages-2Mi: "0"
144 |     memory: 8174332Ki
145 |     pods: "110"
146 |     volcano.sh/vgpu-memory: "89424"
147 |     volcano.sh/vgpu-number: "10"    # vGPU resource
148 |   capacity:
149 |     cpu: "4"
150 |     ephemeral-storage: 123722704Ki
151 |     hugepages-1Gi: "0"
152 |     hugepages-2Mi: "0"
153 |     memory: 8174332Ki
154 |     pods: "110"
155 |     volcano.sh/vgpu-memory: "89424"
156 |     volcano.sh/vgpu-number: "10"   # vGPU resource
157 | ```
158 | 
159 | ### Running VGPU Jobs
160 | 
161 | VGPU can be requested by both set "volcano.sh/vgpu-number" , "volcano.sh/vgpu-cores" and "volcano.sh/vgpu-memory" in resource.limit
162 | 
163 | ```shell script
164 | $ cat <<EOF | kubectl apply -f -
165 | apiVersion: v1
166 | kind: Pod
167 | metadata:
168 |   name: gpu-pod1
169 | spec:
170 |   schedulerName: volcano
171 |   containers:
172 |     - name: cuda-container
173 |       image: nvidia/cuda:9.0-devel
174 |       command: ["sleep"]
175 |       args: ["100000"]
176 |       resources:
177 |         limits:
178 |           volcano.sh/vgpu-number: 2 # requesting 2 gpu cards
179 |           volcano.sh/vgpu-memory: 3000 # (optinal)each vGPU uses 3G device memory
180 |           volcano.sh/vgpu-cores: 50 # (optional)each vGPU uses 50% core  
181 | EOF
182 | ```
183 | 
184 | You can validate device memory using nvidia-smi inside container:
185 | 
186 | ![img](./doc/hard_limit.jpg)
187 | 
188 | > **WARNING:** *if you don't request GPUs when using the device plugin with NVIDIA images all
189 | > the GPUs on the machine will be exposed inside your container.
190 | > The number of vgpu used by a container can not exceed the number of gpus on that node.*
191 | 
192 | ### Monitor
193 | 
194 | volcano-scheduler-metrics records every GPU usage and limitation, visit the following address to get these metrics.
195 | 
196 | ```
197 | curl {volcano scheduler cluster ip}:8080/metrics
198 | ```
199 | 
200 | You can also collect the **GPU utilization**, **GPU memory usage**, **pods' GPU memory limitations** and **pods' GPU memory usage** metrics on nodes by visiting the following addresses:
201 | 
202 | ```
203 | curl {volcano device plugin pod ip}:9394/metrics
204 | ```
205 | ![img](./doc/vgpu_device_plugin_metrics.png)
206 | 
207 | # Issues and Contributing
208 | [Checkout the Contributing document!](CONTRIBUTING.md)
209 | 
210 | * You can report a bug by [filing a new issue](https://github.com/Project-HAMi/volcano-vgpu-device-plugin)
211 | * You can contribute by opening a [pull request](https://help.github.com/articles/using-pull-requests/)
212 | 
213 | 
214 | ## Upgrading Kubernetes with the device plugin
215 | 
216 | Upgrading Kubernetes when you have a device plugin deployed doesn't require you to do any,
217 | particular changes to your workflow.
218 | The API is versioned and is pretty stable (though it is not guaranteed to be non breaking),
219 | upgrading kubernetes won't require you to deploy a different version of the device plugin and you will
220 | see GPUs re-registering themselves after you node comes back online.
221 | 
222 | 
223 | Upgrading the device plugin is a more complex task. It is recommended to drain GPU tasks as
224 | we cannot guarantee that GPU tasks will survive a rolling upgrade.
225 | However we make best efforts to preserve GPU tasks during an upgrade.
226 | 
227 | 
228 | ## License
229 | [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin?ref=badge_large)


--------------------------------------------------------------------------------
/cmd/vgpu-monitor/feedback.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2024 The HAMi Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package main
 18 | 
 19 | import (
 20 | 	"time"
 21 | 
 22 | 	"volcano.sh/k8s-device-plugin/pkg/monitor/nvidia"
 23 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 24 | 
 25 | 	"k8s.io/klog/v2"
 26 | )
 27 | 
 28 | type UtilizationPerDevice []int
 29 | 
 30 | func CheckBlocking(utSwitchOn map[string]UtilizationPerDevice, p int, c *nvidia.ContainerUsage) bool {
 31 | 	for i := 0; i < c.Info.DeviceMax(); i++ {
 32 | 		uuid := c.Info.DeviceUUID(i)
 33 | 		_, ok := utSwitchOn[uuid]
 34 | 		if ok {
 35 | 			for i := 0; i < p; i++ {
 36 | 				if utSwitchOn[uuid][i] > 0 {
 37 | 					return true
 38 | 				}
 39 | 			}
 40 | 			return false
 41 | 		}
 42 | 	}
 43 | 	return false
 44 | }
 45 | 
 46 | // Check whether task with higher priority use GPU or there are other tasks with the same priority.
 47 | func CheckPriority(utSwitchOn map[string]UtilizationPerDevice, p int, c *nvidia.ContainerUsage) bool {
 48 | 	for i := 0; i < c.Info.DeviceMax(); i++ {
 49 | 		uuid := c.Info.DeviceUUID(i)
 50 | 		_, ok := utSwitchOn[uuid]
 51 | 		if ok {
 52 | 			for i := 0; i < p; i++ {
 53 | 				if utSwitchOn[uuid][i] > 0 {
 54 | 					return true
 55 | 				}
 56 | 			}
 57 | 			if utSwitchOn[uuid][p] > 1 {
 58 | 				return true
 59 | 			}
 60 | 		}
 61 | 	}
 62 | 	return false
 63 | }
 64 | 
 65 | func Observe(lister *nvidia.ContainerLister) {
 66 | 	utSwitchOn := map[string]UtilizationPerDevice{}
 67 | 	containers := lister.ListContainers()
 68 | 
 69 | 	for _, c := range containers {
 70 | 		recentKernel := c.Info.GetRecentKernel()
 71 | 		if recentKernel > 0 {
 72 | 			recentKernel--
 73 | 			if recentKernel > 0 {
 74 | 				for i := 0; i < c.Info.DeviceMax(); i++ {
 75 | 					// Null device condition
 76 | 					if !c.Info.IsValidUUID(i) {
 77 | 						continue
 78 | 					}
 79 | 					uuid := c.Info.DeviceUUID(i)
 80 | 					if len(utSwitchOn[uuid]) == 0 {
 81 | 						utSwitchOn[uuid] = []int{0, 0}
 82 | 					}
 83 | 					utSwitchOn[uuid][c.Info.GetPriority()]++
 84 | 				}
 85 | 			}
 86 | 			c.Info.SetRecentKernel(recentKernel)
 87 | 		}
 88 | 	}
 89 | 	for idx, c := range containers {
 90 | 		priority := c.Info.GetPriority()
 91 | 		recentKernel := c.Info.GetRecentKernel()
 92 | 		utilizationSwitch := c.Info.GetUtilizationSwitch()
 93 | 		if CheckBlocking(utSwitchOn, priority, c) {
 94 | 			if recentKernel >= 0 {
 95 | 				klog.Infof("utSwitchon=%v", utSwitchOn)
 96 | 				klog.Infof("Setting Blocking to on %v", idx)
 97 | 				c.Info.SetRecentKernel(-1)
 98 | 			}
 99 | 		} else {
100 | 			if recentKernel < 0 {
101 | 				klog.Infof("utSwitchon=%v", utSwitchOn)
102 | 				klog.Infof("Setting Blocking to off %v", idx)
103 | 				c.Info.SetRecentKernel(0)
104 | 			}
105 | 		}
106 | 		if CheckPriority(utSwitchOn, priority, c) {
107 | 			if utilizationSwitch != 1 {
108 | 				klog.Infof("utSwitchon=%v", utSwitchOn)
109 | 				klog.Infof("Setting UtilizationSwitch to on %v", idx)
110 | 				c.Info.SetUtilizationSwitch(1)
111 | 			}
112 | 		} else {
113 | 			if utilizationSwitch != 0 {
114 | 				klog.Infof("utSwitchon=%v", utSwitchOn)
115 | 				klog.Infof("Setting UtilizationSwitch to off %v", idx)
116 | 				c.Info.SetUtilizationSwitch(0)
117 | 			}
118 | 		}
119 | 	}
120 | }
121 | 
122 | func watchAndFeedback(lister *nvidia.ContainerLister) {
123 | 	config.Nvml().Init()
124 | 	for {
125 | 		time.Sleep(time.Second * 5)
126 | 		err := lister.Update()
127 | 		if err != nil {
128 | 			klog.Errorf("Failed to update container list: %v", err)
129 | 			continue
130 | 		}
131 | 		Observe(lister)
132 | 	}
133 | }
134 | 


--------------------------------------------------------------------------------
/cmd/vgpu-monitor/main.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2024 The HAMi Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package main
18 | 
19 | import (
20 | 	"volcano.sh/k8s-device-plugin/pkg/monitor/nvidia"
21 | 
22 | 	"k8s.io/klog/v2"
23 | )
24 | 
25 | func main() {
26 | 	if err := ValidateEnvVars(); err != nil {
27 | 		klog.Fatalf("Failed to validate environment variables: %v", err)
28 | 	}
29 | 	containerLister, err := nvidia.NewContainerLister()
30 | 	if err != nil {
31 | 		klog.Fatalf("Failed to create container lister: %v", err)
32 | 	}
33 | 	errchannel := make(chan error)
34 | 	go initMetrics(containerLister)
35 | 	go watchAndFeedback(containerLister)
36 | 	for {
37 | 		err := <-errchannel
38 | 		klog.Errorf("failed to serve: %v", err)
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/cmd/vgpu-monitor/metrics.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2024 The HAMi Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package main
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"log"
 22 | 	"net/http"
 23 | 	"strings"
 24 | 	"time"
 25 | 
 26 | 	"volcano.sh/k8s-device-plugin/pkg/monitor/nvidia"
 27 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 28 | 
 29 | 	"github.com/NVIDIA/go-nvml/pkg/nvml"
 30 | 	"github.com/prometheus/client_golang/prometheus"
 31 | 	"github.com/prometheus/client_golang/prometheus/promhttp"
 32 | 
 33 | 	"k8s.io/apimachinery/pkg/labels"
 34 | 	"k8s.io/client-go/informers"
 35 | 	listerscorev1 "k8s.io/client-go/listers/core/v1"
 36 | 	"k8s.io/klog/v2"
 37 | )
 38 | 
 39 | // ClusterManager is an example for a system that might have been built without
 40 | // Prometheus in mind. It models a central manager of jobs running in a
 41 | // cluster. Thus, we implement a custom Collector called
 42 | // ClusterManagerCollector, which collects information from a ClusterManager
 43 | // using its provided methods and turns them into Prometheus Metrics for
 44 | // collection.
 45 | //
 46 | // An additional challenge is that multiple instances of the ClusterManager are
 47 | // run within the same binary, each in charge of a different zone. We need to
 48 | // make use of wrapping Registerers to be able to register each
 49 | // ClusterManagerCollector instance with Prometheus.
 50 | type ClusterManager struct {
 51 | 	Zone string
 52 | 	// Contains many more fields not listed in this example.
 53 | 	PodLister       listerscorev1.PodLister
 54 | 	containerLister *nvidia.ContainerLister
 55 | }
 56 | 
 57 | // ReallyExpensiveAssessmentOfTheSystemState is a mock for the data gathering a
 58 | // real cluster manager would have to do. Since it may actually be really
 59 | // expensive, it must only be called once per collection. This implementation,
 60 | // obviously, only returns some made-up data.
 61 | func (c *ClusterManager) ReallyExpensiveAssessmentOfTheSystemState() (
 62 | 	oomCountByHost map[string]int, ramUsageByHost map[string]float64,
 63 | ) {
 64 | 	// Just example fake data.
 65 | 	oomCountByHost = map[string]int{
 66 | 		"foo.example.org": 42,
 67 | 		"bar.example.org": 2001,
 68 | 	}
 69 | 	ramUsageByHost = map[string]float64{
 70 | 		"foo.example.org": 6.023e23,
 71 | 		"bar.example.org": 3.14,
 72 | 	}
 73 | 	return
 74 | }
 75 | 
 76 | // ClusterManagerCollector implements the Collector interface.
 77 | type ClusterManagerCollector struct {
 78 | 	ClusterManager *ClusterManager
 79 | }
 80 | 
 81 | // Descriptors used by the ClusterManagerCollector below.
 82 | var (
 83 | 	hostGPUdesc = prometheus.NewDesc(
 84 | 		"HostGPUMemoryUsage",
 85 | 		"GPU device memory usage",
 86 | 		[]string{"deviceidx", "deviceuuid"}, nil,
 87 | 	)
 88 | 
 89 | 	hostGPUUtilizationdesc = prometheus.NewDesc(
 90 | 		"HostCoreUtilization",
 91 | 		"GPU core utilization",
 92 | 		[]string{"deviceidx", "deviceuuid"}, nil,
 93 | 	)
 94 | 
 95 | 	ctrvGPUdesc = prometheus.NewDesc(
 96 | 		"vGPU_device_memory_usage_in_bytes",
 97 | 		"vGPU device usage",
 98 | 		[]string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil,
 99 | 	)
100 | 
101 | 	ctrvGPUlimitdesc = prometheus.NewDesc(
102 | 		"vGPU_device_memory_limit_in_bytes",
103 | 		"vGPU device limit",
104 | 		[]string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil,
105 | 	)
106 | 	ctrDeviceMemorydesc = prometheus.NewDesc(
107 | 		"Device_memory_desc_of_container",
108 | 		"Container device meory description",
109 | 		[]string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid", "context", "module", "data", "offset"}, nil,
110 | 	)
111 | 	ctrDeviceUtilizationdesc = prometheus.NewDesc(
112 | 		"Device_utilization_desc_of_container",
113 | 		"Container device utilization description",
114 | 		[]string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil,
115 | 	)
116 | 	ctrDeviceLastKernelDesc = prometheus.NewDesc(
117 | 		"Device_last_kernel_of_container",
118 | 		"Container device last kernel description",
119 | 		[]string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil,
120 | 	)
121 | )
122 | 
123 | // Describe is implemented with DescribeByCollect. That's possible because the
124 | // Collect method will always return the same two metrics with the same two
125 | // descriptors.
126 | func (cc ClusterManagerCollector) Describe(ch chan<- *prometheus.Desc) {
127 | 	ch <- hostGPUdesc
128 | 	ch <- ctrvGPUdesc
129 | 	ch <- ctrvGPUlimitdesc
130 | 	ch <- hostGPUUtilizationdesc
131 | 	//prometheus.DescribeByCollect(cc, ch)
132 | }
133 | 
134 | // Collect first triggers the ReallyExpensiveAssessmentOfTheSystemState. Then it
135 | // creates constant metrics for each host on the fly based on the returned data.
136 | //
137 | // Note that Collect could be called concurrently, so we depend on
138 | // ReallyExpensiveAssessmentOfTheSystemState to be concurrency-safe.
139 | func (cc ClusterManagerCollector) Collect(ch chan<- prometheus.Metric) {
140 | 	klog.Info("Starting to collect metrics for vGPUMonitor")
141 | 	containerLister := cc.ClusterManager.containerLister
142 | 	if err := containerLister.Update(); err != nil {
143 | 		klog.Error("Update container error: %s", err.Error())
144 | 	}
145 | 
146 | 	nvret := config.Nvml().Init()
147 | 	if nvret != nvml.SUCCESS {
148 | 		klog.Errorf("nvml Init err= %v", nvret)
149 | 	}
150 | 	devnum, nvret := config.Nvml().DeviceGetCount()
151 | 	if nvret != nvml.SUCCESS {
152 | 		klog.Errorf("nvml GetDeviceCount err= %v", nvret)
153 | 	} else {
154 | 		for ii := 0; ii < devnum; ii++ {
155 | 			hdev, nvret := config.Nvml().DeviceGetHandleByIndex(ii)
156 | 			if nvret != nvml.SUCCESS {
157 | 				klog.Error(nvret)
158 | 			}
159 | 			memoryUsed := 0
160 | 			memory, ret := hdev.GetMemoryInfo()
161 | 			if ret == nvml.SUCCESS {
162 | 				memoryUsed = int(memory.Used)
163 | 			} else {
164 | 				klog.Error("nvml get memory error ret=", ret)
165 | 			}
166 | 
167 | 			uuid, nvret := hdev.GetUUID()
168 | 			if nvret != nvml.SUCCESS {
169 | 				klog.Error(nvret)
170 | 			} else {
171 | 				ch <- prometheus.MustNewConstMetric(
172 | 					hostGPUdesc,
173 | 					prometheus.GaugeValue,
174 | 					float64(memoryUsed),
175 | 					fmt.Sprint(ii), uuid,
176 | 				)
177 | 			}
178 | 			util, nvret := hdev.GetUtilizationRates()
179 | 			if nvret != nvml.SUCCESS {
180 | 				klog.Error(nvret)
181 | 			} else {
182 | 				ch <- prometheus.MustNewConstMetric(
183 | 					hostGPUUtilizationdesc,
184 | 					prometheus.GaugeValue,
185 | 					float64(util.Gpu),
186 | 					fmt.Sprint(ii), uuid,
187 | 				)
188 | 			}
189 | 
190 | 		}
191 | 	}
192 | 
193 | 	pods, err := cc.ClusterManager.PodLister.List(labels.Everything())
194 | 	if err != nil {
195 | 		klog.Error("failed to list pods with err=", err.Error())
196 | 	}
197 | 	nowSec := time.Now().Unix()
198 | 
199 | 	containers := containerLister.ListContainers()
200 | 	for _, pod := range pods {
201 | 		for _, c := range containers {
202 | 			//for sridx := range srPodList {
203 | 			//	if srPodList[sridx].sr == nil {
204 | 			//		continue
205 | 			//	}
206 | 			if c.Info == nil {
207 | 				continue
208 | 			}
209 | 			//podUID := strings.Split(srPodList[sridx].idstr, "_")[0]
210 | 			//ctrName := strings.Split(srPodList[sridx].idstr, "_")[1]
211 | 			podUID := c.PodUID
212 | 			ctrName := c.ContainerName
213 | 			if strings.Compare(string(pod.UID), podUID) != 0 {
214 | 				continue
215 | 			}
216 | 			fmt.Println("Pod matched!", pod.Name, pod.Namespace, pod.Labels)
217 | 			for _, ctr := range pod.Spec.Containers {
218 | 				if strings.Compare(ctr.Name, ctrName) != 0 {
219 | 					continue
220 | 				}
221 | 				fmt.Println("container matched", ctr.Name)
222 | 				//err := setHostPid(pod, pod.Status.ContainerStatuses[ctridx], &srPodList[sridx])
223 | 				//if err != nil {
224 | 				//	fmt.Println("setHostPid filed", err.Error())
225 | 				//}
226 | 				//fmt.Println("sr.list=", srPodList[sridx].sr)
227 | 				podlabels := make(map[string]string)
228 | 				for idx, val := range pod.Labels {
229 | 					idxfix := strings.ReplaceAll(idx, "-", "_")
230 | 					valfix := strings.ReplaceAll(val, "-", "_")
231 | 					podlabels[idxfix] = valfix
232 | 				}
233 | 				for i := 0; i < c.Info.DeviceNum(); i++ {
234 | 					uuid := c.Info.DeviceUUID(i)[0:40]
235 | 					memoryTotal := c.Info.DeviceMemoryTotal(i)
236 | 					memoryLimit := c.Info.DeviceMemoryLimit(i)
237 | 					memoryContextSize := c.Info.DeviceMemoryContextSize(i)
238 | 					memoryModuleSize := c.Info.DeviceMemoryModuleSize(i)
239 | 					memoryBufferSize := c.Info.DeviceMemoryBufferSize(i)
240 | 					memoryOffset := c.Info.DeviceMemoryOffset(i)
241 | 					smUtil := c.Info.DeviceSmUtil(i)
242 | 					lastKernelTime := c.Info.LastKernelTime()
243 | 
244 | 					//fmt.Println("uuid=", uuid, "length=", len(uuid))
245 | 					ch <- prometheus.MustNewConstMetric(
246 | 						ctrvGPUdesc,
247 | 						prometheus.GaugeValue,
248 | 						float64(memoryTotal),
249 | 						pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, /*,string(sr.sr.uuids[i].uuid[:])*/
250 | 					)
251 | 					ch <- prometheus.MustNewConstMetric(
252 | 						ctrvGPUlimitdesc,
253 | 						prometheus.GaugeValue,
254 | 						float64(memoryLimit),
255 | 						pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, /*,string(sr.sr.uuids[i].uuid[:])*/
256 | 					)
257 | 					ch <- prometheus.MustNewConstMetric(
258 | 						ctrDeviceMemorydesc,
259 | 						prometheus.CounterValue,
260 | 						float64(memoryTotal),
261 | 						pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid,
262 | 						fmt.Sprint(memoryContextSize), fmt.Sprint(memoryModuleSize), fmt.Sprint(memoryBufferSize), fmt.Sprint(memoryOffset),
263 | 					)
264 | 					ch <- prometheus.MustNewConstMetric(
265 | 						ctrDeviceUtilizationdesc,
266 | 						prometheus.GaugeValue,
267 | 						float64(smUtil),
268 | 						pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid,
269 | 					)
270 | 					if lastKernelTime > 0 {
271 | 						lastSec := nowSec - lastKernelTime
272 | 						if lastSec < 0 {
273 | 							lastSec = 0
274 | 						}
275 | 						ch <- prometheus.MustNewConstMetric(
276 | 							ctrDeviceLastKernelDesc,
277 | 							prometheus.GaugeValue,
278 | 							float64(lastSec),
279 | 							pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid,
280 | 						)
281 | 					}
282 | 				}
283 | 			}
284 | 		}
285 | 	}
286 | }
287 | 
288 | // NewClusterManager first creates a Prometheus-ignorant ClusterManager
289 | // instance. Then, it creates a ClusterManagerCollector for the just created
290 | // ClusterManager. Finally, it registers the ClusterManagerCollector with a
291 | // wrapping Registerer that adds the zone as a label. In this way, the metrics
292 | // collected by different ClusterManagerCollectors do not collide.
293 | func NewClusterManager(zone string, reg prometheus.Registerer, containerLister *nvidia.ContainerLister) *ClusterManager {
294 | 	c := &ClusterManager{
295 | 		Zone:            zone,
296 | 		containerLister: containerLister,
297 | 	}
298 | 
299 | 	informerFactory := informers.NewSharedInformerFactoryWithOptions(containerLister.Clientset(), time.Hour*1)
300 | 	c.PodLister = informerFactory.Core().V1().Pods().Lister()
301 | 	stopCh := make(chan struct{})
302 | 	informerFactory.Start(stopCh)
303 | 
304 | 	cc := ClusterManagerCollector{ClusterManager: c}
305 | 	prometheus.WrapRegistererWith(prometheus.Labels{"zone": zone}, reg).MustRegister(cc)
306 | 	return c
307 | }
308 | 
309 | func initMetrics(containerLister *nvidia.ContainerLister) {
310 | 	// Since we are dealing with custom Collector implementations, it might
311 | 	// be a good idea to try it out with a pedantic registry.
312 | 	klog.Info("Initializing metrics for vGPUmonitor")
313 | 	reg := prometheus.NewRegistry()
314 | 	//reg := prometheus.NewPedanticRegistry()
315 | 
316 | 	// Construct cluster managers. In real code, we would assign them to
317 | 	// variables to then do something with them.
318 | 	NewClusterManager("vGPU", reg, containerLister)
319 | 
320 | 	http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{}))
321 | 	log.Fatal(http.ListenAndServe(":9394", nil))
322 | }
323 | 


--------------------------------------------------------------------------------
/cmd/vgpu-monitor/validation.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2024 The HAMi Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package main
18 | 
19 | import (
20 | 	"fmt"
21 | 	"os"
22 | )
23 | 
24 | var requiredEnvVars = map[string]bool{
25 | 	"HOOK_PATH":     true,
26 | 	"OTHER_ENV_VAR": false,
27 | }
28 | 
29 | func ValidateEnvVars() error {
30 | 	for envVar, required := range requiredEnvVars {
31 | 		_, exists := os.LookupEnv(envVar)
32 | 		if required && !exists {
33 | 			return fmt.Errorf("required environment variable %s not set", envVar)
34 | 		}
35 | 	}
36 | 	return nil
37 | }
38 | 


--------------------------------------------------------------------------------
/cmd/vgpu/main.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 | 	http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | package main
 17 | 
 18 | import (
 19 | 	"fmt"
 20 | 	"net/http"
 21 | 	_ "net/http/pprof"
 22 | 	"syscall"
 23 | 
 24 | 	"github.com/NVIDIA/go-nvml/pkg/nvml"
 25 | 	"github.com/fsnotify/fsnotify"
 26 | 	"github.com/spf13/cobra"
 27 | 	"github.com/spf13/viper"
 28 | 	"k8s.io/klog/v2"
 29 | 	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
 30 | 	nvidiadevice "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu"
 31 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 32 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
 33 | )
 34 | 
 35 | var (
 36 | 	failOnInitErrorFlag bool
 37 | 	migStrategyFlag     string
 38 | 
 39 | 	rootCmd = &cobra.Command{
 40 | 		Use:   "device-plugin",
 41 | 		Short: "kubernetes vgpu device-plugin",
 42 | 		Run: func(cmd *cobra.Command, args []string) {
 43 | 			if err := start(); err != nil {
 44 | 				klog.Fatal(err)
 45 | 			}
 46 | 		},
 47 | 	}
 48 | )
 49 | 
 50 | type devicePluginConfigs struct {
 51 | 	Nodeconfig []struct {
 52 | 		Name                string  `json:"name"`
 53 | 		Devicememoryscaling float64 `json:"devicememoryscaling"`
 54 | 		Devicesplitcount    int     `json:"devicesplitcount"`
 55 | 		Migstrategy         string  `json:"migstrategy"`
 56 | 	} `json:"nodeconfig"`
 57 | }
 58 | 
 59 | func init() {
 60 | 	// https://github.com/spf13/viper/issues/461
 61 | 	viper.BindEnv("node-name", "NODE_NAME")
 62 | 
 63 | 	rootCmd.Flags().SortFlags = false
 64 | 	rootCmd.PersistentFlags().SortFlags = false
 65 | 
 66 | 	rootCmd.Flags().StringVar(&migStrategyFlag, "mig-strategy", "none", "the desired strategy for exposing MIG devices on GPUs that support it:\n\t\t[none | single | mixed]")
 67 | 	rootCmd.Flags().BoolVar(&failOnInitErrorFlag, "fail-on-init-error", true, "fail the plugin if an error is encountered during initialization, otherwise block indefinitely")
 68 | 	rootCmd.Flags().UintVar(&config.DeviceSplitCount, "device-split-count", 2, "the number for NVIDIA device split")
 69 | 	rootCmd.Flags().UintVar(&config.GPUMemoryFactor, "gpu-memory-factor", 1, "the default gpu memory block size is 1MB")
 70 | 	rootCmd.Flags().Float64Var(&config.DeviceCoresScaling, "device-cores-scaling", 1.0, "the ratio for NVIDIA device cores scaling")
 71 | 	rootCmd.Flags().StringVar(&config.NodeName, "node-name", viper.GetString("node-name"), "node name")
 72 | 
 73 | 	rootCmd.PersistentFlags().AddGoFlagSet(util.GlobalFlagSet())
 74 | 	rootCmd.AddCommand(config.VersionCmd)
 75 | }
 76 | 
 77 | func start() error {
 78 | 	go func() {
 79 | 		klog.Info("Starting pprof server, listen on port 6060")
 80 | 		klog.Info(http.ListenAndServe(":6060", nil))
 81 | 	}()
 82 | 
 83 | 	klog.Info("Loading NVML")
 84 | 	if nvret := config.Nvml().Init(); nvret != nvml.SUCCESS {
 85 | 		klog.Infof("Failed to initialize NVML: %v.", nvret)
 86 | 		klog.Infof("If this is a GPU node, did you set the docker default runtime to `nvidia`?")
 87 | 		klog.Infof("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
 88 | 		klog.Infof("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
 89 | 		klog.Infof("If this is not a GPU node, you should set up a toleration or nodeSelector to only deploy this plugin on GPU nodes")
 90 | 		if failOnInitErrorFlag {
 91 | 			return fmt.Errorf("failed to initialize NVML: %v", nvret)
 92 | 		}
 93 | 		select {}
 94 | 	}
 95 | 	defer func() { klog.Info("Shutdown of NVML returned:", config.Nvml().Shutdown()) }()
 96 | 
 97 | 	klog.Info("Starting FS watcher.")
 98 | 	watcher, err := NewFSWatcher(pluginapi.DevicePluginPath)
 99 | 	if err != nil {
100 | 		return fmt.Errorf("failed to create FS watcher: %v", err)
101 | 	}
102 | 	defer watcher.Close()
103 | 
104 | 	klog.Info("Starting OS watcher.")
105 | 	sigs := NewOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
106 | 
107 | 	nvidiaCfg := util.LoadNvidiaConfig()
108 | 
109 | 	cache := nvidiadevice.NewDeviceCache()
110 | 	cache.Start()
111 | 	defer cache.Stop()
112 | 
113 | 	register := nvidiadevice.NewDeviceRegister(cache)
114 | 	register.Start()
115 | 	defer register.Stop()
116 | 
117 | 	var plugins []*nvidiadevice.NvidiaDevicePlugin
118 | restart:
119 | 	// If we are restarting, idempotently stop any running plugins before
120 | 	// recreating them below.
121 | 	for _, p := range plugins {
122 | 		p.Stop()
123 | 	}
124 | 	klog.Info("Retreiving plugins.")
125 | 	migStrategy, err := nvidiadevice.NewMigStrategy(migStrategyFlag)
126 | 	if err != nil {
127 | 		return fmt.Errorf("error creating MIG strategy: %v", err)
128 | 	}
129 | 	plugins = migStrategy.GetPlugins(nvidiaCfg, cache)
130 | 
131 | 	started := 0
132 | 	pluginStartError := make(chan struct{})
133 | 	for _, p := range plugins {
134 | 		// Just continue if there are no devices to serve for plugin p.
135 | 		if len(p.Devices()) == 0 {
136 | 			continue
137 | 		}
138 | 
139 | 		// Start the gRPC server for plugin p and connect it with the kubelet.
140 | 		if err := p.Start(); err != nil {
141 | 			//klog.SetOutput(os.Stderr)
142 | 			klog.Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
143 | 			klog.Info("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
144 | 			klog.Info("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
145 | 			close(pluginStartError)
146 | 			goto events
147 | 		}
148 | 		started++
149 | 	}
150 | 
151 | 	if started == 0 {
152 | 		klog.Info("No devices found. Waiting indefinitely.")
153 | 	}
154 | 
155 | events:
156 | 	// Start an infinite loop, waiting for several indicators to either log
157 | 	// some messages, trigger a restart of the plugins, or exit the program.
158 | 	for {
159 | 		select {
160 | 		// If there was an error starting any plugins, restart them all.
161 | 		case <-pluginStartError:
162 | 			goto restart
163 | 
164 | 		// Detect a kubelet restart by watching for a newly created
165 | 		// 'pluginapi.KubeletSocket' file. When this occurs, restart this loop,
166 | 		// restarting all of the plugins in the process.
167 | 		case event := <-watcher.Events:
168 | 			if event.Name == pluginapi.KubeletSocket && event.Op&fsnotify.Create == fsnotify.Create {
169 | 				klog.Infof("inotify: %s created, restarting.", pluginapi.KubeletSocket)
170 | 				goto restart
171 | 			}
172 | 
173 | 		// Watch for any other fs errors and log them.
174 | 		case err := <-watcher.Errors:
175 | 			klog.Infof("inotify: %s", err)
176 | 
177 | 		// Watch for any signals from the OS. On SIGHUP, restart this loop,
178 | 		// restarting all of the plugins in the process. On all other
179 | 		// signals, exit the loop and exit the program.
180 | 		case s := <-sigs:
181 | 			switch s {
182 | 			case syscall.SIGHUP:
183 | 				klog.Info("Received SIGHUP, restarting.")
184 | 				goto restart
185 | 			default:
186 | 				klog.Infof("Received signal %v, shutting down.", s)
187 | 				for _, p := range plugins {
188 | 					p.Stop()
189 | 				}
190 | 				break events
191 | 			}
192 | 		}
193 | 	}
194 | 	return nil
195 | }
196 | 
197 | func main() {
198 | 	if err := rootCmd.Execute(); err != nil {
199 | 		klog.Fatal(err)
200 | 	}
201 | }
202 | 


--------------------------------------------------------------------------------
/cmd/vgpu/watchers.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2023 The Volcano Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package main
18 | 
19 | import (
20 | 	"os"
21 | 	"os/signal"
22 | 
23 | 	"github.com/fsnotify/fsnotify"
24 | )
25 | 
26 | func NewFSWatcher(files ...string) (*fsnotify.Watcher, error) {
27 | 	watcher, err := fsnotify.NewWatcher()
28 | 	if err != nil {
29 | 		return nil, err
30 | 	}
31 | 
32 | 	for _, f := range files {
33 | 		err = watcher.Add(f)
34 | 		if err != nil {
35 | 			watcher.Close()
36 | 			return nil, err
37 | 		}
38 | 	}
39 | 
40 | 	return watcher, nil
41 | }
42 | 
43 | func NewOSWatcher(sigs ...os.Signal) chan os.Signal {
44 | 	sigChan := make(chan os.Signal, 1)
45 | 	signal.Notify(sigChan, sigs...)
46 | 
47 | 	return sigChan
48 | }
49 | 


--------------------------------------------------------------------------------
/doc/config.md:
--------------------------------------------------------------------------------
 1 | # Global Config
 2 | 
 3 | ## Device Configs: ConfigMap
 4 | 
 5 | **Note:**
 6 | All the configurations listed below are managed within the `volcano-vgpu-device-config` ConfigMap.
 7 | You can update these configurations using the following methods:
 8 | 
 9 | 1. Directly edit the ConfigMap: If `volcano-vgpu-device-plugin` has already been successfully installed, you can manually update the `volcano-vgpu-device-config` ConfigMap using the `kubectl edit` command to manually update the hami-scheduler-device ConfigMap.
10 | 
11 | ```bash
12 | kubectl edit configmap volcano-vgpu-device-config -n <namespace>
13 | ```
14 | 
15 | After making changes, restart the volcano-vgpu-device-plugin and volcano-scheduler to apply the updated configurations.
16 | 
17 | * `nvidia.deviceMemoryScaling`: 
18 |   Float type, by default: 1. The ratio for NVIDIA device memory scaling, can be greater than 1 (enable virtual device memory, experimental feature). For NVIDIA GPU with *M* memory, if we set `nvidia.deviceMemoryScaling` argument to *S*, vGPUs splitted by this GPU will totally get `S * M` memory in Kubernetes with our device plugin.
19 | * `nvidia.deviceSplitCount`: 
20 |   Integer type, by default: equals 10. Maximum tasks assigned to a simple GPU device.
21 | * `nvidia.migstrategy`: 
22 |   String type, "none" for ignoring MIG features or "mixed" for allocating MIG device by seperate resources. Default "none"
23 | * `nvidia.disablecorelimit`: 
24 |   String type, "true" for disable core limit, "false" for enable core limit, default: false
25 | * `nvidia.defaultMem`: 
26 |   Integer type, by default: 0. The default device memory of the current task, in MB.'0' means use 100% device memory
27 | * `nvidia.defaultCores`: 
28 |   Integer type, by default: equals 0. Percentage of GPU cores reserved for the current task. If assigned to 0, it may fit in any GPU with enough device memory. If assigned to 100, it will use an entire GPU card exclusively.
29 | * `nvidia.defaultGPUNum`: 
30 |   Integer type, by default: equals 1, if configuration value is 0, then the configuration value will not take effect and will be filtered. When a user does not set nvidia.com/gpu this key in pod resource, webhook should check nvidia.com/gpumem、resource-mem-percentage、nvidia.com/gpucores these three keys, anyone a key having value, webhook should add nvidia.com/gpu key and this default value to resources limits map.
31 | * `nvidia.resourceCountName`: 
32 |   String type, vgpu number resource name, default: "volcano.sh/vgpu-number"
33 | * `nvidia.resourceMemoryName`: 
34 |   String type, vgpu memory size resource name, default: "volcano.sh/vgpu-memory"
35 | * `nvidia.resourceMemoryPercentageName`: 
36 |   String type, vgpu memory fraction resource name, default: "volcano.sh/vgpu-memory-percentage" 
37 | * `nvidia.resourceCoreName`: 
38 |   String type, vgpu cores resource name, default: "volcano.sh/vgpu-cores"
39 | 
40 | ## Node Configs
41 | 
42 | **Note:**
43 | All the configurations listed below are managed within the `volcano-vgpu-node-config` ConfigMap.
44 | You can update these configurations using the following methods:
45 | 
46 | ```bash
47 | kubectl edit configmap volcano-vgpu-node-config -n <namespace>
48 | ```
49 | 
50 | After making changes, restart the volcano-vgpu-device-plugin and volcano-scheduler to apply the updated configurations.
51 | 
52 | * `name`: the name of the node, the following parameters will only take effect on this node. 
53 | * `operatingmode`: 
54 | String type, `hami-core` for using hami-core for container resource limitation, `mig` for using mig for container resource limition (only available for on architect Ampere or later GPU)
55 | * `devicememoryscaling`:
56 | Integer type, device memory oversubscription on that node
57 | * `devicecorescaling`: 
58 | Integer type, device core oversubscription on that node 
59 | 


--------------------------------------------------------------------------------
/doc/design.md:
--------------------------------------------------------------------------------
 1 | ## Config the volcano device plugin binary
 2 | 
 3 | The volcano device plugin has a number of options that can be configured. These options can be configured as command line flags, environment variables, or via a config file when launching the device plugin. The following section explains these configurations.
 4 | 
 5 | ### As command line flags or envvars
 6 | 
 7 | | Flag                     | Envvar                  | Default Value   |
 8 | |--------------------------|-------------------------|-----------------|
 9 | | `--gpu-strategy`         | `$GPU_STRATEGY`         | `"share"`       |
10 | | `--gpu-memory-factor`    | `$GPU_MEMORY_FACTOR`    | `1`             |
11 | | `--config-file`          | `$CONFIG_FILE`          | `""`            |
12 | 
13 | when starting volcano-device-plugin.yml, users can specify these parameters by adding args to the container 'volcano-device-plugin'.
14 | For example: 
15 |  - args: ["--gpu-strategy=number"] will let device plugin using the gpu-number strategy
16 |  - args: ["--gpu-strategy=share","--gpu-memory-factor=10"] will let device plugin using the gpu-share strategy, and memory factor is 10MB
17 | 
18 | ### As a configuration file
19 | ```
20 | version: v1
21 | flags:
22 |   GPUStrategy: "number"
23 | ```
24 | 
25 | ### Configuration Option Details
26 | **`GPU_STRATEGY`(string)**:
27 |   the desired strategy for exposing GPU devices
28 | 
29 |   `[number | share ] (default 'share')`
30 | 
31 |   The `GPU_STRATEGY` option configures the daemonset to be able to expose
32 |   on GPU devices in numbers or sharing mode. More information on what
33 |   these strategies are and how to use it in Volcano can be found in Volcano scheduler.
34 | 
35 | **`GPU_MEMORY_FACTOR`(uint)**:
36 |   the desired memory factor for exposing GPU shared memory virtual devices
37 | 
38 |   `(default 1)`
39 | 
40 |   The `GPU_MEMORY_FACTOR` option configures the daemonset to be able to expose
41 |   on GPU shared memory virtual devices size. By default each block is set to be 1MB, 
42 |   but users who have large gpu memory can specify a larger number such as 10MB, 100MB. 
43 | 
44 | **`CONFIG_FILE`**:
45 |   point the plugin at a configuration file instead of relying on command line
46 |   flags or environment variables
47 | 
48 |   `(default '')`
49 | 
50 |   The order of precedence for setting each option is (1) command line flag, (2)
51 |   environment variable, (3) configuration file. In this way, one could use a
52 |   pre-defined configuration file, but then override the values set in it at
53 |   launch time. 
54 | 


--------------------------------------------------------------------------------
/doc/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/example.png


--------------------------------------------------------------------------------
/doc/hard_limit.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/hard_limit.jpg


--------------------------------------------------------------------------------
/doc/vgpu-on-volcano.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/vgpu-on-volcano.pdf


--------------------------------------------------------------------------------
/doc/vgpu_device_plugin_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/vgpu_device_plugin_metrics.png


--------------------------------------------------------------------------------
/docker/Dockerfile.ubuntu20.04:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:20.04 AS builder
16 | ARG TARGETARCH
17 | RUN apt-get update
18 | RUN apt-get -y install ca-certificates g++ wget
19 | RUN wget -qO- https://storage.googleapis.com/golang/go1.23.7.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -zx
20 | ENV GOPATH=/go
21 | ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
22 | WORKDIR /go/src/volcano.sh/devices
23 | 
24 | COPY . .
25 | RUN go env -w GOARCH=${TARGETARCH}
26 | RUN go env -w CGO_LDFLAGS_ALLOW='-Wl,--unresolved-symbols=ignore-in-object-files'
27 | RUN go build -ldflags="-s -w" -o volcano-vgpu-device-plugin ./cmd/vgpu
28 | RUN go build -ldflags="-s -w" -o volcano-vgpu-monitor ./cmd/vgpu-monitor
29 | RUN go install github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted@latest
30 | 
31 | FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 AS nvidia_builder
32 | ARG TARGETARCH
33 | RUN apt-get update
34 | RUN apt-get -y install wget openssl libssl-dev
35 | RUN case "${TARGETARCH}" in \
36 |         "amd64") wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-x86_64.tar.gz ;; \
37 |         "arm64") wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-aarch64.tar.gz ;; \
38 |         *) echo "Unsupported architecture: ${TARGETARCH}" && exit 1 ;; \
39 |     esac && \
40 |     tar -xzf cmake-3.19.8-Linux-*.tar.gz -C /opt && \
41 |     ln -s /opt/cmake-3.19.8-Linux-*/bin/cmake /usr/local/bin/cmake && \
42 |     rm cmake-3.19.8-Linux-*.tar.gz
43 | COPY ./libvgpu /libvgpu
44 | WORKDIR /libvgpu
45 | RUN rm -rf /libvgpu/build
46 | RUN bash ./build.sh
47 | 
48 | FROM ubuntu:24.04
49 | 
50 | ENV NVIDIA_VISIBLE_DEVICES=all
51 | ENV NVIDIA_DRIVER_CAPABILITIES=utility
52 | 
53 | COPY --from=builder /go/src/volcano.sh/devices/volcano-vgpu-device-plugin /usr/bin/volcano-vgpu-device-plugin
54 | COPY --from=builder /go/src/volcano.sh/devices/volcano-vgpu-monitor /usr/bin/volcano-vgpu-monitor
55 | COPY --from=builder /go/bin/nvidia-mig-parted /usr/bin/nvidia-mig-parted
56 | COPY --from=builder /go/src/volcano.sh/devices/lib/nvidia/ld.so.preload /k8s-vgpu/lib/nvidia/
57 | COPY --from=nvidia_builder /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/
58 | 
59 | ENTRYPOINT ["volcano-vgpu-device-plugin"]
60 | 


--------------------------------------------------------------------------------
/examples/gpu-share.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: pod1
 5 | spec:
 6 |   restartPolicy: OnFailure
 7 |   schedulerName: volcano
 8 |   containers:
 9 |   - image: nvidia/cuda:10.1-base-ubuntu18.04
10 |     name: pod1-ctr
11 |     command: ["sleep"]
12 |     args: ["100000"]
13 |     resources:
14 |       limits:
15 |         volcano.sh/gpu-memory: 1024  # 1024MB
16 | 


--------------------------------------------------------------------------------
/examples/vgpu-case01.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: test1
 5 | spec:
 6 |   restartPolicy: OnFailure
 7 |   schedulerName: volcano
 8 |   containers:
 9 |   - image: ubuntu:20.04
10 |     name: pod1-ctr
11 |     command: ["sleep"]
12 |     args: ["100000"]
13 |     resources:
14 |       limits:
15 |         volcano.sh/vgpu-memory: 1024
16 |         volcano.sh/vgpu-number: 1
17 | 


--------------------------------------------------------------------------------
/examples/vgpu-case02.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: pod1
 5 | spec:
 6 |   restartPolicy: OnFailure
 7 |   schedulerName: volcano
 8 |   containers:
 9 |   - image: nvidia/cuda:11.2.2-base-ubi8
10 |     name: pod1-ctr
11 |     command: ["sleep"]
12 |     args: ["100000"]
13 |     resources:
14 |       limits:
15 |         volcano.sh/vgpu-number: 1 #request 1 GPU
16 |         volcano.sh/vgpu-cores: 50 #each GPU request 50% of compute core resources
17 |         volcano.sh/vgpu-memory: 10240 #each GPU request 10G device memory
18 | 


--------------------------------------------------------------------------------
/examples/vgpu-case03.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: gpu-pod12
 5 | spec:
 6 |   schedulerName: volcano
 7 |   containers:
 8 |     - name: ubuntu-container
 9 |       image: ubuntu:18.04
10 |       command: ["bash", "-c", "sleep 86400"]
11 |       resources:
12 |         limits:
13 |           volcano.sh/vgpu-number: 2 # requesting 2 vGPUs
14 |           volcano.sh/vgpu-memory: 2000
15 |           #volcano.sh/vgpu-memory-percentage: 50 #Each vGPU containers 50% device memory of that GPU. Can not be used with nvidia.com/gpumem
16 |     - name: ubuntu-container0
17 |       image: ubuntu:18.04
18 |       command: ["bash", "-c", "sleep 86400"]
19 |     - name: ubuntu-container1
20 |       image: ubuntu:18.04
21 |       command: ["bash", "-c", "sleep 86400"]
22 |       resources:
23 |         limits:
24 |           volcano.sh/vgpu-number: 2 # requesting 2 vGPUs
25 |           volcano.sh/vgpu-memory: 3000 
26 | 
27 | 


--------------------------------------------------------------------------------
/examples/vgpu-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: resnet101-deployment
 5 | spec:
 6 |   selector:
 7 |     matchLabels:
 8 |       app: resnet101-server
 9 |   replicas: 10
10 |   template:
11 |     metadata:
12 |       labels:
13 |         app: resnet101-server
14 |     spec:
15 |       schedulerName: volcano
16 |       containers:
17 |       - name: resnet101-container
18 |         image: ubuntu:18.04
19 |         command: ["sleep","infinity"]
20 |         resources:
21 |          limits:
22 |             volcano.sh/vgpu-number: 1 # requesting 2 vGPUs
23 |             volcano.sh/vgpu-memory: 16384


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module volcano.sh/k8s-device-plugin
 2 | 
 3 | go 1.23
 4 | 
 5 | require (
 6 | 	github.com/NVIDIA/go-gpuallocator v0.5.0
 7 | 	github.com/NVIDIA/go-nvlib v0.7.1
 8 | 	github.com/NVIDIA/go-nvml v0.12.4-1
 9 | 	github.com/fsnotify/fsnotify v1.4.9
10 | 	github.com/prometheus/client_golang v1.0.0
11 | 	github.com/spf13/cobra v0.0.5
12 | 	github.com/spf13/viper v1.3.2
13 | 	github.com/stretchr/testify v1.10.0
14 | 	github.com/urfave/cli/v2 v2.4.0
15 | 	golang.org/x/net v0.0.0-20200421231249-e086a090c8fd
16 | 	google.golang.org/grpc v1.32.0
17 | 	gopkg.in/yaml.v2 v2.2.8
18 | 	k8s.io/api v0.18.2
19 | 	k8s.io/apimachinery v0.18.2
20 | 	k8s.io/client-go v0.18.2
21 | 	k8s.io/klog v1.0.0
22 | 	k8s.io/klog/v2 v2.80.1
23 | 	k8s.io/kubelet v0.0.0
24 | 	sigs.k8s.io/yaml v1.2.0
25 | )
26 | 
27 | require (
28 | 	github.com/beorn7/perks v1.0.0 // indirect
29 | 	github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
30 | 	github.com/davecgh/go-spew v1.1.1 // indirect
31 | 	github.com/go-logr/logr v1.2.0 // indirect
32 | 	github.com/gogo/protobuf v1.3.1 // indirect
33 | 	github.com/golang/protobuf v1.5.0 // indirect
34 | 	github.com/google/go-cmp v0.5.5 // indirect
35 | 	github.com/google/gofuzz v1.1.0 // indirect
36 | 	github.com/google/uuid v1.6.0 // indirect
37 | 	github.com/googleapis/gnostic v0.1.0 // indirect
38 | 	github.com/hashicorp/golang-lru v0.5.1 // indirect
39 | 	github.com/hashicorp/hcl v1.0.0 // indirect
40 | 	github.com/imdario/mergo v0.3.5 // indirect
41 | 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
42 | 	github.com/json-iterator/go v1.1.8 // indirect
43 | 	github.com/magiconair/properties v1.8.1 // indirect
44 | 	github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
45 | 	github.com/mitchellh/mapstructure v1.1.2 // indirect
46 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
47 | 	github.com/modern-go/reflect2 v1.0.1 // indirect
48 | 	github.com/pelletier/go-toml v1.2.0 // indirect
49 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
50 | 	github.com/prometheus/client_model v0.2.0 // indirect
51 | 	github.com/prometheus/common v0.4.1 // indirect
52 | 	github.com/prometheus/procfs v0.0.2 // indirect
53 | 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
54 | 	github.com/spf13/afero v1.2.2 // indirect
55 | 	github.com/spf13/cast v1.3.0 // indirect
56 | 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
57 | 	github.com/spf13/pflag v1.0.5 // indirect
58 | 	golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975 // indirect
59 | 	golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 // indirect
60 | 	golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 // indirect
61 | 	golang.org/x/text v0.3.2 // indirect
62 | 	golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect
63 | 	google.golang.org/appengine v1.5.0 // indirect
64 | 	google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55 // indirect
65 | 	google.golang.org/protobuf v1.34.2 // indirect
66 | 	gopkg.in/inf.v0 v0.9.1 // indirect
67 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
68 | 	k8s.io/utils v0.0.0-20200324210504-a9aa75ae1b89 // indirect
69 | 	sigs.k8s.io/structured-merge-diff/v3 v3.0.0 // indirect
70 | )
71 | 
72 | replace (
73 | 	k8s.io/api => k8s.io/api v0.18.2
74 | 	k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.18.2
75 | 	k8s.io/apimachinery => k8s.io/apimachinery v0.18.2
76 | 	k8s.io/apiserver => k8s.io/apiserver v0.18.2
77 | 	k8s.io/cli-runtime => k8s.io/cli-runtime v0.18.2
78 | 	k8s.io/client-go => k8s.io/client-go v0.18.2
79 | 	k8s.io/cloud-provider => k8s.io/cloud-provider v0.18.2
80 | 	k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.18.2
81 | 	k8s.io/code-generator => k8s.io/code-generator v0.18.2
82 | 	k8s.io/component-base => k8s.io/component-base v0.18.2
83 | 	k8s.io/cri-api => k8s.io/cri-api v0.18.2
84 | 	k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.18.2
85 | 	k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.18.2
86 | 	k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.18.2
87 | 	k8s.io/kube-proxy => k8s.io/kube-proxy v0.18.2
88 | 	k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.18.2
89 | 	k8s.io/kubectl => k8s.io/kubectl v0.18.2
90 | 	k8s.io/kubelet => k8s.io/kubelet v0.18.2
91 | 	k8s.io/kubernetes => k8s.io/kubernetes v1.18.2
92 | 	k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.18.2
93 | 	k8s.io/metrics => k8s.io/metrics v0.18.2
94 | 	k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.18.2
95 | )
96 | 


--------------------------------------------------------------------------------
/lib/nvidia/ld.so.preload:
--------------------------------------------------------------------------------
1 | /usr/local/vgpu/libvgpu.so


--------------------------------------------------------------------------------
/pkg/apis/config.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2022 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package apis
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"io"
 22 | 	"log"
 23 | 	"os"
 24 | 
 25 | 	cli "github.com/urfave/cli/v2"
 26 | 	"sigs.k8s.io/yaml"
 27 | )
 28 | 
 29 | // Version indicates the version of the 'Config' struct used to hold configuration information.
 30 | const Version = "v1beta1"
 31 | 
 32 | // Config is a versioned struct used to hold configuration information.
 33 | type Config struct {
 34 | 	Version string `json:"version"             yaml:"version"`
 35 | 	Flags   Flags  `json:"flags,omitempty"     yaml:"flags,omitempty"`
 36 | }
 37 | 
 38 | // NewConfig builds out a Config struct from a config file (or command line flags).
 39 | // The data stored in the config will be populated in order of precedence from
 40 | // (1) command line, (2) environment variable, (3) config file.
 41 | func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error) {
 42 | 	config := &Config{
 43 | 		Version: Version,
 44 | 	}
 45 | 
 46 | 	log.Println(c.String("gpu-strategy"))
 47 | 	log.Println(c.Uint("gpu-memory-factor"))
 48 | 
 49 | 	configFile := c.String("config-file")
 50 | 	if configFile != "" {
 51 | 		var err error
 52 | 		config, err = parseConfig(configFile)
 53 | 		if err != nil {
 54 | 			return nil, fmt.Errorf("unable to parse config file: %v", err)
 55 | 		}
 56 | 	}
 57 | 
 58 | 	config.Flags.CommandLineFlags = NewCommandLineFlags(c)
 59 | 
 60 | 	return config, nil
 61 | }
 62 | 
 63 | // parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct.
 64 | func parseConfig(configFile string) (*Config, error) {
 65 | 	reader, err := os.Open(configFile)
 66 | 	if err != nil {
 67 | 		return nil, fmt.Errorf("error opening config file: %v", err)
 68 | 	}
 69 | 	defer reader.Close()
 70 | 
 71 | 	config, err := parseConfigFrom(reader)
 72 | 	if err != nil {
 73 | 		return nil, fmt.Errorf("error parsing config file: %v", err)
 74 | 	}
 75 | 
 76 | 	return config, nil
 77 | }
 78 | 
 79 | func parseConfigFrom(reader io.Reader) (*Config, error) {
 80 | 	var err error
 81 | 	var configYaml []byte
 82 | 
 83 | 	configYaml, err = io.ReadAll(reader)
 84 | 	if err != nil {
 85 | 		return nil, fmt.Errorf("read error: %v", err)
 86 | 	}
 87 | 
 88 | 	var config Config
 89 | 	err = yaml.Unmarshal(configYaml, &config)
 90 | 	if err != nil {
 91 | 		return nil, fmt.Errorf("unmarshal error: %v", err)
 92 | 	}
 93 | 
 94 | 	if config.Version == "" {
 95 | 		config.Version = Version
 96 | 	}
 97 | 
 98 | 	if config.Version != Version {
 99 | 		return nil, fmt.Errorf("unknown version: %v", config.Version)
100 | 	}
101 | 
102 | 	return &config, nil
103 | }
104 | 


--------------------------------------------------------------------------------
/pkg/apis/flags.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2022 The Volcano Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package apis
18 | 
19 | import (
20 | 	cli "github.com/urfave/cli/v2"
21 | )
22 | 
23 | // Flags holds the full list of flags used to configure the device plugin and GFD.
24 | type Flags struct {
25 | 	*CommandLineFlags
26 | }
27 | 
28 | // CommandLineFlags holds the list of command line flags used to configure the device plugin and GFD.
29 | type CommandLineFlags struct {
30 | 	GPUStrategy     string `json:"GPUStrategy"                yaml:"GPUStrategy"`
31 | 	GPUMemoryFactor uint   `json:"GPUMemoryFactor"                yaml:"GPUMemoryFactor"`
32 | }
33 | 
34 | func NewCommandLineFlags(c *cli.Context) *CommandLineFlags {
35 | 	return &CommandLineFlags{
36 | 		GPUStrategy:     c.String("gpu-strategy"),
37 | 		GPUMemoryFactor: c.Uint("gpu-memory-factor"),
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/pkg/apis/flags_test.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2022 The Volcano Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package apis
18 | 
19 | import (
20 | 	"encoding/json"
21 | 	"fmt"
22 | 	"testing"
23 | 
24 | 	"github.com/stretchr/testify/require"
25 | )
26 | 
27 | func TestUnmarshalFlags(t *testing.T) {
28 | 	testCases := []struct {
29 | 		input  string
30 | 		output Flags
31 | 		err    bool
32 | 	}{
33 | 		{
34 | 			input: ``,
35 | 			err:   true,
36 | 		},
37 | 		{
38 | 			input:  `{}`,
39 | 			output: Flags{},
40 | 		},
41 | 		{
42 | 			input: `{
43 | 				"GPUStrategy": "number"
44 | 			}`,
45 | 			output: Flags{
46 | 				&CommandLineFlags{
47 | 					GPUStrategy: "number",
48 | 				},
49 | 			},
50 | 		},
51 | 	}
52 | 
53 | 	for i, tc := range testCases {
54 | 		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
55 | 			var output Flags
56 | 			err := json.Unmarshal([]byte(tc.input), &output)
57 | 			if tc.err {
58 | 				require.Error(t, err)
59 | 				return
60 | 			}
61 | 			require.NoError(t, err)
62 | 			require.Equal(t, tc.output, output)
63 | 		})
64 | 	}
65 | }
66 | 
67 | func TestMarshalFlags(t *testing.T) {
68 | 	testCases := []struct {
69 | 		input  Flags
70 | 		output string
71 | 		err    bool
72 | 	}{
73 | 		{
74 | 			input: Flags{
75 | 				&CommandLineFlags{
76 | 					GPUStrategy: "number",
77 | 				},
78 | 			},
79 | 			output: `{
80 | 				"GPUStrategy": "number"
81 | 			}`,
82 | 		},
83 | 	}
84 | 
85 | 	for i, tc := range testCases {
86 | 		t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
87 | 			output, err := json.Marshal(tc.input)
88 | 			if tc.err {
89 | 				require.Error(t, err)
90 | 				return
91 | 			}
92 | 			require.NoError(t, err)
93 | 			require.JSONEq(t, tc.output, string(output))
94 | 		})
95 | 	}
96 | }
97 | 


--------------------------------------------------------------------------------
/pkg/filewatcher/filewatcher.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2023 The Volcano Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package filewatcher
18 | 
19 | import (
20 | 	"github.com/fsnotify/fsnotify"
21 | )
22 | 
23 | // NewFileWatcher creates a file watcher watching the given files.
24 | func NewFileWatcher(files ...string) (*fsnotify.Watcher, error) {
25 | 	watcher, err := fsnotify.NewWatcher()
26 | 	if err != nil {
27 | 		return nil, err
28 | 	}
29 | 
30 | 	for _, f := range files {
31 | 		err = watcher.Add(f)
32 | 		if err != nil {
33 | 			watcher.Close()
34 | 			return nil, err
35 | 		}
36 | 	}
37 | 
38 | 	return watcher, nil
39 | }
40 | 


--------------------------------------------------------------------------------
/pkg/gpu/doc.go:
--------------------------------------------------------------------------------
1 | package gpu
2 | 


--------------------------------------------------------------------------------
/pkg/lock/nodelock.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package lock
 18 | 
 19 | import (
 20 | 	"context"
 21 | 	"fmt"
 22 | 	"os"
 23 | 	"path/filepath"
 24 | 	"time"
 25 | 
 26 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 27 | 	"k8s.io/client-go/kubernetes"
 28 | 	"k8s.io/client-go/rest"
 29 | 	"k8s.io/client-go/tools/clientcmd"
 30 | 	"k8s.io/klog/v2"
 31 | )
 32 | 
 33 | const MaxLockRetry = 5
 34 | 
 35 | var kubeClient kubernetes.Interface
 36 | 
 37 | func GetClient() kubernetes.Interface {
 38 | 	return kubeClient
 39 | }
 40 | 
 41 | // NewClient connects to an API server
 42 | func NewClient() (kubernetes.Interface, error) {
 43 | 	kubeConfig := os.Getenv("KUBECONFIG")
 44 | 	if kubeConfig == "" {
 45 | 		kubeConfig = filepath.Join(os.Getenv("HOME"), ".kube", "config")
 46 | 	}
 47 | 	config, err := rest.InClusterConfig()
 48 | 	if err != nil {
 49 | 		config, err = clientcmd.BuildConfigFromFlags("", kubeConfig)
 50 | 		if err != nil {
 51 | 			return nil, err
 52 | 		}
 53 | 	}
 54 | 	client, err := kubernetes.NewForConfig(config)
 55 | 	kubeClient = client
 56 | 	return client, err
 57 | }
 58 | 
 59 | // UseClient uses existing client
 60 | func UseClient(client kubernetes.Interface) error {
 61 | 	kubeClient = client
 62 | 	return nil
 63 | }
 64 | 
 65 | func setNodeLock(nodeName string, lockName string) error {
 66 | 	ctx := context.Background()
 67 | 	node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
 68 | 	if err != nil {
 69 | 		klog.Errorln("get node failed", err.Error())
 70 | 		return err
 71 | 	}
 72 | 	if _, ok := node.ObjectMeta.Annotations[lockName]; ok {
 73 | 		return fmt.Errorf("node %s is locked", nodeName)
 74 | 	}
 75 | 	newNode := node.DeepCopy()
 76 | 	newNode.ObjectMeta.Annotations[lockName] = time.Now().Format(time.RFC3339)
 77 | 	_, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{})
 78 | 	for i := 0; i < MaxLockRetry && err != nil; i++ {
 79 | 		klog.ErrorS(err, "Failed to update node", "node", nodeName, "retry", i)
 80 | 		time.Sleep(100 * time.Millisecond)
 81 | 		node, err = kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
 82 | 		if err != nil {
 83 | 			klog.ErrorS(err, "Failed to get node when retry to update", "node", nodeName)
 84 | 			continue
 85 | 		}
 86 | 		newNode := node.DeepCopy()
 87 | 		newNode.ObjectMeta.Annotations[lockName] = time.Now().Format(time.RFC3339)
 88 | 		_, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{})
 89 | 	}
 90 | 	if err != nil {
 91 | 		return fmt.Errorf("setNodeLock exceeds retry count %d", MaxLockRetry)
 92 | 	}
 93 | 	klog.V(3).InfoS("Node lock set", "node", nodeName)
 94 | 	return nil
 95 | }
 96 | 
 97 | // ReleaseNodeLock releases a certain lock on a certain device
 98 | func ReleaseNodeLock(nodeName string, lockName string) error {
 99 | 	ctx := context.Background()
100 | 	node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
101 | 	if err != nil {
102 | 		return err
103 | 	}
104 | 	if _, ok := node.ObjectMeta.Annotations[lockName]; !ok {
105 | 		klog.V(3).InfoS("Node lock not set", "node", nodeName, "lock", lockName)
106 | 		return nil
107 | 	}
108 | 	newNode := node.DeepCopy()
109 | 	delete(newNode.ObjectMeta.Annotations, lockName)
110 | 	_, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{})
111 | 	for i := 0; i < MaxLockRetry && err != nil; i++ {
112 | 		klog.ErrorS(err, "Failed to update node", "node", nodeName, "retry", i)
113 | 		time.Sleep(100 * time.Millisecond)
114 | 		node, err = kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
115 | 		if err != nil {
116 | 			klog.ErrorS(err, "Failed to get node when retry to update", "node", nodeName)
117 | 			continue
118 | 		}
119 | 		newNode := node.DeepCopy()
120 | 		delete(newNode.ObjectMeta.Annotations, lockName)
121 | 		_, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{})
122 | 	}
123 | 	if err != nil {
124 | 		return fmt.Errorf("releaseNodeLock exceeds retry count %d", MaxLockRetry)
125 | 	}
126 | 	klog.V(3).InfoS("Node lock released", "node", nodeName)
127 | 	return nil
128 | }
129 | 
130 | // LockNode locks a device on a certain node
131 | func LockNode(nodeName string, lockName string) error {
132 | 	ctx := context.Background()
133 | 	node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
134 | 	if err != nil {
135 | 		return err
136 | 	}
137 | 	if _, ok := node.ObjectMeta.Annotations[lockName]; !ok {
138 | 		return setNodeLock(nodeName, lockName)
139 | 	}
140 | 	lockTime, err := time.Parse(time.RFC3339, node.ObjectMeta.Annotations[lockName])
141 | 	if err != nil {
142 | 		return err
143 | 	}
144 | 	if time.Since(lockTime) > time.Minute*5 {
145 | 		klog.InfoS("Node lock expired", "node", nodeName, "lockTime", lockTime)
146 | 		err = ReleaseNodeLock(nodeName, lockName)
147 | 		if err != nil {
148 | 			klog.ErrorS(err, "Failed to release node lock", "node", nodeName)
149 | 			return err
150 | 		}
151 | 		return setNodeLock(nodeName, lockName)
152 | 	}
153 | 	return fmt.Errorf("node %s has been locked within 5 minutes", nodeName)
154 | }
155 | 


--------------------------------------------------------------------------------
/pkg/monitor/nvidia/cudevshr.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2024 The HAMi Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package nvidia
 18 | 
 19 | import (
 20 | 	"context"
 21 | 	"errors"
 22 | 	"fmt"
 23 | 	"os"
 24 | 	"path/filepath"
 25 | 	"strings"
 26 | 	"sync"
 27 | 	"syscall"
 28 | 	"time"
 29 | 	"unsafe"
 30 | 
 31 | 	v0 "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia/v0"
 32 | 	v1 "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia/v1"
 33 | 
 34 | 	corev1 "k8s.io/api/core/v1"
 35 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 36 | 	"k8s.io/client-go/kubernetes"
 37 | 	"k8s.io/client-go/tools/clientcmd"
 38 | 	"k8s.io/klog/v2"
 39 | )
 40 | 
 41 | const SharedRegionMagicFlag = 19920718
 42 | 
 43 | type headerT struct {
 44 | 	initializedFlag int32
 45 | 	majorVersion    int32
 46 | 	minorVersion    int32
 47 | }
 48 | 
 49 | type UsageInfo interface {
 50 | 	DeviceMax() int
 51 | 	DeviceNum() int
 52 | 	DeviceMemoryContextSize(idx int) uint64
 53 | 	DeviceMemoryModuleSize(idx int) uint64
 54 | 	DeviceMemoryBufferSize(idx int) uint64
 55 | 	DeviceMemoryOffset(idx int) uint64
 56 | 	DeviceMemoryTotal(idx int) uint64
 57 | 	DeviceSmUtil(idx int) uint64
 58 | 	IsValidUUID(idx int) bool
 59 | 	DeviceUUID(idx int) string
 60 | 	DeviceMemoryLimit(idx int) uint64
 61 | 	LastKernelTime() int64
 62 | 	//UsedMemory(idx int) (uint64, error)
 63 | 	GetPriority() int
 64 | 	GetRecentKernel() int32
 65 | 	SetRecentKernel(v int32)
 66 | 	GetUtilizationSwitch() int32
 67 | 	SetUtilizationSwitch(v int32)
 68 | }
 69 | 
 70 | type ContainerUsage struct {
 71 | 	PodUID        string
 72 | 	ContainerName string
 73 | 	data          []byte
 74 | 	Info          UsageInfo
 75 | }
 76 | 
 77 | type ContainerLister struct {
 78 | 	containerPath string
 79 | 	containers    map[string]*ContainerUsage
 80 | 	mutex         sync.Mutex
 81 | 	clientset     *kubernetes.Clientset
 82 | }
 83 | 
 84 | func NewContainerLister() (*ContainerLister, error) {
 85 | 	hookPath, ok := os.LookupEnv("HOOK_PATH")
 86 | 	if !ok {
 87 | 		return nil, fmt.Errorf("HOOK_PATH not set")
 88 | 	}
 89 | 	config, err := clientcmd.BuildConfigFromFlags("", os.Getenv("KUBECONFIG"))
 90 | 	if err != nil {
 91 | 		klog.Errorf("Failed to build kubeconfig: %v", err)
 92 | 		return nil, err
 93 | 	}
 94 | 	clientset, err := kubernetes.NewForConfig(config)
 95 | 	if err != nil {
 96 | 		klog.Errorf("Failed to build clientset: %v", err)
 97 | 		return nil, err
 98 | 	}
 99 | 	return &ContainerLister{
100 | 		containerPath: filepath.Join(hookPath, "containers"),
101 | 		containers:    make(map[string]*ContainerUsage),
102 | 		clientset:     clientset,
103 | 	}, nil
104 | }
105 | 
106 | func (l *ContainerLister) Lock() {
107 | 	l.mutex.Lock()
108 | }
109 | 
110 | func (l *ContainerLister) UnLock() {
111 | 	l.mutex.Unlock()
112 | }
113 | 
114 | func (l *ContainerLister) ListContainers() map[string]*ContainerUsage {
115 | 	return l.containers
116 | }
117 | 
118 | func (l *ContainerLister) Clientset() *kubernetes.Clientset {
119 | 	return l.clientset
120 | }
121 | 
122 | func (l *ContainerLister) Update() error {
123 | 	pods, err := l.clientset.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{})
124 | 	if err != nil {
125 | 		return err
126 | 	}
127 | 
128 | 	l.mutex.Lock()
129 | 	defer l.mutex.Unlock()
130 | 	entries, err := os.ReadDir(l.containerPath)
131 | 	if err != nil {
132 | 		return err
133 | 	}
134 | 	for _, entry := range entries {
135 | 		if !entry.IsDir() {
136 | 			continue
137 | 		}
138 | 		dirName := filepath.Join(l.containerPath, entry.Name())
139 | 		if !isValidPod(entry.Name(), pods) {
140 | 			dirInfo, err := os.Stat(dirName)
141 | 			if err == nil && dirInfo.ModTime().Add(time.Second*300).After(time.Now()) {
142 | 				continue
143 | 			}
144 | 			klog.Infof("Removing dirname %s in monitorpath", dirName)
145 | 			if c, ok := l.containers[entry.Name()]; ok {
146 | 				syscall.Munmap(c.data)
147 | 				delete(l.containers, entry.Name())
148 | 			}
149 | 			_ = os.RemoveAll(dirName)
150 | 			continue
151 | 		}
152 | 		if _, ok := l.containers[entry.Name()]; ok {
153 | 			continue
154 | 		}
155 | 		usage, err := loadCache(dirName)
156 | 		if err != nil {
157 | 			klog.Errorf("Failed to load cache: %s, error: %v", dirName, err)
158 | 			continue
159 | 		}
160 | 		if usage == nil {
161 | 			// no cuInit in container
162 | 			continue
163 | 		}
164 | 		usage.PodUID = strings.Split(entry.Name(), "_")[0]
165 | 		usage.ContainerName = strings.Split(entry.Name(), "_")[1]
166 | 		l.containers[entry.Name()] = usage
167 | 		klog.Infof("Adding ctr dirname %s in monitorpath", dirName)
168 | 	}
169 | 	return nil
170 | }
171 | 
172 | func loadCache(fpath string) (*ContainerUsage, error) {
173 | 	klog.Infof("Checking path %s", fpath)
174 | 	files, err := os.ReadDir(fpath)
175 | 	if err != nil {
176 | 		return nil, err
177 | 	}
178 | 	if len(files) > 2 {
179 | 		return nil, errors.New("cache num not matched")
180 | 	}
181 | 	if len(files) == 0 {
182 | 		return nil, nil
183 | 	}
184 | 	cacheFile := ""
185 | 	for _, val := range files {
186 | 		if strings.Contains(val.Name(), "libvgpu.so") {
187 | 			continue
188 | 		}
189 | 		if !strings.Contains(val.Name(), ".cache") {
190 | 			continue
191 | 		}
192 | 		cacheFile = filepath.Join(fpath, val.Name())
193 | 		break
194 | 	}
195 | 	if cacheFile == "" {
196 | 		klog.Infof("No cache file in %s", fpath)
197 | 		return nil, nil
198 | 	}
199 | 	info, err := os.Stat(cacheFile)
200 | 	if err != nil {
201 | 		klog.Errorf("Failed to stat cache file: %s, error: %v", cacheFile, err)
202 | 		return nil, err
203 | 	}
204 | 	if info.Size() < int64(unsafe.Sizeof(headerT{})) {
205 | 		return nil, fmt.Errorf("cache file size %d too small", info.Size())
206 | 	}
207 | 	f, err := os.OpenFile(cacheFile, os.O_RDWR, 0666)
208 | 	if err != nil {
209 | 		klog.Errorf("Failed to open cache file: %s, error: %v", cacheFile, err)
210 | 		return nil, err
211 | 	}
212 | 	defer func(f *os.File) {
213 | 		_ = f.Close()
214 | 	}(f)
215 | 	usage := &ContainerUsage{}
216 | 	usage.data, err = syscall.Mmap(int(f.Fd()), 0, int(info.Size()), syscall.PROT_WRITE|syscall.PROT_READ, syscall.MAP_SHARED)
217 | 	if err != nil {
218 | 		klog.Errorf("Failed to mmap cache file: %s, error: %v", cacheFile, err)
219 | 		return nil, err
220 | 	}
221 | 	head := (*headerT)(unsafe.Pointer(&usage.data[0]))
222 | 	if head.initializedFlag != SharedRegionMagicFlag {
223 | 		_ = syscall.Munmap(usage.data)
224 | 		return nil, fmt.Errorf("cache file magic flag not matched")
225 | 	}
226 | 	if info.Size() == 1197897 {
227 | 		usage.Info = v0.CastSpec(usage.data)
228 | 	} else if head.majorVersion == 1 {
229 | 		usage.Info = v1.CastSpec(usage.data)
230 | 	} else {
231 | 		_ = syscall.Munmap(usage.data)
232 | 		return nil, fmt.Errorf("unknown cache file size %d version %d.%d", info.Size(), head.majorVersion, head.minorVersion)
233 | 	}
234 | 	return usage, nil
235 | }
236 | 
237 | func isValidPod(name string, pods *corev1.PodList) bool {
238 | 	for _, val := range pods.Items {
239 | 		if strings.Contains(name, string(val.UID)) {
240 | 			return true
241 | 		}
242 | 	}
243 | 	return false
244 | }
245 | 


--------------------------------------------------------------------------------
/pkg/monitor/nvidia/v0/spec.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2024 The HAMi Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package v0
 18 | 
 19 | import "unsafe"
 20 | 
 21 | const maxDevices = 16
 22 | 
 23 | type deviceMemory struct {
 24 | 	contextSize uint64
 25 | 	moduleSize  uint64
 26 | 	bufferSize  uint64
 27 | 	offset      uint64
 28 | 	total       uint64
 29 | }
 30 | 
 31 | type deviceUtilization struct {
 32 | 	decUtil uint64
 33 | 	encUtil uint64
 34 | 	smUtil  uint64
 35 | }
 36 | 
 37 | type shrregProcSlotT struct {
 38 | 	pid         int32
 39 | 	hostpid     int32
 40 | 	used        [16]deviceMemory
 41 | 	monitorused [16]uint64
 42 | 	deviceUtil  [16]deviceUtilization
 43 | 	status      int32
 44 | }
 45 | 
 46 | type uuid struct {
 47 | 	uuid [96]byte
 48 | }
 49 | 
 50 | type semT struct {
 51 | 	sem [32]byte
 52 | }
 53 | 
 54 | type sharedRegionT struct {
 55 | 	initializedFlag int32
 56 | 	smInitFlag      int32
 57 | 	ownerPid        uint32
 58 | 	sem             semT
 59 | 	num             uint64
 60 | 	uuids           [16]uuid
 61 | 
 62 | 	limit   [16]uint64
 63 | 	smLimit [16]uint64
 64 | 	procs   [1024]shrregProcSlotT
 65 | 
 66 | 	procnum           int32
 67 | 	utilizationSwitch int32
 68 | 	recentKernel      int32
 69 | 	priority          int32
 70 | }
 71 | 
 72 | type Spec struct {
 73 | 	sr *sharedRegionT
 74 | }
 75 | 
 76 | func (s Spec) DeviceMax() int {
 77 | 	return maxDevices
 78 | }
 79 | 
 80 | func (s Spec) DeviceNum() int {
 81 | 	return int(s.sr.num)
 82 | }
 83 | 
 84 | func (s Spec) DeviceMemoryContextSize(idx int) uint64 {
 85 | 	v := uint64(0)
 86 | 	for _, p := range s.sr.procs {
 87 | 		v += p.used[idx].contextSize
 88 | 	}
 89 | 	return v
 90 | }
 91 | 
 92 | func (s Spec) DeviceMemoryModuleSize(idx int) uint64 {
 93 | 	v := uint64(0)
 94 | 	for _, p := range s.sr.procs {
 95 | 		v += p.used[idx].moduleSize
 96 | 	}
 97 | 	return v
 98 | }
 99 | 
100 | func (s Spec) DeviceMemoryBufferSize(idx int) uint64 {
101 | 	v := uint64(0)
102 | 	for _, p := range s.sr.procs {
103 | 		v += p.used[idx].bufferSize
104 | 	}
105 | 	return v
106 | }
107 | 
108 | func (s Spec) DeviceMemoryOffset(idx int) uint64 {
109 | 	v := uint64(0)
110 | 	for _, p := range s.sr.procs {
111 | 		v += p.used[idx].offset
112 | 	}
113 | 	return v
114 | }
115 | 
116 | func (s Spec) DeviceMemoryTotal(idx int) uint64 {
117 | 	v := uint64(0)
118 | 	for _, p := range s.sr.procs {
119 | 		v += p.used[idx].total
120 | 	}
121 | 	return v
122 | }
123 | 
124 | func (s Spec) DeviceSmUtil(idx int) uint64 {
125 | 	v := uint64(0)
126 | 	for _, p := range s.sr.procs {
127 | 		v += p.deviceUtil[idx].smUtil
128 | 	}
129 | 	return v
130 | }
131 | 
132 | func (s Spec) IsValidUUID(idx int) bool {
133 | 	return s.sr.uuids[idx].uuid[0] != 0
134 | }
135 | 
136 | func (s Spec) DeviceUUID(idx int) string {
137 | 	return string(s.sr.uuids[idx].uuid[:])
138 | }
139 | 
140 | func (s Spec) DeviceMemoryLimit(idx int) uint64 {
141 | 	return s.sr.limit[idx]
142 | }
143 | 
144 | func (s Spec) LastKernelTime() int64 {
145 | 	return 0
146 | }
147 | 
148 | func CastSpec(data []byte) Spec {
149 | 	return Spec{
150 | 		sr: (*sharedRegionT)(unsafe.Pointer(&data[0])),
151 | 	}
152 | }
153 | 
154 | //	func (s *SharedRegionT) UsedMemory(idx int) (uint64, error) {
155 | //		return 0, nil
156 | //	}
157 | 
158 | func (s Spec) GetPriority() int {
159 | 	return int(s.sr.priority)
160 | }
161 | 
162 | func (s Spec) GetRecentKernel() int32 {
163 | 	return s.sr.recentKernel
164 | }
165 | 
166 | func (s Spec) SetRecentKernel(v int32) {
167 | 	s.sr.recentKernel = v
168 | }
169 | 
170 | func (s Spec) GetUtilizationSwitch() int32 {
171 | 	return s.sr.utilizationSwitch
172 | }
173 | 
174 | func (s Spec) SetUtilizationSwitch(v int32) {
175 | 	s.sr.utilizationSwitch = v
176 | }
177 | 


--------------------------------------------------------------------------------
/pkg/monitor/nvidia/v1/spec.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2024 The HAMi Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package v1
 18 | 
 19 | import "unsafe"
 20 | 
 21 | const maxDevices = 16
 22 | 
 23 | type deviceMemory struct {
 24 | 	contextSize uint64
 25 | 	moduleSize  uint64
 26 | 	bufferSize  uint64
 27 | 	offset      uint64
 28 | 	total       uint64
 29 | 	unused      [3]uint64
 30 | }
 31 | 
 32 | type deviceUtilization struct {
 33 | 	decUtil uint64
 34 | 	encUtil uint64
 35 | 	smUtil  uint64
 36 | 	unused  [3]uint64
 37 | }
 38 | 
 39 | type shrregProcSlotT struct {
 40 | 	pid         int32
 41 | 	hostpid     int32
 42 | 	used        [16]deviceMemory
 43 | 	monitorused [16]uint64
 44 | 	deviceUtil  [16]deviceUtilization
 45 | 	status      int32
 46 | 	unused      [3]uint64
 47 | }
 48 | 
 49 | type uuid struct {
 50 | 	uuid [96]byte
 51 | }
 52 | 
 53 | type semT struct {
 54 | 	sem [32]byte
 55 | }
 56 | 
 57 | type sharedRegionT struct {
 58 | 	initializedFlag int32
 59 | 	majorVersion    int32
 60 | 	minorVersion    int32
 61 | 	smInitFlag      int32
 62 | 	ownerPid        uint32
 63 | 	sem             semT
 64 | 	num             uint64
 65 | 	uuids           [16]uuid
 66 | 
 67 | 	limit   [16]uint64
 68 | 	smLimit [16]uint64
 69 | 	procs   [1024]shrregProcSlotT
 70 | 
 71 | 	procnum           int32
 72 | 	utilizationSwitch int32
 73 | 	recentKernel      int32
 74 | 	priority          int32
 75 | 	lastKernelTime    int64
 76 | 	unused            [4]uint64
 77 | }
 78 | 
 79 | type Spec struct {
 80 | 	sr *sharedRegionT
 81 | }
 82 | 
 83 | func (s Spec) DeviceMax() int {
 84 | 	return maxDevices
 85 | }
 86 | 
 87 | func (s Spec) DeviceNum() int {
 88 | 	return int(s.sr.num)
 89 | }
 90 | 
 91 | func (s Spec) DeviceMemoryContextSize(idx int) uint64 {
 92 | 	v := uint64(0)
 93 | 	for _, p := range s.sr.procs {
 94 | 		v += p.used[idx].contextSize
 95 | 	}
 96 | 	return v
 97 | }
 98 | 
 99 | func (s Spec) DeviceMemoryModuleSize(idx int) uint64 {
100 | 	v := uint64(0)
101 | 	for _, p := range s.sr.procs {
102 | 		v += p.used[idx].moduleSize
103 | 	}
104 | 	return v
105 | }
106 | 
107 | func (s Spec) DeviceMemoryBufferSize(idx int) uint64 {
108 | 	v := uint64(0)
109 | 	for _, p := range s.sr.procs {
110 | 		v += p.used[idx].bufferSize
111 | 	}
112 | 	return v
113 | }
114 | 
115 | func (s Spec) DeviceMemoryOffset(idx int) uint64 {
116 | 	v := uint64(0)
117 | 	for _, p := range s.sr.procs {
118 | 		v += p.used[idx].offset
119 | 	}
120 | 	return v
121 | }
122 | 
123 | func (s Spec) DeviceMemoryTotal(idx int) uint64 {
124 | 	v := uint64(0)
125 | 	for _, p := range s.sr.procs {
126 | 		v += p.used[idx].total
127 | 	}
128 | 	return v
129 | }
130 | 
131 | func (s Spec) DeviceSmUtil(idx int) uint64 {
132 | 	v := uint64(0)
133 | 	for _, p := range s.sr.procs {
134 | 		v += p.deviceUtil[idx].smUtil
135 | 	}
136 | 	return v
137 | }
138 | 
139 | func (s Spec) IsValidUUID(idx int) bool {
140 | 	return s.sr.uuids[idx].uuid[0] != 0
141 | }
142 | 
143 | func (s Spec) DeviceUUID(idx int) string {
144 | 	return string(s.sr.uuids[idx].uuid[:])
145 | }
146 | 
147 | func (s Spec) DeviceMemoryLimit(idx int) uint64 {
148 | 	return s.sr.limit[idx]
149 | }
150 | 
151 | func (s Spec) LastKernelTime() int64 {
152 | 	return s.sr.lastKernelTime
153 | }
154 | 
155 | func CastSpec(data []byte) Spec {
156 | 	return Spec{
157 | 		sr: (*sharedRegionT)(unsafe.Pointer(&data[0])),
158 | 	}
159 | }
160 | 
161 | //	func (s *SharedRegionT) UsedMemory(idx int) (uint64, error) {
162 | //		return 0, nil
163 | //	}
164 | 
165 | func (s Spec) GetPriority() int {
166 | 	return int(s.sr.priority)
167 | }
168 | 
169 | func (s Spec) GetRecentKernel() int32 {
170 | 	return s.sr.recentKernel
171 | }
172 | 
173 | func (s Spec) SetRecentKernel(v int32) {
174 | 	s.sr.recentKernel = v
175 | }
176 | 
177 | func (s Spec) GetUtilizationSwitch() int32 {
178 | 	return s.sr.utilizationSwitch
179 | }
180 | 
181 | func (s Spec) SetUtilizationSwitch(v int32) {
182 | 	s.sr.utilizationSwitch = v
183 | }
184 | 


--------------------------------------------------------------------------------
/pkg/plugin/interface.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2023 The Volcano Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package plugin
18 | 
19 | // DevicePlugin interface
20 | type DevicePlugin interface {
21 | 	// Get the device plugin name
22 | 	Name() string
23 | 	// Start the plugin
24 | 	Start() error
25 | 	// Get all the devices number which reside within the node
26 | 	DevicesNum() int
27 | 	// Stop the plugin
28 | 	Stop() error
29 | }
30 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/cache.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2023 The Volcano Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package vgpu
18 | 
19 | import (
20 | 	"sync"
21 | 
22 | 	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
23 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
24 | )
25 | 
26 | type DeviceCache struct {
27 | 	*GpuDeviceManager
28 | 
29 | 	cache     []*Device
30 | 	stopCh    chan interface{}
31 | 	unhealthy chan *Device
32 | 	notifyCh  map[string]chan *Device
33 | 	mutex     sync.Mutex
34 | }
35 | 
36 | func NewDeviceCache() *DeviceCache {
37 | 	skipMigEnabledGPUs := true
38 | 	if config.Mode == "mig" {
39 | 		skipMigEnabledGPUs = false
40 | 	}
41 | 	return &DeviceCache{
42 | 		GpuDeviceManager: NewGpuDeviceManager(skipMigEnabledGPUs),
43 | 		stopCh:           make(chan interface{}),
44 | 		unhealthy:        make(chan *Device),
45 | 		notifyCh:         make(map[string]chan *Device),
46 | 	}
47 | }
48 | 
49 | func (d *DeviceCache) AddNotifyChannel(name string, ch chan *Device) {
50 | 	d.mutex.Lock()
51 | 	defer d.mutex.Unlock()
52 | 	d.notifyCh[name] = ch
53 | }
54 | 
55 | func (d *DeviceCache) RemoveNotifyChannel(name string) {
56 | 	d.mutex.Lock()
57 | 	defer d.mutex.Unlock()
58 | 	delete(d.notifyCh, name)
59 | }
60 | 
61 | func (d *DeviceCache) Start() {
62 | 	d.cache = d.Devices()
63 | 	go d.CheckHealth(d.stopCh, d.cache, d.unhealthy)
64 | 	go d.notify()
65 | }
66 | 
67 | func (d *DeviceCache) Stop() {
68 | 	close(d.stopCh)
69 | }
70 | 
71 | func (d *DeviceCache) GetCache() []*Device {
72 | 	return d.cache
73 | }
74 | 
75 | func (d *DeviceCache) notify() {
76 | 	for {
77 | 		select {
78 | 		case <-d.stopCh:
79 | 			return
80 | 		case dev := <-d.unhealthy:
81 | 			dev.Health = pluginapi.Unhealthy
82 | 			d.mutex.Lock()
83 | 			for _, ch := range d.notifyCh {
84 | 				ch <- dev
85 | 			}
86 | 			d.mutex.Unlock()
87 | 		}
88 | 	}
89 | }
90 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/config/config.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package config
 18 | 
 19 | import (
 20 | 	"sync"
 21 | 
 22 | 	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
 23 | 	"github.com/NVIDIA/go-nvml/pkg/nvml"
 24 | )
 25 | 
 26 | type NvidiaConfig struct {
 27 | 	ResourceCountName            string                 `yaml:"resourceCountName"`
 28 | 	ResourceMemoryName           string                 `yaml:"resourceMemoryName"`
 29 | 	ResourceCoreName             string                 `yaml:"resourceCoreName"`
 30 | 	ResourceMemoryPercentageName string                 `yaml:"resourceMemoryPercentageName"`
 31 | 	ResourcePriority             string                 `yaml:"resourcePriorityName"`
 32 | 	OverwriteEnv                 bool                   `yaml:"overwriteEnv"`
 33 | 	DefaultMemory                int32                  `yaml:"defaultMemory"`
 34 | 	DefaultCores                 int32                  `yaml:"defaultCores"`
 35 | 	DefaultGPUNum                int32                  `yaml:"defaultGPUNum"`
 36 | 	DeviceSplitCount             uint                   `yaml:"deviceSplitCount"`
 37 | 	DeviceMemoryScaling          float64                `yaml:"deviceMemoryScaling"`
 38 | 	DeviceCoreScaling            float64                `yaml:"deviceCoreScaling"`
 39 | 	DisableCoreLimit             bool                   `yaml:"disableCoreLimit"`
 40 | 	MigGeometriesList            []AllowedMigGeometries `yaml:"knownMigGeometries"`
 41 | 	GPUMemoryFactor              uint                   `yaml:"gpuMemoryFactor"`
 42 | }
 43 | 
 44 | var (
 45 | 	nvmllib = nvml.New()
 46 | 
 47 | 	lock         sync.Mutex
 48 | 	globalDevice device.Interface
 49 | )
 50 | 
 51 | var (
 52 | 	// DevicePluginFilterDevice need device-plugin filter this device, don't register this device.
 53 | 	DevicePluginFilterDevice *FilterDevice
 54 | )
 55 | 
 56 | func Nvml() nvml.Interface {
 57 | 	return nvmllib
 58 | }
 59 | 
 60 | func Device() device.Interface {
 61 | 	if globalDevice != nil {
 62 | 		return globalDevice
 63 | 	}
 64 | 
 65 | 	lock.Lock()
 66 | 	defer lock.Unlock()
 67 | 
 68 | 	globalDevice = device.New(nvmllib)
 69 | 	return globalDevice
 70 | }
 71 | 
 72 | var (
 73 | 	DeviceSplitCount   uint
 74 | 	GPUMemoryFactor    uint
 75 | 	Mode               string
 76 | 	DeviceCoresScaling float64
 77 | 	NodeName           string
 78 | 	RuntimeSocketFlag  string
 79 | 	DisableCoreLimit   bool
 80 | )
 81 | 
 82 | type MigTemplate struct {
 83 | 	Name   string `yaml:"name"`
 84 | 	Memory int32  `yaml:"memory"`
 85 | 	Count  int32  `yaml:"count"`
 86 | }
 87 | 
 88 | type MigTemplateUsage struct {
 89 | 	Name   string `json:"name,omitempty"`
 90 | 	Memory int32  `json:"memory,omitempty"`
 91 | 	InUse  bool   `json:"inuse,omitempty"`
 92 | }
 93 | 
 94 | type Geometry struct {
 95 | 	Group     string        `yaml:"group"`
 96 | 	Instances []MigTemplate `yaml:"geometries"`
 97 | }
 98 | 
 99 | type MIGS []MigTemplateUsage
100 | 
101 | type MigInUse struct {
102 | 	Index     int32
103 | 	UsageList MIGS
104 | }
105 | 
106 | type AllowedMigGeometries struct {
107 | 	Models     []string   `yaml:"models"`
108 | 	Geometries []Geometry `yaml:"allowedGeometries"`
109 | }
110 | 
111 | type Config struct {
112 | 	NvidiaConfig NvidiaConfig `yaml:"nvidia"`
113 | }
114 | 
115 | type MigPartedSpec struct {
116 | 	Version    string                        `json:"version"               yaml:"version"`
117 | 	MigConfigs map[string]MigConfigSpecSlice `json:"mig-configs,omitempty" yaml:"mig-configs,omitempty"`
118 | }
119 | 
120 | // MigConfigSpec defines the spec to declare the desired MIG configuration for a set of GPUs.
121 | type MigConfigSpec struct {
122 | 	DeviceFilter interface{}      `json:"device-filter,omitempty" yaml:"device-filter,flow,omitempty"`
123 | 	Devices      []int32          `json:"devices"                 yaml:"devices,flow"`
124 | 	MigEnabled   bool             `json:"mig-enabled"             yaml:"mig-enabled"`
125 | 	MigDevices   map[string]int32 `json:"mig-devices"             yaml:"mig-devices"`
126 | }
127 | 
128 | // MigConfigSpecSlice represents a slice of 'MigConfigSpec'.
129 | type MigConfigSpecSlice []MigConfigSpec
130 | 
131 | type FilterDevice struct {
132 | 	// UUID is the device ID.
133 | 	UUID []string `json:"uuid"`
134 | 	// Index is the device index.
135 | 	Index []uint `json:"index"`
136 | }
137 | 
138 | type DevicePluginConfigs struct {
139 | 	Nodeconfig []struct {
140 | 		Name                string        `json:"name"`
141 | 		OperatingMode       string        `json:"operatingmode"`
142 | 		Devicememoryscaling float64       `json:"devicememoryscaling"`
143 | 		Devicecorescaling   float64       `json:"devicecorescaling"`
144 | 		Devicesplitcount    uint          `json:"devicesplitcount"`
145 | 		Migstrategy         string        `json:"migstrategy"`
146 | 		FilterDevice        *FilterDevice `json:"filterdevices"`
147 | 	} `json:"nodeconfig"`
148 | }
149 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/config/version.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2023 The Volcano Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package config
18 | 
19 | import (
20 | 	"fmt"
21 | 
22 | 	"github.com/spf13/cobra"
23 | )
24 | 
25 | var (
26 | 	version    string
27 | 	VersionCmd = &cobra.Command{
28 | 		Use:   "version",
29 | 		Short: "print version",
30 | 		Run: func(cmd *cobra.Command, args []string) {
31 | 			fmt.Println(Version())
32 | 		},
33 | 	}
34 | )
35 | 
36 | func Version() string {
37 | 	return version
38 | }
39 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/helper.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2025 The Volcano Authors.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | package vgpu
18 | 
19 | // int8Slice wraps an []int8 with more functions.
20 | type int8Slice []int8
21 | 
22 | // String turns a nil terminated int8Slice into a string
23 | func (s int8Slice) String() string {
24 | 	var b []byte
25 | 	for _, c := range s {
26 | 		if c == 0 {
27 | 			break
28 | 		}
29 | 		b = append(b, byte(c))
30 | 	}
31 | 	return string(b)
32 | }
33 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/mig-strategy.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package vgpu
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"log"
 22 | 
 23 | 	"github.com/NVIDIA/go-gpuallocator/gpuallocator"
 24 | 	"github.com/NVIDIA/go-nvml/pkg/nvml"
 25 | 	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
 26 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 27 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
 28 | )
 29 | 
 30 | // Constants representing the various MIG strategies
 31 | const (
 32 | 	MigStrategyNone   = "none"
 33 | 	MigStrategySingle = "single"
 34 | 	MigStrategyMixed  = "mixed"
 35 | )
 36 | 
 37 | // MigStrategyResourceSet holds a set of resource names for a given MIG strategy
 38 | type MigStrategyResourceSet map[string]struct{}
 39 | 
 40 | // MigStrategy provides an interface for building the set of plugins required to implement a given MIG strategy
 41 | type MigStrategy interface {
 42 | 	GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin
 43 | 	MatchesResource(mig *nvml.Device, resource string) bool
 44 | }
 45 | 
 46 | // NewMigStrategy returns a reference to a given MigStrategy based on the 'strategy' passed in
 47 | func NewMigStrategy(strategy string) (MigStrategy, error) {
 48 | 	switch strategy {
 49 | 	case MigStrategyNone:
 50 | 		return &migStrategyNone{}, nil
 51 | 	case MigStrategySingle:
 52 | 		return &migStrategySingle{}, nil
 53 | 	case MigStrategyMixed:
 54 | 		return &migStrategyMixed{}, nil
 55 | 	}
 56 | 	return nil, fmt.Errorf("unknown strategy: %v", strategy)
 57 | }
 58 | 
 59 | type migStrategyNone struct{}
 60 | type migStrategySingle struct{}
 61 | type migStrategyMixed struct{}
 62 | 
 63 | // migStrategyNone
 64 | func (s *migStrategyNone) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin {
 65 | 	return []*NvidiaDevicePlugin{
 66 | 		NewNvidiaDevicePlugin(
 67 | 			//"nvidia.com/gpu",
 68 | 			util.ResourceName,
 69 | 			cache,
 70 | 			gpuallocator.NewBestEffortPolicy(),
 71 | 			pluginapi.DevicePluginPath+"nvidia-gpu.sock",
 72 | 			cfg),
 73 | 		NewNvidiaDevicePlugin(
 74 | 			util.ResourceMem,
 75 | 			cache,
 76 | 			gpuallocator.NewBestEffortPolicy(),
 77 | 			pluginapi.DevicePluginPath+"nvidia-gpu-memory.sock",
 78 | 			cfg),
 79 | 		NewNvidiaDevicePlugin(
 80 | 			util.ResourceCores,
 81 | 			cache,
 82 | 			gpuallocator.NewBestEffortPolicy(),
 83 | 			pluginapi.DevicePluginPath+"nvidia-gpu-cores.sock",
 84 | 			cfg),
 85 | 	}
 86 | }
 87 | 
 88 | func (s *migStrategyNone) MatchesResource(mig *nvml.Device, resource string) bool {
 89 | 	panic("Should never be called")
 90 | }
 91 | 
 92 | // migStrategySingle
 93 | func (s *migStrategySingle) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin {
 94 | 	panic("single mode in MIG currently not supported")
 95 | }
 96 | 
 97 | func (s *migStrategySingle) MatchesResource(mig *nvml.Device, resource string) bool {
 98 | 	return true
 99 | }
100 | 
101 | // migStrategyMixed
102 | func (s *migStrategyMixed) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin {
103 | 	devices := NewMIGCapableDevices()
104 | 
105 | 	if err := devices.AssertAllMigEnabledDevicesAreValid(); err != nil {
106 | 		panic(fmt.Errorf("at least one device with migEnabled=true was not configured correctly: %v", err))
107 | 	}
108 | 
109 | 	resources := make(MigStrategyResourceSet)
110 | 	migs, err := devices.GetAllMigDevices()
111 | 	if err != nil {
112 | 		panic(fmt.Errorf("unable to retrieve list of MIG devices: %v", err))
113 | 	}
114 | 	for _, mig := range migs {
115 | 		// Convert old NVML device to new NVML device
116 | 		uuid, ret := (*mig).GetUUID()
117 | 		check(ret)
118 | 		newDevice, ret := config.Nvml().DeviceGetHandleByUUID(uuid)
119 | 		check(ret)
120 | 
121 | 		r := s.getResourceName(&newDevice)
122 | 		if !s.validMigDevice(&newDevice) {
123 | 			log.Printf("Skipping unsupported MIG device: %v", r)
124 | 			continue
125 | 		}
126 | 		resources[r] = struct{}{}
127 | 	}
128 | 
129 | 	plugins := []*NvidiaDevicePlugin{
130 | 		NewNvidiaDevicePlugin(
131 | 			util.ResourceName,
132 | 			cache,
133 | 			gpuallocator.NewBestEffortPolicy(),
134 | 			pluginapi.DevicePluginPath+"nvidia-gpu.sock",
135 | 			cfg),
136 | 	}
137 | 
138 | 	for resource := range resources {
139 | 		plugin := NewMIGNvidiaDevicePlugin(
140 | 			"nvidia.com/"+resource,
141 | 			NewMigDeviceManager(s, resource),
142 | 			"NVIDIA_VISIBLE_DEVICES",
143 | 			gpuallocator.Policy(nil),
144 | 			pluginapi.DevicePluginPath+"nvidia-"+resource+".sock")
145 | 		plugins = append(plugins, plugin)
146 | 	}
147 | 
148 | 	return plugins
149 | }
150 | 
151 | func (s *migStrategyMixed) validMigDevice(mig *nvml.Device) bool {
152 | 	gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig)
153 | 	check(ret)
154 | 	ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig)
155 | 	check(ret)
156 | 	return gi == ci
157 | }
158 | 
159 | func (s *migStrategyMixed) getResourceName(mig *nvml.Device) string {
160 | 	gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig)
161 | 	check(ret)
162 | 	ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig)
163 | 	check(ret)
164 | 
165 | 	memory, ret := config.Nvml().DeviceGetMemoryInfo(*mig)
166 | 	check(ret)
167 | 	gb := ((memory.Total/(1024*1024) + 1024 - 1) / 1024)
168 | 
169 | 	var r string
170 | 	if gi == ci {
171 | 		r = fmt.Sprintf("mig-%dg.%dgb", gi, gb)
172 | 	} else {
173 | 		r = fmt.Sprintf("mig-%dc.%dg.%dgb", ci, gi, gb)
174 | 	}
175 | 
176 | 	return r
177 | }
178 | 
179 | func (s *migStrategyMixed) MatchesResource(mig *nvml.Device, resource string) bool {
180 | 	return s.getResourceName(mig) == resource
181 | }
182 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/mig.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package vgpu
 18 | 
 19 | import (
 20 | 	"bufio"
 21 | 	"fmt"
 22 | 	"log"
 23 | 	"os"
 24 | 
 25 | 	"github.com/NVIDIA/go-nvml/pkg/nvml"
 26 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 27 | )
 28 | 
 29 | const (
 30 | 	nvidiaProcDriverPath   = "/proc/driver/nvidia"
 31 | 	nvidiaCapabilitiesPath = nvidiaProcDriverPath + "/capabilities"
 32 | 
 33 | 	nvcapsProcDriverPath = "/proc/driver/nvidia-caps"
 34 | 	nvcapsMigMinorsPath  = nvcapsProcDriverPath + "/mig-minors"
 35 | 	nvcapsDevicePath     = "/dev/nvidia-caps"
 36 | )
 37 | 
 38 | // MIGCapableDevices stores information about all devices on the node
 39 | type MIGCapableDevices struct {
 40 | 	// devicesMap holds a list of devices, separated by whether they have MigEnabled or not
 41 | 	devicesMap map[bool][]*nvml.Device
 42 | }
 43 | 
 44 | // NewMIGCapableDevices creates a new MIGCapableDevices struct and returns a pointer to it.
 45 | func NewMIGCapableDevices() *MIGCapableDevices {
 46 | 	return &MIGCapableDevices{
 47 | 		devicesMap: nil, // Is initialized on first use
 48 | 	}
 49 | }
 50 | 
 51 | func (devices *MIGCapableDevices) getDevicesMap() (map[bool][]*nvml.Device, error) {
 52 | 	if devices.devicesMap == nil {
 53 | 		n, ret := config.Nvml().DeviceGetCount()
 54 | 		if ret != nvml.SUCCESS {
 55 | 			return nil, fmt.Errorf("error getting device count: %v", ret)
 56 | 		}
 57 | 
 58 | 		migEnabledDevicesMap := make(map[bool][]*nvml.Device)
 59 | 		for i := 0; i < int(n); i++ {
 60 | 			d, ret := config.Nvml().DeviceGetHandleByIndex(i)
 61 | 			if ret != nvml.SUCCESS {
 62 | 				return nil, fmt.Errorf("error getting device handle: %v", ret)
 63 | 			}
 64 | 
 65 | 			isMigEnabled, _, ret := config.Nvml().DeviceGetMigMode(d)
 66 | 			if ret != nvml.SUCCESS {
 67 | 				if ret == nvml.ERROR_NOT_SUPPORTED {
 68 | 					isMigEnabled = nvml.DEVICE_MIG_DISABLE
 69 | 				} else {
 70 | 					return nil, fmt.Errorf("error getting MIG mode: %v", ret)
 71 | 				}
 72 | 			}
 73 | 
 74 | 			migEnabledDevicesMap[isMigEnabled == 1] = append(migEnabledDevicesMap[isMigEnabled == 1], &d)
 75 | 		}
 76 | 
 77 | 		devices.devicesMap = migEnabledDevicesMap
 78 | 	}
 79 | 	return devices.devicesMap, nil
 80 | }
 81 | 
 82 | // GetDevicesWithMigEnabled returns a list of devices with migEnabled=true
 83 | func (devices *MIGCapableDevices) GetDevicesWithMigEnabled() ([]*nvml.Device, error) {
 84 | 	devicesMap, err := devices.getDevicesMap()
 85 | 	if err != nil {
 86 | 		return nil, err
 87 | 	}
 88 | 	return devicesMap[true], nil
 89 | }
 90 | 
 91 | // GetDevicesWithMigDisabled returns a list of devices with migEnabled=false
 92 | func (devices *MIGCapableDevices) GetDevicesWithMigDisabled() ([]*nvml.Device, error) {
 93 | 	devicesMap, err := devices.getDevicesMap()
 94 | 	if err != nil {
 95 | 		return nil, err
 96 | 	}
 97 | 	return devicesMap[false], nil
 98 | }
 99 | 
100 | // AssertAllMigEnabledDevicesAreValid ensures that all devices with migEnabled=true are valid. This means:
101 | // * The have at least 1 mig devices associated with them
102 | // Returns nill if the device is valid, or an error if these are not valid
103 | func (devices *MIGCapableDevices) AssertAllMigEnabledDevicesAreValid() error {
104 | 	devicesMap, err := devices.getDevicesMap()
105 | 	if err != nil {
106 | 		return err
107 | 	}
108 | 
109 | 	for _, d := range devicesMap[true] {
110 | 		var migs []*nvml.Device
111 | 		maxMigDevices, ret := config.Nvml().DeviceGetMaxMigDeviceCount(*d)
112 | 		if ret != nvml.SUCCESS {
113 | 			return fmt.Errorf("error getting max MIG device count: %v", ret)
114 | 		}
115 | 		for i := 0; i < int(maxMigDevices); i++ {
116 | 			mig, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(*d, i)
117 | 			if ret == nvml.SUCCESS {
118 | 				migs = append(migs, &mig)
119 | 			}
120 | 		}
121 | 		if len(migs) == 0 {
122 | 			uuid, ret := config.Nvml().DeviceGetUUID(*d)
123 | 			if ret != nvml.SUCCESS {
124 | 				return fmt.Errorf("error getting device UUID: %v", ret)
125 | 			}
126 | 			return fmt.Errorf("no MIG devices associated with device: %v", uuid)
127 | 		}
128 | 	}
129 | 	return nil
130 | }
131 | 
132 | // GetAllMigDevices returns a list of all MIG devices.
133 | func (devices *MIGCapableDevices) GetAllMigDevices() ([]*nvml.Device, error) {
134 | 	devicesMap, err := devices.getDevicesMap()
135 | 	if err != nil {
136 | 		return nil, err
137 | 	}
138 | 
139 | 	var migs []*nvml.Device
140 | 	for _, d := range devicesMap[true] {
141 | 		maxMigDevices, ret := config.Nvml().DeviceGetMaxMigDeviceCount(*d)
142 | 		if ret != nvml.SUCCESS {
143 | 			return nil, fmt.Errorf("error getting max MIG device count: %v", ret)
144 | 		}
145 | 		for i := 0; i < int(maxMigDevices); i++ {
146 | 			mig, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(*d, i)
147 | 			if ret == nvml.SUCCESS {
148 | 				migs = append(migs, &mig)
149 | 			}
150 | 		}
151 | 	}
152 | 	return migs, nil
153 | }
154 | 
155 | // GetMigCapabilityDevicePaths returns a mapping of MIG capability path to device node path
156 | func GetMigCapabilityDevicePaths() (map[string]string, error) {
157 | 	// Open nvcapsMigMinorsPath for walking.
158 | 	// If the nvcapsMigMinorsPath does not exist, then we are not on a MIG
159 | 	// capable machine, so there is nothing to do.
160 | 	// The format of this file is discussed in:
161 | 	//     https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#unique_1576522674
162 | 	minorsFile, err := os.Open(nvcapsMigMinorsPath)
163 | 	if os.IsNotExist(err) {
164 | 		return nil, nil
165 | 	}
166 | 	if err != nil {
167 | 		return nil, fmt.Errorf("error opening MIG minors file: %v", err)
168 | 	}
169 | 	defer minorsFile.Close()
170 | 
171 | 	// Define a function to process each each line of nvcapsMigMinorsPath
172 | 	processLine := func(line string) (string, int, error) {
173 | 		var gpu, gi, ci, migMinor int
174 | 
175 | 		// Look for a CI access file
176 | 		n, _ := fmt.Sscanf(line, "gpu%d/gi%d/ci%d/access %d", &gpu, &gi, &ci, &migMinor)
177 | 		if n == 4 {
178 | 			capPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)
179 | 			return capPath, migMinor, nil
180 | 		}
181 | 
182 | 		// Look for a GI access file
183 | 		n, _ = fmt.Sscanf(line, "gpu%d/gi%d/access %d", &gpu, &gi, &migMinor)
184 | 		if n == 3 {
185 | 			capPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", gpu, gi)
186 | 			return capPath, migMinor, nil
187 | 		}
188 | 
189 | 		// Look for the MIG config file
190 | 		n, _ = fmt.Sscanf(line, "config %d", &migMinor)
191 | 		if n == 1 {
192 | 			capPath := fmt.Sprintf(nvidiaCapabilitiesPath + "/mig/config")
193 | 			return capPath, migMinor, nil
194 | 		}
195 | 
196 | 		// Look for the MIG monitor file
197 | 		n, _ = fmt.Sscanf(line, "monitor %d", &migMinor)
198 | 		if n == 1 {
199 | 			capPath := fmt.Sprintf(nvidiaCapabilitiesPath + "/mig/monitor")
200 | 			return capPath, migMinor, nil
201 | 		}
202 | 
203 | 		return "", 0, fmt.Errorf("unparsable line: %v", line)
204 | 	}
205 | 
206 | 	// Walk each line of nvcapsMigMinorsPath and construct a mapping of nvidia
207 | 	// capabilities path to device minor for that capability
208 | 	capsDevicePaths := make(map[string]string)
209 | 	scanner := bufio.NewScanner(minorsFile)
210 | 	for scanner.Scan() {
211 | 		capPath, migMinor, err := processLine(scanner.Text())
212 | 		if err != nil {
213 | 			log.Printf("Skipping line in MIG minors file: %v", err)
214 | 			continue
215 | 		}
216 | 		capsDevicePaths[capPath] = fmt.Sprintf(nvcapsDevicePath+"/nvidia-cap%d", migMinor)
217 | 	}
218 | 	return capsDevicePaths, nil
219 | }
220 | 
221 | // GetMigDeviceNodePaths returns a list of device node paths associated with a MIG device
222 | func GetMigDeviceNodePaths(parent nvml.Device, mig *nvml.Device) ([]string, error) {
223 | 	capDevicePaths, err := GetMigCapabilityDevicePaths()
224 | 	if err != nil {
225 | 		return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
226 | 	}
227 | 
228 | 	gpu, ret := parent.GetMinorNumber()
229 | 	if ret != nvml.SUCCESS {
230 | 		return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
231 | 	}
232 | 
233 | 	gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig)
234 | 	if ret != nvml.SUCCESS {
235 | 		return nil, fmt.Errorf("error getting MIG GPU instance ID: %v", ret)
236 | 	}
237 | 
238 | 	ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig)
239 | 	if ret != nvml.SUCCESS {
240 | 		return nil, fmt.Errorf("error getting MIG compute instance ID: %v", ret)
241 | 	}
242 | 
243 | 	giCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", gpu, gi)
244 | 	if _, exists := capDevicePaths[giCapPath]; !exists {
245 | 		return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath)
246 | 	}
247 | 
248 | 	ciCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)
249 | 	if _, exists := capDevicePaths[ciCapPath]; !exists {
250 | 		return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath)
251 | 	}
252 | 
253 | 	devicePaths := []string{
254 | 		fmt.Sprintf("/dev/nvidia%d", gpu),
255 | 		capDevicePaths[giCapPath],
256 | 		capDevicePaths[ciCapPath],
257 | 	}
258 | 
259 | 	return devicePaths, nil
260 | }
261 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/nvidia.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package vgpu
 18 | 
 19 | import (
 20 | 	"bytes"
 21 | 	"fmt"
 22 | 	"log"
 23 | 	"os"
 24 | 	"strconv"
 25 | 	"strings"
 26 | 
 27 | 	"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
 28 | 	"github.com/NVIDIA/go-nvml/pkg/nvml"
 29 | 	"k8s.io/klog"
 30 | 	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
 31 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 32 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
 33 | )
 34 | 
 35 | const (
 36 | 	envDisableHealthChecks = "DP_DISABLE_HEALTHCHECKS"
 37 | 	allHealthChecks        = "xids"
 38 | )
 39 | 
 40 | // Device couples an underlying pluginapi.Device type with its device node paths
 41 | type Device struct {
 42 | 	pluginapi.Device
 43 | 	Paths  []string
 44 | 	Index  string
 45 | 	Memory uint64
 46 | }
 47 | 
 48 | // ResourceManager provides an interface for listing a set of Devices and checking health on them
 49 | type ResourceManager interface {
 50 | 	Devices() []*Device
 51 | 	CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device)
 52 | }
 53 | 
 54 | // GpuDeviceManager implements the ResourceManager interface for full GPU devices
 55 | type GpuDeviceManager struct {
 56 | 	skipMigEnabledGPUs bool
 57 | }
 58 | 
 59 | // MigDeviceManager implements the ResourceManager interface for MIG devices
 60 | type MigDeviceManager struct {
 61 | 	strategy MigStrategy
 62 | 	resource string
 63 | }
 64 | 
 65 | func check(ret nvml.Return) {
 66 | 	if ret != nvml.SUCCESS {
 67 | 		log.Panicln("Fatal:", ret)
 68 | 	}
 69 | }
 70 | 
 71 | // NewGpuDeviceManager returns a reference to a new GpuDeviceManager
 72 | func NewGpuDeviceManager(skipMigEnabledGPUs bool) *GpuDeviceManager {
 73 | 	return &GpuDeviceManager{
 74 | 		skipMigEnabledGPUs: skipMigEnabledGPUs,
 75 | 	}
 76 | }
 77 | 
 78 | // NewMigDeviceManager returns a reference to a new MigDeviceManager
 79 | func NewMigDeviceManager(strategy MigStrategy, resource string) *MigDeviceManager {
 80 | 	return &MigDeviceManager{
 81 | 		strategy: strategy,
 82 | 		resource: resource,
 83 | 	}
 84 | }
 85 | 
 86 | // Devices returns a list of devices from the GpuDeviceManager
 87 | func (g *GpuDeviceManager) Devices() []*Device {
 88 | 	n, ret := config.Nvml().DeviceGetCount()
 89 | 	check(ret)
 90 | 	if n > util.DeviceLimit {
 91 | 		n = util.DeviceLimit
 92 | 	}
 93 | 
 94 | 	var devs []*Device
 95 | 	for i := 0; i < n; i++ {
 96 | 		d, ret := config.Nvml().DeviceGetHandleByIndex(i)
 97 | 		check(ret)
 98 | 
 99 | 		migMode, _, ret := d.GetMigMode()
100 | 		if ret != nvml.SUCCESS {
101 | 			if ret == nvml.ERROR_NOT_SUPPORTED {
102 | 				migMode = nvml.DEVICE_MIG_DISABLE
103 | 			} else {
104 | 				check(ret)
105 | 			}
106 | 		}
107 | 
108 | 		if migMode == nvml.DEVICE_MIG_ENABLE && g.skipMigEnabledGPUs {
109 | 			continue
110 | 		}
111 | 
112 | 		// Auto ebale MIG mode when the plugin is running in MIG mode
113 | 		if config.Mode == "mig" && migMode != nvml.DEVICE_MIG_ENABLE {
114 | 			if ret == nvml.ERROR_NOT_SUPPORTED {
115 | 				klog.V(4).Infof("Node is configed as MIG mode, but GPU %v does not support MIG mode", i)
116 | 				continue
117 | 			}
118 | 			ret, stat := d.SetMigMode(nvml.DEVICE_MIG_ENABLE)
119 | 			if ret != nvml.SUCCESS || stat != nvml.SUCCESS {
120 | 				klog.V(4).Infof("Node is configed as MIG mode, but failed to enable MIG mode for GPU %v : ret=%v, stat=%v", i, ret, stat)
121 | 				continue
122 | 			}
123 | 		}
124 | 
125 | 		dev, err := buildDevice(fmt.Sprintf("%v", i), d)
126 | 		if err != nil {
127 | 			log.Panicln("Fatal:", err)
128 | 		}
129 | 
130 | 		devs = append(devs, dev)
131 | 	}
132 | 
133 | 	return devs
134 | }
135 | 
136 | // Devices returns a list of devices from the MigDeviceManager
137 | func (m *MigDeviceManager) Devices() []*Device {
138 | 	n, ret := config.Nvml().DeviceGetCount()
139 | 	check(ret)
140 | 	if n > util.DeviceLimit {
141 | 		n = util.DeviceLimit
142 | 	}
143 | 
144 | 	var devs []*Device
145 | 	for i := 0; i < n; i++ {
146 | 		d, ret := config.Nvml().DeviceGetHandleByIndex(i)
147 | 		check(ret)
148 | 
149 | 		migMode, _, ret := d.GetMigMode()
150 | 		if ret != nvml.SUCCESS {
151 | 			if ret == nvml.ERROR_NOT_SUPPORTED {
152 | 				migMode = nvml.DEVICE_MIG_DISABLE
153 | 			} else {
154 | 				check(ret)
155 | 			}
156 | 		}
157 | 
158 | 		if migMode != nvml.DEVICE_MIG_ENABLE {
159 | 			continue
160 | 		}
161 | 
162 | 		err := config.Device().VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
163 | 			dev, err := buildMigDevice(fmt.Sprintf("%v:%v", i, j), mig)
164 | 			if err != nil {
165 | 				log.Panicln("Fatal:", err)
166 | 			}
167 | 			devs = append(devs, dev)
168 | 			return nil
169 | 		})
170 | 		if err != nil {
171 | 			log.Fatalf("VisitMigDevices error: %v", err)
172 | 		}
173 | 	}
174 | 
175 | 	return devs
176 | }
177 | 
178 | // CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices
179 | func (g *GpuDeviceManager) CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) {
180 | 	checkHealth(stop, devices, unhealthy)
181 | }
182 | 
183 | // CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices
184 | func (m *MigDeviceManager) CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) {
185 | 	checkHealth(stop, devices, unhealthy)
186 | }
187 | 
188 | func buildDevice(index string, d nvml.Device) (*Device, error) {
189 | 	uuid, ret := config.Nvml().DeviceGetUUID(d)
190 | 	if ret != nvml.SUCCESS {
191 | 		return nil, fmt.Errorf("error getting UUID of device: %v", ret)
192 | 	}
193 | 
194 | 	minor, ret := config.Nvml().DeviceGetMinorNumber(d)
195 | 	if ret != nvml.SUCCESS {
196 | 		return nil, fmt.Errorf("error getting minor number of device: %v", ret)
197 | 	}
198 | 	paths := []string{fmt.Sprintf("/dev/nvidia%d", minor)}
199 | 
200 | 	memory, ret := config.Nvml().DeviceGetMemoryInfo(d)
201 | 	if ret != nvml.SUCCESS {
202 | 		return nil, fmt.Errorf("error getting memory info of device: %v", ret)
203 | 	}
204 | 
205 | 	hasNuma, numa, err := getNumaNode(d)
206 | 	if err != nil {
207 | 		return nil, fmt.Errorf("error getting device NUMA node: %v", err)
208 | 	}
209 | 
210 | 	dev := Device{}
211 | 	dev.ID = uuid
212 | 	dev.Health = pluginapi.Healthy
213 | 	dev.Paths = paths
214 | 	dev.Index = index
215 | 	dev.Memory = memory.Total / (1024 * 1024)
216 | 	if hasNuma {
217 | 		dev.Topology = &pluginapi.TopologyInfo{
218 | 			Nodes: []*pluginapi.NUMANode{
219 | 				{
220 | 					ID: int64(numa),
221 | 				},
222 | 			},
223 | 		}
224 | 	}
225 | 	return &dev, nil
226 | }
227 | 
228 | func buildMigDevice(index string, d device.MigDevice) (*Device, error) {
229 | 	uuid, ret := config.Nvml().DeviceGetUUID(d)
230 | 	if ret != nvml.SUCCESS {
231 | 		return nil, fmt.Errorf("error getting UUID of device: %v", ret)
232 | 	}
233 | 
234 | 	paths, err := getMigPaths(d)
235 | 	if err != nil {
236 | 		return nil, fmt.Errorf("error getting MIG paths of device: %v", err)
237 | 	}
238 | 
239 | 	memory, ret := config.Nvml().DeviceGetMemoryInfo(d)
240 | 	if ret != nvml.SUCCESS {
241 | 		return nil, fmt.Errorf("error getting memory info of device: %v", ret)
242 | 	}
243 | 
244 | 	parent, ret := d.GetDeviceHandleFromMigDeviceHandle()
245 | 	if ret != nvml.SUCCESS {
246 | 		return nil, fmt.Errorf("error getting parent GPU device from MIG device: %v", ret)
247 | 	}
248 | 	hasNuma, numa, err := getNumaNode(parent)
249 | 	if err != nil {
250 | 		return nil, fmt.Errorf("error getting device NUMA node: %v", err)
251 | 	}
252 | 
253 | 	dev := Device{}
254 | 	dev.ID = uuid
255 | 	dev.Health = pluginapi.Healthy
256 | 	dev.Paths = paths
257 | 	dev.Index = index
258 | 	dev.Memory = memory.Total / (1024 * 1024)
259 | 	if hasNuma {
260 | 		dev.Topology = &pluginapi.TopologyInfo{
261 | 			Nodes: []*pluginapi.NUMANode{
262 | 				{
263 | 					ID: int64(numa),
264 | 				},
265 | 			},
266 | 		}
267 | 	}
268 | 	return &dev, nil
269 | }
270 | 
271 | func getMigPaths(d device.MigDevice) ([]string, error) {
272 | 	capDevicePaths, err := GetMigCapabilityDevicePaths()
273 | 	if err != nil {
274 | 		return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
275 | 	}
276 | 
277 | 	gi, ret := d.GetGpuInstanceId()
278 | 	if ret != nvml.SUCCESS {
279 | 		return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
280 | 	}
281 | 
282 | 	ci, ret := d.GetComputeInstanceId()
283 | 	if ret != nvml.SUCCESS {
284 | 		return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
285 | 	}
286 | 
287 | 	parent, ret := d.GetDeviceHandleFromMigDeviceHandle()
288 | 	if ret != nvml.SUCCESS {
289 | 		return nil, fmt.Errorf("error getting parent device: %v", ret)
290 | 	}
291 | 	minor, ret := parent.GetMinorNumber()
292 | 	if ret != nvml.SUCCESS {
293 | 		return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
294 | 	}
295 | 	parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
296 | 
297 | 	giCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", minor, gi)
298 | 	if _, exists := capDevicePaths[giCapPath]; !exists {
299 | 		return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath)
300 | 	}
301 | 
302 | 	ciCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", minor, gi, ci)
303 | 	if _, exists := capDevicePaths[ciCapPath]; !exists {
304 | 		return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath)
305 | 	}
306 | 
307 | 	devicePaths := []string{
308 | 		parentPath,
309 | 		capDevicePaths[giCapPath],
310 | 		capDevicePaths[ciCapPath],
311 | 	}
312 | 
313 | 	return devicePaths, nil
314 | }
315 | 
316 | func getNumaNode(d nvml.Device) (bool, int, error) {
317 | 	pciInfo, ret := d.GetPciInfo()
318 | 	if ret != nvml.SUCCESS {
319 | 		return false, 0, fmt.Errorf("error getting PCI Bus Info of device: %v", ret)
320 | 	}
321 | 
322 | 	// Discard leading zeros.
323 | 	busID := strings.ToLower(strings.TrimPrefix(int8Slice(pciInfo.BusId[:]).String(), "0000"))
324 | 
325 | 	b, err := os.ReadFile(fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", busID))
326 | 	if err != nil {
327 | 		return false, 0, nil
328 | 	}
329 | 
330 | 	node, err := strconv.Atoi(string(bytes.TrimSpace(b)))
331 | 	if err != nil {
332 | 		return false, 0, fmt.Errorf("eror parsing value for NUMA node: %v", err)
333 | 	}
334 | 
335 | 	if node < 0 {
336 | 		return false, 0, nil
337 | 	}
338 | 
339 | 	return true, node, nil
340 | }
341 | 
342 | func checkHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) {
343 | 	disableHealthChecks := strings.ToLower(os.Getenv(envDisableHealthChecks))
344 | 	if disableHealthChecks == "all" {
345 | 		disableHealthChecks = allHealthChecks
346 | 	}
347 | 	if strings.Contains(disableHealthChecks, "xids") {
348 | 		return
349 | 	}
350 | 
351 | 	// FIXME: formalize the full list and document it.
352 | 	// http://docs.nvidia.com/deploy/xid-errors/index.html#topic_4
353 | 	// Application errors: the GPU should still be healthy
354 | 	applicationErrorXids := []uint64{
355 | 		13, // Graphics Engine Exception
356 | 		31, // GPU memory page fault
357 | 		43, // GPU stopped processing
358 | 		45, // Preemptive cleanup, due to previous errors
359 | 		68, // Video processor exception
360 | 	}
361 | 
362 | 	skippedXids := make(map[uint64]bool)
363 | 	for _, id := range applicationErrorXids {
364 | 		skippedXids[id] = true
365 | 	}
366 | 
367 | 	for _, additionalXid := range getAdditionalXids(disableHealthChecks) {
368 | 		skippedXids[additionalXid] = true
369 | 	}
370 | 
371 | 	eventSet, ret := config.Nvml().EventSetCreate()
372 | 	if ret != nvml.SUCCESS {
373 | 		klog.Warningf("could not create event set: %v", ret)
374 | 		return
375 | 	}
376 | 	defer eventSet.Free()
377 | 
378 | 	parentToDeviceMap := make(map[string]*Device)
379 | 	deviceIDToGiMap := make(map[string]int)
380 | 	deviceIDToCiMap := make(map[string]int)
381 | 
382 | 	eventMask := uint64(nvml.EventTypeXidCriticalError | nvml.EventTypeDoubleBitEccError | nvml.EventTypeSingleBitEccError)
383 | 	for _, d := range devices {
384 | 		uuid, gi, ci, err := getDevicePlacement(d)
385 | 		if err != nil {
386 | 			klog.Warningf("Could not determine device placement for %v: %v; Marking it unhealthy.", d.ID, err)
387 | 			unhealthy <- d
388 | 			continue
389 | 		}
390 | 		deviceIDToGiMap[d.ID] = gi
391 | 		deviceIDToCiMap[d.ID] = ci
392 | 		parentToDeviceMap[uuid] = d
393 | 
394 | 		gpu, ret := config.Nvml().DeviceGetHandleByUUID(uuid)
395 | 		if ret != nvml.SUCCESS {
396 | 			klog.Infof("unable to get device handle from UUID: %v; marking it as unhealthy", ret)
397 | 			unhealthy <- d
398 | 			continue
399 | 		}
400 | 
401 | 		supportedEvents, ret := gpu.GetSupportedEventTypes()
402 | 		if ret != nvml.SUCCESS {
403 | 			klog.Infof("Unable to determine the supported events for %v: %v; marking it as unhealthy", d.ID, ret)
404 | 			unhealthy <- d
405 | 			continue
406 | 		}
407 | 
408 | 		ret = gpu.RegisterEvents(eventMask&supportedEvents, eventSet)
409 | 		if ret == nvml.ERROR_NOT_SUPPORTED {
410 | 			klog.Warningf("Device %v is too old to support healthchecking.", d.ID)
411 | 		}
412 | 		if ret != nvml.SUCCESS {
413 | 			klog.Infof("Marking device %v as unhealthy: %v", d.ID, ret)
414 | 			unhealthy <- d
415 | 		}
416 | 	}
417 | 
418 | 	for {
419 | 		select {
420 | 		case <-stop:
421 | 			return
422 | 		default:
423 | 		}
424 | 
425 | 		e, ret := eventSet.Wait(5000)
426 | 		if ret == nvml.ERROR_TIMEOUT {
427 | 			continue
428 | 		}
429 | 		if ret != nvml.SUCCESS {
430 | 			klog.Infof("Error waiting for event: %v; Marking all devices as unhealthy", ret)
431 | 			for _, d := range devices {
432 | 				unhealthy <- d
433 | 			}
434 | 			continue
435 | 		}
436 | 
437 | 		if e.EventType != nvml.EventTypeXidCriticalError {
438 | 			klog.Infof("Skipping non-nvmlEventTypeXidCriticalError event: %+v", e)
439 | 			continue
440 | 		}
441 | 
442 | 		if skippedXids[e.EventData] {
443 | 			klog.Infof("Skipping event %+v", e)
444 | 			continue
445 | 		}
446 | 
447 | 		klog.Infof("Processing event %+v", e)
448 | 		eventUUID, ret := e.Device.GetUUID()
449 | 		if ret != nvml.SUCCESS {
450 | 			// If we cannot reliably determine the device UUID, we mark all devices as unhealthy.
451 | 			klog.Infof("Failed to determine uuid for event %v: %v; Marking all devices as unhealthy.", e, ret)
452 | 			for _, d := range devices {
453 | 				unhealthy <- d
454 | 			}
455 | 			continue
456 | 		}
457 | 
458 | 		d, exists := parentToDeviceMap[eventUUID]
459 | 		if !exists {
460 | 			klog.Infof("Ignoring event for unexpected device: %v", eventUUID)
461 | 			continue
462 | 		}
463 | 
464 | 		if d.IsMigDevice() && e.GpuInstanceId != 0xFFFFFFFF && e.ComputeInstanceId != 0xFFFFFFFF {
465 | 			gi := deviceIDToGiMap[d.ID]
466 | 			ci := deviceIDToCiMap[d.ID]
467 | 			if !(uint32(gi) == e.GpuInstanceId && uint32(ci) == e.ComputeInstanceId) {
468 | 				continue
469 | 			}
470 | 			klog.Infof("Event for mig device %v (gi=%v, ci=%v)", d.ID, gi, ci)
471 | 		}
472 | 
473 | 		klog.Infof("XidCriticalError: Xid=%d on Device=%s; marking device as unhealthy.", e.EventData, d.ID)
474 | 		unhealthy <- d
475 | 	}
476 | }
477 | 
478 | // getAdditionalXids returns a list of additional Xids to skip from the specified string.
479 | // The input is treaded as a comma-separated string and all valid uint64 values are considered as Xid values. Invalid values
480 | // are ignored.
481 | func getAdditionalXids(input string) []uint64 {
482 | 	if input == "" {
483 | 		return nil
484 | 	}
485 | 
486 | 	var additionalXids []uint64
487 | 	for _, additionalXid := range strings.Split(input, ",") {
488 | 		trimmed := strings.TrimSpace(additionalXid)
489 | 		if trimmed == "" {
490 | 			continue
491 | 		}
492 | 		xid, err := strconv.ParseUint(trimmed, 10, 64)
493 | 		if err != nil {
494 | 			log.Printf("Ignoring malformed Xid value %v: %v", trimmed, err)
495 | 			continue
496 | 		}
497 | 		additionalXids = append(additionalXids, xid)
498 | 	}
499 | 
500 | 	return additionalXids
501 | }
502 | 
503 | // getDevicePlacement returns the placement of the specified device.
504 | // For a MIG device the placement is defined by the 3-tuple <parent UUID, GI, CI>
505 | // For a full device the returned 3-tuple is the device's uuid and 0xFFFFFFFF for the other two elements.
506 | func getDevicePlacement(d *Device) (string, int, int, error) {
507 | 	if !d.IsMigDevice() {
508 | 		return d.GetUUID(), 0xFFFFFFFF, 0xFFFFFFFF, nil
509 | 	}
510 | 	return getMigDeviceParts(d)
511 | }
512 | 
513 | // getMigDeviceParts returns the parent GI and CI ids of the MIG device.
514 | func getMigDeviceParts(d *Device) (string, int, int, error) {
515 | 	if !d.IsMigDevice() {
516 | 		return "", 0, 0, fmt.Errorf("cannot get GI and CI of full device")
517 | 	}
518 | 
519 | 	uuid := d.GetUUID()
520 | 	// For older driver versions, the call to DeviceGetHandleByUUID will fail for MIG devices.
521 | 	mig, ret := config.Nvml().DeviceGetHandleByUUID(uuid)
522 | 	if ret == nvml.SUCCESS {
523 | 		parentHandle, ret := mig.GetDeviceHandleFromMigDeviceHandle()
524 | 		if ret != nvml.SUCCESS {
525 | 			return "", 0, 0, fmt.Errorf("failed to get parent device handle: %v", ret)
526 | 		}
527 | 
528 | 		parentUUID, ret := parentHandle.GetUUID()
529 | 		if ret != nvml.SUCCESS {
530 | 			return "", 0, 0, fmt.Errorf("failed to get parent uuid: %v", ret)
531 | 		}
532 | 		gi, ret := mig.GetGpuInstanceId()
533 | 		if ret != nvml.SUCCESS {
534 | 			return "", 0, 0, fmt.Errorf("failed to get GPU Instance ID: %v", ret)
535 | 		}
536 | 
537 | 		ci, ret := mig.GetComputeInstanceId()
538 | 		if ret != nvml.SUCCESS {
539 | 			return "", 0, 0, fmt.Errorf("failed to get Compute Instance ID: %v", ret)
540 | 		}
541 | 		return parentUUID, gi, ci, nil
542 | 	}
543 | 	return parseMigDeviceUUID(uuid)
544 | }
545 | 
546 | // parseMigDeviceUUID splits the MIG device UUID into the parent device UUID and ci and gi
547 | func parseMigDeviceUUID(mig string) (string, int, int, error) {
548 | 	tokens := strings.SplitN(mig, "-", 2)
549 | 	if len(tokens) != 2 || tokens[0] != "MIG" {
550 | 		return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
551 | 	}
552 | 
553 | 	tokens = strings.SplitN(tokens[1], "/", 3)
554 | 	if len(tokens) != 3 || !strings.HasPrefix(tokens[0], "GPU-") {
555 | 		return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
556 | 	}
557 | 
558 | 	gi, err := strconv.ParseInt(tokens[1], 10, 32)
559 | 	if err != nil {
560 | 		return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
561 | 	}
562 | 
563 | 	ci, err := strconv.ParseInt(tokens[2], 10, 32)
564 | 	if err != nil {
565 | 		return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
566 | 	}
567 | 
568 | 	return tokens[0], int(gi), int(ci), nil
569 | }
570 | 
571 | // IsMigDevice returns checks whether d is a MIG device or not.
572 | func (d Device) IsMigDevice() bool {
573 | 	return strings.Contains(d.Index, ":")
574 | }
575 | 
576 | // GetUUID returns the UUID for the device from the annotated ID.
577 | func (d Device) GetUUID() string {
578 | 	return AnnotatedID(d.ID).GetID()
579 | }
580 | 
581 | // AnnotatedID represents an ID with a replica number embedded in it.
582 | type AnnotatedID string
583 | 
584 | // Split splits a AnnotatedID into its ID and replica number parts.
585 | func (r AnnotatedID) Split() (string, int) {
586 | 	split := strings.SplitN(string(r), "::", 2)
587 | 	if len(split) != 2 {
588 | 		return string(r), 0
589 | 	}
590 | 	replica, _ := strconv.ParseInt(split[1], 10, 0)
591 | 	return split[0], int(replica)
592 | }
593 | 
594 | // GetID returns just the ID part of the replicated ID
595 | func (r AnnotatedID) GetID() string {
596 | 	id, _ := r.Split()
597 | 	return id
598 | }
599 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/plugin.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package vgpu
 18 | 
 19 | import (
 20 | 	"bytes"
 21 | 	"errors"
 22 | 	"fmt"
 23 | 	"log"
 24 | 	"net"
 25 | 	"os"
 26 | 	"os/exec"
 27 | 	"path"
 28 | 	"strings"
 29 | 	"time"
 30 | 
 31 | 	"gopkg.in/yaml.v2"
 32 | 	"k8s.io/apimachinery/pkg/util/uuid"
 33 | 	"k8s.io/klog/v2"
 34 | 	"volcano.sh/k8s-device-plugin/pkg/lock"
 35 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 36 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
 37 | 
 38 | 	"github.com/NVIDIA/go-gpuallocator/gpuallocator"
 39 | 	"golang.org/x/net/context"
 40 | 	"google.golang.org/grpc"
 41 | 	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
 42 | )
 43 | 
 44 | // Constants to represent the various device list strategies
 45 | const (
 46 | 	DeviceListStrategyEnvvar       = "envvar"
 47 | 	DeviceListStrategyVolumeMounts = "volume-mounts"
 48 | )
 49 | 
 50 | // Constants to represent the various device id strategies
 51 | const (
 52 | 	DeviceIDStrategyUUID  = "uuid"
 53 | 	DeviceIDStrategyIndex = "index"
 54 | )
 55 | 
 56 | // Constants for use by the 'volume-mounts' device list strategy
 57 | const (
 58 | 	deviceListAsVolumeMountsHostPath          = "/dev/null"
 59 | 	deviceListAsVolumeMountsContainerPathRoot = "/var/run/nvidia-container-devices"
 60 | )
 61 | 
 62 | // NvidiaDevicePlugin implements the Kubernetes device plugin API
 63 | type NvidiaDevicePlugin struct {
 64 | 	ResourceManager
 65 | 	deviceCache      *DeviceCache
 66 | 	resourceName     string
 67 | 	deviceListEnvvar string
 68 | 	allocatePolicy   gpuallocator.Policy
 69 | 	socket           string
 70 | 	schedulerConfig  *config.NvidiaConfig
 71 | 	operatingMode    string
 72 | 
 73 | 	virtualDevices []*pluginapi.Device
 74 | 	migCurrent     config.MigPartedSpec
 75 | 
 76 | 	server        *grpc.Server
 77 | 	cachedDevices []*Device
 78 | 	health        chan *Device
 79 | 	stop          chan interface{}
 80 | 	changed       chan struct{}
 81 | 	migStrategy   string
 82 | }
 83 | 
 84 | // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
 85 | func NewNvidiaDevicePlugin(resourceName string, deviceCache *DeviceCache, allocatePolicy gpuallocator.Policy, socket string, cfg *config.NvidiaConfig) *NvidiaDevicePlugin {
 86 | 	dp := &NvidiaDevicePlugin{
 87 | 		deviceCache:     deviceCache,
 88 | 		resourceName:    resourceName,
 89 | 		allocatePolicy:  allocatePolicy,
 90 | 		socket:          socket,
 91 | 		migStrategy:     "none",
 92 | 		operatingMode:   config.Mode,
 93 | 		schedulerConfig: cfg,
 94 | 		// These will be reinitialized every
 95 | 		// time the plugin server is restarted.
 96 | 		server: nil,
 97 | 		health: nil,
 98 | 		stop:   nil,
 99 | 	}
100 | 	return dp
101 | }
102 | 
103 | // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
104 | func NewMIGNvidiaDevicePlugin(resourceName string, resourceManager ResourceManager, deviceListEnvvar string, allocatePolicy gpuallocator.Policy, socket string) *NvidiaDevicePlugin {
105 | 	return &NvidiaDevicePlugin{
106 | 		ResourceManager:  resourceManager,
107 | 		resourceName:     resourceName,
108 | 		deviceListEnvvar: deviceListEnvvar,
109 | 		allocatePolicy:   allocatePolicy,
110 | 		socket:           socket,
111 | 
112 | 		// These will be reinitialized every
113 | 		// time the plugin server is restarted.
114 | 		cachedDevices: nil,
115 | 		server:        nil,
116 | 		health:        nil,
117 | 		stop:          nil,
118 | 		migStrategy:   "mixed",
119 | 	}
120 | }
121 | 
122 | func (m *NvidiaDevicePlugin) initialize() {
123 | 	if strings.Compare(m.migStrategy, "mixed") == 0 {
124 | 		m.cachedDevices = m.ResourceManager.Devices()
125 | 	}
126 | 	m.server = grpc.NewServer([]grpc.ServerOption{}...)
127 | 	m.health = make(chan *Device)
128 | 	m.stop = make(chan interface{})
129 | 	m.virtualDevices, _ = util.GetDevices(config.GPUMemoryFactor)
130 | }
131 | 
132 | func (m *NvidiaDevicePlugin) cleanup() {
133 | 	close(m.stop)
134 | 	m.server = nil
135 | 	m.health = nil
136 | 	m.stop = nil
137 | }
138 | 
139 | // Start starts the gRPC server, registers the device plugin with the Kubelet,
140 | // and starts the device healthchecks.
141 | func (m *NvidiaDevicePlugin) Start() error {
142 | 	m.initialize()
143 | 
144 | 	err := m.Serve()
145 | 	if err != nil {
146 | 		log.Printf("Could not start device plugin for '%s': %s", m.resourceName, err)
147 | 		m.cleanup()
148 | 		return err
149 | 	}
150 | 	log.Printf("Starting to serve '%s' on %s", m.resourceName, m.socket)
151 | 
152 | 	err = m.Register()
153 | 	if err != nil {
154 | 		log.Printf("Could not register device plugin: %s", err)
155 | 		m.Stop()
156 | 		return err
157 | 	}
158 | 	log.Printf("Registered device plugin for '%s' with Kubelet", m.resourceName)
159 | 
160 | 	if m.operatingMode == "mig" {
161 | 		cmd := exec.Command("nvidia-mig-parted", "export")
162 | 		var stdout, stderr bytes.Buffer
163 | 		cmd.Stdout = &stdout
164 | 		cmd.Stderr = &stderr
165 | 		err := cmd.Run()
166 | 		if err != nil {
167 | 			klog.Fatalf("nvidia-mig-parted failed with %s\n", err)
168 | 		}
169 | 		outStr := stdout.Bytes()
170 | 		yaml.Unmarshal(outStr, &m.migCurrent)
171 | 		os.WriteFile("/tmp/migconfig.yaml", outStr, os.ModePerm)
172 | 		if len(m.migCurrent.MigConfigs["current"]) == 1 && len(m.migCurrent.MigConfigs["current"][0].Devices) == 0 {
173 | 			idx := 0
174 | 			m.migCurrent.MigConfigs["current"][0].Devices = make([]int32, 0)
175 | 			for idx < util.GetDeviceNums() {
176 | 				m.migCurrent.MigConfigs["current"][0].Devices = append(m.migCurrent.MigConfigs["current"][0].Devices, int32(idx))
177 | 				idx++
178 | 			}
179 | 		}
180 | 		klog.Infoln("Mig export", m.migCurrent)
181 | 	}
182 | 
183 | 	if strings.Compare(m.migStrategy, "none") == 0 {
184 | 		m.deviceCache.AddNotifyChannel("plugin", m.health)
185 | 	} else if strings.Compare(m.migStrategy, "mixed") == 0 {
186 | 		go m.CheckHealth(m.stop, m.cachedDevices, m.health)
187 | 	} else {
188 | 		log.Panicln("migstrategy not recognized", m.migStrategy)
189 | 	}
190 | 	return nil
191 | }
192 | 
193 | // Stop stops the gRPC server.
194 | func (m *NvidiaDevicePlugin) Stop() error {
195 | 	if m == nil || m.server == nil {
196 | 		return nil
197 | 	}
198 | 	log.Printf("Stopping to serve '%s' on %s", m.resourceName, m.socket)
199 | 	m.deviceCache.RemoveNotifyChannel("plugin")
200 | 	m.server.Stop()
201 | 	if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) {
202 | 		return err
203 | 	}
204 | 	m.cleanup()
205 | 	return nil
206 | }
207 | 
208 | // Serve starts the gRPC server of the device plugin.
209 | func (m *NvidiaDevicePlugin) Serve() error {
210 | 	os.Remove(m.socket)
211 | 	sock, err := net.Listen("unix", m.socket)
212 | 	if err != nil {
213 | 		return err
214 | 	}
215 | 
216 | 	pluginapi.RegisterDevicePluginServer(m.server, m)
217 | 
218 | 	go func() {
219 | 		lastCrashTime := time.Now()
220 | 		restartCount := 0
221 | 		for {
222 | 			log.Printf("Starting GRPC server for '%s'", m.resourceName)
223 | 			err := m.server.Serve(sock)
224 | 			if err == nil {
225 | 				break
226 | 			}
227 | 
228 | 			log.Printf("GRPC server for '%s' crashed with error: %v", m.resourceName, err)
229 | 
230 | 			// restart if it has not been too often
231 | 			// i.e. if server has crashed more than 5 times and it didn't last more than one hour each time
232 | 			if restartCount > 5 {
233 | 				// quit
234 | 				log.Fatalf("GRPC server for '%s' has repeatedly crashed recently. Quitting", m.resourceName)
235 | 			}
236 | 			timeSinceLastCrash := time.Since(lastCrashTime).Seconds()
237 | 			lastCrashTime = time.Now()
238 | 			if timeSinceLastCrash > 3600 {
239 | 				// it has been one hour since the last crash.. reset the count
240 | 				// to reflect on the frequency
241 | 				restartCount = 1
242 | 			} else {
243 | 				restartCount++
244 | 			}
245 | 		}
246 | 	}()
247 | 
248 | 	// Wait for server to start by launching a blocking connexion
249 | 	conn, err := m.dial(m.socket, 5*time.Second)
250 | 	if err != nil {
251 | 		return err
252 | 	}
253 | 	conn.Close()
254 | 
255 | 	return nil
256 | }
257 | 
258 | // Register registers the device plugin for the given resourceName with Kubelet.
259 | func (m *NvidiaDevicePlugin) Register() error {
260 | 	conn, err := m.dial(pluginapi.KubeletSocket, 5*time.Second)
261 | 	if err != nil {
262 | 		return err
263 | 	}
264 | 	defer conn.Close()
265 | 
266 | 	client := pluginapi.NewRegistrationClient(conn)
267 | 	reqt := &pluginapi.RegisterRequest{
268 | 		Version:      pluginapi.Version,
269 | 		Endpoint:     path.Base(m.socket),
270 | 		ResourceName: m.resourceName,
271 | 		Options:      &pluginapi.DevicePluginOptions{},
272 | 	}
273 | 
274 | 	_, err = client.Register(context.Background(), reqt)
275 | 	if err != nil {
276 | 		return err
277 | 	}
278 | 	return nil
279 | }
280 | 
281 | // GetDevicePluginOptions returns the values of the optional settings for this plugin
282 | func (m *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
283 | 	options := &pluginapi.DevicePluginOptions{}
284 | 	return options, nil
285 | }
286 | 
287 | // ListAndWatch lists devices and update that list according to the health status
288 | func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
289 | 	if m.resourceName == util.ResourceMem {
290 | 		err := s.Send(&pluginapi.ListAndWatchResponse{Devices: m.virtualDevices})
291 | 		if err != nil {
292 | 			log.Fatalf("failed sending devices %d: %v", len(m.virtualDevices), err)
293 | 		}
294 | 
295 | 		for {
296 | 			select {
297 | 			case <-m.stop:
298 | 				return nil
299 | 			case d := <-m.health:
300 | 				// FIXME: there is no way to recover from the Unhealthy state.
301 | 				//isChange := false
302 | 				//if d.Health != pluginapi.Unhealthy {
303 | 				//isChange = true
304 | 				//}
305 | 				d.Health = pluginapi.Unhealthy
306 | 				log.Printf("'%s' device marked unhealthy: %s", m.resourceName, d.ID)
307 | 				s.Send(&pluginapi.ListAndWatchResponse{Devices: m.virtualDevices})
308 | 				//if isChange {
309 | 				//	m.kubeInteractor.PatchUnhealthyGPUListOnNode(m.physicalDevices)
310 | 				//}
311 | 			}
312 | 		}
313 | 
314 | 	} else {
315 | 		_ = s.Send(&pluginapi.ListAndWatchResponse{Devices: m.apiDevices()})
316 | 		for {
317 | 			select {
318 | 			case <-m.stop:
319 | 				return nil
320 | 			case d := <-m.health:
321 | 				// FIXME: there is no way to recover from the Unhealthy state.
322 | 				//d.Health = pluginapi.Unhealthy
323 | 				log.Printf("'%s' device marked unhealthy: %s", m.resourceName, d.ID)
324 | 				_ = s.Send(&pluginapi.ListAndWatchResponse{Devices: m.apiDevices()})
325 | 			}
326 | 		}
327 | 	}
328 | }
329 | 
330 | func (m *NvidiaDevicePlugin) MIGAllocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
331 | 	responses := pluginapi.AllocateResponse{}
332 | 	for _, req := range reqs.ContainerRequests {
333 | 		for _, id := range req.DevicesIDs {
334 | 			if !m.deviceExists(id) {
335 | 				return nil, fmt.Errorf("invalid allocation request for '%s': unknown device: %s", m.resourceName, id)
336 | 			}
337 | 		}
338 | 
339 | 		response := pluginapi.ContainerAllocateResponse{}
340 | 
341 | 		uuids := req.DevicesIDs
342 | 		deviceIDs := m.deviceIDsFromUUIDs(uuids)
343 | 
344 | 		response.Envs = m.apiEnvs(m.deviceListEnvvar, deviceIDs)
345 | 
346 | 		klog.Infof("response=", response.Envs)
347 | 		responses.ContainerResponses = append(responses.ContainerResponses, &response)
348 | 	}
349 | 
350 | 	return &responses, nil
351 | }
352 | 
353 | // Allocate which return list of devices.
354 | func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
355 | 	if len(reqs.ContainerRequests) > 1 {
356 | 		return &pluginapi.AllocateResponse{}, errors.New("multiple Container Requests not supported")
357 | 	}
358 | 	if strings.Compare(m.migStrategy, "mixed") == 0 {
359 | 		return m.MIGAllocate(ctx, reqs)
360 | 	}
361 | 	responses := pluginapi.AllocateResponse{}
362 | 
363 | 	if strings.Compare(m.resourceName, util.ResourceMem) == 0 || strings.Compare(m.resourceName, util.ResourceCores) == 0 {
364 | 		for range reqs.ContainerRequests {
365 | 			responses.ContainerResponses = append(responses.ContainerResponses, &pluginapi.ContainerAllocateResponse{})
366 | 		}
367 | 		return &responses, nil
368 | 	}
369 | 	nodename := os.Getenv("NODE_NAME")
370 | 
371 | 	current, err := util.GetPendingPod(nodename)
372 | 	if err != nil {
373 | 		lock.ReleaseNodeLock(nodename, util.VGPUDeviceName)
374 | 		return &pluginapi.AllocateResponse{}, err
375 | 	}
376 | 	if current == nil {
377 | 		klog.Errorf("no pending pod found on node %s", nodename)
378 | 		lock.ReleaseNodeLock(nodename, util.VGPUDeviceName)
379 | 		return &pluginapi.AllocateResponse{}, errors.New("no pending pod found on node")
380 | 	}
381 | 
382 | 	for idx := range reqs.ContainerRequests {
383 | 		currentCtr, devreq, err := util.GetNextDeviceRequest(util.NvidiaGPUDevice, *current)
384 | 		klog.Infoln("deviceAllocateFromAnnotation=", devreq)
385 | 		if err != nil {
386 | 			klog.Errorln("get device from annotation failed", err.Error())
387 | 			util.PodAllocationFailed(nodename, current)
388 | 			return &pluginapi.AllocateResponse{}, err
389 | 		}
390 | 		if len(devreq) != len(reqs.ContainerRequests[idx].DevicesIDs) {
391 | 			klog.Errorln("device number not matched", devreq, reqs.ContainerRequests[idx].DevicesIDs)
392 | 			util.PodAllocationFailed(nodename, current)
393 | 			return &pluginapi.AllocateResponse{}, errors.New("device number not matched")
394 | 		}
395 | 
396 | 		response := pluginapi.ContainerAllocateResponse{}
397 | 		response.Envs = make(map[string]string)
398 | 		response.Envs["NVIDIA_VISIBLE_DEVICES"] = strings.Join(m.GetContainerDeviceStrArray(devreq), ",")
399 | 
400 | 		err = util.EraseNextDeviceTypeFromAnnotation(util.NvidiaGPUDevice, *current)
401 | 		if err != nil {
402 | 			klog.Errorln("Erase annotation failed", err.Error())
403 | 			util.PodAllocationFailed(nodename, current)
404 | 			return &pluginapi.AllocateResponse{}, err
405 | 		}
406 | 
407 | 		if m.operatingMode != "mig" {
408 | 
409 | 			for i, dev := range devreq {
410 | 				limitKey := fmt.Sprintf("CUDA_DEVICE_MEMORY_LIMIT_%v", i)
411 | 				response.Envs[limitKey] = fmt.Sprintf("%vm", dev.Usedmem*int32(config.GPUMemoryFactor))
412 | 			}
413 | 			response.Envs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprint(devreq[0].Usedcores)
414 | 			response.Envs["CUDA_DEVICE_MEMORY_SHARED_CACHE"] = fmt.Sprintf("/tmp/vgpu/%v.cache", uuid.NewUUID())
415 | 
416 | 			cacheFileHostDirectory := "/tmp/vgpu/containers/" + string(current.UID) + "_" + currentCtr.Name
417 | 			os.MkdirAll(cacheFileHostDirectory, 0777)
418 | 			os.Chmod(cacheFileHostDirectory, 0777)
419 | 			os.MkdirAll("/tmp/vgpulock", 0777)
420 | 			os.Chmod("/tmp/vgpulock", 0777)
421 | 			hostHookPath := os.Getenv("HOOK_PATH")
422 | 
423 | 			response.Mounts = append(response.Mounts,
424 | 				&pluginapi.Mount{ContainerPath: "/usr/local/vgpu/libvgpu.so",
425 | 					HostPath: hostHookPath + "/libvgpu.so",
426 | 					ReadOnly: true},
427 | 				&pluginapi.Mount{ContainerPath: "/tmp/vgpu",
428 | 					HostPath: cacheFileHostDirectory,
429 | 					ReadOnly: false},
430 | 				&pluginapi.Mount{ContainerPath: "/tmp/vgpulock",
431 | 					HostPath: "/tmp/vgpulock",
432 | 					ReadOnly: false},
433 | 			)
434 | 			found := false
435 | 			for _, val := range currentCtr.Env {
436 | 				if strings.Compare(val.Name, "CUDA_DISABLE_CONTROL") == 0 {
437 | 					found = true
438 | 					break
439 | 				}
440 | 			}
441 | 			if !found {
442 | 				response.Mounts = append(response.Mounts, &pluginapi.Mount{ContainerPath: "/etc/ld.so.preload",
443 | 					HostPath: hostHookPath + "/ld.so.preload",
444 | 					ReadOnly: true},
445 | 				)
446 | 			}
447 | 		}
448 | 		responses.ContainerResponses = append(responses.ContainerResponses, &response)
449 | 	}
450 | 	klog.Infoln("Allocate Response", responses.ContainerResponses)
451 | 	util.PodAllocationTrySuccess(nodename, current)
452 | 	return &responses, nil
453 | }
454 | 
455 | // PreStartContainer is unimplemented for this plugin
456 | func (m *NvidiaDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
457 | 	return &pluginapi.PreStartContainerResponse{}, nil
458 | }
459 | 
460 | // dial establishes the gRPC communication with the registered device plugin.
461 | func (m *NvidiaDevicePlugin) dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) {
462 | 	c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
463 | 		grpc.WithTimeout(timeout),
464 | 		grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
465 | 			return net.DialTimeout("unix", addr, timeout)
466 | 		}),
467 | 	)
468 | 
469 | 	if err != nil {
470 | 		return nil, err
471 | 	}
472 | 
473 | 	return c, nil
474 | }
475 | 
476 | func (m *NvidiaDevicePlugin) Devices() []*Device {
477 | 	if strings.Compare(m.migStrategy, "none") == 0 {
478 | 		return m.deviceCache.GetCache()
479 | 	}
480 | 	if strings.Compare(m.migStrategy, "mixed") == 0 {
481 | 		return m.ResourceManager.Devices()
482 | 	}
483 | 	log.Panic("migStrategy not recognized,exiting...")
484 | 	return []*Device{}
485 | }
486 | 
487 | func (m *NvidiaDevicePlugin) deviceExists(id string) bool {
488 | 	for _, d := range m.cachedDevices {
489 | 		if d.ID == id {
490 | 			return true
491 | 		}
492 | 	}
493 | 	return false
494 | }
495 | 
496 | func (m *NvidiaDevicePlugin) deviceIDsFromUUIDs(uuids []string) []string {
497 | 	return uuids
498 | }
499 | 
500 | func (m *NvidiaDevicePlugin) apiDevices() []*pluginapi.Device {
501 | 	if strings.Compare(m.migStrategy, "mixed") == 0 {
502 | 		var pdevs []*pluginapi.Device
503 | 		for _, d := range m.cachedDevices {
504 | 			pdevs = append(pdevs, &d.Device)
505 | 		}
506 | 		return pdevs
507 | 	}
508 | 	devices := m.Devices()
509 | 	var res []*pluginapi.Device
510 | 
511 | 	if strings.Compare(m.resourceName, util.ResourceMem) == 0 {
512 | 		for _, dev := range devices {
513 | 			i := 0
514 | 			klog.Infoln("memory=", dev.Memory, "id=", dev.ID)
515 | 			for i < int(32767) {
516 | 				res = append(res, &pluginapi.Device{
517 | 					ID:       fmt.Sprintf("%v-memory-%v", dev.ID, i),
518 | 					Health:   dev.Health,
519 | 					Topology: nil,
520 | 				})
521 | 				i++
522 | 			}
523 | 		}
524 | 		klog.Infoln("res length=", len(res))
525 | 		return res
526 | 	}
527 | 	if strings.Compare(m.resourceName, util.ResourceCores) == 0 {
528 | 		for _, dev := range devices {
529 | 			i := 0
530 | 			for i < 100 {
531 | 				res = append(res, &pluginapi.Device{
532 | 					ID:       fmt.Sprintf("%v-core-%v", dev.ID, i),
533 | 					Health:   dev.Health,
534 | 					Topology: nil,
535 | 				})
536 | 				i++
537 | 			}
538 | 		}
539 | 		return res
540 | 	}
541 | 
542 | 	for _, dev := range devices {
543 | 		for i := uint(0); i < config.DeviceSplitCount; i++ {
544 | 			id := fmt.Sprintf("%v-%v", dev.ID, i)
545 | 			res = append(res, &pluginapi.Device{
546 | 				ID:       id,
547 | 				Health:   dev.Health,
548 | 				Topology: nil,
549 | 			})
550 | 		}
551 | 	}
552 | 	return res
553 | }
554 | 
555 | func (m *NvidiaDevicePlugin) apiEnvs(envvar string, deviceIDs []string) map[string]string {
556 | 	return map[string]string{
557 | 		envvar: strings.Join(deviceIDs, ","),
558 | 	}
559 | }
560 | 
561 | func (m *NvidiaDevicePlugin) ApplyMigTemplate() {
562 | 	data, err := yaml.Marshal(m.migCurrent)
563 | 	if err != nil {
564 | 		klog.Error("marshal failed", err.Error())
565 | 	}
566 | 	klog.Infoln("Applying data=", string(data))
567 | 	os.WriteFile("/tmp/migconfig.yaml", data, os.ModePerm)
568 | 	cmd := exec.Command("nvidia-mig-parted", "apply", "-f", "/tmp/migconfig.yaml")
569 | 	var stdout, stderr bytes.Buffer
570 | 	cmd.Stdout = &stdout
571 | 	cmd.Stderr = &stderr
572 | 	err = cmd.Run()
573 | 	if err != nil {
574 | 		klog.Fatalf("nvidia-mig-parted failed with %s\n", err)
575 | 	}
576 | 	outStr := stdout.String()
577 | 	klog.Infoln("Mig apply", outStr)
578 | }
579 | 
580 | func (m *NvidiaDevicePlugin) GetContainerDeviceStrArray(c util.ContainerDevices) []string {
581 | 	tmp := []string{}
582 | 	needsreset := false
583 | 	position := 0
584 | 	for _, val := range c {
585 | 		if !strings.Contains(val.UUID, "[") {
586 | 			tmp = append(tmp, val.UUID)
587 | 		} else {
588 | 			devtype, devindex := util.GetIndexAndTypeFromUUID(val.UUID)
589 | 			position, needsreset = m.GenerateMigTemplate(devtype, devindex, val)
590 | 			if needsreset {
591 | 				m.ApplyMigTemplate()
592 | 			}
593 | 			tmp = append(tmp, util.GetMigUUIDFromIndex(val.UUID, position))
594 | 		}
595 | 	}
596 | 	klog.V(3).Infoln("mig current=", m.migCurrent, ":", needsreset, "position=", position, "uuid lists", tmp)
597 | 	return tmp
598 | }
599 | 
600 | func (m *NvidiaDevicePlugin) GenerateMigTemplate(devtype string, devindex int, val util.ContainerDevice) (int, bool) {
601 | 	needsreset := false
602 | 	position := -1 // Initialize to an invalid position
603 | 
604 | 	for _, migTemplate := range m.schedulerConfig.MigGeometriesList {
605 | 		if containsModel(devtype, migTemplate.Models) {
606 | 			klog.InfoS("type found", "Type", devtype, "Models", strings.Join(migTemplate.Models, ", "))
607 | 
608 | 			templateGroupName, pos, err := util.ExtractMigTemplatesFromUUID(val.UUID)
609 | 			if err != nil {
610 | 				klog.ErrorS(err, "failed to extract template index from UUID", "UUID", val.UUID)
611 | 				return -1, false
612 | 			}
613 | 
614 | 			templateIdx := -1
615 | 			for i, migTemplateEntry := range migTemplate.Geometries {
616 | 				if migTemplateEntry.Group == templateGroupName {
617 | 					templateIdx = i
618 | 					break
619 | 				}
620 | 			}
621 | 
622 | 			if templateIdx < 0 || templateIdx >= len(migTemplate.Geometries) {
623 | 				klog.ErrorS(nil, "invalid template index extracted from UUID", "UUID", val.UUID, "Index", templateIdx)
624 | 				return -1, false
625 | 			}
626 | 
627 | 			position = pos
628 | 
629 | 			v := migTemplate.Geometries[templateIdx].Instances
630 | 
631 | 			for migidx, migpartedDev := range m.migCurrent.MigConfigs["current"] {
632 | 				if containsDevice(devindex, migpartedDev.Devices) {
633 | 					for _, migTemplateEntry := range v {
634 | 						currentCount, ok := migpartedDev.MigDevices[migTemplateEntry.Name]
635 | 						expectedCount := migTemplateEntry.Count
636 | 
637 | 						if !ok || currentCount != expectedCount {
638 | 							needsreset = true
639 | 							klog.InfoS("updated mig device count", "Template", v)
640 | 						} else {
641 | 							klog.InfoS("incremented mig device count", "TemplateName", migTemplateEntry.Name, "Count", currentCount+1)
642 | 						}
643 | 					}
644 | 
645 | 					if needsreset {
646 | 						for k := range m.migCurrent.MigConfigs["current"][migidx].MigDevices {
647 | 							delete(m.migCurrent.MigConfigs["current"][migidx].MigDevices, k)
648 | 						}
649 | 
650 | 						for _, migTemplateEntry := range v {
651 | 							m.migCurrent.MigConfigs["current"][migidx].MigDevices[migTemplateEntry.Name] = migTemplateEntry.Count
652 | 							m.migCurrent.MigConfigs["current"][migidx].MigEnabled = true
653 | 						}
654 | 					}
655 | 					break
656 | 				}
657 | 			}
658 | 			break
659 | 		}
660 | 	}
661 | 
662 | 	return position, needsreset
663 | }
664 | 
665 | // Helper function to check if a model is in the list of models.
666 | func containsModel(target string, models []string) bool {
667 | 	for _, model := range models {
668 | 		if strings.Contains(target, model) {
669 | 			return true
670 | 		}
671 | 	}
672 | 	return false
673 | }
674 | 
675 | // Helper function to check if a device index is in the list of devices.
676 | func containsDevice(target int, devices []int32) bool {
677 | 	for _, device := range devices {
678 | 		if int(device) == target {
679 | 			return true
680 | 		}
681 | 	}
682 | 	return false
683 | }
684 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/register.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package vgpu
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"strings"
 22 | 	"time"
 23 | 
 24 | 	"github.com/NVIDIA/go-nvml/pkg/nvml"
 25 | 	"k8s.io/klog/v2"
 26 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 27 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
 28 | )
 29 | 
 30 | type DevListFunc func() []*Device
 31 | 
 32 | type DeviceRegister struct {
 33 | 	deviceCache *DeviceCache
 34 | 	unhealthy   chan *Device
 35 | 	stopCh      chan struct{}
 36 | }
 37 | 
 38 | func NewDeviceRegister(deviceCache *DeviceCache) *DeviceRegister {
 39 | 	return &DeviceRegister{
 40 | 		deviceCache: deviceCache,
 41 | 		unhealthy:   make(chan *Device),
 42 | 		stopCh:      make(chan struct{}),
 43 | 	}
 44 | }
 45 | 
 46 | func (r *DeviceRegister) Start() {
 47 | 	r.deviceCache.AddNotifyChannel("register", r.unhealthy)
 48 | 	go r.WatchAndRegister()
 49 | }
 50 | 
 51 | func (r *DeviceRegister) Stop() {
 52 | 	close(r.stopCh)
 53 | }
 54 | 
 55 | func (r *DeviceRegister) apiDevices() *[]*util.DeviceInfo {
 56 | 	devs := r.deviceCache.GetCache()
 57 | 	res := make([]*util.DeviceInfo, 0, len(devs))
 58 | 	for _, dev := range devs {
 59 | 		ndev, ret := config.Nvml().DeviceGetHandleByUUID(dev.ID)
 60 | 		if ret != nvml.SUCCESS {
 61 | 			fmt.Println("nvml new device by uuid error id=", dev.ID)
 62 | 			panic(ret)
 63 | 		}
 64 | 
 65 | 		memory, ret := config.Nvml().DeviceGetMemoryInfo(ndev)
 66 | 		if ret != nvml.SUCCESS {
 67 | 			fmt.Println("failed to get memory info for device id=", dev.ID)
 68 | 			panic(ret)
 69 | 		}
 70 | 
 71 | 		model, ret := config.Nvml().DeviceGetName(ndev)
 72 | 		if ret != nvml.SUCCESS {
 73 | 			fmt.Println("failed to get model name for device id=", dev.ID)
 74 | 			panic(ret)
 75 | 		}
 76 | 
 77 | 		klog.V(3).Infoln("nvml registered device id=", dev.ID, "memory=", memory.Total, "type=", model)
 78 | 
 79 | 		registeredmem := int32(memory.Total/(1024*1024)) / int32(config.GPUMemoryFactor)
 80 | 		klog.V(3).Infoln("GPUMemoryFactor=", config.GPUMemoryFactor, "registeredmem=", registeredmem)
 81 | 		res = append(res, &util.DeviceInfo{
 82 | 			Id:     dev.ID,
 83 | 			Count:  int32(config.DeviceSplitCount),
 84 | 			Devmem: registeredmem,
 85 | 			Mode:   config.Mode,
 86 | 			Type:   fmt.Sprintf("%v-%v", "NVIDIA", model),
 87 | 			Health: strings.EqualFold(dev.Health, "healthy"),
 88 | 		})
 89 | 	}
 90 | 	return &res
 91 | }
 92 | 
 93 | func (r *DeviceRegister) RegisterInAnnotation() error {
 94 | 	devices := r.apiDevices()
 95 | 	annos := make(map[string]string)
 96 | 	node, err := util.GetNode(config.NodeName)
 97 | 	if err != nil {
 98 | 		klog.Errorln("get node error", err.Error())
 99 | 		return err
100 | 	}
101 | 	encodeddevices := util.EncodeNodeDevices(*devices)
102 | 	annos[util.NodeHandshake] = "Reported " + time.Now().String()
103 | 	annos[util.NodeNvidiaDeviceRegistered] = encodeddevices
104 | 	klog.Infoln("Reporting devices", encodeddevices, "in", time.Now().String())
105 | 	err = util.PatchNodeAnnotations(node, annos)
106 | 
107 | 	if err != nil {
108 | 		klog.Errorln("patch node error", err.Error())
109 | 	}
110 | 	return err
111 | }
112 | 
113 | func (r *DeviceRegister) WatchAndRegister() {
114 | 	klog.Infof("into WatchAndRegister")
115 | 	for {
116 | 		if len(config.Mode) == 0 {
117 | 			klog.V(5).Info("register skipped, waiting for device config to be loaded")
118 | 			time.Sleep(time.Second * 2)
119 | 			continue
120 | 		}
121 | 		err := r.RegisterInAnnotation()
122 | 		if err != nil {
123 | 			klog.Errorf("register error, %v", err)
124 | 			time.Sleep(time.Second * 5)
125 | 		} else {
126 | 			time.Sleep(time.Second * 30)
127 | 		}
128 | 	}
129 | }
130 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/util/types.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package util
 18 | 
 19 | import "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 20 | 
 21 | const (
 22 | 	AssignedTimeAnnotations          = "volcano.sh/vgpu-time"
 23 | 	AssignedIDsAnnotations           = "volcano.sh/vgpu-ids-new"
 24 | 	AssignedIDsToAllocateAnnotations = "volcano.sh/devices-to-allocate"
 25 | 	AssignedNodeAnnotations          = "volcano.sh/vgpu-node"
 26 | 	BindTimeAnnotations              = "volcano.sh/bind-time"
 27 | 	DeviceBindPhase                  = "volcano.sh/bind-phase"
 28 | 
 29 | 	// PodAnnotationMaxLength pod annotation max data length 1MB
 30 | 	PodAnnotationMaxLength = 1024 * 1024
 31 | 
 32 | 	GPUInUse = "nvidia.com/use-gputype"
 33 | 	GPUNoUse = "nvidia.com/nouse-gputype"
 34 | 
 35 | 	DeviceBindAllocating = "allocating"
 36 | 	DeviceBindFailed     = "failed"
 37 | 	DeviceBindSuccess    = "success"
 38 | 
 39 | 	DeviceLimit = 100
 40 | 
 41 | 	BestEffort string = "best-effort"
 42 | 	Restricted string = "restricted"
 43 | 	Guaranteed string = "guaranteed"
 44 | 
 45 | 	NvidiaGPUDevice     = "NVIDIA"
 46 | 	NvidiaGPUCommonWord = "GPU"
 47 | 
 48 | 	NodeLockTime = "volcano.sh/mutex.lock"
 49 | 	MaxLockRetry = 5
 50 | 
 51 | 	NodeHandshake              = "volcano.sh/node-vgpu-handshake"
 52 | 	NodeNvidiaDeviceRegistered = "volcano.sh/node-vgpu-register"
 53 | 
 54 | 	// DeviceName used to indicate this device
 55 | 	VGPUDeviceName = "hamivgpu"
 56 | 
 57 | 	// DeviceConfigurationConfigMapKey specifies in what ConfigMap key the device configuration should be stored
 58 | 	DeviceConfigurationConfigMapKey = "device-config.yaml"
 59 | )
 60 | 
 61 | var (
 62 | 	ResourceName          string
 63 | 	ResourceMem           string
 64 | 	ResourceCores         string
 65 | 	ResourceMemPercentage string
 66 | 	ResourcePriority      string
 67 | 	DebugMode             bool
 68 | 
 69 | 	MLUResourceCount  string
 70 | 	MLUResourceMemory string
 71 | 
 72 | 	KnownDevice = map[string]string{
 73 | 		NodeHandshake: NodeNvidiaDeviceRegistered,
 74 | 	}
 75 | )
 76 | 
 77 | type ContainerDevice struct {
 78 | 	UUID      string
 79 | 	Type      string
 80 | 	Usedmem   int32
 81 | 	Usedcores int32
 82 | }
 83 | 
 84 | type ContainerDeviceRequest struct {
 85 | 	Nums             int32
 86 | 	Type             string
 87 | 	Memreq           int32
 88 | 	MemPercentagereq int32
 89 | 	Coresreq         int32
 90 | }
 91 | 
 92 | type ContainerDevices []ContainerDevice
 93 | 
 94 | type PodDevices []ContainerDevices
 95 | 
 96 | type DeviceInfo struct {
 97 | 	Id                   string            `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
 98 | 	Count                int32             `protobuf:"varint,2,opt,name=count,proto3" json:"count,omitempty"`
 99 | 	Devmem               int32             `protobuf:"varint,3,opt,name=devmem,proto3" json:"devmem,omitempty"`
100 | 	Type                 string            `protobuf:"bytes,4,opt,name=type,proto3" json:"type,omitempty"`
101 | 	Health               bool              `protobuf:"varint,5,opt,name=health,proto3" json:"health,omitempty"`
102 | 	Mode                 string            `json:"mode,omitempty"`
103 | 	MIGTemplate          []config.Geometry `json:"migtemplate,omitempty"`
104 | 	XXX_NoUnkeyedLiteral struct{}          `json:"-"`
105 | 	XXX_unrecognized     []byte            `json:"-"`
106 | 	XXX_sizecache        int32             `json:"-"`
107 | }
108 | 


--------------------------------------------------------------------------------
/pkg/plugin/vgpu/util/util.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2023 The Volcano Authors.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package util
 18 | 
 19 | import (
 20 | 	"bytes"
 21 | 	"context"
 22 | 	"encoding/json"
 23 | 	"errors"
 24 | 	"flag"
 25 | 	"fmt"
 26 | 	"math"
 27 | 	"os"
 28 | 	"os/exec"
 29 | 	"strconv"
 30 | 	"strings"
 31 | 
 32 | 	"github.com/NVIDIA/go-nvml/pkg/nvml"
 33 | 	"gopkg.in/yaml.v2"
 34 | 	v1 "k8s.io/api/core/v1"
 35 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 36 | 	k8stypes "k8s.io/apimachinery/pkg/types"
 37 | 	"k8s.io/klog/v2"
 38 | 	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
 39 | 	"volcano.sh/k8s-device-plugin/pkg/lock"
 40 | 	"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
 41 | )
 42 | 
 43 | var DevicesToHandle []string
 44 | 
 45 | func init() {
 46 | 	client, _ := lock.NewClient()
 47 | 	lock.UseClient(client)
 48 | 	DevicesToHandle = []string{}
 49 | 	DevicesToHandle = append(DevicesToHandle, NvidiaGPUCommonWord)
 50 | }
 51 | 
 52 | func GlobalFlagSet() *flag.FlagSet {
 53 | 	fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
 54 | 	fs.StringVar(&ResourceName, "resource-name", "volcano.sh/vgpu-number", "resource name")
 55 | 	fs.StringVar(&ResourceMem, "resource-memory-name", "volcano.sh/vgpu-memory", "resource name for resource memory resources")
 56 | 	fs.StringVar(&ResourceCores, "resource-core-name", "volcano.sh/vgpu-cores", "resource name for resource core resources")
 57 | 	fs.BoolVar(&DebugMode, "debug", false, "debug mode")
 58 | 	klog.InitFlags(fs)
 59 | 	return fs
 60 | }
 61 | 
 62 | func GetNode(nodename string) (*v1.Node, error) {
 63 | 	n, err := lock.GetClient().CoreV1().Nodes().Get(context.Background(), nodename, metav1.GetOptions{})
 64 | 	return n, err
 65 | }
 66 | 
 67 | func GetPendingPod(node string) (*v1.Pod, error) {
 68 | 	podList, err := lock.GetClient().CoreV1().Pods("").List(context.Background(), metav1.ListOptions{})
 69 | 	if err != nil {
 70 | 		return nil, err
 71 | 	}
 72 | 
 73 | 	oldestPod := getOldestPod(podList.Items, node)
 74 | 	if oldestPod == nil {
 75 | 		return nil, fmt.Errorf("cannot get valid pod")
 76 | 	}
 77 | 
 78 | 	return oldestPod, nil
 79 | }
 80 | 
 81 | func getOldestPod(pods []v1.Pod, nodename string) *v1.Pod {
 82 | 	if len(pods) == 0 {
 83 | 		return nil
 84 | 	}
 85 | 	oldest := pods[0]
 86 | 	for _, pod := range pods {
 87 | 		if pod.Annotations[AssignedNodeAnnotations] == nodename {
 88 | 			klog.V(4).Infof("pod %s, predicate time: %s", pod.Name, pod.Annotations[AssignedTimeAnnotations])
 89 | 			if getPredicateTimeFromPodAnnotation(&oldest) > getPredicateTimeFromPodAnnotation(&pod) {
 90 | 				oldest = pod
 91 | 			}
 92 | 		}
 93 | 	}
 94 | 	klog.V(4).Infof("oldest pod %#v, predicate time: %#v", oldest.Name,
 95 | 		oldest.Annotations[AssignedTimeAnnotations])
 96 | 	annotation := map[string]string{AssignedTimeAnnotations: strconv.FormatUint(math.MaxUint64, 10)}
 97 | 	if err := PatchPodAnnotations(&oldest, annotation); err != nil {
 98 | 		klog.Errorf("update pod %s failed, err: %v", oldest.Name, err)
 99 | 		return nil
100 | 	}
101 | 	return &oldest
102 | }
103 | 
104 | func getPredicateTimeFromPodAnnotation(pod *v1.Pod) uint64 {
105 | 	assumeTimeStr, ok := pod.Annotations[AssignedTimeAnnotations]
106 | 	if !ok {
107 | 		klog.Warningf("volcano not write timestamp, pod Name: %s", pod.Name)
108 | 		return math.MaxUint64
109 | 	}
110 | 	if len(assumeTimeStr) > PodAnnotationMaxLength {
111 | 		klog.Warningf("timestamp fmt invalid, pod Name: %s", pod.Name)
112 | 		return math.MaxUint64
113 | 	}
114 | 	predicateTime, err := strconv.ParseUint(assumeTimeStr, 10, 64)
115 | 	if err != nil {
116 | 		klog.Errorf("parse timestamp failed, %v", err)
117 | 		return math.MaxUint64
118 | 	}
119 | 	return predicateTime
120 | }
121 | 
122 | func DecodeNodeDevices(str string) []*DeviceInfo {
123 | 	if !strings.Contains(str, ":") {
124 | 		return []*DeviceInfo{}
125 | 	}
126 | 	tmp := strings.Split(str, ":")
127 | 	var retval []*DeviceInfo
128 | 	for _, val := range tmp {
129 | 		if strings.Contains(val, ",") {
130 | 			items := strings.Split(val, ",")
131 | 			count, _ := strconv.Atoi(items[1])
132 | 			devmem, _ := strconv.Atoi(items[2])
133 | 			health, _ := strconv.ParseBool(items[4])
134 | 			i := DeviceInfo{
135 | 				Id:     items[0],
136 | 				Count:  int32(count),
137 | 				Devmem: int32(devmem),
138 | 				Type:   items[3],
139 | 				Health: health,
140 | 			}
141 | 			retval = append(retval, &i)
142 | 		}
143 | 	}
144 | 	return retval
145 | }
146 | 
147 | func EncodeNodeDevices(dlist []*DeviceInfo) string {
148 | 	tmp := ""
149 | 	for _, val := range dlist {
150 | 		tmp += val.Id + "," + strconv.FormatInt(int64(val.Count), 10) + "," + strconv.Itoa(int(val.Devmem)) + "," + val.Type + "," + strconv.FormatBool(val.Health) + "," + val.Mode + ":"
151 | 	}
152 | 	klog.V(3).Infoln("Encoded node Devices", tmp)
153 | 	return tmp
154 | }
155 | 
156 | func EncodeContainerDevices(cd ContainerDevices) string {
157 | 	tmp := ""
158 | 	for _, val := range cd {
159 | 		tmp += val.UUID + "," + val.Type + "," + strconv.Itoa(int(val.Usedmem)) + "," + strconv.Itoa(int(val.Usedcores)) + ":"
160 | 	}
161 | 	fmt.Println("Encoded container Devices=", tmp)
162 | 	return tmp
163 | 	//return strings.Join(cd, ",")
164 | }
165 | 
166 | func EncodePodDevices(pd PodDevices) string {
167 | 	var ss []string
168 | 	for _, cd := range pd {
169 | 		ss = append(ss, EncodeContainerDevices(cd))
170 | 	}
171 | 	return strings.Join(ss, ";")
172 | }
173 | 
174 | func DecodeContainerDevices(str string) ContainerDevices {
175 | 	if len(str) == 0 {
176 | 		return ContainerDevices{}
177 | 	}
178 | 	cd := strings.Split(str, ":")
179 | 	contdev := ContainerDevices{}
180 | 	tmpdev := ContainerDevice{}
181 | 	if len(str) == 0 {
182 | 		return contdev
183 | 	}
184 | 	for _, val := range cd {
185 | 		if strings.Contains(val, ",") {
186 | 			tmpstr := strings.Split(val, ",")
187 | 			tmpdev.UUID = tmpstr[0]
188 | 			tmpdev.Type = tmpstr[1]
189 | 			devmem, _ := strconv.ParseInt(tmpstr[2], 10, 32)
190 | 			tmpdev.Usedmem = int32(devmem)
191 | 			devcores, _ := strconv.ParseInt(tmpstr[3], 10, 32)
192 | 			tmpdev.Usedcores = int32(devcores)
193 | 			contdev = append(contdev, tmpdev)
194 | 		}
195 | 	}
196 | 	return contdev
197 | }
198 | 
199 | func DecodePodDevices(str string) PodDevices {
200 | 	if len(str) == 0 {
201 | 		return PodDevices{}
202 | 	}
203 | 	var pd PodDevices
204 | 	for _, s := range strings.Split(str, ";") {
205 | 		cd := DecodeContainerDevices(s)
206 | 		pd = append(pd, cd)
207 | 	}
208 | 	return pd
209 | }
210 | 
211 | func GetNextDeviceRequest(dtype string, p v1.Pod) (v1.Container, ContainerDevices, error) {
212 | 	pdevices := DecodePodDevices(p.Annotations[AssignedIDsToAllocateAnnotations])
213 | 	klog.Infoln("pdevices=", pdevices)
214 | 	res := ContainerDevices{}
215 | 	for idx, val := range pdevices {
216 | 		found := false
217 | 		for _, dev := range val {
218 | 			if strings.Compare(dtype, dev.Type) == 0 {
219 | 				res = append(res, dev)
220 | 				found = true
221 | 			}
222 | 		}
223 | 		if found {
224 | 			return p.Spec.Containers[idx], res, nil
225 | 		}
226 | 	}
227 | 	return v1.Container{}, res, errors.New("device request not found")
228 | }
229 | 
230 | func EraseNextDeviceTypeFromAnnotation(dtype string, p v1.Pod) error {
231 | 	pdevices := DecodePodDevices(p.Annotations[AssignedIDsToAllocateAnnotations])
232 | 	res := PodDevices{}
233 | 	found := false
234 | 	for _, val := range pdevices {
235 | 		if found {
236 | 			res = append(res, val)
237 | 			continue
238 | 		} else {
239 | 			tmp := ContainerDevices{}
240 | 			for _, dev := range val {
241 | 				if strings.Compare(dtype, dev.Type) == 0 {
242 | 					found = true
243 | 				} else {
244 | 					tmp = append(tmp, dev)
245 | 				}
246 | 			}
247 | 			if !found {
248 | 				res = append(res, val)
249 | 			} else {
250 | 				res = append(res, tmp)
251 | 			}
252 | 		}
253 | 	}
254 | 	klog.Infoln("After erase res=", res)
255 | 	newannos := make(map[string]string)
256 | 	newannos[AssignedIDsToAllocateAnnotations] = EncodePodDevices(res)
257 | 	return PatchPodAnnotations(&p, newannos)
258 | }
259 | 
260 | func PodAllocationTrySuccess(nodeName string, pod *v1.Pod) {
261 | 	refreshed, _ := lock.GetClient().CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{})
262 | 	annos := refreshed.Annotations[AssignedIDsToAllocateAnnotations]
263 | 	klog.Infoln("TrySuccess:", annos)
264 | 	for _, val := range DevicesToHandle {
265 | 		if strings.Contains(annos, val) {
266 | 			return
267 | 		}
268 | 	}
269 | 	klog.Infoln("AllDevicesAllocateSuccess releasing lock")
270 | 	PodAllocationSuccess(nodeName, pod)
271 | }
272 | 
273 | func PodAllocationSuccess(nodeName string, pod *v1.Pod) {
274 | 	newannos := make(map[string]string)
275 | 	newannos[DeviceBindPhase] = DeviceBindSuccess
276 | 	err := PatchPodAnnotations(pod, newannos)
277 | 	if err != nil {
278 | 		klog.Errorf("patchPodAnnotations failed:%v", err.Error())
279 | 	}
280 | 	err = lock.ReleaseNodeLock(nodeName, VGPUDeviceName)
281 | 	if err != nil {
282 | 		klog.Errorf("release lock failed:%v", err.Error())
283 | 	}
284 | }
285 | 
286 | func PodAllocationFailed(nodeName string, pod *v1.Pod) {
287 | 	newannos := make(map[string]string)
288 | 	newannos[DeviceBindPhase] = DeviceBindFailed
289 | 	err := PatchPodAnnotations(pod, newannos)
290 | 	if err != nil {
291 | 		klog.Errorf("patchPodAnnotations failed:%v", err.Error())
292 | 	}
293 | 	err = lock.ReleaseNodeLock(nodeName, VGPUDeviceName)
294 | 	if err != nil {
295 | 		klog.Errorf("release lock failed:%v", err.Error())
296 | 	}
297 | }
298 | 
299 | func PatchNodeAnnotations(node *v1.Node, annotations map[string]string) error {
300 | 	type patchMetadata struct {
301 | 		Annotations map[string]string `json:"annotations,omitempty"`
302 | 	}
303 | 	type patchPod struct {
304 | 		Metadata patchMetadata `json:"metadata"`
305 | 		//Spec     patchSpec     `json:"spec,omitempty"`
306 | 	}
307 | 
308 | 	p := patchPod{}
309 | 	p.Metadata.Annotations = annotations
310 | 
311 | 	bytes, err := json.Marshal(p)
312 | 	if err != nil {
313 | 		return err
314 | 	}
315 | 	_, err = lock.GetClient().CoreV1().Nodes().
316 | 		Patch(context.Background(), node.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{})
317 | 	if err != nil {
318 | 		klog.Infof("patch pod %v failed, %v", node.Name, err)
319 | 	}
320 | 	return err
321 | }
322 | 
323 | func PatchPodAnnotations(pod *v1.Pod, annotations map[string]string) error {
324 | 	type patchMetadata struct {
325 | 		Annotations map[string]string `json:"annotations,omitempty"`
326 | 	}
327 | 	type patchPod struct {
328 | 		Metadata patchMetadata `json:"metadata"`
329 | 		//Spec     patchSpec     `json:"spec,omitempty"`
330 | 	}
331 | 
332 | 	p := patchPod{}
333 | 	p.Metadata.Annotations = annotations
334 | 
335 | 	bytes, err := json.Marshal(p)
336 | 	if err != nil {
337 | 		return err
338 | 	}
339 | 	_, err = lock.GetClient().CoreV1().Pods(pod.Namespace).
340 | 		Patch(context.Background(), pod.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{})
341 | 	if err != nil {
342 | 		klog.Infof("patch pod %v failed, %v", pod.Name, err)
343 | 	}
344 | 	return err
345 | }
346 | 
347 | func LoadConfigFromCM(cmName string) (*config.Config, error) {
348 | 	lock.NewClient()
349 | 	cm, err := lock.GetClient().CoreV1().ConfigMaps("kube-system").Get(context.Background(), cmName, metav1.GetOptions{})
350 | 	if err != nil {
351 | 		cm, err = lock.GetClient().CoreV1().ConfigMaps("volcano-system").Get(context.Background(), cmName, metav1.GetOptions{})
352 | 		if err != nil {
353 | 			return nil, err
354 | 		}
355 | 	}
356 | 	data, ok := cm.Data[DeviceConfigurationConfigMapKey]
357 | 	if !ok {
358 | 		return nil, fmt.Errorf("%v not found in ConfigMap %v", DeviceConfigurationConfigMapKey, cmName)
359 | 	}
360 | 	var yamlData config.Config
361 | 	err = yaml.Unmarshal([]byte(data), &yamlData)
362 | 	if err != nil {
363 | 		return nil, err
364 | 	}
365 | 	return &yamlData, nil
366 | }
367 | 
368 | func LoadConfig(path string) (*config.Config, error) {
369 | 	data, err := os.ReadFile(path)
370 | 	if err != nil {
371 | 		return nil, err
372 | 	}
373 | 	var yamlData config.Config
374 | 	err = yaml.Unmarshal(data, &yamlData)
375 | 	if err != nil {
376 | 		return nil, err
377 | 	}
378 | 	return &yamlData, nil
379 | }
380 | 
381 | func GenerateVirtualDeviceID(id uint, fakeCounter uint) string {
382 | 	return fmt.Sprintf("%d-%d", id, fakeCounter)
383 | }
384 | 
385 | // GetDevices returns virtual devices and all physical devices by index.
386 | func GetDevices(gpuMemoryFactor uint) ([]*pluginapi.Device, map[uint]string) {
387 | 	n, ret := config.Nvml().DeviceGetCount()
388 | 	if ret != nvml.SUCCESS {
389 | 		klog.Fatalf("call nvml.DeviceGetCount with error: %v", ret)
390 | 	}
391 | 
392 | 	var virtualDevs []*pluginapi.Device
393 | 	deviceByIndex := map[uint]string{}
394 | 	for i := uint(0); i < uint(n); i++ {
395 | 		d, ret := config.Nvml().DeviceGetHandleByIndex(int(i))
396 | 		if ret != nvml.SUCCESS {
397 | 			klog.Fatalf("call nvml.DeviceGetHandleByIndex with error: %v", ret)
398 | 		}
399 | 		uuid, ret := d.GetUUID()
400 | 		if ret != nvml.SUCCESS {
401 | 			klog.Fatalf("call GetUUID with error: %v", ret)
402 | 		}
403 | 		id := i
404 | 		deviceByIndex[id] = uuid
405 | 		memory, ret := d.GetMemoryInfo()
406 | 		if ret != nvml.SUCCESS {
407 | 			klog.Fatalf("call GetMemoryInfo with error: %v", ret)
408 | 		}
409 | 		deviceGPUMemory := uint(memory.Total / (1024 * 1024))
410 | 		for j := uint(0); j < deviceGPUMemory/gpuMemoryFactor; j++ {
411 | 			klog.V(4).Infof("adding virtual device: %d", j)
412 | 			fakeID := GenerateVirtualDeviceID(id, j)
413 | 			virtualDevs = append(virtualDevs, &pluginapi.Device{
414 | 				ID:     fakeID,
415 | 				Health: pluginapi.Healthy,
416 | 			})
417 | 		}
418 | 	}
419 | 
420 | 	return virtualDevs, deviceByIndex
421 | }
422 | 
423 | func GetDeviceNums() int {
424 | 	count, ret := config.Nvml().DeviceGetCount()
425 | 	if ret != nvml.SUCCESS {
426 | 		klog.Error(`nvml get count error ret=`, ret)
427 | 	}
428 | 	return count
429 | }
430 | 
431 | func GetIndexAndTypeFromUUID(uuid string) (string, int) {
432 | 	originuuid := strings.Split(uuid, "[")[0]
433 | 	ndev, ret := config.Nvml().DeviceGetHandleByUUID(originuuid)
434 | 	if ret != nvml.SUCCESS {
435 | 		klog.Error("nvml get handlebyuuid error ret=", ret)
436 | 		panic(0)
437 | 	}
438 | 	model, ret := ndev.GetName()
439 | 	if ret != nvml.SUCCESS {
440 | 		klog.Error("nvml get name error ret=", ret)
441 | 		panic(0)
442 | 	}
443 | 	index, ret := ndev.GetIndex()
444 | 	if ret != nvml.SUCCESS {
445 | 		klog.Error("nvml get index error ret=", ret)
446 | 		panic(0)
447 | 	}
448 | 	return model, index
449 | }
450 | 
451 | func GetMigUUIDFromIndex(uuid string, idx int) string {
452 | 	originuuid := strings.Split(uuid, "[")[0]
453 | 	ndev, ret := config.Nvml().DeviceGetHandleByUUID(originuuid)
454 | 	if ret != nvml.SUCCESS {
455 | 		klog.Error(`nvml get device uuid error ret=`, ret)
456 | 		panic(0)
457 | 	}
458 | 	migdev, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(ndev, idx)
459 | 	if ret != nvml.SUCCESS {
460 | 		klog.Error("nvml get mig dev error ret=", ret, ",idx=", idx, "using nvidia-smi -L for query")
461 | 		cmd := exec.Command("nvidia-smi", "-L")
462 | 		var stdout, stderr bytes.Buffer
463 | 		cmd.Stdout = &stdout
464 | 		cmd.Stderr = &stderr
465 | 		err := cmd.Run()
466 | 		if err != nil {
467 | 			klog.Fatalf("nvidia-smi -L failed with %s\n", err)
468 | 		}
469 | 		outStr := stdout.String()
470 | 		uuid := GetMigUUIDFromSmiOutput(outStr, originuuid, idx)
471 | 		return uuid
472 | 	}
473 | 	res, ret := migdev.GetUUID()
474 | 	if ret != nvml.SUCCESS {
475 | 		klog.Error(`nvml get mig uuid error ret=`, ret)
476 | 		panic(0)
477 | 	}
478 | 	return res
479 | }
480 | 
481 | func GetMigUUIDFromSmiOutput(output string, uuid string, idx int) string {
482 | 	migmode := false
483 | 	for _, val := range strings.Split(output, "\n") {
484 | 		if !strings.Contains(val, "MIG") && strings.Contains(val, uuid) {
485 | 			migmode = true
486 | 			continue
487 | 		}
488 | 		if !strings.Contains(val, "MIG") && !strings.Contains(val, uuid) {
489 | 			migmode = false
490 | 			continue
491 | 		}
492 | 		if !migmode {
493 | 			continue
494 | 		}
495 | 		klog.Infoln("inspecting", val)
496 | 		num := strings.Split(val, "Device")[1]
497 | 		num = strings.Split(num, ":")[0]
498 | 		num = strings.TrimSpace(num)
499 | 		index, err := strconv.Atoi(num)
500 | 		if err != nil {
501 | 			klog.Fatal("atoi failed num=", num)
502 | 		}
503 | 		if index == idx {
504 | 			outputStr := strings.Split(val, ":")[2]
505 | 			outputStr = strings.TrimSpace(outputStr)
506 | 			outputStr = strings.TrimRight(outputStr, ")")
507 | 			return outputStr
508 | 		}
509 | 	}
510 | 	return ""
511 | }
512 | 
513 | // Enhanced ExtractMigTemplatesFromUUID with error handling.
514 | func ExtractMigTemplatesFromUUID(uuid string) (string, int, error) {
515 | 	parts := strings.Split(uuid, "[")
516 | 	if len(parts) < 2 {
517 | 		return "", -1, fmt.Errorf("invalid UUID format: missing '[' delimiter")
518 | 	}
519 | 
520 | 	tmp := parts[1]
521 | 	parts = strings.Split(tmp, "]")
522 | 	if len(parts) < 2 {
523 | 		return "", -1, fmt.Errorf("invalid UUID format: missing ']' delimiter")
524 | 	}
525 | 
526 | 	tmp = parts[0]
527 | 	parts = strings.Split(tmp, "-")
528 | 	if len(parts) < 2 {
529 | 		return "", -1, fmt.Errorf("invalid UUID format: missing '-' delimiter")
530 | 	}
531 | 
532 | 	templateGroupName := strings.TrimSpace(parts[0])
533 | 	if len(templateGroupName) == 0 {
534 | 		return "", -1, fmt.Errorf("invalid UUID format: missing template group name")
535 | 	}
536 | 
537 | 	pos, err := strconv.Atoi(parts[1])
538 | 	if err != nil {
539 | 		return "", -1, fmt.Errorf("invalid position: %v", err)
540 | 	}
541 | 
542 | 	return templateGroupName, pos, nil
543 | }
544 | 
545 | func LoadNvidiaConfig() *config.NvidiaConfig {
546 | 	configs, err := LoadConfigFromCM("volcano-vgpu-device-config")
547 | 	if err != nil {
548 | 		klog.InfoS("configMap not found", err.Error())
549 | 	}
550 | 	nvidiaConfig := config.NvidiaConfig{}
551 | 	if configs != nil {
552 | 		nvidiaConfig = configs.NvidiaConfig
553 | 	}
554 | 	nvidiaConfig.DeviceSplitCount = config.DeviceSplitCount
555 | 	nvidiaConfig.DeviceCoreScaling = config.DeviceCoresScaling
556 | 	nvidiaConfig.GPUMemoryFactor = config.GPUMemoryFactor
557 | 	if err := readFromConfigFile(&nvidiaConfig); err != nil {
558 | 		klog.InfoS("readFrom device cm error", err.Error())
559 | 	}
560 | 	klog.Infoln("Loaded config=", nvidiaConfig)
561 | 	return &nvidiaConfig
562 | }
563 | 
564 | func readFromConfigFile(sConfig *config.NvidiaConfig) error {
565 | 	config.Mode = "hami-core"
566 | 	jsonbyte, err := os.ReadFile("/config/config.json")
567 | 	if err != nil {
568 | 		return err
569 | 	}
570 | 	var deviceConfigs config.DevicePluginConfigs
571 | 	err = json.Unmarshal(jsonbyte, &deviceConfigs)
572 | 	if err != nil {
573 | 		return err
574 | 	}
575 | 	klog.Infof("Device Plugin Configs: %v", fmt.Sprintf("%v", deviceConfigs))
576 | 	for _, val := range deviceConfigs.Nodeconfig {
577 | 		if os.Getenv("NODE_NAME") == val.Name {
578 | 			klog.Infof("Reading config from file %s", val.Name)
579 | 			if val.Devicememoryscaling > 0 {
580 | 				sConfig.DeviceMemoryScaling = val.Devicememoryscaling
581 | 			}
582 | 			if val.Devicecorescaling > 0 {
583 | 				sConfig.DeviceCoreScaling = val.Devicecorescaling
584 | 			}
585 | 			if val.Devicesplitcount > 0 {
586 | 				sConfig.DeviceSplitCount = val.Devicesplitcount
587 | 			}
588 | 			if val.FilterDevice != nil && (len(val.FilterDevice.UUID) > 0 || len(val.FilterDevice.Index) > 0) {
589 | 				config.DevicePluginFilterDevice = val.FilterDevice
590 | 			}
591 | 			if len(val.OperatingMode) > 0 {
592 | 				config.Mode = val.OperatingMode
593 | 			}
594 | 			klog.Infof("FilterDevice: %v", val.FilterDevice)
595 | 		}
596 | 	}
597 | 	return nil
598 | }
599 | 


--------------------------------------------------------------------------------
/volcano-vgpu-device-plugin.yml:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ---
 15 | apiVersion: v1
 16 | kind: ConfigMap
 17 | metadata:
 18 |   name: volcano-vgpu-device-config
 19 |   namespace: kube-system
 20 |   labels:
 21 |     app.kubernetes.io/component: volcano-vgpu-device-plugin
 22 | data:
 23 |   device-config.yaml: |-
 24 |     nvidia:
 25 |       resourceCountName: volcano.sh/vgpu-number
 26 |       resourceMemoryName: volcano.sh/vgpu-memory
 27 |       resourceMemoryPercentageName: volcano.sh/vgpu-memory-percentage
 28 |       resourceCoreName: volcano.sh/vgpu-cores
 29 |       overwriteEnv: false
 30 |       defaultMemory: 0
 31 |       defaultCores: 0
 32 |       defaultGPUNum: 1
 33 |       deviceSplitCount: 10
 34 |       deviceMemoryScaling: 1
 35 |       deviceCoreScaling: 1
 36 |       gpuMemoryFactor: 1
 37 |       knownMigGeometries:
 38 |       - models: [ "A30" ]
 39 |         allowedGeometries:
 40 |           - group: group1
 41 |             geometries: 
 42 |             - name: 1g.6gb
 43 |               memory: 6144
 44 |               count: 4
 45 |           - group: group2
 46 |             geometries: 
 47 |             - name: 2g.12gb
 48 |               memory: 12288
 49 |               count: 2
 50 |           - group: group3
 51 |             geometries: 
 52 |             - name: 4g.24gb
 53 |               memory: 24576
 54 |               count: 1
 55 |       - models: [ "A100-SXM4-40GB", "A100-40GB-PCIe", "A100-PCIE-40GB", "A100-SXM4-40GB" ]
 56 |         allowedGeometries:
 57 |           - group: "group1" 
 58 |             geometries: 
 59 |             - name: 1g.5gb
 60 |               memory: 5120
 61 |               count: 7
 62 |           - group: "group2"
 63 |             geometries: 
 64 |             - name: 2g.10gb
 65 |               memory: 10240
 66 |               count: 3
 67 |             - name: 1g.5gb
 68 |               memory: 5120
 69 |               count: 1
 70 |           - group: "group3"
 71 |             geometries: 
 72 |             - name: 3g.20gb
 73 |               memory: 20480
 74 |               count: 2
 75 |           - group: "group4"
 76 |             geometries: 
 77 |             - name: 7g.40gb
 78 |               memory: 40960
 79 |               count: 1
 80 |       - models: [ "A100-SXM4-80GB", "A100-80GB-PCIe", "A100-PCIE-80GB"]
 81 |         allowedGeometries:
 82 |           - group: "group1" 
 83 |             geometries: 
 84 |             - name: 1g.10gb
 85 |               memory: 10240
 86 |               count: 7
 87 |           - group: "group2"
 88 |             geometries: 
 89 |             - name: 2g.20gb
 90 |               memory: 20480
 91 |               count: 3
 92 |             - name: 1g.10gb
 93 |               memory: 10240
 94 |               count: 1
 95 |           - group: "group3"
 96 |             geometries: 
 97 |             - name: 3g.40gb
 98 |               memory: 40960
 99 |               count: 2
100 |           - group: "group4"
101 |             geometries: 
102 |             - name: 7g.79gb
103 |               memory: 80896
104 |               count: 1
105 | ---
106 | apiVersion: v1
107 | kind: ConfigMap
108 | metadata:
109 |   name: volcano-vgpu-node-config
110 |   namespace: kube-system
111 |   labels:
112 |     app.kubernetes.io/component: volcano-vgpu-node-plugin
113 | data:
114 |   config.json: |
115 |     {
116 |         "nodeconfig": [
117 |             {
118 |                 "name": "aio-node67",
119 |                 "operatingmode": "hami-core",
120 |                 "devicememoryscaling": 1.8,
121 |                 "devicesplitcount": 10,
122 |                 "migstrategy":"none",
123 |                 "filterdevices": {
124 |                   "uuid": [],
125 |                   "index": []
126 |                 }
127 |             }
128 |         ]
129 |     }
130 | ---
131 | apiVersion: v1
132 | kind: ServiceAccount
133 | metadata:
134 |   name: volcano-device-plugin
135 |   namespace: kube-system
136 | ---
137 | kind: ClusterRole
138 | apiVersion: rbac.authorization.k8s.io/v1
139 | metadata:
140 |   name: volcano-device-plugin
141 | rules:
142 | - apiGroups: [""]
143 |   resources: ["nodes"]
144 |   verbs: ["get", "list", "watch", "update", "patch"]
145 | - apiGroups: [""]
146 |   resources: ["nodes/status"]
147 |   verbs: ["patch"]
148 | - apiGroups: [""]
149 |   resources: ["pods"]
150 |   verbs: ["get", "list", "update", "patch", "watch"]
151 | - apiGroups: [""]
152 |   resources: ["configmaps"]
153 |   verbs: ["get", "list", "watch", "create", "update"]
154 | ---
155 | kind: ClusterRoleBinding
156 | apiVersion: rbac.authorization.k8s.io/v1
157 | metadata:
158 |   name: volcano-device-plugin
159 | subjects:
160 | - kind: ServiceAccount
161 |   name: volcano-device-plugin
162 |   namespace: kube-system
163 | roleRef:
164 |   kind: ClusterRole
165 |   name: volcano-device-plugin
166 |   apiGroup: rbac.authorization.k8s.io
167 | ---
168 | apiVersion: apps/v1
169 | kind: DaemonSet
170 | metadata:
171 |   name: volcano-device-plugin
172 |   namespace: kube-system
173 | spec:
174 |   selector:
175 |     matchLabels:
176 |       name: volcano-device-plugin
177 |   updateStrategy:
178 |     type: RollingUpdate
179 |   template:
180 |     metadata:
181 |       # This annotation is deprecated. Kept here for backward compatibility
182 |       # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
183 |       annotations:
184 |         scheduler.alpha.kubernetes.io/critical-pod: ""
185 |       labels:
186 |         name: volcano-device-plugin
187 |     spec:
188 |       tolerations:
189 |       # This toleration is deprecated. Kept here for backward compatibility
190 |       # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
191 |       - key: CriticalAddonsOnly
192 |         operator: Exists
193 |       - key: volcano.sh/gpu-memory
194 |         operator: Exists
195 |         effect: NoSchedule
196 |       # Mark this pod as a critical add-on; when enabled, the critical add-on
197 |       # scheduler reserves resources for critical add-on pods so that they can
198 |       # be rescheduled after a failure.
199 |       # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
200 |       priorityClassName: "system-node-critical"
201 |       serviceAccount: volcano-device-plugin
202 |       containers:
203 |       - image: docker.io/projecthami/volcano-vgpu-device-plugin:v1.9.4
204 |         args: ["--device-split-count=10"]
205 |         lifecycle:
206 |           postStart:
207 |             exec:
208 |               command: ["/bin/sh", "-c", "cp -f /k8s-vgpu/lib/nvidia/* /usr/local/vgpu/"]
209 |         name: volcano-device-plugin
210 |         env:
211 |         - name: NODE_NAME
212 |           valueFrom:
213 |             fieldRef:
214 |               fieldPath: spec.nodeName
215 |         - name: HOOK_PATH
216 |           value: "/usr/local/vgpu"
217 |         - name: NVIDIA_VISIBLE_DEVICES
218 |           value: "all"
219 |         - name: NVIDIA_MIG_MONITOR_DEVICES
220 |           value: "all"
221 |         - name: NVIDIA_DRIVER_CAPABILITIES
222 |           value: "utility"
223 |         securityContext:
224 |           allowPrivilegeEscalation: true
225 |           previleged: true
226 |           capabilities:
227 |             drop: ["ALL"]
228 |             add: ["SYS_ADMIN"]
229 |         volumeMounts:
230 |         - name: deviceconfig
231 |           mountPath: /config
232 |         - name: device-plugin
233 |           mountPath: /var/lib/kubelet/device-plugins
234 |         - name: lib
235 |           mountPath: /usr/local/vgpu
236 |         - name: hosttmp
237 |           mountPath: /tmp
238 |       - image: docker.io/projecthami/volcano-vgpu-device-plugin:v1.9.4
239 |         name: monitor
240 |         command:
241 |         - /bin/bash
242 |         - -c
243 |         - volcano-vgpu-monitor
244 |         env:
245 |         - name: NVIDIA_VISIBLE_DEVICES
246 |           value: "all"
247 |         - name: NVIDIA_MIG_MONITOR_DEVICES
248 |           value: "all"
249 |         - name: HOOK_PATH
250 |           value: "/tmp/vgpu"
251 |         - name: NODE_NAME
252 |           valueFrom:
253 |             fieldRef:
254 |               fieldPath: spec.nodeName
255 |         securityContext:
256 |           privileged: true
257 |           allowPrivilegeEscalation: true
258 |           capabilities:
259 |             drop: ["ALL"]
260 |             add: ["SYS_ADMIN"]
261 |         volumeMounts:
262 |         - name: dockers
263 |           mountPath: /run/docker
264 |         - name: containerds
265 |           mountPath: /run/containerd
266 |         - name: sysinfo
267 |           mountPath: /sysinfo
268 |         - name: hostvar
269 |           mountPath: /hostvar
270 |         - name: hosttmp
271 |           mountPath: /tmp
272 |       volumes:
273 |       - name: deviceconfig
274 |         configMap:
275 |           name: volcano-vgpu-node-config
276 |       - hostPath:
277 |           path: /var/lib/kubelet/device-plugins
278 |           type: Directory
279 |         name: device-plugin
280 |       - hostPath:
281 |           path: /usr/local/vgpu
282 |           type: DirectoryOrCreate
283 |         name: lib
284 |       - name: hosttmp
285 |         hostPath:
286 |           path: /tmp
287 |           type: DirectoryOrCreate
288 |       - name: dockers
289 |         hostPath:
290 |           path: /run/docker
291 |           type: DirectoryOrCreate
292 |       - name: containerds
293 |         hostPath:
294 |           path: /run/containerd
295 |           type: DirectoryOrCreate
296 |       - name: usrbin
297 |         hostPath:
298 |           path: /usr/bin
299 |           type: Directory
300 |       - name: sysinfo
301 |         hostPath:
302 |           path: /sys
303 |           type: Directory
304 |       - name: hostvar
305 |         hostPath:
306 |           path: /var
307 |           type: Directory
308 | 


--------------------------------------------------------------------------------