├── .dockerignore
├── .github
└── workflows
│ ├── dev-image-build.yaml
│ └── release-image-build.yml
├── .gitignore
├── .gitmodules
├── LICENSE
├── Makefile
├── Makefile.def
├── OWNERS
├── README.md
├── cmd
├── vgpu-monitor
│ ├── feedback.go
│ ├── main.go
│ ├── metrics.go
│ └── validation.go
└── vgpu
│ ├── main.go
│ └── watchers.go
├── doc
├── config.md
├── design.md
├── example.png
├── hard_limit.jpg
├── vgpu-on-volcano.pdf
└── vgpu_device_plugin_metrics.png
├── docker
└── Dockerfile.ubuntu20.04
├── examples
├── gpu-share.yml
├── vgpu-case01.yml
├── vgpu-case02.yml
├── vgpu-case03.yml
└── vgpu-deployment.yaml
├── go.mod
├── go.sum
├── lib
└── nvidia
│ └── ld.so.preload
├── pkg
├── apis
│ ├── config.go
│ ├── flags.go
│ └── flags_test.go
├── filewatcher
│ └── filewatcher.go
├── gpu
│ └── doc.go
├── lock
│ └── nodelock.go
├── monitor
│ └── nvidia
│ │ ├── cudevshr.go
│ │ ├── v0
│ │ └── spec.go
│ │ └── v1
│ │ └── spec.go
└── plugin
│ ├── interface.go
│ └── vgpu
│ ├── cache.go
│ ├── config
│ ├── config.go
│ └── version.go
│ ├── helper.go
│ ├── mig-strategy.go
│ ├── mig.go
│ ├── nvidia.go
│ ├── plugin.go
│ ├── register.go
│ └── util
│ ├── types.go
│ └── util.go
└── volcano-vgpu-device-plugin.yml
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Ignore version control directories
2 | .git/
3 | .github/
4 |
5 | # Ignore build and docs directories
6 | _output/
7 | doc/
8 | examples/
9 | README.md
10 | OWNERS
11 | LICENSE
12 |
13 | # Ignore IDE and OS files
14 | *.DS_Store
15 | .idea/
16 | .vscode/
17 | *.iml
18 |
19 | # Ignore Docker-specific files
20 | docker/
21 |
--------------------------------------------------------------------------------
/.github/workflows/dev-image-build.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2024 NVIDIA CORPORATION
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Run this workflow on pull requests or merge to main/dev branches
16 | name: Build Dev Image
17 |
18 | on:
19 | push:
20 | branches:
21 | - main
22 | - dev-vgpu-1219
23 | pull_request_target:
24 | types:
25 | - opened
26 | - synchronize
27 | - reopened
28 |
29 | jobs:
30 | build:
31 | runs-on: ubuntu-latest
32 | steps:
33 | - uses: actions/checkout@v2
34 | - uses: actions/setup-go@v2
35 | with:
36 | go-version: "^1.19.x"
37 | - name: Checkout submodule
38 | uses: Mushus/checkout-submodule@v1.0.1
39 | with:
40 | submodulePath: libvgpu
41 | - run: go version
42 | - name: Get branch name
43 | uses: nelonoel/branch-name@v1.0.1
44 | - name: Docker Login
45 | uses: docker/login-action@v2.1.0
46 | with:
47 | username: ${{ secrets.DOCKERHUB_TOKEN }}
48 | password: ${{ secrets.DOCKERHUB_PASSWD }}
49 | - name: Set up Docker Buildx
50 | id: buildx
51 | uses: docker/setup-buildx-action@v1
52 | - name: Generating image tag
53 | id: runtime-tag
54 | run: |
55 | echo tag="$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
56 | - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}-${{ steps.runtime-tag.outputs.tag }}" make push-short
57 | - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}-${{ steps.runtime-tag.outputs.tag }}" make push-latest
58 |
--------------------------------------------------------------------------------
/.github/workflows/release-image-build.yml:
--------------------------------------------------------------------------------
1 | # Copyright 2024 NVIDIA CORPORATION
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Run this workflow on new tags
16 | name: Build Release Image
17 |
18 | on:
19 | push:
20 | tags:
21 | - v[0-9]+.[0-9]+.[0-9]+
22 |
23 | jobs:
24 | build:
25 | runs-on: ubuntu-latest
26 | steps:
27 | - uses: actions/checkout@v2
28 | - uses: actions/setup-go@v2
29 | with:
30 | go-version: "^1.19.x"
31 | - name: Checkout submodule
32 | uses: Mushus/checkout-submodule@v1.0.1
33 | with:
34 | submodulePath: libvgpu
35 | - run: go version
36 | - name: Get branch name
37 | uses: nelonoel/branch-name@v1.0.1
38 | - name: Docker Login
39 | uses: docker/login-action@v2.1.0
40 | with:
41 | username: ${{ secrets.DOCKERHUB_TOKEN }}
42 | password: ${{ secrets.DOCKERHUB_PASSWD }}
43 | - name: Set up Docker Buildx
44 | id: buildx
45 | uses: docker/setup-buildx-action@v1
46 | - name: Generating image tag
47 | id: runtime-tag
48 | run: |
49 | echo tag="$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
50 | - run: BUILD_PLATFORMS="linux/amd64,linux/arm64" VERSION="${BRANCH_NAME}" make push-short
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # OSX leaves these everywhere on SMB shares
2 | ._*
3 |
4 | # OSX trash
5 | .DS_Store
6 |
7 | # Eclipse files
8 | .classpath
9 | .project
10 | .settings/**
11 |
12 | # Files generated by JetBrains IDEs, e.g. IntelliJ IDEA
13 | .idea/
14 | *.iml
15 |
16 | # Vscode files
17 | .vscode
18 |
19 | # This is where the result of the go build goes
20 | /output*/
21 | /_output*/
22 | /_output
23 |
24 | # Emacs save files
25 | *~
26 | \#*\#
27 | .\#*
28 |
29 | # Vim-related files
30 | [._]*.s[a-w][a-z]
31 | [._]s[a-w][a-z]
32 | *.un~
33 | Session.vim
34 | .netrwhist
35 |
36 | # cscope-related files
37 | cscope.*
38 |
39 | # Go test binaries
40 | *.test
41 | /hack/.test-cmd-auth
42 |
43 | # JUnit test output from ginkgo e2e tests
44 | /junit*.xml
45 |
46 | # Mercurial files
47 | **/.hg
48 | **/.hg*
49 |
50 | # Vagrant
51 | .vagrant
52 | network_closure.sh
53 |
54 | # Local cluster env variables
55 | /cluster/env.sh
56 |
57 | # Compiled binaries in third_party
58 | /third_party/pkg
59 |
60 | # Also ignore etcd installed by hack/install-etcd.sh
61 | /third_party/etcd*
62 | /default.etcd
63 |
64 | # User cluster configs
65 | .kubeconfig
66 |
67 | .tags*
68 |
69 | # Version file for dockerized build
70 | .dockerized-kube-version-defs
71 |
72 | # Web UI
73 | /www/master/node_modules/
74 | /www/master/npm-debug.log
75 | /www/master/shared/config/development.json
76 |
77 | # Karma output
78 | /www/test_out
79 |
80 | # precommit temporary directories created by ./hack/verify-generated-docs.sh and ./hack/lib/util.sh
81 | /_tmp/
82 | /doc_tmp/
83 |
84 | # Test artifacts produced by Jenkins jobs
85 | /_artifacts/
86 |
87 | # Go dependencies installed on Jenkins
88 | /_gopath/
89 |
90 | # Config directories created by gcloud and gsutil on Jenkins
91 | /.config/gcloud*/
92 | /.gsutil/
93 |
94 | # CoreOS stuff
95 | /cluster/libvirt-coreos/coreos_*.img
96 |
97 | # Juju Stuff
98 | /cluster/juju/charms/*
99 | /cluster/juju/bundles/local.yaml
100 |
101 | # Downloaded Kubernetes binary release
102 | /kubernetes/
103 |
104 | # direnv .envrc files
105 | .envrc
106 |
107 | # Downloaded kubernetes binary release tar ball
108 | kubernetes.tar.gz
109 |
110 | # generated files in any directory
111 | # TODO(thockin): uncomment this when we stop committing the generated files.
112 | #zz_generated.*
113 | #zz_generated.openapi.go
114 |
115 | # make-related metadata
116 | /.make/
117 | # Just in time generated data in the source, should never be commited
118 | /test/e2e/generated/bindata.go
119 |
120 | # This file used by some vendor repos (e.g. github.com/go-openapi/...) to store secret variables and should not be ignored
121 | !\.drone\.sec
122 |
123 | /bazel-*
124 | *.pyc
125 |
126 | # e2e log files
127 | *.log
128 |
129 | # test coverage file
130 | coverage.txt
131 |
132 | updateso.sh
133 | volcano-vgpu-device-plugin
134 |
135 | lib/nvidia/libvgpu/build
136 |
137 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "libvgpu"]
2 | path = libvgpu
3 | url = https://github.com/Project-HAMi/HAMi-core.git
4 | branch = main
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | .DEFAULT_GOAL := all
17 |
18 | include Makefile.def
19 |
20 | ##### Global variables #####
21 | REGISTRY ?= projecthami
22 | VERSION ?= 1.0.0
23 |
24 | ##### Using `BUILD_PLATFORMS=linux/arm64 make all` to build arm64 arch image locally
25 | ##### Using `BUILD_PLATFORMS=linux/amd64,linux/arm64 make push-latest` to build and publish multi-arch image
26 | BUILD_PLATFORMS ?= linux/amd64
27 |
28 | ##### Public rules #####
29 |
30 | all: ubuntu20.04
31 |
32 | push:
33 | docker buildx build --platform $(BUILD_PLATFORMS) --push \
34 | --tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)-ubuntu20.04 \
35 | --file docker/Dockerfile.ubuntu20.04 .
36 |
37 | push-short:
38 | docker buildx build --platform $(BUILD_PLATFORMS) --push \
39 | --tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)\
40 | --file docker/Dockerfile.ubuntu20.04 .
41 |
42 | push-latest:
43 | docker buildx build --platform $(BUILD_PLATFORMS) --push \
44 | --tag $(REGISTRY)/volcano-vgpu-device-plugin:latest\
45 | --file docker/Dockerfile.ubuntu20.04 .
46 |
47 | ubuntu20.04:
48 | docker buildx build --platform $(BUILD_PLATFORMS) --load \
49 | --tag $(REGISTRY)/volcano-vgpu-device-plugin:$(VERSION)-ubuntu20.04 \
50 | --file docker/Dockerfile.ubuntu20.04 .
51 |
52 | BIN_DIR=_output/bin
53 | RELEASE_DIR=_output/release
54 | REL_OSARCH=linux/amd64
55 |
56 | init:
57 | mkdir -p ${BIN_DIR}
58 | mkdir -p ${RELEASE_DIR}
59 |
60 | gen_bin: init
61 | go get github.com/mitchellh/gox
62 | CGO_ENABLED=1 gox -osarch=${REL_OSARCH} -ldflags ${LD_FLAGS} -output ${BIN_DIR}/${REL_OSARCH}/volcano-vgpu-device-plugin ./cmd/vgpu
63 |
--------------------------------------------------------------------------------
/Makefile.def:
--------------------------------------------------------------------------------
1 |
2 | # If tag not explicitly set in users default to the git sha.
3 | TAG ?= $(shell git rev-parse --verify HEAD)
4 | GitSHA=`git rev-parse HEAD`
5 | Date=`date "+%Y-%m-%d %H:%M:%S"`
6 | RELEASE_VER=latest
7 | LD_FLAGS=" \
8 | -X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \
9 | -X '${REPO_PATH}/pkg/version.Built=${Date}' \
10 | -X '${REPO_PATH}/pkg/version.Version=${RELEASE_VER}'"
11 |
12 |
--------------------------------------------------------------------------------
/OWNERS:
--------------------------------------------------------------------------------
1 | reviewers:
2 | - k82cn
3 | - kevin-wangzefeng
4 | - william-wang
5 | - Thor-wl
6 | - archlitchi
7 | - hzxuzhonghu
8 | - wangyang0616
9 | approvers:
10 | - k82cn
11 | - kevin-wangzefeng
12 | - william-wang
13 | - hzxuzhonghu
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Volcano vgpu device plugin for Kubernetes
2 |
3 | [](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin?ref=badge_shield)
4 | [](https://hub.docker.com/r/projecthami/volcano-vgpu-device-plugin)
5 |
6 | **Note**:
7 |
8 | Volcano vgpu device-plugin can provide device-sharing mechanism for NVIDIA devices managed by volcano.
9 |
10 | This is based on [Nvidia Device Plugin](https://github.com/NVIDIA/k8s-device-plugin), it uses [HAMi-core](https://github.com/Project-HAMi/HAMi-core) to support hard isolation of GPU card.
11 |
12 | And collaborate with volcano, it is possible to enable GPU sharing.
13 |
14 | ## Table of Contents
15 |
16 | - [About](#about)
17 | - [Prerequisites](#prerequisites)
18 | - [Quick Start](#quick-start)
19 | - [Preparing your GPU Nodes](#preparing-your-gpu-nodes)
20 | - [Enabling vGPU Support in Kubernetes](#enabling-gpu-support-in-kubernetes)
21 | - [Running vGPU Jobs](#running-vgpu-jobs)
22 | - [Issues and Contributing](#issues-and-contributing)
23 |
24 | ## About
25 |
26 | The Volcano device plugin for Kubernetes is a Daemonset that allows you to automatically:
27 | - Expose the number of GPUs on each node of your cluster
28 | - Keep track of the health of your GPUs
29 | - Run GPU enabled containers in your Kubernetes cluster.
30 | - Provide device-sharing mechanism for GPU tasks as the figure below.
31 | - Enforce hard resource limit in container.
32 | - Support dynamic-mig, for more details, see [config](doc/config.md)
33 |
34 |
35 |
36 | ## Prerequisites
37 |
38 | The list of prerequisites for running the Volcano device plugin is described below:
39 | * NVIDIA drivers > 440
40 | * nvidia-docker version > 2.0 (see how to [install](https://github.com/NVIDIA/nvidia-docker) and it's [prerequisites](https://github.com/nvidia/nvidia-docker/wiki/Installation-\(version-2.0\)#prerequisites))
41 | * docker configured with nvidia as the [default runtime](https://github.com/NVIDIA/nvidia-docker/wiki/Advanced-topics#default-runtime).
42 | * Kubernetes version >= 1.16
43 | * Volcano verison >= 1.9
44 |
45 | ## Quick Start
46 |
47 | ### Preparing your GPU Nodes
48 |
49 | The following steps need to be executed on all your GPU nodes.
50 | This README assumes that the NVIDIA drivers and nvidia-docker have been installed.
51 |
52 | Note that you need to install the nvidia-docker2 package and not the nvidia-container-toolkit.
53 | This is because the new `--gpus` options hasn't reached kubernetes yet. Example:
54 | ```bash
55 | # Add the package repositories
56 | $ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
57 | $ curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
58 | $ curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
59 |
60 | $ sudo apt-get update && sudo apt-get install -y nvidia-docker2
61 | $ sudo systemctl restart docker
62 | ```
63 |
64 | You will need to enable the nvidia runtime as your default runtime on your node.
65 | We will be editing the docker daemon config file which is usually present at `/etc/docker/daemon.json`:
66 | ```json
67 | {
68 | "default-runtime": "nvidia",
69 | "runtimes": {
70 | "nvidia": {
71 | "path": "/usr/bin/nvidia-container-runtime",
72 | "runtimeArgs": []
73 | }
74 | }
75 | }
76 | ```
77 | > *if `runtimes` is not already present, head to the install page of [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)*
78 |
79 |
80 | ### Configure scheduler
81 |
82 | update the scheduler configuration:
83 |
84 | ```shell script
85 | kubectl edit cm -n volcano-system volcano-scheduler-configmap
86 | ```
87 |
88 | For volcano v1.9+,, use the following configMap
89 | ```yaml
90 | kind: ConfigMap
91 | apiVersion: v1
92 | metadata:
93 | name: volcano-scheduler-configmap
94 | namespace: volcano-system
95 | data:
96 | volcano-scheduler.conf: |
97 | actions: "enqueue, allocate, backfill"
98 | tiers:
99 | - plugins:
100 | - name: priority
101 | - name: gang
102 | - name: conformance
103 | - plugins:
104 | - name: drf
105 | - name: deviceshare
106 | arguments:
107 | deviceshare.VGPUEnable: true # enable vgpu
108 | - name: predicates
109 | - name: proportion
110 | - name: nodeorder
111 | - name: binpack
112 | ```
113 |
114 | Customize your installation by adjusting the [configs](doc/config.md)
115 |
116 |
117 | ### Enabling GPU Support in Kubernetes
118 |
119 | Once you have enabled this option on *all* the GPU nodes you wish to use,
120 | you can then enable GPU support in your cluster by deploying the following Daemonset:
121 |
122 | ```
123 | $ kubectl create -f volcano-vgpu-device-plugin.yml
124 | ```
125 |
126 | ### Verify environment is ready
127 |
128 | Check the node status, it is ok if `volcano.sh/vgpu-number` is included in the allocatable resources.
129 |
130 | ```shell script
131 | $ kubectl get node {node name} -oyaml
132 | ...
133 | status:
134 | addresses:
135 | - address: 172.17.0.3
136 | type: InternalIP
137 | - address: volcano-control-plane
138 | type: Hostname
139 | allocatable:
140 | cpu: "4"
141 | ephemeral-storage: 123722704Ki
142 | hugepages-1Gi: "0"
143 | hugepages-2Mi: "0"
144 | memory: 8174332Ki
145 | pods: "110"
146 | volcano.sh/vgpu-memory: "89424"
147 | volcano.sh/vgpu-number: "10" # vGPU resource
148 | capacity:
149 | cpu: "4"
150 | ephemeral-storage: 123722704Ki
151 | hugepages-1Gi: "0"
152 | hugepages-2Mi: "0"
153 | memory: 8174332Ki
154 | pods: "110"
155 | volcano.sh/vgpu-memory: "89424"
156 | volcano.sh/vgpu-number: "10" # vGPU resource
157 | ```
158 |
159 | ### Running VGPU Jobs
160 |
161 | VGPU can be requested by both set "volcano.sh/vgpu-number" , "volcano.sh/vgpu-cores" and "volcano.sh/vgpu-memory" in resource.limit
162 |
163 | ```shell script
164 | $ cat < **WARNING:** *if you don't request GPUs when using the device plugin with NVIDIA images all
189 | > the GPUs on the machine will be exposed inside your container.
190 | > The number of vgpu used by a container can not exceed the number of gpus on that node.*
191 |
192 | ### Monitor
193 |
194 | volcano-scheduler-metrics records every GPU usage and limitation, visit the following address to get these metrics.
195 |
196 | ```
197 | curl {volcano scheduler cluster ip}:8080/metrics
198 | ```
199 |
200 | You can also collect the **GPU utilization**, **GPU memory usage**, **pods' GPU memory limitations** and **pods' GPU memory usage** metrics on nodes by visiting the following addresses:
201 |
202 | ```
203 | curl {volcano device plugin pod ip}:9394/metrics
204 | ```
205 | 
206 |
207 | # Issues and Contributing
208 | [Checkout the Contributing document!](CONTRIBUTING.md)
209 |
210 | * You can report a bug by [filing a new issue](https://github.com/Project-HAMi/volcano-vgpu-device-plugin)
211 | * You can contribute by opening a [pull request](https://help.github.com/articles/using-pull-requests/)
212 |
213 |
214 | ## Upgrading Kubernetes with the device plugin
215 |
216 | Upgrading Kubernetes when you have a device plugin deployed doesn't require you to do any,
217 | particular changes to your workflow.
218 | The API is versioned and is pretty stable (though it is not guaranteed to be non breaking),
219 | upgrading kubernetes won't require you to deploy a different version of the device plugin and you will
220 | see GPUs re-registering themselves after you node comes back online.
221 |
222 |
223 | Upgrading the device plugin is a more complex task. It is recommended to drain GPU tasks as
224 | we cannot guarantee that GPU tasks will survive a rolling upgrade.
225 | However we make best efforts to preserve GPU tasks during an upgrade.
226 |
227 |
228 | ## License
229 | [](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fvolcano-vgpu-device-plugin?ref=badge_large)
--------------------------------------------------------------------------------
/cmd/vgpu-monitor/feedback.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2024 The HAMi Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package main
18 |
19 | import (
20 | "time"
21 |
22 | "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia"
23 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
24 |
25 | "k8s.io/klog/v2"
26 | )
27 |
28 | type UtilizationPerDevice []int
29 |
30 | func CheckBlocking(utSwitchOn map[string]UtilizationPerDevice, p int, c *nvidia.ContainerUsage) bool {
31 | for i := 0; i < c.Info.DeviceMax(); i++ {
32 | uuid := c.Info.DeviceUUID(i)
33 | _, ok := utSwitchOn[uuid]
34 | if ok {
35 | for i := 0; i < p; i++ {
36 | if utSwitchOn[uuid][i] > 0 {
37 | return true
38 | }
39 | }
40 | return false
41 | }
42 | }
43 | return false
44 | }
45 |
46 | // Check whether task with higher priority use GPU or there are other tasks with the same priority.
47 | func CheckPriority(utSwitchOn map[string]UtilizationPerDevice, p int, c *nvidia.ContainerUsage) bool {
48 | for i := 0; i < c.Info.DeviceMax(); i++ {
49 | uuid := c.Info.DeviceUUID(i)
50 | _, ok := utSwitchOn[uuid]
51 | if ok {
52 | for i := 0; i < p; i++ {
53 | if utSwitchOn[uuid][i] > 0 {
54 | return true
55 | }
56 | }
57 | if utSwitchOn[uuid][p] > 1 {
58 | return true
59 | }
60 | }
61 | }
62 | return false
63 | }
64 |
65 | func Observe(lister *nvidia.ContainerLister) {
66 | utSwitchOn := map[string]UtilizationPerDevice{}
67 | containers := lister.ListContainers()
68 |
69 | for _, c := range containers {
70 | recentKernel := c.Info.GetRecentKernel()
71 | if recentKernel > 0 {
72 | recentKernel--
73 | if recentKernel > 0 {
74 | for i := 0; i < c.Info.DeviceMax(); i++ {
75 | // Null device condition
76 | if !c.Info.IsValidUUID(i) {
77 | continue
78 | }
79 | uuid := c.Info.DeviceUUID(i)
80 | if len(utSwitchOn[uuid]) == 0 {
81 | utSwitchOn[uuid] = []int{0, 0}
82 | }
83 | utSwitchOn[uuid][c.Info.GetPriority()]++
84 | }
85 | }
86 | c.Info.SetRecentKernel(recentKernel)
87 | }
88 | }
89 | for idx, c := range containers {
90 | priority := c.Info.GetPriority()
91 | recentKernel := c.Info.GetRecentKernel()
92 | utilizationSwitch := c.Info.GetUtilizationSwitch()
93 | if CheckBlocking(utSwitchOn, priority, c) {
94 | if recentKernel >= 0 {
95 | klog.Infof("utSwitchon=%v", utSwitchOn)
96 | klog.Infof("Setting Blocking to on %v", idx)
97 | c.Info.SetRecentKernel(-1)
98 | }
99 | } else {
100 | if recentKernel < 0 {
101 | klog.Infof("utSwitchon=%v", utSwitchOn)
102 | klog.Infof("Setting Blocking to off %v", idx)
103 | c.Info.SetRecentKernel(0)
104 | }
105 | }
106 | if CheckPriority(utSwitchOn, priority, c) {
107 | if utilizationSwitch != 1 {
108 | klog.Infof("utSwitchon=%v", utSwitchOn)
109 | klog.Infof("Setting UtilizationSwitch to on %v", idx)
110 | c.Info.SetUtilizationSwitch(1)
111 | }
112 | } else {
113 | if utilizationSwitch != 0 {
114 | klog.Infof("utSwitchon=%v", utSwitchOn)
115 | klog.Infof("Setting UtilizationSwitch to off %v", idx)
116 | c.Info.SetUtilizationSwitch(0)
117 | }
118 | }
119 | }
120 | }
121 |
122 | func watchAndFeedback(lister *nvidia.ContainerLister) {
123 | config.Nvml().Init()
124 | for {
125 | time.Sleep(time.Second * 5)
126 | err := lister.Update()
127 | if err != nil {
128 | klog.Errorf("Failed to update container list: %v", err)
129 | continue
130 | }
131 | Observe(lister)
132 | }
133 | }
134 |
--------------------------------------------------------------------------------
/cmd/vgpu-monitor/main.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2024 The HAMi Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package main
18 |
19 | import (
20 | "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia"
21 |
22 | "k8s.io/klog/v2"
23 | )
24 |
25 | func main() {
26 | if err := ValidateEnvVars(); err != nil {
27 | klog.Fatalf("Failed to validate environment variables: %v", err)
28 | }
29 | containerLister, err := nvidia.NewContainerLister()
30 | if err != nil {
31 | klog.Fatalf("Failed to create container lister: %v", err)
32 | }
33 | errchannel := make(chan error)
34 | go initMetrics(containerLister)
35 | go watchAndFeedback(containerLister)
36 | for {
37 | err := <-errchannel
38 | klog.Errorf("failed to serve: %v", err)
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/cmd/vgpu-monitor/metrics.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2024 The HAMi Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package main
18 |
19 | import (
20 | "fmt"
21 | "log"
22 | "net/http"
23 | "strings"
24 | "time"
25 |
26 | "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia"
27 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
28 |
29 | "github.com/NVIDIA/go-nvml/pkg/nvml"
30 | "github.com/prometheus/client_golang/prometheus"
31 | "github.com/prometheus/client_golang/prometheus/promhttp"
32 |
33 | "k8s.io/apimachinery/pkg/labels"
34 | "k8s.io/client-go/informers"
35 | listerscorev1 "k8s.io/client-go/listers/core/v1"
36 | "k8s.io/klog/v2"
37 | )
38 |
39 | // ClusterManager is an example for a system that might have been built without
40 | // Prometheus in mind. It models a central manager of jobs running in a
41 | // cluster. Thus, we implement a custom Collector called
42 | // ClusterManagerCollector, which collects information from a ClusterManager
43 | // using its provided methods and turns them into Prometheus Metrics for
44 | // collection.
45 | //
46 | // An additional challenge is that multiple instances of the ClusterManager are
47 | // run within the same binary, each in charge of a different zone. We need to
48 | // make use of wrapping Registerers to be able to register each
49 | // ClusterManagerCollector instance with Prometheus.
50 | type ClusterManager struct {
51 | Zone string
52 | // Contains many more fields not listed in this example.
53 | PodLister listerscorev1.PodLister
54 | containerLister *nvidia.ContainerLister
55 | }
56 |
57 | // ReallyExpensiveAssessmentOfTheSystemState is a mock for the data gathering a
58 | // real cluster manager would have to do. Since it may actually be really
59 | // expensive, it must only be called once per collection. This implementation,
60 | // obviously, only returns some made-up data.
61 | func (c *ClusterManager) ReallyExpensiveAssessmentOfTheSystemState() (
62 | oomCountByHost map[string]int, ramUsageByHost map[string]float64,
63 | ) {
64 | // Just example fake data.
65 | oomCountByHost = map[string]int{
66 | "foo.example.org": 42,
67 | "bar.example.org": 2001,
68 | }
69 | ramUsageByHost = map[string]float64{
70 | "foo.example.org": 6.023e23,
71 | "bar.example.org": 3.14,
72 | }
73 | return
74 | }
75 |
76 | // ClusterManagerCollector implements the Collector interface.
77 | type ClusterManagerCollector struct {
78 | ClusterManager *ClusterManager
79 | }
80 |
81 | // Descriptors used by the ClusterManagerCollector below.
82 | var (
83 | hostGPUdesc = prometheus.NewDesc(
84 | "HostGPUMemoryUsage",
85 | "GPU device memory usage",
86 | []string{"deviceidx", "deviceuuid"}, nil,
87 | )
88 |
89 | hostGPUUtilizationdesc = prometheus.NewDesc(
90 | "HostCoreUtilization",
91 | "GPU core utilization",
92 | []string{"deviceidx", "deviceuuid"}, nil,
93 | )
94 |
95 | ctrvGPUdesc = prometheus.NewDesc(
96 | "vGPU_device_memory_usage_in_bytes",
97 | "vGPU device usage",
98 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil,
99 | )
100 |
101 | ctrvGPUlimitdesc = prometheus.NewDesc(
102 | "vGPU_device_memory_limit_in_bytes",
103 | "vGPU device limit",
104 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil,
105 | )
106 | ctrDeviceMemorydesc = prometheus.NewDesc(
107 | "Device_memory_desc_of_container",
108 | "Container device meory description",
109 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid", "context", "module", "data", "offset"}, nil,
110 | )
111 | ctrDeviceUtilizationdesc = prometheus.NewDesc(
112 | "Device_utilization_desc_of_container",
113 | "Container device utilization description",
114 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil,
115 | )
116 | ctrDeviceLastKernelDesc = prometheus.NewDesc(
117 | "Device_last_kernel_of_container",
118 | "Container device last kernel description",
119 | []string{"podnamespace", "podname", "ctrname", "vdeviceid", "deviceuuid"}, nil,
120 | )
121 | )
122 |
123 | // Describe is implemented with DescribeByCollect. That's possible because the
124 | // Collect method will always return the same two metrics with the same two
125 | // descriptors.
126 | func (cc ClusterManagerCollector) Describe(ch chan<- *prometheus.Desc) {
127 | ch <- hostGPUdesc
128 | ch <- ctrvGPUdesc
129 | ch <- ctrvGPUlimitdesc
130 | ch <- hostGPUUtilizationdesc
131 | //prometheus.DescribeByCollect(cc, ch)
132 | }
133 |
134 | // Collect first triggers the ReallyExpensiveAssessmentOfTheSystemState. Then it
135 | // creates constant metrics for each host on the fly based on the returned data.
136 | //
137 | // Note that Collect could be called concurrently, so we depend on
138 | // ReallyExpensiveAssessmentOfTheSystemState to be concurrency-safe.
139 | func (cc ClusterManagerCollector) Collect(ch chan<- prometheus.Metric) {
140 | klog.Info("Starting to collect metrics for vGPUMonitor")
141 | containerLister := cc.ClusterManager.containerLister
142 | if err := containerLister.Update(); err != nil {
143 | klog.Error("Update container error: %s", err.Error())
144 | }
145 |
146 | nvret := config.Nvml().Init()
147 | if nvret != nvml.SUCCESS {
148 | klog.Errorf("nvml Init err= %v", nvret)
149 | }
150 | devnum, nvret := config.Nvml().DeviceGetCount()
151 | if nvret != nvml.SUCCESS {
152 | klog.Errorf("nvml GetDeviceCount err= %v", nvret)
153 | } else {
154 | for ii := 0; ii < devnum; ii++ {
155 | hdev, nvret := config.Nvml().DeviceGetHandleByIndex(ii)
156 | if nvret != nvml.SUCCESS {
157 | klog.Error(nvret)
158 | }
159 | memoryUsed := 0
160 | memory, ret := hdev.GetMemoryInfo()
161 | if ret == nvml.SUCCESS {
162 | memoryUsed = int(memory.Used)
163 | } else {
164 | klog.Error("nvml get memory error ret=", ret)
165 | }
166 |
167 | uuid, nvret := hdev.GetUUID()
168 | if nvret != nvml.SUCCESS {
169 | klog.Error(nvret)
170 | } else {
171 | ch <- prometheus.MustNewConstMetric(
172 | hostGPUdesc,
173 | prometheus.GaugeValue,
174 | float64(memoryUsed),
175 | fmt.Sprint(ii), uuid,
176 | )
177 | }
178 | util, nvret := hdev.GetUtilizationRates()
179 | if nvret != nvml.SUCCESS {
180 | klog.Error(nvret)
181 | } else {
182 | ch <- prometheus.MustNewConstMetric(
183 | hostGPUUtilizationdesc,
184 | prometheus.GaugeValue,
185 | float64(util.Gpu),
186 | fmt.Sprint(ii), uuid,
187 | )
188 | }
189 |
190 | }
191 | }
192 |
193 | pods, err := cc.ClusterManager.PodLister.List(labels.Everything())
194 | if err != nil {
195 | klog.Error("failed to list pods with err=", err.Error())
196 | }
197 | nowSec := time.Now().Unix()
198 |
199 | containers := containerLister.ListContainers()
200 | for _, pod := range pods {
201 | for _, c := range containers {
202 | //for sridx := range srPodList {
203 | // if srPodList[sridx].sr == nil {
204 | // continue
205 | // }
206 | if c.Info == nil {
207 | continue
208 | }
209 | //podUID := strings.Split(srPodList[sridx].idstr, "_")[0]
210 | //ctrName := strings.Split(srPodList[sridx].idstr, "_")[1]
211 | podUID := c.PodUID
212 | ctrName := c.ContainerName
213 | if strings.Compare(string(pod.UID), podUID) != 0 {
214 | continue
215 | }
216 | fmt.Println("Pod matched!", pod.Name, pod.Namespace, pod.Labels)
217 | for _, ctr := range pod.Spec.Containers {
218 | if strings.Compare(ctr.Name, ctrName) != 0 {
219 | continue
220 | }
221 | fmt.Println("container matched", ctr.Name)
222 | //err := setHostPid(pod, pod.Status.ContainerStatuses[ctridx], &srPodList[sridx])
223 | //if err != nil {
224 | // fmt.Println("setHostPid filed", err.Error())
225 | //}
226 | //fmt.Println("sr.list=", srPodList[sridx].sr)
227 | podlabels := make(map[string]string)
228 | for idx, val := range pod.Labels {
229 | idxfix := strings.ReplaceAll(idx, "-", "_")
230 | valfix := strings.ReplaceAll(val, "-", "_")
231 | podlabels[idxfix] = valfix
232 | }
233 | for i := 0; i < c.Info.DeviceNum(); i++ {
234 | uuid := c.Info.DeviceUUID(i)[0:40]
235 | memoryTotal := c.Info.DeviceMemoryTotal(i)
236 | memoryLimit := c.Info.DeviceMemoryLimit(i)
237 | memoryContextSize := c.Info.DeviceMemoryContextSize(i)
238 | memoryModuleSize := c.Info.DeviceMemoryModuleSize(i)
239 | memoryBufferSize := c.Info.DeviceMemoryBufferSize(i)
240 | memoryOffset := c.Info.DeviceMemoryOffset(i)
241 | smUtil := c.Info.DeviceSmUtil(i)
242 | lastKernelTime := c.Info.LastKernelTime()
243 |
244 | //fmt.Println("uuid=", uuid, "length=", len(uuid))
245 | ch <- prometheus.MustNewConstMetric(
246 | ctrvGPUdesc,
247 | prometheus.GaugeValue,
248 | float64(memoryTotal),
249 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, /*,string(sr.sr.uuids[i].uuid[:])*/
250 | )
251 | ch <- prometheus.MustNewConstMetric(
252 | ctrvGPUlimitdesc,
253 | prometheus.GaugeValue,
254 | float64(memoryLimit),
255 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid, /*,string(sr.sr.uuids[i].uuid[:])*/
256 | )
257 | ch <- prometheus.MustNewConstMetric(
258 | ctrDeviceMemorydesc,
259 | prometheus.CounterValue,
260 | float64(memoryTotal),
261 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid,
262 | fmt.Sprint(memoryContextSize), fmt.Sprint(memoryModuleSize), fmt.Sprint(memoryBufferSize), fmt.Sprint(memoryOffset),
263 | )
264 | ch <- prometheus.MustNewConstMetric(
265 | ctrDeviceUtilizationdesc,
266 | prometheus.GaugeValue,
267 | float64(smUtil),
268 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid,
269 | )
270 | if lastKernelTime > 0 {
271 | lastSec := nowSec - lastKernelTime
272 | if lastSec < 0 {
273 | lastSec = 0
274 | }
275 | ch <- prometheus.MustNewConstMetric(
276 | ctrDeviceLastKernelDesc,
277 | prometheus.GaugeValue,
278 | float64(lastSec),
279 | pod.Namespace, pod.Name, ctrName, fmt.Sprint(i), uuid,
280 | )
281 | }
282 | }
283 | }
284 | }
285 | }
286 | }
287 |
288 | // NewClusterManager first creates a Prometheus-ignorant ClusterManager
289 | // instance. Then, it creates a ClusterManagerCollector for the just created
290 | // ClusterManager. Finally, it registers the ClusterManagerCollector with a
291 | // wrapping Registerer that adds the zone as a label. In this way, the metrics
292 | // collected by different ClusterManagerCollectors do not collide.
293 | func NewClusterManager(zone string, reg prometheus.Registerer, containerLister *nvidia.ContainerLister) *ClusterManager {
294 | c := &ClusterManager{
295 | Zone: zone,
296 | containerLister: containerLister,
297 | }
298 |
299 | informerFactory := informers.NewSharedInformerFactoryWithOptions(containerLister.Clientset(), time.Hour*1)
300 | c.PodLister = informerFactory.Core().V1().Pods().Lister()
301 | stopCh := make(chan struct{})
302 | informerFactory.Start(stopCh)
303 |
304 | cc := ClusterManagerCollector{ClusterManager: c}
305 | prometheus.WrapRegistererWith(prometheus.Labels{"zone": zone}, reg).MustRegister(cc)
306 | return c
307 | }
308 |
309 | func initMetrics(containerLister *nvidia.ContainerLister) {
310 | // Since we are dealing with custom Collector implementations, it might
311 | // be a good idea to try it out with a pedantic registry.
312 | klog.Info("Initializing metrics for vGPUmonitor")
313 | reg := prometheus.NewRegistry()
314 | //reg := prometheus.NewPedanticRegistry()
315 |
316 | // Construct cluster managers. In real code, we would assign them to
317 | // variables to then do something with them.
318 | NewClusterManager("vGPU", reg, containerLister)
319 |
320 | http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{}))
321 | log.Fatal(http.ListenAndServe(":9394", nil))
322 | }
323 |
--------------------------------------------------------------------------------
/cmd/vgpu-monitor/validation.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2024 The HAMi Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package main
18 |
19 | import (
20 | "fmt"
21 | "os"
22 | )
23 |
24 | var requiredEnvVars = map[string]bool{
25 | "HOOK_PATH": true,
26 | "OTHER_ENV_VAR": false,
27 | }
28 |
29 | func ValidateEnvVars() error {
30 | for envVar, required := range requiredEnvVars {
31 | _, exists := os.LookupEnv(envVar)
32 | if required && !exists {
33 | return fmt.Errorf("required environment variable %s not set", envVar)
34 | }
35 | }
36 | return nil
37 | }
38 |
--------------------------------------------------------------------------------
/cmd/vgpu/main.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | package main
17 |
18 | import (
19 | "fmt"
20 | "net/http"
21 | _ "net/http/pprof"
22 | "syscall"
23 |
24 | "github.com/NVIDIA/go-nvml/pkg/nvml"
25 | "github.com/fsnotify/fsnotify"
26 | "github.com/spf13/cobra"
27 | "github.com/spf13/viper"
28 | "k8s.io/klog/v2"
29 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
30 | nvidiadevice "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu"
31 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
32 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
33 | )
34 |
35 | var (
36 | failOnInitErrorFlag bool
37 | migStrategyFlag string
38 |
39 | rootCmd = &cobra.Command{
40 | Use: "device-plugin",
41 | Short: "kubernetes vgpu device-plugin",
42 | Run: func(cmd *cobra.Command, args []string) {
43 | if err := start(); err != nil {
44 | klog.Fatal(err)
45 | }
46 | },
47 | }
48 | )
49 |
50 | type devicePluginConfigs struct {
51 | Nodeconfig []struct {
52 | Name string `json:"name"`
53 | Devicememoryscaling float64 `json:"devicememoryscaling"`
54 | Devicesplitcount int `json:"devicesplitcount"`
55 | Migstrategy string `json:"migstrategy"`
56 | } `json:"nodeconfig"`
57 | }
58 |
59 | func init() {
60 | // https://github.com/spf13/viper/issues/461
61 | viper.BindEnv("node-name", "NODE_NAME")
62 |
63 | rootCmd.Flags().SortFlags = false
64 | rootCmd.PersistentFlags().SortFlags = false
65 |
66 | rootCmd.Flags().StringVar(&migStrategyFlag, "mig-strategy", "none", "the desired strategy for exposing MIG devices on GPUs that support it:\n\t\t[none | single | mixed]")
67 | rootCmd.Flags().BoolVar(&failOnInitErrorFlag, "fail-on-init-error", true, "fail the plugin if an error is encountered during initialization, otherwise block indefinitely")
68 | rootCmd.Flags().UintVar(&config.DeviceSplitCount, "device-split-count", 2, "the number for NVIDIA device split")
69 | rootCmd.Flags().UintVar(&config.GPUMemoryFactor, "gpu-memory-factor", 1, "the default gpu memory block size is 1MB")
70 | rootCmd.Flags().Float64Var(&config.DeviceCoresScaling, "device-cores-scaling", 1.0, "the ratio for NVIDIA device cores scaling")
71 | rootCmd.Flags().StringVar(&config.NodeName, "node-name", viper.GetString("node-name"), "node name")
72 |
73 | rootCmd.PersistentFlags().AddGoFlagSet(util.GlobalFlagSet())
74 | rootCmd.AddCommand(config.VersionCmd)
75 | }
76 |
77 | func start() error {
78 | go func() {
79 | klog.Info("Starting pprof server, listen on port 6060")
80 | klog.Info(http.ListenAndServe(":6060", nil))
81 | }()
82 |
83 | klog.Info("Loading NVML")
84 | if nvret := config.Nvml().Init(); nvret != nvml.SUCCESS {
85 | klog.Infof("Failed to initialize NVML: %v.", nvret)
86 | klog.Infof("If this is a GPU node, did you set the docker default runtime to `nvidia`?")
87 | klog.Infof("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
88 | klog.Infof("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
89 | klog.Infof("If this is not a GPU node, you should set up a toleration or nodeSelector to only deploy this plugin on GPU nodes")
90 | if failOnInitErrorFlag {
91 | return fmt.Errorf("failed to initialize NVML: %v", nvret)
92 | }
93 | select {}
94 | }
95 | defer func() { klog.Info("Shutdown of NVML returned:", config.Nvml().Shutdown()) }()
96 |
97 | klog.Info("Starting FS watcher.")
98 | watcher, err := NewFSWatcher(pluginapi.DevicePluginPath)
99 | if err != nil {
100 | return fmt.Errorf("failed to create FS watcher: %v", err)
101 | }
102 | defer watcher.Close()
103 |
104 | klog.Info("Starting OS watcher.")
105 | sigs := NewOSWatcher(syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
106 |
107 | nvidiaCfg := util.LoadNvidiaConfig()
108 |
109 | cache := nvidiadevice.NewDeviceCache()
110 | cache.Start()
111 | defer cache.Stop()
112 |
113 | register := nvidiadevice.NewDeviceRegister(cache)
114 | register.Start()
115 | defer register.Stop()
116 |
117 | var plugins []*nvidiadevice.NvidiaDevicePlugin
118 | restart:
119 | // If we are restarting, idempotently stop any running plugins before
120 | // recreating them below.
121 | for _, p := range plugins {
122 | p.Stop()
123 | }
124 | klog.Info("Retreiving plugins.")
125 | migStrategy, err := nvidiadevice.NewMigStrategy(migStrategyFlag)
126 | if err != nil {
127 | return fmt.Errorf("error creating MIG strategy: %v", err)
128 | }
129 | plugins = migStrategy.GetPlugins(nvidiaCfg, cache)
130 |
131 | started := 0
132 | pluginStartError := make(chan struct{})
133 | for _, p := range plugins {
134 | // Just continue if there are no devices to serve for plugin p.
135 | if len(p.Devices()) == 0 {
136 | continue
137 | }
138 |
139 | // Start the gRPC server for plugin p and connect it with the kubelet.
140 | if err := p.Start(); err != nil {
141 | //klog.SetOutput(os.Stderr)
142 | klog.Info("Could not contact Kubelet, retrying. Did you enable the device plugin feature gate?")
143 | klog.Info("You can check the prerequisites at: https://github.com/NVIDIA/k8s-device-plugin#prerequisites")
144 | klog.Info("You can learn how to set the runtime at: https://github.com/NVIDIA/k8s-device-plugin#quick-start")
145 | close(pluginStartError)
146 | goto events
147 | }
148 | started++
149 | }
150 |
151 | if started == 0 {
152 | klog.Info("No devices found. Waiting indefinitely.")
153 | }
154 |
155 | events:
156 | // Start an infinite loop, waiting for several indicators to either log
157 | // some messages, trigger a restart of the plugins, or exit the program.
158 | for {
159 | select {
160 | // If there was an error starting any plugins, restart them all.
161 | case <-pluginStartError:
162 | goto restart
163 |
164 | // Detect a kubelet restart by watching for a newly created
165 | // 'pluginapi.KubeletSocket' file. When this occurs, restart this loop,
166 | // restarting all of the plugins in the process.
167 | case event := <-watcher.Events:
168 | if event.Name == pluginapi.KubeletSocket && event.Op&fsnotify.Create == fsnotify.Create {
169 | klog.Infof("inotify: %s created, restarting.", pluginapi.KubeletSocket)
170 | goto restart
171 | }
172 |
173 | // Watch for any other fs errors and log them.
174 | case err := <-watcher.Errors:
175 | klog.Infof("inotify: %s", err)
176 |
177 | // Watch for any signals from the OS. On SIGHUP, restart this loop,
178 | // restarting all of the plugins in the process. On all other
179 | // signals, exit the loop and exit the program.
180 | case s := <-sigs:
181 | switch s {
182 | case syscall.SIGHUP:
183 | klog.Info("Received SIGHUP, restarting.")
184 | goto restart
185 | default:
186 | klog.Infof("Received signal %v, shutting down.", s)
187 | for _, p := range plugins {
188 | p.Stop()
189 | }
190 | break events
191 | }
192 | }
193 | }
194 | return nil
195 | }
196 |
197 | func main() {
198 | if err := rootCmd.Execute(); err != nil {
199 | klog.Fatal(err)
200 | }
201 | }
202 |
--------------------------------------------------------------------------------
/cmd/vgpu/watchers.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package main
18 |
19 | import (
20 | "os"
21 | "os/signal"
22 |
23 | "github.com/fsnotify/fsnotify"
24 | )
25 |
26 | func NewFSWatcher(files ...string) (*fsnotify.Watcher, error) {
27 | watcher, err := fsnotify.NewWatcher()
28 | if err != nil {
29 | return nil, err
30 | }
31 |
32 | for _, f := range files {
33 | err = watcher.Add(f)
34 | if err != nil {
35 | watcher.Close()
36 | return nil, err
37 | }
38 | }
39 |
40 | return watcher, nil
41 | }
42 |
43 | func NewOSWatcher(sigs ...os.Signal) chan os.Signal {
44 | sigChan := make(chan os.Signal, 1)
45 | signal.Notify(sigChan, sigs...)
46 |
47 | return sigChan
48 | }
49 |
--------------------------------------------------------------------------------
/doc/config.md:
--------------------------------------------------------------------------------
1 | # Global Config
2 |
3 | ## Device Configs: ConfigMap
4 |
5 | **Note:**
6 | All the configurations listed below are managed within the `volcano-vgpu-device-config` ConfigMap.
7 | You can update these configurations using the following methods:
8 |
9 | 1. Directly edit the ConfigMap: If `volcano-vgpu-device-plugin` has already been successfully installed, you can manually update the `volcano-vgpu-device-config` ConfigMap using the `kubectl edit` command to manually update the hami-scheduler-device ConfigMap.
10 |
11 | ```bash
12 | kubectl edit configmap volcano-vgpu-device-config -n
13 | ```
14 |
15 | After making changes, restart the volcano-vgpu-device-plugin and volcano-scheduler to apply the updated configurations.
16 |
17 | * `nvidia.deviceMemoryScaling`:
18 | Float type, by default: 1. The ratio for NVIDIA device memory scaling, can be greater than 1 (enable virtual device memory, experimental feature). For NVIDIA GPU with *M* memory, if we set `nvidia.deviceMemoryScaling` argument to *S*, vGPUs splitted by this GPU will totally get `S * M` memory in Kubernetes with our device plugin.
19 | * `nvidia.deviceSplitCount`:
20 | Integer type, by default: equals 10. Maximum tasks assigned to a simple GPU device.
21 | * `nvidia.migstrategy`:
22 | String type, "none" for ignoring MIG features or "mixed" for allocating MIG device by seperate resources. Default "none"
23 | * `nvidia.disablecorelimit`:
24 | String type, "true" for disable core limit, "false" for enable core limit, default: false
25 | * `nvidia.defaultMem`:
26 | Integer type, by default: 0. The default device memory of the current task, in MB.'0' means use 100% device memory
27 | * `nvidia.defaultCores`:
28 | Integer type, by default: equals 0. Percentage of GPU cores reserved for the current task. If assigned to 0, it may fit in any GPU with enough device memory. If assigned to 100, it will use an entire GPU card exclusively.
29 | * `nvidia.defaultGPUNum`:
30 | Integer type, by default: equals 1, if configuration value is 0, then the configuration value will not take effect and will be filtered. When a user does not set nvidia.com/gpu this key in pod resource, webhook should check nvidia.com/gpumem、resource-mem-percentage、nvidia.com/gpucores these three keys, anyone a key having value, webhook should add nvidia.com/gpu key and this default value to resources limits map.
31 | * `nvidia.resourceCountName`:
32 | String type, vgpu number resource name, default: "volcano.sh/vgpu-number"
33 | * `nvidia.resourceMemoryName`:
34 | String type, vgpu memory size resource name, default: "volcano.sh/vgpu-memory"
35 | * `nvidia.resourceMemoryPercentageName`:
36 | String type, vgpu memory fraction resource name, default: "volcano.sh/vgpu-memory-percentage"
37 | * `nvidia.resourceCoreName`:
38 | String type, vgpu cores resource name, default: "volcano.sh/vgpu-cores"
39 |
40 | ## Node Configs
41 |
42 | **Note:**
43 | All the configurations listed below are managed within the `volcano-vgpu-node-config` ConfigMap.
44 | You can update these configurations using the following methods:
45 |
46 | ```bash
47 | kubectl edit configmap volcano-vgpu-node-config -n
48 | ```
49 |
50 | After making changes, restart the volcano-vgpu-device-plugin and volcano-scheduler to apply the updated configurations.
51 |
52 | * `name`: the name of the node, the following parameters will only take effect on this node.
53 | * `operatingmode`:
54 | String type, `hami-core` for using hami-core for container resource limitation, `mig` for using mig for container resource limition (only available for on architect Ampere or later GPU)
55 | * `devicememoryscaling`:
56 | Integer type, device memory oversubscription on that node
57 | * `devicecorescaling`:
58 | Integer type, device core oversubscription on that node
59 |
--------------------------------------------------------------------------------
/doc/design.md:
--------------------------------------------------------------------------------
1 | ## Config the volcano device plugin binary
2 |
3 | The volcano device plugin has a number of options that can be configured. These options can be configured as command line flags, environment variables, or via a config file when launching the device plugin. The following section explains these configurations.
4 |
5 | ### As command line flags or envvars
6 |
7 | | Flag | Envvar | Default Value |
8 | |--------------------------|-------------------------|-----------------|
9 | | `--gpu-strategy` | `$GPU_STRATEGY` | `"share"` |
10 | | `--gpu-memory-factor` | `$GPU_MEMORY_FACTOR` | `1` |
11 | | `--config-file` | `$CONFIG_FILE` | `""` |
12 |
13 | when starting volcano-device-plugin.yml, users can specify these parameters by adding args to the container 'volcano-device-plugin'.
14 | For example:
15 | - args: ["--gpu-strategy=number"] will let device plugin using the gpu-number strategy
16 | - args: ["--gpu-strategy=share","--gpu-memory-factor=10"] will let device plugin using the gpu-share strategy, and memory factor is 10MB
17 |
18 | ### As a configuration file
19 | ```
20 | version: v1
21 | flags:
22 | GPUStrategy: "number"
23 | ```
24 |
25 | ### Configuration Option Details
26 | **`GPU_STRATEGY`(string)**:
27 | the desired strategy for exposing GPU devices
28 |
29 | `[number | share ] (default 'share')`
30 |
31 | The `GPU_STRATEGY` option configures the daemonset to be able to expose
32 | on GPU devices in numbers or sharing mode. More information on what
33 | these strategies are and how to use it in Volcano can be found in Volcano scheduler.
34 |
35 | **`GPU_MEMORY_FACTOR`(uint)**:
36 | the desired memory factor for exposing GPU shared memory virtual devices
37 |
38 | `(default 1)`
39 |
40 | The `GPU_MEMORY_FACTOR` option configures the daemonset to be able to expose
41 | on GPU shared memory virtual devices size. By default each block is set to be 1MB,
42 | but users who have large gpu memory can specify a larger number such as 10MB, 100MB.
43 |
44 | **`CONFIG_FILE`**:
45 | point the plugin at a configuration file instead of relying on command line
46 | flags or environment variables
47 |
48 | `(default '')`
49 |
50 | The order of precedence for setting each option is (1) command line flag, (2)
51 | environment variable, (3) configuration file. In this way, one could use a
52 | pre-defined configuration file, but then override the values set in it at
53 | launch time.
54 |
--------------------------------------------------------------------------------
/doc/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/example.png
--------------------------------------------------------------------------------
/doc/hard_limit.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/hard_limit.jpg
--------------------------------------------------------------------------------
/doc/vgpu-on-volcano.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/vgpu-on-volcano.pdf
--------------------------------------------------------------------------------
/doc/vgpu_device_plugin_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Project-HAMi/volcano-vgpu-device-plugin/9fe49cddc9d40d387f5c4f742a4f4786bbb74642/doc/vgpu_device_plugin_metrics.png
--------------------------------------------------------------------------------
/docker/Dockerfile.ubuntu20.04:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | FROM ubuntu:20.04 AS builder
16 | ARG TARGETARCH
17 | RUN apt-get update
18 | RUN apt-get -y install ca-certificates g++ wget
19 | RUN wget -qO- https://storage.googleapis.com/golang/go1.23.7.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -zx
20 | ENV GOPATH=/go
21 | ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
22 | WORKDIR /go/src/volcano.sh/devices
23 |
24 | COPY . .
25 | RUN go env -w GOARCH=${TARGETARCH}
26 | RUN go env -w CGO_LDFLAGS_ALLOW='-Wl,--unresolved-symbols=ignore-in-object-files'
27 | RUN go build -ldflags="-s -w" -o volcano-vgpu-device-plugin ./cmd/vgpu
28 | RUN go build -ldflags="-s -w" -o volcano-vgpu-monitor ./cmd/vgpu-monitor
29 | RUN go install github.com/NVIDIA/mig-parted/cmd/nvidia-mig-parted@latest
30 |
31 | FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 AS nvidia_builder
32 | ARG TARGETARCH
33 | RUN apt-get update
34 | RUN apt-get -y install wget openssl libssl-dev
35 | RUN case "${TARGETARCH}" in \
36 | "amd64") wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-x86_64.tar.gz ;; \
37 | "arm64") wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-aarch64.tar.gz ;; \
38 | *) echo "Unsupported architecture: ${TARGETARCH}" && exit 1 ;; \
39 | esac && \
40 | tar -xzf cmake-3.19.8-Linux-*.tar.gz -C /opt && \
41 | ln -s /opt/cmake-3.19.8-Linux-*/bin/cmake /usr/local/bin/cmake && \
42 | rm cmake-3.19.8-Linux-*.tar.gz
43 | COPY ./libvgpu /libvgpu
44 | WORKDIR /libvgpu
45 | RUN rm -rf /libvgpu/build
46 | RUN bash ./build.sh
47 |
48 | FROM ubuntu:24.04
49 |
50 | ENV NVIDIA_VISIBLE_DEVICES=all
51 | ENV NVIDIA_DRIVER_CAPABILITIES=utility
52 |
53 | COPY --from=builder /go/src/volcano.sh/devices/volcano-vgpu-device-plugin /usr/bin/volcano-vgpu-device-plugin
54 | COPY --from=builder /go/src/volcano.sh/devices/volcano-vgpu-monitor /usr/bin/volcano-vgpu-monitor
55 | COPY --from=builder /go/bin/nvidia-mig-parted /usr/bin/nvidia-mig-parted
56 | COPY --from=builder /go/src/volcano.sh/devices/lib/nvidia/ld.so.preload /k8s-vgpu/lib/nvidia/
57 | COPY --from=nvidia_builder /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/
58 |
59 | ENTRYPOINT ["volcano-vgpu-device-plugin"]
60 |
--------------------------------------------------------------------------------
/examples/gpu-share.yml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: pod1
5 | spec:
6 | restartPolicy: OnFailure
7 | schedulerName: volcano
8 | containers:
9 | - image: nvidia/cuda:10.1-base-ubuntu18.04
10 | name: pod1-ctr
11 | command: ["sleep"]
12 | args: ["100000"]
13 | resources:
14 | limits:
15 | volcano.sh/gpu-memory: 1024 # 1024MB
16 |
--------------------------------------------------------------------------------
/examples/vgpu-case01.yml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: test1
5 | spec:
6 | restartPolicy: OnFailure
7 | schedulerName: volcano
8 | containers:
9 | - image: ubuntu:20.04
10 | name: pod1-ctr
11 | command: ["sleep"]
12 | args: ["100000"]
13 | resources:
14 | limits:
15 | volcano.sh/vgpu-memory: 1024
16 | volcano.sh/vgpu-number: 1
17 |
--------------------------------------------------------------------------------
/examples/vgpu-case02.yml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: pod1
5 | spec:
6 | restartPolicy: OnFailure
7 | schedulerName: volcano
8 | containers:
9 | - image: nvidia/cuda:11.2.2-base-ubi8
10 | name: pod1-ctr
11 | command: ["sleep"]
12 | args: ["100000"]
13 | resources:
14 | limits:
15 | volcano.sh/vgpu-number: 1 #request 1 GPU
16 | volcano.sh/vgpu-cores: 50 #each GPU request 50% of compute core resources
17 | volcano.sh/vgpu-memory: 10240 #each GPU request 10G device memory
18 |
--------------------------------------------------------------------------------
/examples/vgpu-case03.yml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: gpu-pod12
5 | spec:
6 | schedulerName: volcano
7 | containers:
8 | - name: ubuntu-container
9 | image: ubuntu:18.04
10 | command: ["bash", "-c", "sleep 86400"]
11 | resources:
12 | limits:
13 | volcano.sh/vgpu-number: 2 # requesting 2 vGPUs
14 | volcano.sh/vgpu-memory: 2000
15 | #volcano.sh/vgpu-memory-percentage: 50 #Each vGPU containers 50% device memory of that GPU. Can not be used with nvidia.com/gpumem
16 | - name: ubuntu-container0
17 | image: ubuntu:18.04
18 | command: ["bash", "-c", "sleep 86400"]
19 | - name: ubuntu-container1
20 | image: ubuntu:18.04
21 | command: ["bash", "-c", "sleep 86400"]
22 | resources:
23 | limits:
24 | volcano.sh/vgpu-number: 2 # requesting 2 vGPUs
25 | volcano.sh/vgpu-memory: 3000
26 |
27 |
--------------------------------------------------------------------------------
/examples/vgpu-deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: resnet101-deployment
5 | spec:
6 | selector:
7 | matchLabels:
8 | app: resnet101-server
9 | replicas: 10
10 | template:
11 | metadata:
12 | labels:
13 | app: resnet101-server
14 | spec:
15 | schedulerName: volcano
16 | containers:
17 | - name: resnet101-container
18 | image: ubuntu:18.04
19 | command: ["sleep","infinity"]
20 | resources:
21 | limits:
22 | volcano.sh/vgpu-number: 1 # requesting 2 vGPUs
23 | volcano.sh/vgpu-memory: 16384
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module volcano.sh/k8s-device-plugin
2 |
3 | go 1.23
4 |
5 | require (
6 | github.com/NVIDIA/go-gpuallocator v0.5.0
7 | github.com/NVIDIA/go-nvlib v0.7.1
8 | github.com/NVIDIA/go-nvml v0.12.4-1
9 | github.com/fsnotify/fsnotify v1.4.9
10 | github.com/prometheus/client_golang v1.0.0
11 | github.com/spf13/cobra v0.0.5
12 | github.com/spf13/viper v1.3.2
13 | github.com/stretchr/testify v1.10.0
14 | github.com/urfave/cli/v2 v2.4.0
15 | golang.org/x/net v0.0.0-20200421231249-e086a090c8fd
16 | google.golang.org/grpc v1.32.0
17 | gopkg.in/yaml.v2 v2.2.8
18 | k8s.io/api v0.18.2
19 | k8s.io/apimachinery v0.18.2
20 | k8s.io/client-go v0.18.2
21 | k8s.io/klog v1.0.0
22 | k8s.io/klog/v2 v2.80.1
23 | k8s.io/kubelet v0.0.0
24 | sigs.k8s.io/yaml v1.2.0
25 | )
26 |
27 | require (
28 | github.com/beorn7/perks v1.0.0 // indirect
29 | github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
30 | github.com/davecgh/go-spew v1.1.1 // indirect
31 | github.com/go-logr/logr v1.2.0 // indirect
32 | github.com/gogo/protobuf v1.3.1 // indirect
33 | github.com/golang/protobuf v1.5.0 // indirect
34 | github.com/google/go-cmp v0.5.5 // indirect
35 | github.com/google/gofuzz v1.1.0 // indirect
36 | github.com/google/uuid v1.6.0 // indirect
37 | github.com/googleapis/gnostic v0.1.0 // indirect
38 | github.com/hashicorp/golang-lru v0.5.1 // indirect
39 | github.com/hashicorp/hcl v1.0.0 // indirect
40 | github.com/imdario/mergo v0.3.5 // indirect
41 | github.com/inconshreveable/mousetrap v1.0.0 // indirect
42 | github.com/json-iterator/go v1.1.8 // indirect
43 | github.com/magiconair/properties v1.8.1 // indirect
44 | github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
45 | github.com/mitchellh/mapstructure v1.1.2 // indirect
46 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
47 | github.com/modern-go/reflect2 v1.0.1 // indirect
48 | github.com/pelletier/go-toml v1.2.0 // indirect
49 | github.com/pmezard/go-difflib v1.0.0 // indirect
50 | github.com/prometheus/client_model v0.2.0 // indirect
51 | github.com/prometheus/common v0.4.1 // indirect
52 | github.com/prometheus/procfs v0.0.2 // indirect
53 | github.com/russross/blackfriday/v2 v2.1.0 // indirect
54 | github.com/spf13/afero v1.2.2 // indirect
55 | github.com/spf13/cast v1.3.0 // indirect
56 | github.com/spf13/jwalterweatherman v1.1.0 // indirect
57 | github.com/spf13/pflag v1.0.5 // indirect
58 | golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975 // indirect
59 | golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 // indirect
60 | golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 // indirect
61 | golang.org/x/text v0.3.2 // indirect
62 | golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect
63 | google.golang.org/appengine v1.5.0 // indirect
64 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55 // indirect
65 | google.golang.org/protobuf v1.34.2 // indirect
66 | gopkg.in/inf.v0 v0.9.1 // indirect
67 | gopkg.in/yaml.v3 v3.0.1 // indirect
68 | k8s.io/utils v0.0.0-20200324210504-a9aa75ae1b89 // indirect
69 | sigs.k8s.io/structured-merge-diff/v3 v3.0.0 // indirect
70 | )
71 |
72 | replace (
73 | k8s.io/api => k8s.io/api v0.18.2
74 | k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.18.2
75 | k8s.io/apimachinery => k8s.io/apimachinery v0.18.2
76 | k8s.io/apiserver => k8s.io/apiserver v0.18.2
77 | k8s.io/cli-runtime => k8s.io/cli-runtime v0.18.2
78 | k8s.io/client-go => k8s.io/client-go v0.18.2
79 | k8s.io/cloud-provider => k8s.io/cloud-provider v0.18.2
80 | k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.18.2
81 | k8s.io/code-generator => k8s.io/code-generator v0.18.2
82 | k8s.io/component-base => k8s.io/component-base v0.18.2
83 | k8s.io/cri-api => k8s.io/cri-api v0.18.2
84 | k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.18.2
85 | k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.18.2
86 | k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.18.2
87 | k8s.io/kube-proxy => k8s.io/kube-proxy v0.18.2
88 | k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.18.2
89 | k8s.io/kubectl => k8s.io/kubectl v0.18.2
90 | k8s.io/kubelet => k8s.io/kubelet v0.18.2
91 | k8s.io/kubernetes => k8s.io/kubernetes v1.18.2
92 | k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.18.2
93 | k8s.io/metrics => k8s.io/metrics v0.18.2
94 | k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.18.2
95 | )
96 |
--------------------------------------------------------------------------------
/lib/nvidia/ld.so.preload:
--------------------------------------------------------------------------------
1 | /usr/local/vgpu/libvgpu.so
--------------------------------------------------------------------------------
/pkg/apis/config.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2022 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import (
20 | "fmt"
21 | "io"
22 | "log"
23 | "os"
24 |
25 | cli "github.com/urfave/cli/v2"
26 | "sigs.k8s.io/yaml"
27 | )
28 |
29 | // Version indicates the version of the 'Config' struct used to hold configuration information.
30 | const Version = "v1beta1"
31 |
32 | // Config is a versioned struct used to hold configuration information.
33 | type Config struct {
34 | Version string `json:"version" yaml:"version"`
35 | Flags Flags `json:"flags,omitempty" yaml:"flags,omitempty"`
36 | }
37 |
38 | // NewConfig builds out a Config struct from a config file (or command line flags).
39 | // The data stored in the config will be populated in order of precedence from
40 | // (1) command line, (2) environment variable, (3) config file.
41 | func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error) {
42 | config := &Config{
43 | Version: Version,
44 | }
45 |
46 | log.Println(c.String("gpu-strategy"))
47 | log.Println(c.Uint("gpu-memory-factor"))
48 |
49 | configFile := c.String("config-file")
50 | if configFile != "" {
51 | var err error
52 | config, err = parseConfig(configFile)
53 | if err != nil {
54 | return nil, fmt.Errorf("unable to parse config file: %v", err)
55 | }
56 | }
57 |
58 | config.Flags.CommandLineFlags = NewCommandLineFlags(c)
59 |
60 | return config, nil
61 | }
62 |
63 | // parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct.
64 | func parseConfig(configFile string) (*Config, error) {
65 | reader, err := os.Open(configFile)
66 | if err != nil {
67 | return nil, fmt.Errorf("error opening config file: %v", err)
68 | }
69 | defer reader.Close()
70 |
71 | config, err := parseConfigFrom(reader)
72 | if err != nil {
73 | return nil, fmt.Errorf("error parsing config file: %v", err)
74 | }
75 |
76 | return config, nil
77 | }
78 |
79 | func parseConfigFrom(reader io.Reader) (*Config, error) {
80 | var err error
81 | var configYaml []byte
82 |
83 | configYaml, err = io.ReadAll(reader)
84 | if err != nil {
85 | return nil, fmt.Errorf("read error: %v", err)
86 | }
87 |
88 | var config Config
89 | err = yaml.Unmarshal(configYaml, &config)
90 | if err != nil {
91 | return nil, fmt.Errorf("unmarshal error: %v", err)
92 | }
93 |
94 | if config.Version == "" {
95 | config.Version = Version
96 | }
97 |
98 | if config.Version != Version {
99 | return nil, fmt.Errorf("unknown version: %v", config.Version)
100 | }
101 |
102 | return &config, nil
103 | }
104 |
--------------------------------------------------------------------------------
/pkg/apis/flags.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2022 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import (
20 | cli "github.com/urfave/cli/v2"
21 | )
22 |
23 | // Flags holds the full list of flags used to configure the device plugin and GFD.
24 | type Flags struct {
25 | *CommandLineFlags
26 | }
27 |
28 | // CommandLineFlags holds the list of command line flags used to configure the device plugin and GFD.
29 | type CommandLineFlags struct {
30 | GPUStrategy string `json:"GPUStrategy" yaml:"GPUStrategy"`
31 | GPUMemoryFactor uint `json:"GPUMemoryFactor" yaml:"GPUMemoryFactor"`
32 | }
33 |
34 | func NewCommandLineFlags(c *cli.Context) *CommandLineFlags {
35 | return &CommandLineFlags{
36 | GPUStrategy: c.String("gpu-strategy"),
37 | GPUMemoryFactor: c.Uint("gpu-memory-factor"),
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/pkg/apis/flags_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2022 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package apis
18 |
19 | import (
20 | "encoding/json"
21 | "fmt"
22 | "testing"
23 |
24 | "github.com/stretchr/testify/require"
25 | )
26 |
27 | func TestUnmarshalFlags(t *testing.T) {
28 | testCases := []struct {
29 | input string
30 | output Flags
31 | err bool
32 | }{
33 | {
34 | input: ``,
35 | err: true,
36 | },
37 | {
38 | input: `{}`,
39 | output: Flags{},
40 | },
41 | {
42 | input: `{
43 | "GPUStrategy": "number"
44 | }`,
45 | output: Flags{
46 | &CommandLineFlags{
47 | GPUStrategy: "number",
48 | },
49 | },
50 | },
51 | }
52 |
53 | for i, tc := range testCases {
54 | t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
55 | var output Flags
56 | err := json.Unmarshal([]byte(tc.input), &output)
57 | if tc.err {
58 | require.Error(t, err)
59 | return
60 | }
61 | require.NoError(t, err)
62 | require.Equal(t, tc.output, output)
63 | })
64 | }
65 | }
66 |
67 | func TestMarshalFlags(t *testing.T) {
68 | testCases := []struct {
69 | input Flags
70 | output string
71 | err bool
72 | }{
73 | {
74 | input: Flags{
75 | &CommandLineFlags{
76 | GPUStrategy: "number",
77 | },
78 | },
79 | output: `{
80 | "GPUStrategy": "number"
81 | }`,
82 | },
83 | }
84 |
85 | for i, tc := range testCases {
86 | t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
87 | output, err := json.Marshal(tc.input)
88 | if tc.err {
89 | require.Error(t, err)
90 | return
91 | }
92 | require.NoError(t, err)
93 | require.JSONEq(t, tc.output, string(output))
94 | })
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/pkg/filewatcher/filewatcher.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package filewatcher
18 |
19 | import (
20 | "github.com/fsnotify/fsnotify"
21 | )
22 |
23 | // NewFileWatcher creates a file watcher watching the given files.
24 | func NewFileWatcher(files ...string) (*fsnotify.Watcher, error) {
25 | watcher, err := fsnotify.NewWatcher()
26 | if err != nil {
27 | return nil, err
28 | }
29 |
30 | for _, f := range files {
31 | err = watcher.Add(f)
32 | if err != nil {
33 | watcher.Close()
34 | return nil, err
35 | }
36 | }
37 |
38 | return watcher, nil
39 | }
40 |
--------------------------------------------------------------------------------
/pkg/gpu/doc.go:
--------------------------------------------------------------------------------
1 | package gpu
2 |
--------------------------------------------------------------------------------
/pkg/lock/nodelock.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package lock
18 |
19 | import (
20 | "context"
21 | "fmt"
22 | "os"
23 | "path/filepath"
24 | "time"
25 |
26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27 | "k8s.io/client-go/kubernetes"
28 | "k8s.io/client-go/rest"
29 | "k8s.io/client-go/tools/clientcmd"
30 | "k8s.io/klog/v2"
31 | )
32 |
33 | const MaxLockRetry = 5
34 |
35 | var kubeClient kubernetes.Interface
36 |
37 | func GetClient() kubernetes.Interface {
38 | return kubeClient
39 | }
40 |
41 | // NewClient connects to an API server
42 | func NewClient() (kubernetes.Interface, error) {
43 | kubeConfig := os.Getenv("KUBECONFIG")
44 | if kubeConfig == "" {
45 | kubeConfig = filepath.Join(os.Getenv("HOME"), ".kube", "config")
46 | }
47 | config, err := rest.InClusterConfig()
48 | if err != nil {
49 | config, err = clientcmd.BuildConfigFromFlags("", kubeConfig)
50 | if err != nil {
51 | return nil, err
52 | }
53 | }
54 | client, err := kubernetes.NewForConfig(config)
55 | kubeClient = client
56 | return client, err
57 | }
58 |
59 | // UseClient uses existing client
60 | func UseClient(client kubernetes.Interface) error {
61 | kubeClient = client
62 | return nil
63 | }
64 |
65 | func setNodeLock(nodeName string, lockName string) error {
66 | ctx := context.Background()
67 | node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
68 | if err != nil {
69 | klog.Errorln("get node failed", err.Error())
70 | return err
71 | }
72 | if _, ok := node.ObjectMeta.Annotations[lockName]; ok {
73 | return fmt.Errorf("node %s is locked", nodeName)
74 | }
75 | newNode := node.DeepCopy()
76 | newNode.ObjectMeta.Annotations[lockName] = time.Now().Format(time.RFC3339)
77 | _, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{})
78 | for i := 0; i < MaxLockRetry && err != nil; i++ {
79 | klog.ErrorS(err, "Failed to update node", "node", nodeName, "retry", i)
80 | time.Sleep(100 * time.Millisecond)
81 | node, err = kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
82 | if err != nil {
83 | klog.ErrorS(err, "Failed to get node when retry to update", "node", nodeName)
84 | continue
85 | }
86 | newNode := node.DeepCopy()
87 | newNode.ObjectMeta.Annotations[lockName] = time.Now().Format(time.RFC3339)
88 | _, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{})
89 | }
90 | if err != nil {
91 | return fmt.Errorf("setNodeLock exceeds retry count %d", MaxLockRetry)
92 | }
93 | klog.V(3).InfoS("Node lock set", "node", nodeName)
94 | return nil
95 | }
96 |
97 | // ReleaseNodeLock releases a certain lock on a certain device
98 | func ReleaseNodeLock(nodeName string, lockName string) error {
99 | ctx := context.Background()
100 | node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
101 | if err != nil {
102 | return err
103 | }
104 | if _, ok := node.ObjectMeta.Annotations[lockName]; !ok {
105 | klog.V(3).InfoS("Node lock not set", "node", nodeName, "lock", lockName)
106 | return nil
107 | }
108 | newNode := node.DeepCopy()
109 | delete(newNode.ObjectMeta.Annotations, lockName)
110 | _, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{})
111 | for i := 0; i < MaxLockRetry && err != nil; i++ {
112 | klog.ErrorS(err, "Failed to update node", "node", nodeName, "retry", i)
113 | time.Sleep(100 * time.Millisecond)
114 | node, err = kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
115 | if err != nil {
116 | klog.ErrorS(err, "Failed to get node when retry to update", "node", nodeName)
117 | continue
118 | }
119 | newNode := node.DeepCopy()
120 | delete(newNode.ObjectMeta.Annotations, lockName)
121 | _, err = kubeClient.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{})
122 | }
123 | if err != nil {
124 | return fmt.Errorf("releaseNodeLock exceeds retry count %d", MaxLockRetry)
125 | }
126 | klog.V(3).InfoS("Node lock released", "node", nodeName)
127 | return nil
128 | }
129 |
130 | // LockNode locks a device on a certain node
131 | func LockNode(nodeName string, lockName string) error {
132 | ctx := context.Background()
133 | node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
134 | if err != nil {
135 | return err
136 | }
137 | if _, ok := node.ObjectMeta.Annotations[lockName]; !ok {
138 | return setNodeLock(nodeName, lockName)
139 | }
140 | lockTime, err := time.Parse(time.RFC3339, node.ObjectMeta.Annotations[lockName])
141 | if err != nil {
142 | return err
143 | }
144 | if time.Since(lockTime) > time.Minute*5 {
145 | klog.InfoS("Node lock expired", "node", nodeName, "lockTime", lockTime)
146 | err = ReleaseNodeLock(nodeName, lockName)
147 | if err != nil {
148 | klog.ErrorS(err, "Failed to release node lock", "node", nodeName)
149 | return err
150 | }
151 | return setNodeLock(nodeName, lockName)
152 | }
153 | return fmt.Errorf("node %s has been locked within 5 minutes", nodeName)
154 | }
155 |
--------------------------------------------------------------------------------
/pkg/monitor/nvidia/cudevshr.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2024 The HAMi Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package nvidia
18 |
19 | import (
20 | "context"
21 | "errors"
22 | "fmt"
23 | "os"
24 | "path/filepath"
25 | "strings"
26 | "sync"
27 | "syscall"
28 | "time"
29 | "unsafe"
30 |
31 | v0 "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia/v0"
32 | v1 "volcano.sh/k8s-device-plugin/pkg/monitor/nvidia/v1"
33 |
34 | corev1 "k8s.io/api/core/v1"
35 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
36 | "k8s.io/client-go/kubernetes"
37 | "k8s.io/client-go/tools/clientcmd"
38 | "k8s.io/klog/v2"
39 | )
40 |
41 | const SharedRegionMagicFlag = 19920718
42 |
43 | type headerT struct {
44 | initializedFlag int32
45 | majorVersion int32
46 | minorVersion int32
47 | }
48 |
49 | type UsageInfo interface {
50 | DeviceMax() int
51 | DeviceNum() int
52 | DeviceMemoryContextSize(idx int) uint64
53 | DeviceMemoryModuleSize(idx int) uint64
54 | DeviceMemoryBufferSize(idx int) uint64
55 | DeviceMemoryOffset(idx int) uint64
56 | DeviceMemoryTotal(idx int) uint64
57 | DeviceSmUtil(idx int) uint64
58 | IsValidUUID(idx int) bool
59 | DeviceUUID(idx int) string
60 | DeviceMemoryLimit(idx int) uint64
61 | LastKernelTime() int64
62 | //UsedMemory(idx int) (uint64, error)
63 | GetPriority() int
64 | GetRecentKernel() int32
65 | SetRecentKernel(v int32)
66 | GetUtilizationSwitch() int32
67 | SetUtilizationSwitch(v int32)
68 | }
69 |
70 | type ContainerUsage struct {
71 | PodUID string
72 | ContainerName string
73 | data []byte
74 | Info UsageInfo
75 | }
76 |
77 | type ContainerLister struct {
78 | containerPath string
79 | containers map[string]*ContainerUsage
80 | mutex sync.Mutex
81 | clientset *kubernetes.Clientset
82 | }
83 |
84 | func NewContainerLister() (*ContainerLister, error) {
85 | hookPath, ok := os.LookupEnv("HOOK_PATH")
86 | if !ok {
87 | return nil, fmt.Errorf("HOOK_PATH not set")
88 | }
89 | config, err := clientcmd.BuildConfigFromFlags("", os.Getenv("KUBECONFIG"))
90 | if err != nil {
91 | klog.Errorf("Failed to build kubeconfig: %v", err)
92 | return nil, err
93 | }
94 | clientset, err := kubernetes.NewForConfig(config)
95 | if err != nil {
96 | klog.Errorf("Failed to build clientset: %v", err)
97 | return nil, err
98 | }
99 | return &ContainerLister{
100 | containerPath: filepath.Join(hookPath, "containers"),
101 | containers: make(map[string]*ContainerUsage),
102 | clientset: clientset,
103 | }, nil
104 | }
105 |
106 | func (l *ContainerLister) Lock() {
107 | l.mutex.Lock()
108 | }
109 |
110 | func (l *ContainerLister) UnLock() {
111 | l.mutex.Unlock()
112 | }
113 |
114 | func (l *ContainerLister) ListContainers() map[string]*ContainerUsage {
115 | return l.containers
116 | }
117 |
118 | func (l *ContainerLister) Clientset() *kubernetes.Clientset {
119 | return l.clientset
120 | }
121 |
122 | func (l *ContainerLister) Update() error {
123 | pods, err := l.clientset.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{})
124 | if err != nil {
125 | return err
126 | }
127 |
128 | l.mutex.Lock()
129 | defer l.mutex.Unlock()
130 | entries, err := os.ReadDir(l.containerPath)
131 | if err != nil {
132 | return err
133 | }
134 | for _, entry := range entries {
135 | if !entry.IsDir() {
136 | continue
137 | }
138 | dirName := filepath.Join(l.containerPath, entry.Name())
139 | if !isValidPod(entry.Name(), pods) {
140 | dirInfo, err := os.Stat(dirName)
141 | if err == nil && dirInfo.ModTime().Add(time.Second*300).After(time.Now()) {
142 | continue
143 | }
144 | klog.Infof("Removing dirname %s in monitorpath", dirName)
145 | if c, ok := l.containers[entry.Name()]; ok {
146 | syscall.Munmap(c.data)
147 | delete(l.containers, entry.Name())
148 | }
149 | _ = os.RemoveAll(dirName)
150 | continue
151 | }
152 | if _, ok := l.containers[entry.Name()]; ok {
153 | continue
154 | }
155 | usage, err := loadCache(dirName)
156 | if err != nil {
157 | klog.Errorf("Failed to load cache: %s, error: %v", dirName, err)
158 | continue
159 | }
160 | if usage == nil {
161 | // no cuInit in container
162 | continue
163 | }
164 | usage.PodUID = strings.Split(entry.Name(), "_")[0]
165 | usage.ContainerName = strings.Split(entry.Name(), "_")[1]
166 | l.containers[entry.Name()] = usage
167 | klog.Infof("Adding ctr dirname %s in monitorpath", dirName)
168 | }
169 | return nil
170 | }
171 |
172 | func loadCache(fpath string) (*ContainerUsage, error) {
173 | klog.Infof("Checking path %s", fpath)
174 | files, err := os.ReadDir(fpath)
175 | if err != nil {
176 | return nil, err
177 | }
178 | if len(files) > 2 {
179 | return nil, errors.New("cache num not matched")
180 | }
181 | if len(files) == 0 {
182 | return nil, nil
183 | }
184 | cacheFile := ""
185 | for _, val := range files {
186 | if strings.Contains(val.Name(), "libvgpu.so") {
187 | continue
188 | }
189 | if !strings.Contains(val.Name(), ".cache") {
190 | continue
191 | }
192 | cacheFile = filepath.Join(fpath, val.Name())
193 | break
194 | }
195 | if cacheFile == "" {
196 | klog.Infof("No cache file in %s", fpath)
197 | return nil, nil
198 | }
199 | info, err := os.Stat(cacheFile)
200 | if err != nil {
201 | klog.Errorf("Failed to stat cache file: %s, error: %v", cacheFile, err)
202 | return nil, err
203 | }
204 | if info.Size() < int64(unsafe.Sizeof(headerT{})) {
205 | return nil, fmt.Errorf("cache file size %d too small", info.Size())
206 | }
207 | f, err := os.OpenFile(cacheFile, os.O_RDWR, 0666)
208 | if err != nil {
209 | klog.Errorf("Failed to open cache file: %s, error: %v", cacheFile, err)
210 | return nil, err
211 | }
212 | defer func(f *os.File) {
213 | _ = f.Close()
214 | }(f)
215 | usage := &ContainerUsage{}
216 | usage.data, err = syscall.Mmap(int(f.Fd()), 0, int(info.Size()), syscall.PROT_WRITE|syscall.PROT_READ, syscall.MAP_SHARED)
217 | if err != nil {
218 | klog.Errorf("Failed to mmap cache file: %s, error: %v", cacheFile, err)
219 | return nil, err
220 | }
221 | head := (*headerT)(unsafe.Pointer(&usage.data[0]))
222 | if head.initializedFlag != SharedRegionMagicFlag {
223 | _ = syscall.Munmap(usage.data)
224 | return nil, fmt.Errorf("cache file magic flag not matched")
225 | }
226 | if info.Size() == 1197897 {
227 | usage.Info = v0.CastSpec(usage.data)
228 | } else if head.majorVersion == 1 {
229 | usage.Info = v1.CastSpec(usage.data)
230 | } else {
231 | _ = syscall.Munmap(usage.data)
232 | return nil, fmt.Errorf("unknown cache file size %d version %d.%d", info.Size(), head.majorVersion, head.minorVersion)
233 | }
234 | return usage, nil
235 | }
236 |
237 | func isValidPod(name string, pods *corev1.PodList) bool {
238 | for _, val := range pods.Items {
239 | if strings.Contains(name, string(val.UID)) {
240 | return true
241 | }
242 | }
243 | return false
244 | }
245 |
--------------------------------------------------------------------------------
/pkg/monitor/nvidia/v0/spec.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2024 The HAMi Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package v0
18 |
19 | import "unsafe"
20 |
21 | const maxDevices = 16
22 |
23 | type deviceMemory struct {
24 | contextSize uint64
25 | moduleSize uint64
26 | bufferSize uint64
27 | offset uint64
28 | total uint64
29 | }
30 |
31 | type deviceUtilization struct {
32 | decUtil uint64
33 | encUtil uint64
34 | smUtil uint64
35 | }
36 |
37 | type shrregProcSlotT struct {
38 | pid int32
39 | hostpid int32
40 | used [16]deviceMemory
41 | monitorused [16]uint64
42 | deviceUtil [16]deviceUtilization
43 | status int32
44 | }
45 |
46 | type uuid struct {
47 | uuid [96]byte
48 | }
49 |
50 | type semT struct {
51 | sem [32]byte
52 | }
53 |
54 | type sharedRegionT struct {
55 | initializedFlag int32
56 | smInitFlag int32
57 | ownerPid uint32
58 | sem semT
59 | num uint64
60 | uuids [16]uuid
61 |
62 | limit [16]uint64
63 | smLimit [16]uint64
64 | procs [1024]shrregProcSlotT
65 |
66 | procnum int32
67 | utilizationSwitch int32
68 | recentKernel int32
69 | priority int32
70 | }
71 |
72 | type Spec struct {
73 | sr *sharedRegionT
74 | }
75 |
76 | func (s Spec) DeviceMax() int {
77 | return maxDevices
78 | }
79 |
80 | func (s Spec) DeviceNum() int {
81 | return int(s.sr.num)
82 | }
83 |
84 | func (s Spec) DeviceMemoryContextSize(idx int) uint64 {
85 | v := uint64(0)
86 | for _, p := range s.sr.procs {
87 | v += p.used[idx].contextSize
88 | }
89 | return v
90 | }
91 |
92 | func (s Spec) DeviceMemoryModuleSize(idx int) uint64 {
93 | v := uint64(0)
94 | for _, p := range s.sr.procs {
95 | v += p.used[idx].moduleSize
96 | }
97 | return v
98 | }
99 |
100 | func (s Spec) DeviceMemoryBufferSize(idx int) uint64 {
101 | v := uint64(0)
102 | for _, p := range s.sr.procs {
103 | v += p.used[idx].bufferSize
104 | }
105 | return v
106 | }
107 |
108 | func (s Spec) DeviceMemoryOffset(idx int) uint64 {
109 | v := uint64(0)
110 | for _, p := range s.sr.procs {
111 | v += p.used[idx].offset
112 | }
113 | return v
114 | }
115 |
116 | func (s Spec) DeviceMemoryTotal(idx int) uint64 {
117 | v := uint64(0)
118 | for _, p := range s.sr.procs {
119 | v += p.used[idx].total
120 | }
121 | return v
122 | }
123 |
124 | func (s Spec) DeviceSmUtil(idx int) uint64 {
125 | v := uint64(0)
126 | for _, p := range s.sr.procs {
127 | v += p.deviceUtil[idx].smUtil
128 | }
129 | return v
130 | }
131 |
132 | func (s Spec) IsValidUUID(idx int) bool {
133 | return s.sr.uuids[idx].uuid[0] != 0
134 | }
135 |
136 | func (s Spec) DeviceUUID(idx int) string {
137 | return string(s.sr.uuids[idx].uuid[:])
138 | }
139 |
140 | func (s Spec) DeviceMemoryLimit(idx int) uint64 {
141 | return s.sr.limit[idx]
142 | }
143 |
144 | func (s Spec) LastKernelTime() int64 {
145 | return 0
146 | }
147 |
148 | func CastSpec(data []byte) Spec {
149 | return Spec{
150 | sr: (*sharedRegionT)(unsafe.Pointer(&data[0])),
151 | }
152 | }
153 |
154 | // func (s *SharedRegionT) UsedMemory(idx int) (uint64, error) {
155 | // return 0, nil
156 | // }
157 |
158 | func (s Spec) GetPriority() int {
159 | return int(s.sr.priority)
160 | }
161 |
162 | func (s Spec) GetRecentKernel() int32 {
163 | return s.sr.recentKernel
164 | }
165 |
166 | func (s Spec) SetRecentKernel(v int32) {
167 | s.sr.recentKernel = v
168 | }
169 |
170 | func (s Spec) GetUtilizationSwitch() int32 {
171 | return s.sr.utilizationSwitch
172 | }
173 |
174 | func (s Spec) SetUtilizationSwitch(v int32) {
175 | s.sr.utilizationSwitch = v
176 | }
177 |
--------------------------------------------------------------------------------
/pkg/monitor/nvidia/v1/spec.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2024 The HAMi Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package v1
18 |
19 | import "unsafe"
20 |
21 | const maxDevices = 16
22 |
23 | type deviceMemory struct {
24 | contextSize uint64
25 | moduleSize uint64
26 | bufferSize uint64
27 | offset uint64
28 | total uint64
29 | unused [3]uint64
30 | }
31 |
32 | type deviceUtilization struct {
33 | decUtil uint64
34 | encUtil uint64
35 | smUtil uint64
36 | unused [3]uint64
37 | }
38 |
39 | type shrregProcSlotT struct {
40 | pid int32
41 | hostpid int32
42 | used [16]deviceMemory
43 | monitorused [16]uint64
44 | deviceUtil [16]deviceUtilization
45 | status int32
46 | unused [3]uint64
47 | }
48 |
49 | type uuid struct {
50 | uuid [96]byte
51 | }
52 |
53 | type semT struct {
54 | sem [32]byte
55 | }
56 |
57 | type sharedRegionT struct {
58 | initializedFlag int32
59 | majorVersion int32
60 | minorVersion int32
61 | smInitFlag int32
62 | ownerPid uint32
63 | sem semT
64 | num uint64
65 | uuids [16]uuid
66 |
67 | limit [16]uint64
68 | smLimit [16]uint64
69 | procs [1024]shrregProcSlotT
70 |
71 | procnum int32
72 | utilizationSwitch int32
73 | recentKernel int32
74 | priority int32
75 | lastKernelTime int64
76 | unused [4]uint64
77 | }
78 |
79 | type Spec struct {
80 | sr *sharedRegionT
81 | }
82 |
83 | func (s Spec) DeviceMax() int {
84 | return maxDevices
85 | }
86 |
87 | func (s Spec) DeviceNum() int {
88 | return int(s.sr.num)
89 | }
90 |
91 | func (s Spec) DeviceMemoryContextSize(idx int) uint64 {
92 | v := uint64(0)
93 | for _, p := range s.sr.procs {
94 | v += p.used[idx].contextSize
95 | }
96 | return v
97 | }
98 |
99 | func (s Spec) DeviceMemoryModuleSize(idx int) uint64 {
100 | v := uint64(0)
101 | for _, p := range s.sr.procs {
102 | v += p.used[idx].moduleSize
103 | }
104 | return v
105 | }
106 |
107 | func (s Spec) DeviceMemoryBufferSize(idx int) uint64 {
108 | v := uint64(0)
109 | for _, p := range s.sr.procs {
110 | v += p.used[idx].bufferSize
111 | }
112 | return v
113 | }
114 |
115 | func (s Spec) DeviceMemoryOffset(idx int) uint64 {
116 | v := uint64(0)
117 | for _, p := range s.sr.procs {
118 | v += p.used[idx].offset
119 | }
120 | return v
121 | }
122 |
123 | func (s Spec) DeviceMemoryTotal(idx int) uint64 {
124 | v := uint64(0)
125 | for _, p := range s.sr.procs {
126 | v += p.used[idx].total
127 | }
128 | return v
129 | }
130 |
131 | func (s Spec) DeviceSmUtil(idx int) uint64 {
132 | v := uint64(0)
133 | for _, p := range s.sr.procs {
134 | v += p.deviceUtil[idx].smUtil
135 | }
136 | return v
137 | }
138 |
139 | func (s Spec) IsValidUUID(idx int) bool {
140 | return s.sr.uuids[idx].uuid[0] != 0
141 | }
142 |
143 | func (s Spec) DeviceUUID(idx int) string {
144 | return string(s.sr.uuids[idx].uuid[:])
145 | }
146 |
147 | func (s Spec) DeviceMemoryLimit(idx int) uint64 {
148 | return s.sr.limit[idx]
149 | }
150 |
151 | func (s Spec) LastKernelTime() int64 {
152 | return s.sr.lastKernelTime
153 | }
154 |
155 | func CastSpec(data []byte) Spec {
156 | return Spec{
157 | sr: (*sharedRegionT)(unsafe.Pointer(&data[0])),
158 | }
159 | }
160 |
161 | // func (s *SharedRegionT) UsedMemory(idx int) (uint64, error) {
162 | // return 0, nil
163 | // }
164 |
165 | func (s Spec) GetPriority() int {
166 | return int(s.sr.priority)
167 | }
168 |
169 | func (s Spec) GetRecentKernel() int32 {
170 | return s.sr.recentKernel
171 | }
172 |
173 | func (s Spec) SetRecentKernel(v int32) {
174 | s.sr.recentKernel = v
175 | }
176 |
177 | func (s Spec) GetUtilizationSwitch() int32 {
178 | return s.sr.utilizationSwitch
179 | }
180 |
181 | func (s Spec) SetUtilizationSwitch(v int32) {
182 | s.sr.utilizationSwitch = v
183 | }
184 |
--------------------------------------------------------------------------------
/pkg/plugin/interface.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package plugin
18 |
19 | // DevicePlugin interface
20 | type DevicePlugin interface {
21 | // Get the device plugin name
22 | Name() string
23 | // Start the plugin
24 | Start() error
25 | // Get all the devices number which reside within the node
26 | DevicesNum() int
27 | // Stop the plugin
28 | Stop() error
29 | }
30 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/cache.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package vgpu
18 |
19 | import (
20 | "sync"
21 |
22 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
23 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
24 | )
25 |
26 | type DeviceCache struct {
27 | *GpuDeviceManager
28 |
29 | cache []*Device
30 | stopCh chan interface{}
31 | unhealthy chan *Device
32 | notifyCh map[string]chan *Device
33 | mutex sync.Mutex
34 | }
35 |
36 | func NewDeviceCache() *DeviceCache {
37 | skipMigEnabledGPUs := true
38 | if config.Mode == "mig" {
39 | skipMigEnabledGPUs = false
40 | }
41 | return &DeviceCache{
42 | GpuDeviceManager: NewGpuDeviceManager(skipMigEnabledGPUs),
43 | stopCh: make(chan interface{}),
44 | unhealthy: make(chan *Device),
45 | notifyCh: make(map[string]chan *Device),
46 | }
47 | }
48 |
49 | func (d *DeviceCache) AddNotifyChannel(name string, ch chan *Device) {
50 | d.mutex.Lock()
51 | defer d.mutex.Unlock()
52 | d.notifyCh[name] = ch
53 | }
54 |
55 | func (d *DeviceCache) RemoveNotifyChannel(name string) {
56 | d.mutex.Lock()
57 | defer d.mutex.Unlock()
58 | delete(d.notifyCh, name)
59 | }
60 |
61 | func (d *DeviceCache) Start() {
62 | d.cache = d.Devices()
63 | go d.CheckHealth(d.stopCh, d.cache, d.unhealthy)
64 | go d.notify()
65 | }
66 |
67 | func (d *DeviceCache) Stop() {
68 | close(d.stopCh)
69 | }
70 |
71 | func (d *DeviceCache) GetCache() []*Device {
72 | return d.cache
73 | }
74 |
75 | func (d *DeviceCache) notify() {
76 | for {
77 | select {
78 | case <-d.stopCh:
79 | return
80 | case dev := <-d.unhealthy:
81 | dev.Health = pluginapi.Unhealthy
82 | d.mutex.Lock()
83 | for _, ch := range d.notifyCh {
84 | ch <- dev
85 | }
86 | d.mutex.Unlock()
87 | }
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/config/config.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package config
18 |
19 | import (
20 | "sync"
21 |
22 | "github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
23 | "github.com/NVIDIA/go-nvml/pkg/nvml"
24 | )
25 |
26 | type NvidiaConfig struct {
27 | ResourceCountName string `yaml:"resourceCountName"`
28 | ResourceMemoryName string `yaml:"resourceMemoryName"`
29 | ResourceCoreName string `yaml:"resourceCoreName"`
30 | ResourceMemoryPercentageName string `yaml:"resourceMemoryPercentageName"`
31 | ResourcePriority string `yaml:"resourcePriorityName"`
32 | OverwriteEnv bool `yaml:"overwriteEnv"`
33 | DefaultMemory int32 `yaml:"defaultMemory"`
34 | DefaultCores int32 `yaml:"defaultCores"`
35 | DefaultGPUNum int32 `yaml:"defaultGPUNum"`
36 | DeviceSplitCount uint `yaml:"deviceSplitCount"`
37 | DeviceMemoryScaling float64 `yaml:"deviceMemoryScaling"`
38 | DeviceCoreScaling float64 `yaml:"deviceCoreScaling"`
39 | DisableCoreLimit bool `yaml:"disableCoreLimit"`
40 | MigGeometriesList []AllowedMigGeometries `yaml:"knownMigGeometries"`
41 | GPUMemoryFactor uint `yaml:"gpuMemoryFactor"`
42 | }
43 |
44 | var (
45 | nvmllib = nvml.New()
46 |
47 | lock sync.Mutex
48 | globalDevice device.Interface
49 | )
50 |
51 | var (
52 | // DevicePluginFilterDevice need device-plugin filter this device, don't register this device.
53 | DevicePluginFilterDevice *FilterDevice
54 | )
55 |
56 | func Nvml() nvml.Interface {
57 | return nvmllib
58 | }
59 |
60 | func Device() device.Interface {
61 | if globalDevice != nil {
62 | return globalDevice
63 | }
64 |
65 | lock.Lock()
66 | defer lock.Unlock()
67 |
68 | globalDevice = device.New(nvmllib)
69 | return globalDevice
70 | }
71 |
72 | var (
73 | DeviceSplitCount uint
74 | GPUMemoryFactor uint
75 | Mode string
76 | DeviceCoresScaling float64
77 | NodeName string
78 | RuntimeSocketFlag string
79 | DisableCoreLimit bool
80 | )
81 |
82 | type MigTemplate struct {
83 | Name string `yaml:"name"`
84 | Memory int32 `yaml:"memory"`
85 | Count int32 `yaml:"count"`
86 | }
87 |
88 | type MigTemplateUsage struct {
89 | Name string `json:"name,omitempty"`
90 | Memory int32 `json:"memory,omitempty"`
91 | InUse bool `json:"inuse,omitempty"`
92 | }
93 |
94 | type Geometry struct {
95 | Group string `yaml:"group"`
96 | Instances []MigTemplate `yaml:"geometries"`
97 | }
98 |
99 | type MIGS []MigTemplateUsage
100 |
101 | type MigInUse struct {
102 | Index int32
103 | UsageList MIGS
104 | }
105 |
106 | type AllowedMigGeometries struct {
107 | Models []string `yaml:"models"`
108 | Geometries []Geometry `yaml:"allowedGeometries"`
109 | }
110 |
111 | type Config struct {
112 | NvidiaConfig NvidiaConfig `yaml:"nvidia"`
113 | }
114 |
115 | type MigPartedSpec struct {
116 | Version string `json:"version" yaml:"version"`
117 | MigConfigs map[string]MigConfigSpecSlice `json:"mig-configs,omitempty" yaml:"mig-configs,omitempty"`
118 | }
119 |
120 | // MigConfigSpec defines the spec to declare the desired MIG configuration for a set of GPUs.
121 | type MigConfigSpec struct {
122 | DeviceFilter interface{} `json:"device-filter,omitempty" yaml:"device-filter,flow,omitempty"`
123 | Devices []int32 `json:"devices" yaml:"devices,flow"`
124 | MigEnabled bool `json:"mig-enabled" yaml:"mig-enabled"`
125 | MigDevices map[string]int32 `json:"mig-devices" yaml:"mig-devices"`
126 | }
127 |
128 | // MigConfigSpecSlice represents a slice of 'MigConfigSpec'.
129 | type MigConfigSpecSlice []MigConfigSpec
130 |
131 | type FilterDevice struct {
132 | // UUID is the device ID.
133 | UUID []string `json:"uuid"`
134 | // Index is the device index.
135 | Index []uint `json:"index"`
136 | }
137 |
138 | type DevicePluginConfigs struct {
139 | Nodeconfig []struct {
140 | Name string `json:"name"`
141 | OperatingMode string `json:"operatingmode"`
142 | Devicememoryscaling float64 `json:"devicememoryscaling"`
143 | Devicecorescaling float64 `json:"devicecorescaling"`
144 | Devicesplitcount uint `json:"devicesplitcount"`
145 | Migstrategy string `json:"migstrategy"`
146 | FilterDevice *FilterDevice `json:"filterdevices"`
147 | } `json:"nodeconfig"`
148 | }
149 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/config/version.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package config
18 |
19 | import (
20 | "fmt"
21 |
22 | "github.com/spf13/cobra"
23 | )
24 |
25 | var (
26 | version string
27 | VersionCmd = &cobra.Command{
28 | Use: "version",
29 | Short: "print version",
30 | Run: func(cmd *cobra.Command, args []string) {
31 | fmt.Println(Version())
32 | },
33 | }
34 | )
35 |
36 | func Version() string {
37 | return version
38 | }
39 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/helper.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2025 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package vgpu
18 |
19 | // int8Slice wraps an []int8 with more functions.
20 | type int8Slice []int8
21 |
22 | // String turns a nil terminated int8Slice into a string
23 | func (s int8Slice) String() string {
24 | var b []byte
25 | for _, c := range s {
26 | if c == 0 {
27 | break
28 | }
29 | b = append(b, byte(c))
30 | }
31 | return string(b)
32 | }
33 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/mig-strategy.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package vgpu
18 |
19 | import (
20 | "fmt"
21 | "log"
22 |
23 | "github.com/NVIDIA/go-gpuallocator/gpuallocator"
24 | "github.com/NVIDIA/go-nvml/pkg/nvml"
25 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
26 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
27 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
28 | )
29 |
30 | // Constants representing the various MIG strategies
31 | const (
32 | MigStrategyNone = "none"
33 | MigStrategySingle = "single"
34 | MigStrategyMixed = "mixed"
35 | )
36 |
37 | // MigStrategyResourceSet holds a set of resource names for a given MIG strategy
38 | type MigStrategyResourceSet map[string]struct{}
39 |
40 | // MigStrategy provides an interface for building the set of plugins required to implement a given MIG strategy
41 | type MigStrategy interface {
42 | GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin
43 | MatchesResource(mig *nvml.Device, resource string) bool
44 | }
45 |
46 | // NewMigStrategy returns a reference to a given MigStrategy based on the 'strategy' passed in
47 | func NewMigStrategy(strategy string) (MigStrategy, error) {
48 | switch strategy {
49 | case MigStrategyNone:
50 | return &migStrategyNone{}, nil
51 | case MigStrategySingle:
52 | return &migStrategySingle{}, nil
53 | case MigStrategyMixed:
54 | return &migStrategyMixed{}, nil
55 | }
56 | return nil, fmt.Errorf("unknown strategy: %v", strategy)
57 | }
58 |
59 | type migStrategyNone struct{}
60 | type migStrategySingle struct{}
61 | type migStrategyMixed struct{}
62 |
63 | // migStrategyNone
64 | func (s *migStrategyNone) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin {
65 | return []*NvidiaDevicePlugin{
66 | NewNvidiaDevicePlugin(
67 | //"nvidia.com/gpu",
68 | util.ResourceName,
69 | cache,
70 | gpuallocator.NewBestEffortPolicy(),
71 | pluginapi.DevicePluginPath+"nvidia-gpu.sock",
72 | cfg),
73 | NewNvidiaDevicePlugin(
74 | util.ResourceMem,
75 | cache,
76 | gpuallocator.NewBestEffortPolicy(),
77 | pluginapi.DevicePluginPath+"nvidia-gpu-memory.sock",
78 | cfg),
79 | NewNvidiaDevicePlugin(
80 | util.ResourceCores,
81 | cache,
82 | gpuallocator.NewBestEffortPolicy(),
83 | pluginapi.DevicePluginPath+"nvidia-gpu-cores.sock",
84 | cfg),
85 | }
86 | }
87 |
88 | func (s *migStrategyNone) MatchesResource(mig *nvml.Device, resource string) bool {
89 | panic("Should never be called")
90 | }
91 |
92 | // migStrategySingle
93 | func (s *migStrategySingle) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin {
94 | panic("single mode in MIG currently not supported")
95 | }
96 |
97 | func (s *migStrategySingle) MatchesResource(mig *nvml.Device, resource string) bool {
98 | return true
99 | }
100 |
101 | // migStrategyMixed
102 | func (s *migStrategyMixed) GetPlugins(cfg *config.NvidiaConfig, cache *DeviceCache) []*NvidiaDevicePlugin {
103 | devices := NewMIGCapableDevices()
104 |
105 | if err := devices.AssertAllMigEnabledDevicesAreValid(); err != nil {
106 | panic(fmt.Errorf("at least one device with migEnabled=true was not configured correctly: %v", err))
107 | }
108 |
109 | resources := make(MigStrategyResourceSet)
110 | migs, err := devices.GetAllMigDevices()
111 | if err != nil {
112 | panic(fmt.Errorf("unable to retrieve list of MIG devices: %v", err))
113 | }
114 | for _, mig := range migs {
115 | // Convert old NVML device to new NVML device
116 | uuid, ret := (*mig).GetUUID()
117 | check(ret)
118 | newDevice, ret := config.Nvml().DeviceGetHandleByUUID(uuid)
119 | check(ret)
120 |
121 | r := s.getResourceName(&newDevice)
122 | if !s.validMigDevice(&newDevice) {
123 | log.Printf("Skipping unsupported MIG device: %v", r)
124 | continue
125 | }
126 | resources[r] = struct{}{}
127 | }
128 |
129 | plugins := []*NvidiaDevicePlugin{
130 | NewNvidiaDevicePlugin(
131 | util.ResourceName,
132 | cache,
133 | gpuallocator.NewBestEffortPolicy(),
134 | pluginapi.DevicePluginPath+"nvidia-gpu.sock",
135 | cfg),
136 | }
137 |
138 | for resource := range resources {
139 | plugin := NewMIGNvidiaDevicePlugin(
140 | "nvidia.com/"+resource,
141 | NewMigDeviceManager(s, resource),
142 | "NVIDIA_VISIBLE_DEVICES",
143 | gpuallocator.Policy(nil),
144 | pluginapi.DevicePluginPath+"nvidia-"+resource+".sock")
145 | plugins = append(plugins, plugin)
146 | }
147 |
148 | return plugins
149 | }
150 |
151 | func (s *migStrategyMixed) validMigDevice(mig *nvml.Device) bool {
152 | gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig)
153 | check(ret)
154 | ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig)
155 | check(ret)
156 | return gi == ci
157 | }
158 |
159 | func (s *migStrategyMixed) getResourceName(mig *nvml.Device) string {
160 | gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig)
161 | check(ret)
162 | ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig)
163 | check(ret)
164 |
165 | memory, ret := config.Nvml().DeviceGetMemoryInfo(*mig)
166 | check(ret)
167 | gb := ((memory.Total/(1024*1024) + 1024 - 1) / 1024)
168 |
169 | var r string
170 | if gi == ci {
171 | r = fmt.Sprintf("mig-%dg.%dgb", gi, gb)
172 | } else {
173 | r = fmt.Sprintf("mig-%dc.%dg.%dgb", ci, gi, gb)
174 | }
175 |
176 | return r
177 | }
178 |
179 | func (s *migStrategyMixed) MatchesResource(mig *nvml.Device, resource string) bool {
180 | return s.getResourceName(mig) == resource
181 | }
182 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/mig.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package vgpu
18 |
19 | import (
20 | "bufio"
21 | "fmt"
22 | "log"
23 | "os"
24 |
25 | "github.com/NVIDIA/go-nvml/pkg/nvml"
26 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
27 | )
28 |
29 | const (
30 | nvidiaProcDriverPath = "/proc/driver/nvidia"
31 | nvidiaCapabilitiesPath = nvidiaProcDriverPath + "/capabilities"
32 |
33 | nvcapsProcDriverPath = "/proc/driver/nvidia-caps"
34 | nvcapsMigMinorsPath = nvcapsProcDriverPath + "/mig-minors"
35 | nvcapsDevicePath = "/dev/nvidia-caps"
36 | )
37 |
38 | // MIGCapableDevices stores information about all devices on the node
39 | type MIGCapableDevices struct {
40 | // devicesMap holds a list of devices, separated by whether they have MigEnabled or not
41 | devicesMap map[bool][]*nvml.Device
42 | }
43 |
44 | // NewMIGCapableDevices creates a new MIGCapableDevices struct and returns a pointer to it.
45 | func NewMIGCapableDevices() *MIGCapableDevices {
46 | return &MIGCapableDevices{
47 | devicesMap: nil, // Is initialized on first use
48 | }
49 | }
50 |
51 | func (devices *MIGCapableDevices) getDevicesMap() (map[bool][]*nvml.Device, error) {
52 | if devices.devicesMap == nil {
53 | n, ret := config.Nvml().DeviceGetCount()
54 | if ret != nvml.SUCCESS {
55 | return nil, fmt.Errorf("error getting device count: %v", ret)
56 | }
57 |
58 | migEnabledDevicesMap := make(map[bool][]*nvml.Device)
59 | for i := 0; i < int(n); i++ {
60 | d, ret := config.Nvml().DeviceGetHandleByIndex(i)
61 | if ret != nvml.SUCCESS {
62 | return nil, fmt.Errorf("error getting device handle: %v", ret)
63 | }
64 |
65 | isMigEnabled, _, ret := config.Nvml().DeviceGetMigMode(d)
66 | if ret != nvml.SUCCESS {
67 | if ret == nvml.ERROR_NOT_SUPPORTED {
68 | isMigEnabled = nvml.DEVICE_MIG_DISABLE
69 | } else {
70 | return nil, fmt.Errorf("error getting MIG mode: %v", ret)
71 | }
72 | }
73 |
74 | migEnabledDevicesMap[isMigEnabled == 1] = append(migEnabledDevicesMap[isMigEnabled == 1], &d)
75 | }
76 |
77 | devices.devicesMap = migEnabledDevicesMap
78 | }
79 | return devices.devicesMap, nil
80 | }
81 |
82 | // GetDevicesWithMigEnabled returns a list of devices with migEnabled=true
83 | func (devices *MIGCapableDevices) GetDevicesWithMigEnabled() ([]*nvml.Device, error) {
84 | devicesMap, err := devices.getDevicesMap()
85 | if err != nil {
86 | return nil, err
87 | }
88 | return devicesMap[true], nil
89 | }
90 |
91 | // GetDevicesWithMigDisabled returns a list of devices with migEnabled=false
92 | func (devices *MIGCapableDevices) GetDevicesWithMigDisabled() ([]*nvml.Device, error) {
93 | devicesMap, err := devices.getDevicesMap()
94 | if err != nil {
95 | return nil, err
96 | }
97 | return devicesMap[false], nil
98 | }
99 |
100 | // AssertAllMigEnabledDevicesAreValid ensures that all devices with migEnabled=true are valid. This means:
101 | // * The have at least 1 mig devices associated with them
102 | // Returns nill if the device is valid, or an error if these are not valid
103 | func (devices *MIGCapableDevices) AssertAllMigEnabledDevicesAreValid() error {
104 | devicesMap, err := devices.getDevicesMap()
105 | if err != nil {
106 | return err
107 | }
108 |
109 | for _, d := range devicesMap[true] {
110 | var migs []*nvml.Device
111 | maxMigDevices, ret := config.Nvml().DeviceGetMaxMigDeviceCount(*d)
112 | if ret != nvml.SUCCESS {
113 | return fmt.Errorf("error getting max MIG device count: %v", ret)
114 | }
115 | for i := 0; i < int(maxMigDevices); i++ {
116 | mig, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(*d, i)
117 | if ret == nvml.SUCCESS {
118 | migs = append(migs, &mig)
119 | }
120 | }
121 | if len(migs) == 0 {
122 | uuid, ret := config.Nvml().DeviceGetUUID(*d)
123 | if ret != nvml.SUCCESS {
124 | return fmt.Errorf("error getting device UUID: %v", ret)
125 | }
126 | return fmt.Errorf("no MIG devices associated with device: %v", uuid)
127 | }
128 | }
129 | return nil
130 | }
131 |
132 | // GetAllMigDevices returns a list of all MIG devices.
133 | func (devices *MIGCapableDevices) GetAllMigDevices() ([]*nvml.Device, error) {
134 | devicesMap, err := devices.getDevicesMap()
135 | if err != nil {
136 | return nil, err
137 | }
138 |
139 | var migs []*nvml.Device
140 | for _, d := range devicesMap[true] {
141 | maxMigDevices, ret := config.Nvml().DeviceGetMaxMigDeviceCount(*d)
142 | if ret != nvml.SUCCESS {
143 | return nil, fmt.Errorf("error getting max MIG device count: %v", ret)
144 | }
145 | for i := 0; i < int(maxMigDevices); i++ {
146 | mig, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(*d, i)
147 | if ret == nvml.SUCCESS {
148 | migs = append(migs, &mig)
149 | }
150 | }
151 | }
152 | return migs, nil
153 | }
154 |
155 | // GetMigCapabilityDevicePaths returns a mapping of MIG capability path to device node path
156 | func GetMigCapabilityDevicePaths() (map[string]string, error) {
157 | // Open nvcapsMigMinorsPath for walking.
158 | // If the nvcapsMigMinorsPath does not exist, then we are not on a MIG
159 | // capable machine, so there is nothing to do.
160 | // The format of this file is discussed in:
161 | // https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#unique_1576522674
162 | minorsFile, err := os.Open(nvcapsMigMinorsPath)
163 | if os.IsNotExist(err) {
164 | return nil, nil
165 | }
166 | if err != nil {
167 | return nil, fmt.Errorf("error opening MIG minors file: %v", err)
168 | }
169 | defer minorsFile.Close()
170 |
171 | // Define a function to process each each line of nvcapsMigMinorsPath
172 | processLine := func(line string) (string, int, error) {
173 | var gpu, gi, ci, migMinor int
174 |
175 | // Look for a CI access file
176 | n, _ := fmt.Sscanf(line, "gpu%d/gi%d/ci%d/access %d", &gpu, &gi, &ci, &migMinor)
177 | if n == 4 {
178 | capPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)
179 | return capPath, migMinor, nil
180 | }
181 |
182 | // Look for a GI access file
183 | n, _ = fmt.Sscanf(line, "gpu%d/gi%d/access %d", &gpu, &gi, &migMinor)
184 | if n == 3 {
185 | capPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", gpu, gi)
186 | return capPath, migMinor, nil
187 | }
188 |
189 | // Look for the MIG config file
190 | n, _ = fmt.Sscanf(line, "config %d", &migMinor)
191 | if n == 1 {
192 | capPath := fmt.Sprintf(nvidiaCapabilitiesPath + "/mig/config")
193 | return capPath, migMinor, nil
194 | }
195 |
196 | // Look for the MIG monitor file
197 | n, _ = fmt.Sscanf(line, "monitor %d", &migMinor)
198 | if n == 1 {
199 | capPath := fmt.Sprintf(nvidiaCapabilitiesPath + "/mig/monitor")
200 | return capPath, migMinor, nil
201 | }
202 |
203 | return "", 0, fmt.Errorf("unparsable line: %v", line)
204 | }
205 |
206 | // Walk each line of nvcapsMigMinorsPath and construct a mapping of nvidia
207 | // capabilities path to device minor for that capability
208 | capsDevicePaths := make(map[string]string)
209 | scanner := bufio.NewScanner(minorsFile)
210 | for scanner.Scan() {
211 | capPath, migMinor, err := processLine(scanner.Text())
212 | if err != nil {
213 | log.Printf("Skipping line in MIG minors file: %v", err)
214 | continue
215 | }
216 | capsDevicePaths[capPath] = fmt.Sprintf(nvcapsDevicePath+"/nvidia-cap%d", migMinor)
217 | }
218 | return capsDevicePaths, nil
219 | }
220 |
221 | // GetMigDeviceNodePaths returns a list of device node paths associated with a MIG device
222 | func GetMigDeviceNodePaths(parent nvml.Device, mig *nvml.Device) ([]string, error) {
223 | capDevicePaths, err := GetMigCapabilityDevicePaths()
224 | if err != nil {
225 | return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
226 | }
227 |
228 | gpu, ret := parent.GetMinorNumber()
229 | if ret != nvml.SUCCESS {
230 | return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
231 | }
232 |
233 | gi, ret := config.Nvml().DeviceGetGpuInstanceId(*mig)
234 | if ret != nvml.SUCCESS {
235 | return nil, fmt.Errorf("error getting MIG GPU instance ID: %v", ret)
236 | }
237 |
238 | ci, ret := config.Nvml().DeviceGetComputeInstanceId(*mig)
239 | if ret != nvml.SUCCESS {
240 | return nil, fmt.Errorf("error getting MIG compute instance ID: %v", ret)
241 | }
242 |
243 | giCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", gpu, gi)
244 | if _, exists := capDevicePaths[giCapPath]; !exists {
245 | return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath)
246 | }
247 |
248 | ciCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)
249 | if _, exists := capDevicePaths[ciCapPath]; !exists {
250 | return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath)
251 | }
252 |
253 | devicePaths := []string{
254 | fmt.Sprintf("/dev/nvidia%d", gpu),
255 | capDevicePaths[giCapPath],
256 | capDevicePaths[ciCapPath],
257 | }
258 |
259 | return devicePaths, nil
260 | }
261 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/nvidia.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package vgpu
18 |
19 | import (
20 | "bytes"
21 | "fmt"
22 | "log"
23 | "os"
24 | "strconv"
25 | "strings"
26 |
27 | "github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
28 | "github.com/NVIDIA/go-nvml/pkg/nvml"
29 | "k8s.io/klog"
30 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
31 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
32 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
33 | )
34 |
35 | const (
36 | envDisableHealthChecks = "DP_DISABLE_HEALTHCHECKS"
37 | allHealthChecks = "xids"
38 | )
39 |
40 | // Device couples an underlying pluginapi.Device type with its device node paths
41 | type Device struct {
42 | pluginapi.Device
43 | Paths []string
44 | Index string
45 | Memory uint64
46 | }
47 |
48 | // ResourceManager provides an interface for listing a set of Devices and checking health on them
49 | type ResourceManager interface {
50 | Devices() []*Device
51 | CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device)
52 | }
53 |
54 | // GpuDeviceManager implements the ResourceManager interface for full GPU devices
55 | type GpuDeviceManager struct {
56 | skipMigEnabledGPUs bool
57 | }
58 |
59 | // MigDeviceManager implements the ResourceManager interface for MIG devices
60 | type MigDeviceManager struct {
61 | strategy MigStrategy
62 | resource string
63 | }
64 |
65 | func check(ret nvml.Return) {
66 | if ret != nvml.SUCCESS {
67 | log.Panicln("Fatal:", ret)
68 | }
69 | }
70 |
71 | // NewGpuDeviceManager returns a reference to a new GpuDeviceManager
72 | func NewGpuDeviceManager(skipMigEnabledGPUs bool) *GpuDeviceManager {
73 | return &GpuDeviceManager{
74 | skipMigEnabledGPUs: skipMigEnabledGPUs,
75 | }
76 | }
77 |
78 | // NewMigDeviceManager returns a reference to a new MigDeviceManager
79 | func NewMigDeviceManager(strategy MigStrategy, resource string) *MigDeviceManager {
80 | return &MigDeviceManager{
81 | strategy: strategy,
82 | resource: resource,
83 | }
84 | }
85 |
86 | // Devices returns a list of devices from the GpuDeviceManager
87 | func (g *GpuDeviceManager) Devices() []*Device {
88 | n, ret := config.Nvml().DeviceGetCount()
89 | check(ret)
90 | if n > util.DeviceLimit {
91 | n = util.DeviceLimit
92 | }
93 |
94 | var devs []*Device
95 | for i := 0; i < n; i++ {
96 | d, ret := config.Nvml().DeviceGetHandleByIndex(i)
97 | check(ret)
98 |
99 | migMode, _, ret := d.GetMigMode()
100 | if ret != nvml.SUCCESS {
101 | if ret == nvml.ERROR_NOT_SUPPORTED {
102 | migMode = nvml.DEVICE_MIG_DISABLE
103 | } else {
104 | check(ret)
105 | }
106 | }
107 |
108 | if migMode == nvml.DEVICE_MIG_ENABLE && g.skipMigEnabledGPUs {
109 | continue
110 | }
111 |
112 | // Auto ebale MIG mode when the plugin is running in MIG mode
113 | if config.Mode == "mig" && migMode != nvml.DEVICE_MIG_ENABLE {
114 | if ret == nvml.ERROR_NOT_SUPPORTED {
115 | klog.V(4).Infof("Node is configed as MIG mode, but GPU %v does not support MIG mode", i)
116 | continue
117 | }
118 | ret, stat := d.SetMigMode(nvml.DEVICE_MIG_ENABLE)
119 | if ret != nvml.SUCCESS || stat != nvml.SUCCESS {
120 | klog.V(4).Infof("Node is configed as MIG mode, but failed to enable MIG mode for GPU %v : ret=%v, stat=%v", i, ret, stat)
121 | continue
122 | }
123 | }
124 |
125 | dev, err := buildDevice(fmt.Sprintf("%v", i), d)
126 | if err != nil {
127 | log.Panicln("Fatal:", err)
128 | }
129 |
130 | devs = append(devs, dev)
131 | }
132 |
133 | return devs
134 | }
135 |
136 | // Devices returns a list of devices from the MigDeviceManager
137 | func (m *MigDeviceManager) Devices() []*Device {
138 | n, ret := config.Nvml().DeviceGetCount()
139 | check(ret)
140 | if n > util.DeviceLimit {
141 | n = util.DeviceLimit
142 | }
143 |
144 | var devs []*Device
145 | for i := 0; i < n; i++ {
146 | d, ret := config.Nvml().DeviceGetHandleByIndex(i)
147 | check(ret)
148 |
149 | migMode, _, ret := d.GetMigMode()
150 | if ret != nvml.SUCCESS {
151 | if ret == nvml.ERROR_NOT_SUPPORTED {
152 | migMode = nvml.DEVICE_MIG_DISABLE
153 | } else {
154 | check(ret)
155 | }
156 | }
157 |
158 | if migMode != nvml.DEVICE_MIG_ENABLE {
159 | continue
160 | }
161 |
162 | err := config.Device().VisitMigDevices(func(i int, d device.Device, j int, mig device.MigDevice) error {
163 | dev, err := buildMigDevice(fmt.Sprintf("%v:%v", i, j), mig)
164 | if err != nil {
165 | log.Panicln("Fatal:", err)
166 | }
167 | devs = append(devs, dev)
168 | return nil
169 | })
170 | if err != nil {
171 | log.Fatalf("VisitMigDevices error: %v", err)
172 | }
173 | }
174 |
175 | return devs
176 | }
177 |
178 | // CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices
179 | func (g *GpuDeviceManager) CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) {
180 | checkHealth(stop, devices, unhealthy)
181 | }
182 |
183 | // CheckHealth performs health checks on a set of devices, writing to the 'unhealthy' channel with any unhealthy devices
184 | func (m *MigDeviceManager) CheckHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) {
185 | checkHealth(stop, devices, unhealthy)
186 | }
187 |
188 | func buildDevice(index string, d nvml.Device) (*Device, error) {
189 | uuid, ret := config.Nvml().DeviceGetUUID(d)
190 | if ret != nvml.SUCCESS {
191 | return nil, fmt.Errorf("error getting UUID of device: %v", ret)
192 | }
193 |
194 | minor, ret := config.Nvml().DeviceGetMinorNumber(d)
195 | if ret != nvml.SUCCESS {
196 | return nil, fmt.Errorf("error getting minor number of device: %v", ret)
197 | }
198 | paths := []string{fmt.Sprintf("/dev/nvidia%d", minor)}
199 |
200 | memory, ret := config.Nvml().DeviceGetMemoryInfo(d)
201 | if ret != nvml.SUCCESS {
202 | return nil, fmt.Errorf("error getting memory info of device: %v", ret)
203 | }
204 |
205 | hasNuma, numa, err := getNumaNode(d)
206 | if err != nil {
207 | return nil, fmt.Errorf("error getting device NUMA node: %v", err)
208 | }
209 |
210 | dev := Device{}
211 | dev.ID = uuid
212 | dev.Health = pluginapi.Healthy
213 | dev.Paths = paths
214 | dev.Index = index
215 | dev.Memory = memory.Total / (1024 * 1024)
216 | if hasNuma {
217 | dev.Topology = &pluginapi.TopologyInfo{
218 | Nodes: []*pluginapi.NUMANode{
219 | {
220 | ID: int64(numa),
221 | },
222 | },
223 | }
224 | }
225 | return &dev, nil
226 | }
227 |
228 | func buildMigDevice(index string, d device.MigDevice) (*Device, error) {
229 | uuid, ret := config.Nvml().DeviceGetUUID(d)
230 | if ret != nvml.SUCCESS {
231 | return nil, fmt.Errorf("error getting UUID of device: %v", ret)
232 | }
233 |
234 | paths, err := getMigPaths(d)
235 | if err != nil {
236 | return nil, fmt.Errorf("error getting MIG paths of device: %v", err)
237 | }
238 |
239 | memory, ret := config.Nvml().DeviceGetMemoryInfo(d)
240 | if ret != nvml.SUCCESS {
241 | return nil, fmt.Errorf("error getting memory info of device: %v", ret)
242 | }
243 |
244 | parent, ret := d.GetDeviceHandleFromMigDeviceHandle()
245 | if ret != nvml.SUCCESS {
246 | return nil, fmt.Errorf("error getting parent GPU device from MIG device: %v", ret)
247 | }
248 | hasNuma, numa, err := getNumaNode(parent)
249 | if err != nil {
250 | return nil, fmt.Errorf("error getting device NUMA node: %v", err)
251 | }
252 |
253 | dev := Device{}
254 | dev.ID = uuid
255 | dev.Health = pluginapi.Healthy
256 | dev.Paths = paths
257 | dev.Index = index
258 | dev.Memory = memory.Total / (1024 * 1024)
259 | if hasNuma {
260 | dev.Topology = &pluginapi.TopologyInfo{
261 | Nodes: []*pluginapi.NUMANode{
262 | {
263 | ID: int64(numa),
264 | },
265 | },
266 | }
267 | }
268 | return &dev, nil
269 | }
270 |
271 | func getMigPaths(d device.MigDevice) ([]string, error) {
272 | capDevicePaths, err := GetMigCapabilityDevicePaths()
273 | if err != nil {
274 | return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
275 | }
276 |
277 | gi, ret := d.GetGpuInstanceId()
278 | if ret != nvml.SUCCESS {
279 | return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
280 | }
281 |
282 | ci, ret := d.GetComputeInstanceId()
283 | if ret != nvml.SUCCESS {
284 | return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
285 | }
286 |
287 | parent, ret := d.GetDeviceHandleFromMigDeviceHandle()
288 | if ret != nvml.SUCCESS {
289 | return nil, fmt.Errorf("error getting parent device: %v", ret)
290 | }
291 | minor, ret := parent.GetMinorNumber()
292 | if ret != nvml.SUCCESS {
293 | return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
294 | }
295 | parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
296 |
297 | giCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/access", minor, gi)
298 | if _, exists := capDevicePaths[giCapPath]; !exists {
299 | return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath)
300 | }
301 |
302 | ciCapPath := fmt.Sprintf(nvidiaCapabilitiesPath+"/gpu%d/mig/gi%d/ci%d/access", minor, gi, ci)
303 | if _, exists := capDevicePaths[ciCapPath]; !exists {
304 | return nil, fmt.Errorf("missing MIG GPU instance capability path: %v", giCapPath)
305 | }
306 |
307 | devicePaths := []string{
308 | parentPath,
309 | capDevicePaths[giCapPath],
310 | capDevicePaths[ciCapPath],
311 | }
312 |
313 | return devicePaths, nil
314 | }
315 |
316 | func getNumaNode(d nvml.Device) (bool, int, error) {
317 | pciInfo, ret := d.GetPciInfo()
318 | if ret != nvml.SUCCESS {
319 | return false, 0, fmt.Errorf("error getting PCI Bus Info of device: %v", ret)
320 | }
321 |
322 | // Discard leading zeros.
323 | busID := strings.ToLower(strings.TrimPrefix(int8Slice(pciInfo.BusId[:]).String(), "0000"))
324 |
325 | b, err := os.ReadFile(fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", busID))
326 | if err != nil {
327 | return false, 0, nil
328 | }
329 |
330 | node, err := strconv.Atoi(string(bytes.TrimSpace(b)))
331 | if err != nil {
332 | return false, 0, fmt.Errorf("eror parsing value for NUMA node: %v", err)
333 | }
334 |
335 | if node < 0 {
336 | return false, 0, nil
337 | }
338 |
339 | return true, node, nil
340 | }
341 |
342 | func checkHealth(stop <-chan interface{}, devices []*Device, unhealthy chan<- *Device) {
343 | disableHealthChecks := strings.ToLower(os.Getenv(envDisableHealthChecks))
344 | if disableHealthChecks == "all" {
345 | disableHealthChecks = allHealthChecks
346 | }
347 | if strings.Contains(disableHealthChecks, "xids") {
348 | return
349 | }
350 |
351 | // FIXME: formalize the full list and document it.
352 | // http://docs.nvidia.com/deploy/xid-errors/index.html#topic_4
353 | // Application errors: the GPU should still be healthy
354 | applicationErrorXids := []uint64{
355 | 13, // Graphics Engine Exception
356 | 31, // GPU memory page fault
357 | 43, // GPU stopped processing
358 | 45, // Preemptive cleanup, due to previous errors
359 | 68, // Video processor exception
360 | }
361 |
362 | skippedXids := make(map[uint64]bool)
363 | for _, id := range applicationErrorXids {
364 | skippedXids[id] = true
365 | }
366 |
367 | for _, additionalXid := range getAdditionalXids(disableHealthChecks) {
368 | skippedXids[additionalXid] = true
369 | }
370 |
371 | eventSet, ret := config.Nvml().EventSetCreate()
372 | if ret != nvml.SUCCESS {
373 | klog.Warningf("could not create event set: %v", ret)
374 | return
375 | }
376 | defer eventSet.Free()
377 |
378 | parentToDeviceMap := make(map[string]*Device)
379 | deviceIDToGiMap := make(map[string]int)
380 | deviceIDToCiMap := make(map[string]int)
381 |
382 | eventMask := uint64(nvml.EventTypeXidCriticalError | nvml.EventTypeDoubleBitEccError | nvml.EventTypeSingleBitEccError)
383 | for _, d := range devices {
384 | uuid, gi, ci, err := getDevicePlacement(d)
385 | if err != nil {
386 | klog.Warningf("Could not determine device placement for %v: %v; Marking it unhealthy.", d.ID, err)
387 | unhealthy <- d
388 | continue
389 | }
390 | deviceIDToGiMap[d.ID] = gi
391 | deviceIDToCiMap[d.ID] = ci
392 | parentToDeviceMap[uuid] = d
393 |
394 | gpu, ret := config.Nvml().DeviceGetHandleByUUID(uuid)
395 | if ret != nvml.SUCCESS {
396 | klog.Infof("unable to get device handle from UUID: %v; marking it as unhealthy", ret)
397 | unhealthy <- d
398 | continue
399 | }
400 |
401 | supportedEvents, ret := gpu.GetSupportedEventTypes()
402 | if ret != nvml.SUCCESS {
403 | klog.Infof("Unable to determine the supported events for %v: %v; marking it as unhealthy", d.ID, ret)
404 | unhealthy <- d
405 | continue
406 | }
407 |
408 | ret = gpu.RegisterEvents(eventMask&supportedEvents, eventSet)
409 | if ret == nvml.ERROR_NOT_SUPPORTED {
410 | klog.Warningf("Device %v is too old to support healthchecking.", d.ID)
411 | }
412 | if ret != nvml.SUCCESS {
413 | klog.Infof("Marking device %v as unhealthy: %v", d.ID, ret)
414 | unhealthy <- d
415 | }
416 | }
417 |
418 | for {
419 | select {
420 | case <-stop:
421 | return
422 | default:
423 | }
424 |
425 | e, ret := eventSet.Wait(5000)
426 | if ret == nvml.ERROR_TIMEOUT {
427 | continue
428 | }
429 | if ret != nvml.SUCCESS {
430 | klog.Infof("Error waiting for event: %v; Marking all devices as unhealthy", ret)
431 | for _, d := range devices {
432 | unhealthy <- d
433 | }
434 | continue
435 | }
436 |
437 | if e.EventType != nvml.EventTypeXidCriticalError {
438 | klog.Infof("Skipping non-nvmlEventTypeXidCriticalError event: %+v", e)
439 | continue
440 | }
441 |
442 | if skippedXids[e.EventData] {
443 | klog.Infof("Skipping event %+v", e)
444 | continue
445 | }
446 |
447 | klog.Infof("Processing event %+v", e)
448 | eventUUID, ret := e.Device.GetUUID()
449 | if ret != nvml.SUCCESS {
450 | // If we cannot reliably determine the device UUID, we mark all devices as unhealthy.
451 | klog.Infof("Failed to determine uuid for event %v: %v; Marking all devices as unhealthy.", e, ret)
452 | for _, d := range devices {
453 | unhealthy <- d
454 | }
455 | continue
456 | }
457 |
458 | d, exists := parentToDeviceMap[eventUUID]
459 | if !exists {
460 | klog.Infof("Ignoring event for unexpected device: %v", eventUUID)
461 | continue
462 | }
463 |
464 | if d.IsMigDevice() && e.GpuInstanceId != 0xFFFFFFFF && e.ComputeInstanceId != 0xFFFFFFFF {
465 | gi := deviceIDToGiMap[d.ID]
466 | ci := deviceIDToCiMap[d.ID]
467 | if !(uint32(gi) == e.GpuInstanceId && uint32(ci) == e.ComputeInstanceId) {
468 | continue
469 | }
470 | klog.Infof("Event for mig device %v (gi=%v, ci=%v)", d.ID, gi, ci)
471 | }
472 |
473 | klog.Infof("XidCriticalError: Xid=%d on Device=%s; marking device as unhealthy.", e.EventData, d.ID)
474 | unhealthy <- d
475 | }
476 | }
477 |
478 | // getAdditionalXids returns a list of additional Xids to skip from the specified string.
479 | // The input is treaded as a comma-separated string and all valid uint64 values are considered as Xid values. Invalid values
480 | // are ignored.
481 | func getAdditionalXids(input string) []uint64 {
482 | if input == "" {
483 | return nil
484 | }
485 |
486 | var additionalXids []uint64
487 | for _, additionalXid := range strings.Split(input, ",") {
488 | trimmed := strings.TrimSpace(additionalXid)
489 | if trimmed == "" {
490 | continue
491 | }
492 | xid, err := strconv.ParseUint(trimmed, 10, 64)
493 | if err != nil {
494 | log.Printf("Ignoring malformed Xid value %v: %v", trimmed, err)
495 | continue
496 | }
497 | additionalXids = append(additionalXids, xid)
498 | }
499 |
500 | return additionalXids
501 | }
502 |
503 | // getDevicePlacement returns the placement of the specified device.
504 | // For a MIG device the placement is defined by the 3-tuple
505 | // For a full device the returned 3-tuple is the device's uuid and 0xFFFFFFFF for the other two elements.
506 | func getDevicePlacement(d *Device) (string, int, int, error) {
507 | if !d.IsMigDevice() {
508 | return d.GetUUID(), 0xFFFFFFFF, 0xFFFFFFFF, nil
509 | }
510 | return getMigDeviceParts(d)
511 | }
512 |
513 | // getMigDeviceParts returns the parent GI and CI ids of the MIG device.
514 | func getMigDeviceParts(d *Device) (string, int, int, error) {
515 | if !d.IsMigDevice() {
516 | return "", 0, 0, fmt.Errorf("cannot get GI and CI of full device")
517 | }
518 |
519 | uuid := d.GetUUID()
520 | // For older driver versions, the call to DeviceGetHandleByUUID will fail for MIG devices.
521 | mig, ret := config.Nvml().DeviceGetHandleByUUID(uuid)
522 | if ret == nvml.SUCCESS {
523 | parentHandle, ret := mig.GetDeviceHandleFromMigDeviceHandle()
524 | if ret != nvml.SUCCESS {
525 | return "", 0, 0, fmt.Errorf("failed to get parent device handle: %v", ret)
526 | }
527 |
528 | parentUUID, ret := parentHandle.GetUUID()
529 | if ret != nvml.SUCCESS {
530 | return "", 0, 0, fmt.Errorf("failed to get parent uuid: %v", ret)
531 | }
532 | gi, ret := mig.GetGpuInstanceId()
533 | if ret != nvml.SUCCESS {
534 | return "", 0, 0, fmt.Errorf("failed to get GPU Instance ID: %v", ret)
535 | }
536 |
537 | ci, ret := mig.GetComputeInstanceId()
538 | if ret != nvml.SUCCESS {
539 | return "", 0, 0, fmt.Errorf("failed to get Compute Instance ID: %v", ret)
540 | }
541 | return parentUUID, gi, ci, nil
542 | }
543 | return parseMigDeviceUUID(uuid)
544 | }
545 |
546 | // parseMigDeviceUUID splits the MIG device UUID into the parent device UUID and ci and gi
547 | func parseMigDeviceUUID(mig string) (string, int, int, error) {
548 | tokens := strings.SplitN(mig, "-", 2)
549 | if len(tokens) != 2 || tokens[0] != "MIG" {
550 | return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
551 | }
552 |
553 | tokens = strings.SplitN(tokens[1], "/", 3)
554 | if len(tokens) != 3 || !strings.HasPrefix(tokens[0], "GPU-") {
555 | return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
556 | }
557 |
558 | gi, err := strconv.ParseInt(tokens[1], 10, 32)
559 | if err != nil {
560 | return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
561 | }
562 |
563 | ci, err := strconv.ParseInt(tokens[2], 10, 32)
564 | if err != nil {
565 | return "", 0, 0, fmt.Errorf("unable to parse UUID as MIG device")
566 | }
567 |
568 | return tokens[0], int(gi), int(ci), nil
569 | }
570 |
571 | // IsMigDevice returns checks whether d is a MIG device or not.
572 | func (d Device) IsMigDevice() bool {
573 | return strings.Contains(d.Index, ":")
574 | }
575 |
576 | // GetUUID returns the UUID for the device from the annotated ID.
577 | func (d Device) GetUUID() string {
578 | return AnnotatedID(d.ID).GetID()
579 | }
580 |
581 | // AnnotatedID represents an ID with a replica number embedded in it.
582 | type AnnotatedID string
583 |
584 | // Split splits a AnnotatedID into its ID and replica number parts.
585 | func (r AnnotatedID) Split() (string, int) {
586 | split := strings.SplitN(string(r), "::", 2)
587 | if len(split) != 2 {
588 | return string(r), 0
589 | }
590 | replica, _ := strconv.ParseInt(split[1], 10, 0)
591 | return split[0], int(replica)
592 | }
593 |
594 | // GetID returns just the ID part of the replicated ID
595 | func (r AnnotatedID) GetID() string {
596 | id, _ := r.Split()
597 | return id
598 | }
599 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/plugin.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package vgpu
18 |
19 | import (
20 | "bytes"
21 | "errors"
22 | "fmt"
23 | "log"
24 | "net"
25 | "os"
26 | "os/exec"
27 | "path"
28 | "strings"
29 | "time"
30 |
31 | "gopkg.in/yaml.v2"
32 | "k8s.io/apimachinery/pkg/util/uuid"
33 | "k8s.io/klog/v2"
34 | "volcano.sh/k8s-device-plugin/pkg/lock"
35 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
36 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
37 |
38 | "github.com/NVIDIA/go-gpuallocator/gpuallocator"
39 | "golang.org/x/net/context"
40 | "google.golang.org/grpc"
41 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
42 | )
43 |
44 | // Constants to represent the various device list strategies
45 | const (
46 | DeviceListStrategyEnvvar = "envvar"
47 | DeviceListStrategyVolumeMounts = "volume-mounts"
48 | )
49 |
50 | // Constants to represent the various device id strategies
51 | const (
52 | DeviceIDStrategyUUID = "uuid"
53 | DeviceIDStrategyIndex = "index"
54 | )
55 |
56 | // Constants for use by the 'volume-mounts' device list strategy
57 | const (
58 | deviceListAsVolumeMountsHostPath = "/dev/null"
59 | deviceListAsVolumeMountsContainerPathRoot = "/var/run/nvidia-container-devices"
60 | )
61 |
62 | // NvidiaDevicePlugin implements the Kubernetes device plugin API
63 | type NvidiaDevicePlugin struct {
64 | ResourceManager
65 | deviceCache *DeviceCache
66 | resourceName string
67 | deviceListEnvvar string
68 | allocatePolicy gpuallocator.Policy
69 | socket string
70 | schedulerConfig *config.NvidiaConfig
71 | operatingMode string
72 |
73 | virtualDevices []*pluginapi.Device
74 | migCurrent config.MigPartedSpec
75 |
76 | server *grpc.Server
77 | cachedDevices []*Device
78 | health chan *Device
79 | stop chan interface{}
80 | changed chan struct{}
81 | migStrategy string
82 | }
83 |
84 | // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
85 | func NewNvidiaDevicePlugin(resourceName string, deviceCache *DeviceCache, allocatePolicy gpuallocator.Policy, socket string, cfg *config.NvidiaConfig) *NvidiaDevicePlugin {
86 | dp := &NvidiaDevicePlugin{
87 | deviceCache: deviceCache,
88 | resourceName: resourceName,
89 | allocatePolicy: allocatePolicy,
90 | socket: socket,
91 | migStrategy: "none",
92 | operatingMode: config.Mode,
93 | schedulerConfig: cfg,
94 | // These will be reinitialized every
95 | // time the plugin server is restarted.
96 | server: nil,
97 | health: nil,
98 | stop: nil,
99 | }
100 | return dp
101 | }
102 |
103 | // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
104 | func NewMIGNvidiaDevicePlugin(resourceName string, resourceManager ResourceManager, deviceListEnvvar string, allocatePolicy gpuallocator.Policy, socket string) *NvidiaDevicePlugin {
105 | return &NvidiaDevicePlugin{
106 | ResourceManager: resourceManager,
107 | resourceName: resourceName,
108 | deviceListEnvvar: deviceListEnvvar,
109 | allocatePolicy: allocatePolicy,
110 | socket: socket,
111 |
112 | // These will be reinitialized every
113 | // time the plugin server is restarted.
114 | cachedDevices: nil,
115 | server: nil,
116 | health: nil,
117 | stop: nil,
118 | migStrategy: "mixed",
119 | }
120 | }
121 |
122 | func (m *NvidiaDevicePlugin) initialize() {
123 | if strings.Compare(m.migStrategy, "mixed") == 0 {
124 | m.cachedDevices = m.ResourceManager.Devices()
125 | }
126 | m.server = grpc.NewServer([]grpc.ServerOption{}...)
127 | m.health = make(chan *Device)
128 | m.stop = make(chan interface{})
129 | m.virtualDevices, _ = util.GetDevices(config.GPUMemoryFactor)
130 | }
131 |
132 | func (m *NvidiaDevicePlugin) cleanup() {
133 | close(m.stop)
134 | m.server = nil
135 | m.health = nil
136 | m.stop = nil
137 | }
138 |
139 | // Start starts the gRPC server, registers the device plugin with the Kubelet,
140 | // and starts the device healthchecks.
141 | func (m *NvidiaDevicePlugin) Start() error {
142 | m.initialize()
143 |
144 | err := m.Serve()
145 | if err != nil {
146 | log.Printf("Could not start device plugin for '%s': %s", m.resourceName, err)
147 | m.cleanup()
148 | return err
149 | }
150 | log.Printf("Starting to serve '%s' on %s", m.resourceName, m.socket)
151 |
152 | err = m.Register()
153 | if err != nil {
154 | log.Printf("Could not register device plugin: %s", err)
155 | m.Stop()
156 | return err
157 | }
158 | log.Printf("Registered device plugin for '%s' with Kubelet", m.resourceName)
159 |
160 | if m.operatingMode == "mig" {
161 | cmd := exec.Command("nvidia-mig-parted", "export")
162 | var stdout, stderr bytes.Buffer
163 | cmd.Stdout = &stdout
164 | cmd.Stderr = &stderr
165 | err := cmd.Run()
166 | if err != nil {
167 | klog.Fatalf("nvidia-mig-parted failed with %s\n", err)
168 | }
169 | outStr := stdout.Bytes()
170 | yaml.Unmarshal(outStr, &m.migCurrent)
171 | os.WriteFile("/tmp/migconfig.yaml", outStr, os.ModePerm)
172 | if len(m.migCurrent.MigConfigs["current"]) == 1 && len(m.migCurrent.MigConfigs["current"][0].Devices) == 0 {
173 | idx := 0
174 | m.migCurrent.MigConfigs["current"][0].Devices = make([]int32, 0)
175 | for idx < util.GetDeviceNums() {
176 | m.migCurrent.MigConfigs["current"][0].Devices = append(m.migCurrent.MigConfigs["current"][0].Devices, int32(idx))
177 | idx++
178 | }
179 | }
180 | klog.Infoln("Mig export", m.migCurrent)
181 | }
182 |
183 | if strings.Compare(m.migStrategy, "none") == 0 {
184 | m.deviceCache.AddNotifyChannel("plugin", m.health)
185 | } else if strings.Compare(m.migStrategy, "mixed") == 0 {
186 | go m.CheckHealth(m.stop, m.cachedDevices, m.health)
187 | } else {
188 | log.Panicln("migstrategy not recognized", m.migStrategy)
189 | }
190 | return nil
191 | }
192 |
193 | // Stop stops the gRPC server.
194 | func (m *NvidiaDevicePlugin) Stop() error {
195 | if m == nil || m.server == nil {
196 | return nil
197 | }
198 | log.Printf("Stopping to serve '%s' on %s", m.resourceName, m.socket)
199 | m.deviceCache.RemoveNotifyChannel("plugin")
200 | m.server.Stop()
201 | if err := os.Remove(m.socket); err != nil && !os.IsNotExist(err) {
202 | return err
203 | }
204 | m.cleanup()
205 | return nil
206 | }
207 |
208 | // Serve starts the gRPC server of the device plugin.
209 | func (m *NvidiaDevicePlugin) Serve() error {
210 | os.Remove(m.socket)
211 | sock, err := net.Listen("unix", m.socket)
212 | if err != nil {
213 | return err
214 | }
215 |
216 | pluginapi.RegisterDevicePluginServer(m.server, m)
217 |
218 | go func() {
219 | lastCrashTime := time.Now()
220 | restartCount := 0
221 | for {
222 | log.Printf("Starting GRPC server for '%s'", m.resourceName)
223 | err := m.server.Serve(sock)
224 | if err == nil {
225 | break
226 | }
227 |
228 | log.Printf("GRPC server for '%s' crashed with error: %v", m.resourceName, err)
229 |
230 | // restart if it has not been too often
231 | // i.e. if server has crashed more than 5 times and it didn't last more than one hour each time
232 | if restartCount > 5 {
233 | // quit
234 | log.Fatalf("GRPC server for '%s' has repeatedly crashed recently. Quitting", m.resourceName)
235 | }
236 | timeSinceLastCrash := time.Since(lastCrashTime).Seconds()
237 | lastCrashTime = time.Now()
238 | if timeSinceLastCrash > 3600 {
239 | // it has been one hour since the last crash.. reset the count
240 | // to reflect on the frequency
241 | restartCount = 1
242 | } else {
243 | restartCount++
244 | }
245 | }
246 | }()
247 |
248 | // Wait for server to start by launching a blocking connexion
249 | conn, err := m.dial(m.socket, 5*time.Second)
250 | if err != nil {
251 | return err
252 | }
253 | conn.Close()
254 |
255 | return nil
256 | }
257 |
258 | // Register registers the device plugin for the given resourceName with Kubelet.
259 | func (m *NvidiaDevicePlugin) Register() error {
260 | conn, err := m.dial(pluginapi.KubeletSocket, 5*time.Second)
261 | if err != nil {
262 | return err
263 | }
264 | defer conn.Close()
265 |
266 | client := pluginapi.NewRegistrationClient(conn)
267 | reqt := &pluginapi.RegisterRequest{
268 | Version: pluginapi.Version,
269 | Endpoint: path.Base(m.socket),
270 | ResourceName: m.resourceName,
271 | Options: &pluginapi.DevicePluginOptions{},
272 | }
273 |
274 | _, err = client.Register(context.Background(), reqt)
275 | if err != nil {
276 | return err
277 | }
278 | return nil
279 | }
280 |
281 | // GetDevicePluginOptions returns the values of the optional settings for this plugin
282 | func (m *NvidiaDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
283 | options := &pluginapi.DevicePluginOptions{}
284 | return options, nil
285 | }
286 |
287 | // ListAndWatch lists devices and update that list according to the health status
288 | func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
289 | if m.resourceName == util.ResourceMem {
290 | err := s.Send(&pluginapi.ListAndWatchResponse{Devices: m.virtualDevices})
291 | if err != nil {
292 | log.Fatalf("failed sending devices %d: %v", len(m.virtualDevices), err)
293 | }
294 |
295 | for {
296 | select {
297 | case <-m.stop:
298 | return nil
299 | case d := <-m.health:
300 | // FIXME: there is no way to recover from the Unhealthy state.
301 | //isChange := false
302 | //if d.Health != pluginapi.Unhealthy {
303 | //isChange = true
304 | //}
305 | d.Health = pluginapi.Unhealthy
306 | log.Printf("'%s' device marked unhealthy: %s", m.resourceName, d.ID)
307 | s.Send(&pluginapi.ListAndWatchResponse{Devices: m.virtualDevices})
308 | //if isChange {
309 | // m.kubeInteractor.PatchUnhealthyGPUListOnNode(m.physicalDevices)
310 | //}
311 | }
312 | }
313 |
314 | } else {
315 | _ = s.Send(&pluginapi.ListAndWatchResponse{Devices: m.apiDevices()})
316 | for {
317 | select {
318 | case <-m.stop:
319 | return nil
320 | case d := <-m.health:
321 | // FIXME: there is no way to recover from the Unhealthy state.
322 | //d.Health = pluginapi.Unhealthy
323 | log.Printf("'%s' device marked unhealthy: %s", m.resourceName, d.ID)
324 | _ = s.Send(&pluginapi.ListAndWatchResponse{Devices: m.apiDevices()})
325 | }
326 | }
327 | }
328 | }
329 |
330 | func (m *NvidiaDevicePlugin) MIGAllocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
331 | responses := pluginapi.AllocateResponse{}
332 | for _, req := range reqs.ContainerRequests {
333 | for _, id := range req.DevicesIDs {
334 | if !m.deviceExists(id) {
335 | return nil, fmt.Errorf("invalid allocation request for '%s': unknown device: %s", m.resourceName, id)
336 | }
337 | }
338 |
339 | response := pluginapi.ContainerAllocateResponse{}
340 |
341 | uuids := req.DevicesIDs
342 | deviceIDs := m.deviceIDsFromUUIDs(uuids)
343 |
344 | response.Envs = m.apiEnvs(m.deviceListEnvvar, deviceIDs)
345 |
346 | klog.Infof("response=", response.Envs)
347 | responses.ContainerResponses = append(responses.ContainerResponses, &response)
348 | }
349 |
350 | return &responses, nil
351 | }
352 |
353 | // Allocate which return list of devices.
354 | func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
355 | if len(reqs.ContainerRequests) > 1 {
356 | return &pluginapi.AllocateResponse{}, errors.New("multiple Container Requests not supported")
357 | }
358 | if strings.Compare(m.migStrategy, "mixed") == 0 {
359 | return m.MIGAllocate(ctx, reqs)
360 | }
361 | responses := pluginapi.AllocateResponse{}
362 |
363 | if strings.Compare(m.resourceName, util.ResourceMem) == 0 || strings.Compare(m.resourceName, util.ResourceCores) == 0 {
364 | for range reqs.ContainerRequests {
365 | responses.ContainerResponses = append(responses.ContainerResponses, &pluginapi.ContainerAllocateResponse{})
366 | }
367 | return &responses, nil
368 | }
369 | nodename := os.Getenv("NODE_NAME")
370 |
371 | current, err := util.GetPendingPod(nodename)
372 | if err != nil {
373 | lock.ReleaseNodeLock(nodename, util.VGPUDeviceName)
374 | return &pluginapi.AllocateResponse{}, err
375 | }
376 | if current == nil {
377 | klog.Errorf("no pending pod found on node %s", nodename)
378 | lock.ReleaseNodeLock(nodename, util.VGPUDeviceName)
379 | return &pluginapi.AllocateResponse{}, errors.New("no pending pod found on node")
380 | }
381 |
382 | for idx := range reqs.ContainerRequests {
383 | currentCtr, devreq, err := util.GetNextDeviceRequest(util.NvidiaGPUDevice, *current)
384 | klog.Infoln("deviceAllocateFromAnnotation=", devreq)
385 | if err != nil {
386 | klog.Errorln("get device from annotation failed", err.Error())
387 | util.PodAllocationFailed(nodename, current)
388 | return &pluginapi.AllocateResponse{}, err
389 | }
390 | if len(devreq) != len(reqs.ContainerRequests[idx].DevicesIDs) {
391 | klog.Errorln("device number not matched", devreq, reqs.ContainerRequests[idx].DevicesIDs)
392 | util.PodAllocationFailed(nodename, current)
393 | return &pluginapi.AllocateResponse{}, errors.New("device number not matched")
394 | }
395 |
396 | response := pluginapi.ContainerAllocateResponse{}
397 | response.Envs = make(map[string]string)
398 | response.Envs["NVIDIA_VISIBLE_DEVICES"] = strings.Join(m.GetContainerDeviceStrArray(devreq), ",")
399 |
400 | err = util.EraseNextDeviceTypeFromAnnotation(util.NvidiaGPUDevice, *current)
401 | if err != nil {
402 | klog.Errorln("Erase annotation failed", err.Error())
403 | util.PodAllocationFailed(nodename, current)
404 | return &pluginapi.AllocateResponse{}, err
405 | }
406 |
407 | if m.operatingMode != "mig" {
408 |
409 | for i, dev := range devreq {
410 | limitKey := fmt.Sprintf("CUDA_DEVICE_MEMORY_LIMIT_%v", i)
411 | response.Envs[limitKey] = fmt.Sprintf("%vm", dev.Usedmem*int32(config.GPUMemoryFactor))
412 | }
413 | response.Envs["CUDA_DEVICE_SM_LIMIT"] = fmt.Sprint(devreq[0].Usedcores)
414 | response.Envs["CUDA_DEVICE_MEMORY_SHARED_CACHE"] = fmt.Sprintf("/tmp/vgpu/%v.cache", uuid.NewUUID())
415 |
416 | cacheFileHostDirectory := "/tmp/vgpu/containers/" + string(current.UID) + "_" + currentCtr.Name
417 | os.MkdirAll(cacheFileHostDirectory, 0777)
418 | os.Chmod(cacheFileHostDirectory, 0777)
419 | os.MkdirAll("/tmp/vgpulock", 0777)
420 | os.Chmod("/tmp/vgpulock", 0777)
421 | hostHookPath := os.Getenv("HOOK_PATH")
422 |
423 | response.Mounts = append(response.Mounts,
424 | &pluginapi.Mount{ContainerPath: "/usr/local/vgpu/libvgpu.so",
425 | HostPath: hostHookPath + "/libvgpu.so",
426 | ReadOnly: true},
427 | &pluginapi.Mount{ContainerPath: "/tmp/vgpu",
428 | HostPath: cacheFileHostDirectory,
429 | ReadOnly: false},
430 | &pluginapi.Mount{ContainerPath: "/tmp/vgpulock",
431 | HostPath: "/tmp/vgpulock",
432 | ReadOnly: false},
433 | )
434 | found := false
435 | for _, val := range currentCtr.Env {
436 | if strings.Compare(val.Name, "CUDA_DISABLE_CONTROL") == 0 {
437 | found = true
438 | break
439 | }
440 | }
441 | if !found {
442 | response.Mounts = append(response.Mounts, &pluginapi.Mount{ContainerPath: "/etc/ld.so.preload",
443 | HostPath: hostHookPath + "/ld.so.preload",
444 | ReadOnly: true},
445 | )
446 | }
447 | }
448 | responses.ContainerResponses = append(responses.ContainerResponses, &response)
449 | }
450 | klog.Infoln("Allocate Response", responses.ContainerResponses)
451 | util.PodAllocationTrySuccess(nodename, current)
452 | return &responses, nil
453 | }
454 |
455 | // PreStartContainer is unimplemented for this plugin
456 | func (m *NvidiaDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
457 | return &pluginapi.PreStartContainerResponse{}, nil
458 | }
459 |
460 | // dial establishes the gRPC communication with the registered device plugin.
461 | func (m *NvidiaDevicePlugin) dial(unixSocketPath string, timeout time.Duration) (*grpc.ClientConn, error) {
462 | c, err := grpc.Dial(unixSocketPath, grpc.WithInsecure(), grpc.WithBlock(),
463 | grpc.WithTimeout(timeout),
464 | grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
465 | return net.DialTimeout("unix", addr, timeout)
466 | }),
467 | )
468 |
469 | if err != nil {
470 | return nil, err
471 | }
472 |
473 | return c, nil
474 | }
475 |
476 | func (m *NvidiaDevicePlugin) Devices() []*Device {
477 | if strings.Compare(m.migStrategy, "none") == 0 {
478 | return m.deviceCache.GetCache()
479 | }
480 | if strings.Compare(m.migStrategy, "mixed") == 0 {
481 | return m.ResourceManager.Devices()
482 | }
483 | log.Panic("migStrategy not recognized,exiting...")
484 | return []*Device{}
485 | }
486 |
487 | func (m *NvidiaDevicePlugin) deviceExists(id string) bool {
488 | for _, d := range m.cachedDevices {
489 | if d.ID == id {
490 | return true
491 | }
492 | }
493 | return false
494 | }
495 |
496 | func (m *NvidiaDevicePlugin) deviceIDsFromUUIDs(uuids []string) []string {
497 | return uuids
498 | }
499 |
500 | func (m *NvidiaDevicePlugin) apiDevices() []*pluginapi.Device {
501 | if strings.Compare(m.migStrategy, "mixed") == 0 {
502 | var pdevs []*pluginapi.Device
503 | for _, d := range m.cachedDevices {
504 | pdevs = append(pdevs, &d.Device)
505 | }
506 | return pdevs
507 | }
508 | devices := m.Devices()
509 | var res []*pluginapi.Device
510 |
511 | if strings.Compare(m.resourceName, util.ResourceMem) == 0 {
512 | for _, dev := range devices {
513 | i := 0
514 | klog.Infoln("memory=", dev.Memory, "id=", dev.ID)
515 | for i < int(32767) {
516 | res = append(res, &pluginapi.Device{
517 | ID: fmt.Sprintf("%v-memory-%v", dev.ID, i),
518 | Health: dev.Health,
519 | Topology: nil,
520 | })
521 | i++
522 | }
523 | }
524 | klog.Infoln("res length=", len(res))
525 | return res
526 | }
527 | if strings.Compare(m.resourceName, util.ResourceCores) == 0 {
528 | for _, dev := range devices {
529 | i := 0
530 | for i < 100 {
531 | res = append(res, &pluginapi.Device{
532 | ID: fmt.Sprintf("%v-core-%v", dev.ID, i),
533 | Health: dev.Health,
534 | Topology: nil,
535 | })
536 | i++
537 | }
538 | }
539 | return res
540 | }
541 |
542 | for _, dev := range devices {
543 | for i := uint(0); i < config.DeviceSplitCount; i++ {
544 | id := fmt.Sprintf("%v-%v", dev.ID, i)
545 | res = append(res, &pluginapi.Device{
546 | ID: id,
547 | Health: dev.Health,
548 | Topology: nil,
549 | })
550 | }
551 | }
552 | return res
553 | }
554 |
555 | func (m *NvidiaDevicePlugin) apiEnvs(envvar string, deviceIDs []string) map[string]string {
556 | return map[string]string{
557 | envvar: strings.Join(deviceIDs, ","),
558 | }
559 | }
560 |
561 | func (m *NvidiaDevicePlugin) ApplyMigTemplate() {
562 | data, err := yaml.Marshal(m.migCurrent)
563 | if err != nil {
564 | klog.Error("marshal failed", err.Error())
565 | }
566 | klog.Infoln("Applying data=", string(data))
567 | os.WriteFile("/tmp/migconfig.yaml", data, os.ModePerm)
568 | cmd := exec.Command("nvidia-mig-parted", "apply", "-f", "/tmp/migconfig.yaml")
569 | var stdout, stderr bytes.Buffer
570 | cmd.Stdout = &stdout
571 | cmd.Stderr = &stderr
572 | err = cmd.Run()
573 | if err != nil {
574 | klog.Fatalf("nvidia-mig-parted failed with %s\n", err)
575 | }
576 | outStr := stdout.String()
577 | klog.Infoln("Mig apply", outStr)
578 | }
579 |
580 | func (m *NvidiaDevicePlugin) GetContainerDeviceStrArray(c util.ContainerDevices) []string {
581 | tmp := []string{}
582 | needsreset := false
583 | position := 0
584 | for _, val := range c {
585 | if !strings.Contains(val.UUID, "[") {
586 | tmp = append(tmp, val.UUID)
587 | } else {
588 | devtype, devindex := util.GetIndexAndTypeFromUUID(val.UUID)
589 | position, needsreset = m.GenerateMigTemplate(devtype, devindex, val)
590 | if needsreset {
591 | m.ApplyMigTemplate()
592 | }
593 | tmp = append(tmp, util.GetMigUUIDFromIndex(val.UUID, position))
594 | }
595 | }
596 | klog.V(3).Infoln("mig current=", m.migCurrent, ":", needsreset, "position=", position, "uuid lists", tmp)
597 | return tmp
598 | }
599 |
600 | func (m *NvidiaDevicePlugin) GenerateMigTemplate(devtype string, devindex int, val util.ContainerDevice) (int, bool) {
601 | needsreset := false
602 | position := -1 // Initialize to an invalid position
603 |
604 | for _, migTemplate := range m.schedulerConfig.MigGeometriesList {
605 | if containsModel(devtype, migTemplate.Models) {
606 | klog.InfoS("type found", "Type", devtype, "Models", strings.Join(migTemplate.Models, ", "))
607 |
608 | templateGroupName, pos, err := util.ExtractMigTemplatesFromUUID(val.UUID)
609 | if err != nil {
610 | klog.ErrorS(err, "failed to extract template index from UUID", "UUID", val.UUID)
611 | return -1, false
612 | }
613 |
614 | templateIdx := -1
615 | for i, migTemplateEntry := range migTemplate.Geometries {
616 | if migTemplateEntry.Group == templateGroupName {
617 | templateIdx = i
618 | break
619 | }
620 | }
621 |
622 | if templateIdx < 0 || templateIdx >= len(migTemplate.Geometries) {
623 | klog.ErrorS(nil, "invalid template index extracted from UUID", "UUID", val.UUID, "Index", templateIdx)
624 | return -1, false
625 | }
626 |
627 | position = pos
628 |
629 | v := migTemplate.Geometries[templateIdx].Instances
630 |
631 | for migidx, migpartedDev := range m.migCurrent.MigConfigs["current"] {
632 | if containsDevice(devindex, migpartedDev.Devices) {
633 | for _, migTemplateEntry := range v {
634 | currentCount, ok := migpartedDev.MigDevices[migTemplateEntry.Name]
635 | expectedCount := migTemplateEntry.Count
636 |
637 | if !ok || currentCount != expectedCount {
638 | needsreset = true
639 | klog.InfoS("updated mig device count", "Template", v)
640 | } else {
641 | klog.InfoS("incremented mig device count", "TemplateName", migTemplateEntry.Name, "Count", currentCount+1)
642 | }
643 | }
644 |
645 | if needsreset {
646 | for k := range m.migCurrent.MigConfigs["current"][migidx].MigDevices {
647 | delete(m.migCurrent.MigConfigs["current"][migidx].MigDevices, k)
648 | }
649 |
650 | for _, migTemplateEntry := range v {
651 | m.migCurrent.MigConfigs["current"][migidx].MigDevices[migTemplateEntry.Name] = migTemplateEntry.Count
652 | m.migCurrent.MigConfigs["current"][migidx].MigEnabled = true
653 | }
654 | }
655 | break
656 | }
657 | }
658 | break
659 | }
660 | }
661 |
662 | return position, needsreset
663 | }
664 |
665 | // Helper function to check if a model is in the list of models.
666 | func containsModel(target string, models []string) bool {
667 | for _, model := range models {
668 | if strings.Contains(target, model) {
669 | return true
670 | }
671 | }
672 | return false
673 | }
674 |
675 | // Helper function to check if a device index is in the list of devices.
676 | func containsDevice(target int, devices []int32) bool {
677 | for _, device := range devices {
678 | if int(device) == target {
679 | return true
680 | }
681 | }
682 | return false
683 | }
684 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/register.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package vgpu
18 |
19 | import (
20 | "fmt"
21 | "strings"
22 | "time"
23 |
24 | "github.com/NVIDIA/go-nvml/pkg/nvml"
25 | "k8s.io/klog/v2"
26 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
27 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/util"
28 | )
29 |
30 | type DevListFunc func() []*Device
31 |
32 | type DeviceRegister struct {
33 | deviceCache *DeviceCache
34 | unhealthy chan *Device
35 | stopCh chan struct{}
36 | }
37 |
38 | func NewDeviceRegister(deviceCache *DeviceCache) *DeviceRegister {
39 | return &DeviceRegister{
40 | deviceCache: deviceCache,
41 | unhealthy: make(chan *Device),
42 | stopCh: make(chan struct{}),
43 | }
44 | }
45 |
46 | func (r *DeviceRegister) Start() {
47 | r.deviceCache.AddNotifyChannel("register", r.unhealthy)
48 | go r.WatchAndRegister()
49 | }
50 |
51 | func (r *DeviceRegister) Stop() {
52 | close(r.stopCh)
53 | }
54 |
55 | func (r *DeviceRegister) apiDevices() *[]*util.DeviceInfo {
56 | devs := r.deviceCache.GetCache()
57 | res := make([]*util.DeviceInfo, 0, len(devs))
58 | for _, dev := range devs {
59 | ndev, ret := config.Nvml().DeviceGetHandleByUUID(dev.ID)
60 | if ret != nvml.SUCCESS {
61 | fmt.Println("nvml new device by uuid error id=", dev.ID)
62 | panic(ret)
63 | }
64 |
65 | memory, ret := config.Nvml().DeviceGetMemoryInfo(ndev)
66 | if ret != nvml.SUCCESS {
67 | fmt.Println("failed to get memory info for device id=", dev.ID)
68 | panic(ret)
69 | }
70 |
71 | model, ret := config.Nvml().DeviceGetName(ndev)
72 | if ret != nvml.SUCCESS {
73 | fmt.Println("failed to get model name for device id=", dev.ID)
74 | panic(ret)
75 | }
76 |
77 | klog.V(3).Infoln("nvml registered device id=", dev.ID, "memory=", memory.Total, "type=", model)
78 |
79 | registeredmem := int32(memory.Total/(1024*1024)) / int32(config.GPUMemoryFactor)
80 | klog.V(3).Infoln("GPUMemoryFactor=", config.GPUMemoryFactor, "registeredmem=", registeredmem)
81 | res = append(res, &util.DeviceInfo{
82 | Id: dev.ID,
83 | Count: int32(config.DeviceSplitCount),
84 | Devmem: registeredmem,
85 | Mode: config.Mode,
86 | Type: fmt.Sprintf("%v-%v", "NVIDIA", model),
87 | Health: strings.EqualFold(dev.Health, "healthy"),
88 | })
89 | }
90 | return &res
91 | }
92 |
93 | func (r *DeviceRegister) RegisterInAnnotation() error {
94 | devices := r.apiDevices()
95 | annos := make(map[string]string)
96 | node, err := util.GetNode(config.NodeName)
97 | if err != nil {
98 | klog.Errorln("get node error", err.Error())
99 | return err
100 | }
101 | encodeddevices := util.EncodeNodeDevices(*devices)
102 | annos[util.NodeHandshake] = "Reported " + time.Now().String()
103 | annos[util.NodeNvidiaDeviceRegistered] = encodeddevices
104 | klog.Infoln("Reporting devices", encodeddevices, "in", time.Now().String())
105 | err = util.PatchNodeAnnotations(node, annos)
106 |
107 | if err != nil {
108 | klog.Errorln("patch node error", err.Error())
109 | }
110 | return err
111 | }
112 |
113 | func (r *DeviceRegister) WatchAndRegister() {
114 | klog.Infof("into WatchAndRegister")
115 | for {
116 | if len(config.Mode) == 0 {
117 | klog.V(5).Info("register skipped, waiting for device config to be loaded")
118 | time.Sleep(time.Second * 2)
119 | continue
120 | }
121 | err := r.RegisterInAnnotation()
122 | if err != nil {
123 | klog.Errorf("register error, %v", err)
124 | time.Sleep(time.Second * 5)
125 | } else {
126 | time.Sleep(time.Second * 30)
127 | }
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/util/types.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package util
18 |
19 | import "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
20 |
21 | const (
22 | AssignedTimeAnnotations = "volcano.sh/vgpu-time"
23 | AssignedIDsAnnotations = "volcano.sh/vgpu-ids-new"
24 | AssignedIDsToAllocateAnnotations = "volcano.sh/devices-to-allocate"
25 | AssignedNodeAnnotations = "volcano.sh/vgpu-node"
26 | BindTimeAnnotations = "volcano.sh/bind-time"
27 | DeviceBindPhase = "volcano.sh/bind-phase"
28 |
29 | // PodAnnotationMaxLength pod annotation max data length 1MB
30 | PodAnnotationMaxLength = 1024 * 1024
31 |
32 | GPUInUse = "nvidia.com/use-gputype"
33 | GPUNoUse = "nvidia.com/nouse-gputype"
34 |
35 | DeviceBindAllocating = "allocating"
36 | DeviceBindFailed = "failed"
37 | DeviceBindSuccess = "success"
38 |
39 | DeviceLimit = 100
40 |
41 | BestEffort string = "best-effort"
42 | Restricted string = "restricted"
43 | Guaranteed string = "guaranteed"
44 |
45 | NvidiaGPUDevice = "NVIDIA"
46 | NvidiaGPUCommonWord = "GPU"
47 |
48 | NodeLockTime = "volcano.sh/mutex.lock"
49 | MaxLockRetry = 5
50 |
51 | NodeHandshake = "volcano.sh/node-vgpu-handshake"
52 | NodeNvidiaDeviceRegistered = "volcano.sh/node-vgpu-register"
53 |
54 | // DeviceName used to indicate this device
55 | VGPUDeviceName = "hamivgpu"
56 |
57 | // DeviceConfigurationConfigMapKey specifies in what ConfigMap key the device configuration should be stored
58 | DeviceConfigurationConfigMapKey = "device-config.yaml"
59 | )
60 |
61 | var (
62 | ResourceName string
63 | ResourceMem string
64 | ResourceCores string
65 | ResourceMemPercentage string
66 | ResourcePriority string
67 | DebugMode bool
68 |
69 | MLUResourceCount string
70 | MLUResourceMemory string
71 |
72 | KnownDevice = map[string]string{
73 | NodeHandshake: NodeNvidiaDeviceRegistered,
74 | }
75 | )
76 |
77 | type ContainerDevice struct {
78 | UUID string
79 | Type string
80 | Usedmem int32
81 | Usedcores int32
82 | }
83 |
84 | type ContainerDeviceRequest struct {
85 | Nums int32
86 | Type string
87 | Memreq int32
88 | MemPercentagereq int32
89 | Coresreq int32
90 | }
91 |
92 | type ContainerDevices []ContainerDevice
93 |
94 | type PodDevices []ContainerDevices
95 |
96 | type DeviceInfo struct {
97 | Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
98 | Count int32 `protobuf:"varint,2,opt,name=count,proto3" json:"count,omitempty"`
99 | Devmem int32 `protobuf:"varint,3,opt,name=devmem,proto3" json:"devmem,omitempty"`
100 | Type string `protobuf:"bytes,4,opt,name=type,proto3" json:"type,omitempty"`
101 | Health bool `protobuf:"varint,5,opt,name=health,proto3" json:"health,omitempty"`
102 | Mode string `json:"mode,omitempty"`
103 | MIGTemplate []config.Geometry `json:"migtemplate,omitempty"`
104 | XXX_NoUnkeyedLiteral struct{} `json:"-"`
105 | XXX_unrecognized []byte `json:"-"`
106 | XXX_sizecache int32 `json:"-"`
107 | }
108 |
--------------------------------------------------------------------------------
/pkg/plugin/vgpu/util/util.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2023 The Volcano Authors.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package util
18 |
19 | import (
20 | "bytes"
21 | "context"
22 | "encoding/json"
23 | "errors"
24 | "flag"
25 | "fmt"
26 | "math"
27 | "os"
28 | "os/exec"
29 | "strconv"
30 | "strings"
31 |
32 | "github.com/NVIDIA/go-nvml/pkg/nvml"
33 | "gopkg.in/yaml.v2"
34 | v1 "k8s.io/api/core/v1"
35 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
36 | k8stypes "k8s.io/apimachinery/pkg/types"
37 | "k8s.io/klog/v2"
38 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
39 | "volcano.sh/k8s-device-plugin/pkg/lock"
40 | "volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
41 | )
42 |
43 | var DevicesToHandle []string
44 |
45 | func init() {
46 | client, _ := lock.NewClient()
47 | lock.UseClient(client)
48 | DevicesToHandle = []string{}
49 | DevicesToHandle = append(DevicesToHandle, NvidiaGPUCommonWord)
50 | }
51 |
52 | func GlobalFlagSet() *flag.FlagSet {
53 | fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
54 | fs.StringVar(&ResourceName, "resource-name", "volcano.sh/vgpu-number", "resource name")
55 | fs.StringVar(&ResourceMem, "resource-memory-name", "volcano.sh/vgpu-memory", "resource name for resource memory resources")
56 | fs.StringVar(&ResourceCores, "resource-core-name", "volcano.sh/vgpu-cores", "resource name for resource core resources")
57 | fs.BoolVar(&DebugMode, "debug", false, "debug mode")
58 | klog.InitFlags(fs)
59 | return fs
60 | }
61 |
62 | func GetNode(nodename string) (*v1.Node, error) {
63 | n, err := lock.GetClient().CoreV1().Nodes().Get(context.Background(), nodename, metav1.GetOptions{})
64 | return n, err
65 | }
66 |
67 | func GetPendingPod(node string) (*v1.Pod, error) {
68 | podList, err := lock.GetClient().CoreV1().Pods("").List(context.Background(), metav1.ListOptions{})
69 | if err != nil {
70 | return nil, err
71 | }
72 |
73 | oldestPod := getOldestPod(podList.Items, node)
74 | if oldestPod == nil {
75 | return nil, fmt.Errorf("cannot get valid pod")
76 | }
77 |
78 | return oldestPod, nil
79 | }
80 |
81 | func getOldestPod(pods []v1.Pod, nodename string) *v1.Pod {
82 | if len(pods) == 0 {
83 | return nil
84 | }
85 | oldest := pods[0]
86 | for _, pod := range pods {
87 | if pod.Annotations[AssignedNodeAnnotations] == nodename {
88 | klog.V(4).Infof("pod %s, predicate time: %s", pod.Name, pod.Annotations[AssignedTimeAnnotations])
89 | if getPredicateTimeFromPodAnnotation(&oldest) > getPredicateTimeFromPodAnnotation(&pod) {
90 | oldest = pod
91 | }
92 | }
93 | }
94 | klog.V(4).Infof("oldest pod %#v, predicate time: %#v", oldest.Name,
95 | oldest.Annotations[AssignedTimeAnnotations])
96 | annotation := map[string]string{AssignedTimeAnnotations: strconv.FormatUint(math.MaxUint64, 10)}
97 | if err := PatchPodAnnotations(&oldest, annotation); err != nil {
98 | klog.Errorf("update pod %s failed, err: %v", oldest.Name, err)
99 | return nil
100 | }
101 | return &oldest
102 | }
103 |
104 | func getPredicateTimeFromPodAnnotation(pod *v1.Pod) uint64 {
105 | assumeTimeStr, ok := pod.Annotations[AssignedTimeAnnotations]
106 | if !ok {
107 | klog.Warningf("volcano not write timestamp, pod Name: %s", pod.Name)
108 | return math.MaxUint64
109 | }
110 | if len(assumeTimeStr) > PodAnnotationMaxLength {
111 | klog.Warningf("timestamp fmt invalid, pod Name: %s", pod.Name)
112 | return math.MaxUint64
113 | }
114 | predicateTime, err := strconv.ParseUint(assumeTimeStr, 10, 64)
115 | if err != nil {
116 | klog.Errorf("parse timestamp failed, %v", err)
117 | return math.MaxUint64
118 | }
119 | return predicateTime
120 | }
121 |
122 | func DecodeNodeDevices(str string) []*DeviceInfo {
123 | if !strings.Contains(str, ":") {
124 | return []*DeviceInfo{}
125 | }
126 | tmp := strings.Split(str, ":")
127 | var retval []*DeviceInfo
128 | for _, val := range tmp {
129 | if strings.Contains(val, ",") {
130 | items := strings.Split(val, ",")
131 | count, _ := strconv.Atoi(items[1])
132 | devmem, _ := strconv.Atoi(items[2])
133 | health, _ := strconv.ParseBool(items[4])
134 | i := DeviceInfo{
135 | Id: items[0],
136 | Count: int32(count),
137 | Devmem: int32(devmem),
138 | Type: items[3],
139 | Health: health,
140 | }
141 | retval = append(retval, &i)
142 | }
143 | }
144 | return retval
145 | }
146 |
147 | func EncodeNodeDevices(dlist []*DeviceInfo) string {
148 | tmp := ""
149 | for _, val := range dlist {
150 | tmp += val.Id + "," + strconv.FormatInt(int64(val.Count), 10) + "," + strconv.Itoa(int(val.Devmem)) + "," + val.Type + "," + strconv.FormatBool(val.Health) + "," + val.Mode + ":"
151 | }
152 | klog.V(3).Infoln("Encoded node Devices", tmp)
153 | return tmp
154 | }
155 |
156 | func EncodeContainerDevices(cd ContainerDevices) string {
157 | tmp := ""
158 | for _, val := range cd {
159 | tmp += val.UUID + "," + val.Type + "," + strconv.Itoa(int(val.Usedmem)) + "," + strconv.Itoa(int(val.Usedcores)) + ":"
160 | }
161 | fmt.Println("Encoded container Devices=", tmp)
162 | return tmp
163 | //return strings.Join(cd, ",")
164 | }
165 |
166 | func EncodePodDevices(pd PodDevices) string {
167 | var ss []string
168 | for _, cd := range pd {
169 | ss = append(ss, EncodeContainerDevices(cd))
170 | }
171 | return strings.Join(ss, ";")
172 | }
173 |
174 | func DecodeContainerDevices(str string) ContainerDevices {
175 | if len(str) == 0 {
176 | return ContainerDevices{}
177 | }
178 | cd := strings.Split(str, ":")
179 | contdev := ContainerDevices{}
180 | tmpdev := ContainerDevice{}
181 | if len(str) == 0 {
182 | return contdev
183 | }
184 | for _, val := range cd {
185 | if strings.Contains(val, ",") {
186 | tmpstr := strings.Split(val, ",")
187 | tmpdev.UUID = tmpstr[0]
188 | tmpdev.Type = tmpstr[1]
189 | devmem, _ := strconv.ParseInt(tmpstr[2], 10, 32)
190 | tmpdev.Usedmem = int32(devmem)
191 | devcores, _ := strconv.ParseInt(tmpstr[3], 10, 32)
192 | tmpdev.Usedcores = int32(devcores)
193 | contdev = append(contdev, tmpdev)
194 | }
195 | }
196 | return contdev
197 | }
198 |
199 | func DecodePodDevices(str string) PodDevices {
200 | if len(str) == 0 {
201 | return PodDevices{}
202 | }
203 | var pd PodDevices
204 | for _, s := range strings.Split(str, ";") {
205 | cd := DecodeContainerDevices(s)
206 | pd = append(pd, cd)
207 | }
208 | return pd
209 | }
210 |
211 | func GetNextDeviceRequest(dtype string, p v1.Pod) (v1.Container, ContainerDevices, error) {
212 | pdevices := DecodePodDevices(p.Annotations[AssignedIDsToAllocateAnnotations])
213 | klog.Infoln("pdevices=", pdevices)
214 | res := ContainerDevices{}
215 | for idx, val := range pdevices {
216 | found := false
217 | for _, dev := range val {
218 | if strings.Compare(dtype, dev.Type) == 0 {
219 | res = append(res, dev)
220 | found = true
221 | }
222 | }
223 | if found {
224 | return p.Spec.Containers[idx], res, nil
225 | }
226 | }
227 | return v1.Container{}, res, errors.New("device request not found")
228 | }
229 |
230 | func EraseNextDeviceTypeFromAnnotation(dtype string, p v1.Pod) error {
231 | pdevices := DecodePodDevices(p.Annotations[AssignedIDsToAllocateAnnotations])
232 | res := PodDevices{}
233 | found := false
234 | for _, val := range pdevices {
235 | if found {
236 | res = append(res, val)
237 | continue
238 | } else {
239 | tmp := ContainerDevices{}
240 | for _, dev := range val {
241 | if strings.Compare(dtype, dev.Type) == 0 {
242 | found = true
243 | } else {
244 | tmp = append(tmp, dev)
245 | }
246 | }
247 | if !found {
248 | res = append(res, val)
249 | } else {
250 | res = append(res, tmp)
251 | }
252 | }
253 | }
254 | klog.Infoln("After erase res=", res)
255 | newannos := make(map[string]string)
256 | newannos[AssignedIDsToAllocateAnnotations] = EncodePodDevices(res)
257 | return PatchPodAnnotations(&p, newannos)
258 | }
259 |
260 | func PodAllocationTrySuccess(nodeName string, pod *v1.Pod) {
261 | refreshed, _ := lock.GetClient().CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{})
262 | annos := refreshed.Annotations[AssignedIDsToAllocateAnnotations]
263 | klog.Infoln("TrySuccess:", annos)
264 | for _, val := range DevicesToHandle {
265 | if strings.Contains(annos, val) {
266 | return
267 | }
268 | }
269 | klog.Infoln("AllDevicesAllocateSuccess releasing lock")
270 | PodAllocationSuccess(nodeName, pod)
271 | }
272 |
273 | func PodAllocationSuccess(nodeName string, pod *v1.Pod) {
274 | newannos := make(map[string]string)
275 | newannos[DeviceBindPhase] = DeviceBindSuccess
276 | err := PatchPodAnnotations(pod, newannos)
277 | if err != nil {
278 | klog.Errorf("patchPodAnnotations failed:%v", err.Error())
279 | }
280 | err = lock.ReleaseNodeLock(nodeName, VGPUDeviceName)
281 | if err != nil {
282 | klog.Errorf("release lock failed:%v", err.Error())
283 | }
284 | }
285 |
286 | func PodAllocationFailed(nodeName string, pod *v1.Pod) {
287 | newannos := make(map[string]string)
288 | newannos[DeviceBindPhase] = DeviceBindFailed
289 | err := PatchPodAnnotations(pod, newannos)
290 | if err != nil {
291 | klog.Errorf("patchPodAnnotations failed:%v", err.Error())
292 | }
293 | err = lock.ReleaseNodeLock(nodeName, VGPUDeviceName)
294 | if err != nil {
295 | klog.Errorf("release lock failed:%v", err.Error())
296 | }
297 | }
298 |
299 | func PatchNodeAnnotations(node *v1.Node, annotations map[string]string) error {
300 | type patchMetadata struct {
301 | Annotations map[string]string `json:"annotations,omitempty"`
302 | }
303 | type patchPod struct {
304 | Metadata patchMetadata `json:"metadata"`
305 | //Spec patchSpec `json:"spec,omitempty"`
306 | }
307 |
308 | p := patchPod{}
309 | p.Metadata.Annotations = annotations
310 |
311 | bytes, err := json.Marshal(p)
312 | if err != nil {
313 | return err
314 | }
315 | _, err = lock.GetClient().CoreV1().Nodes().
316 | Patch(context.Background(), node.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{})
317 | if err != nil {
318 | klog.Infof("patch pod %v failed, %v", node.Name, err)
319 | }
320 | return err
321 | }
322 |
323 | func PatchPodAnnotations(pod *v1.Pod, annotations map[string]string) error {
324 | type patchMetadata struct {
325 | Annotations map[string]string `json:"annotations,omitempty"`
326 | }
327 | type patchPod struct {
328 | Metadata patchMetadata `json:"metadata"`
329 | //Spec patchSpec `json:"spec,omitempty"`
330 | }
331 |
332 | p := patchPod{}
333 | p.Metadata.Annotations = annotations
334 |
335 | bytes, err := json.Marshal(p)
336 | if err != nil {
337 | return err
338 | }
339 | _, err = lock.GetClient().CoreV1().Pods(pod.Namespace).
340 | Patch(context.Background(), pod.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{})
341 | if err != nil {
342 | klog.Infof("patch pod %v failed, %v", pod.Name, err)
343 | }
344 | return err
345 | }
346 |
347 | func LoadConfigFromCM(cmName string) (*config.Config, error) {
348 | lock.NewClient()
349 | cm, err := lock.GetClient().CoreV1().ConfigMaps("kube-system").Get(context.Background(), cmName, metav1.GetOptions{})
350 | if err != nil {
351 | cm, err = lock.GetClient().CoreV1().ConfigMaps("volcano-system").Get(context.Background(), cmName, metav1.GetOptions{})
352 | if err != nil {
353 | return nil, err
354 | }
355 | }
356 | data, ok := cm.Data[DeviceConfigurationConfigMapKey]
357 | if !ok {
358 | return nil, fmt.Errorf("%v not found in ConfigMap %v", DeviceConfigurationConfigMapKey, cmName)
359 | }
360 | var yamlData config.Config
361 | err = yaml.Unmarshal([]byte(data), &yamlData)
362 | if err != nil {
363 | return nil, err
364 | }
365 | return &yamlData, nil
366 | }
367 |
368 | func LoadConfig(path string) (*config.Config, error) {
369 | data, err := os.ReadFile(path)
370 | if err != nil {
371 | return nil, err
372 | }
373 | var yamlData config.Config
374 | err = yaml.Unmarshal(data, &yamlData)
375 | if err != nil {
376 | return nil, err
377 | }
378 | return &yamlData, nil
379 | }
380 |
381 | func GenerateVirtualDeviceID(id uint, fakeCounter uint) string {
382 | return fmt.Sprintf("%d-%d", id, fakeCounter)
383 | }
384 |
385 | // GetDevices returns virtual devices and all physical devices by index.
386 | func GetDevices(gpuMemoryFactor uint) ([]*pluginapi.Device, map[uint]string) {
387 | n, ret := config.Nvml().DeviceGetCount()
388 | if ret != nvml.SUCCESS {
389 | klog.Fatalf("call nvml.DeviceGetCount with error: %v", ret)
390 | }
391 |
392 | var virtualDevs []*pluginapi.Device
393 | deviceByIndex := map[uint]string{}
394 | for i := uint(0); i < uint(n); i++ {
395 | d, ret := config.Nvml().DeviceGetHandleByIndex(int(i))
396 | if ret != nvml.SUCCESS {
397 | klog.Fatalf("call nvml.DeviceGetHandleByIndex with error: %v", ret)
398 | }
399 | uuid, ret := d.GetUUID()
400 | if ret != nvml.SUCCESS {
401 | klog.Fatalf("call GetUUID with error: %v", ret)
402 | }
403 | id := i
404 | deviceByIndex[id] = uuid
405 | memory, ret := d.GetMemoryInfo()
406 | if ret != nvml.SUCCESS {
407 | klog.Fatalf("call GetMemoryInfo with error: %v", ret)
408 | }
409 | deviceGPUMemory := uint(memory.Total / (1024 * 1024))
410 | for j := uint(0); j < deviceGPUMemory/gpuMemoryFactor; j++ {
411 | klog.V(4).Infof("adding virtual device: %d", j)
412 | fakeID := GenerateVirtualDeviceID(id, j)
413 | virtualDevs = append(virtualDevs, &pluginapi.Device{
414 | ID: fakeID,
415 | Health: pluginapi.Healthy,
416 | })
417 | }
418 | }
419 |
420 | return virtualDevs, deviceByIndex
421 | }
422 |
423 | func GetDeviceNums() int {
424 | count, ret := config.Nvml().DeviceGetCount()
425 | if ret != nvml.SUCCESS {
426 | klog.Error(`nvml get count error ret=`, ret)
427 | }
428 | return count
429 | }
430 |
431 | func GetIndexAndTypeFromUUID(uuid string) (string, int) {
432 | originuuid := strings.Split(uuid, "[")[0]
433 | ndev, ret := config.Nvml().DeviceGetHandleByUUID(originuuid)
434 | if ret != nvml.SUCCESS {
435 | klog.Error("nvml get handlebyuuid error ret=", ret)
436 | panic(0)
437 | }
438 | model, ret := ndev.GetName()
439 | if ret != nvml.SUCCESS {
440 | klog.Error("nvml get name error ret=", ret)
441 | panic(0)
442 | }
443 | index, ret := ndev.GetIndex()
444 | if ret != nvml.SUCCESS {
445 | klog.Error("nvml get index error ret=", ret)
446 | panic(0)
447 | }
448 | return model, index
449 | }
450 |
451 | func GetMigUUIDFromIndex(uuid string, idx int) string {
452 | originuuid := strings.Split(uuid, "[")[0]
453 | ndev, ret := config.Nvml().DeviceGetHandleByUUID(originuuid)
454 | if ret != nvml.SUCCESS {
455 | klog.Error(`nvml get device uuid error ret=`, ret)
456 | panic(0)
457 | }
458 | migdev, ret := config.Nvml().DeviceGetMigDeviceHandleByIndex(ndev, idx)
459 | if ret != nvml.SUCCESS {
460 | klog.Error("nvml get mig dev error ret=", ret, ",idx=", idx, "using nvidia-smi -L for query")
461 | cmd := exec.Command("nvidia-smi", "-L")
462 | var stdout, stderr bytes.Buffer
463 | cmd.Stdout = &stdout
464 | cmd.Stderr = &stderr
465 | err := cmd.Run()
466 | if err != nil {
467 | klog.Fatalf("nvidia-smi -L failed with %s\n", err)
468 | }
469 | outStr := stdout.String()
470 | uuid := GetMigUUIDFromSmiOutput(outStr, originuuid, idx)
471 | return uuid
472 | }
473 | res, ret := migdev.GetUUID()
474 | if ret != nvml.SUCCESS {
475 | klog.Error(`nvml get mig uuid error ret=`, ret)
476 | panic(0)
477 | }
478 | return res
479 | }
480 |
481 | func GetMigUUIDFromSmiOutput(output string, uuid string, idx int) string {
482 | migmode := false
483 | for _, val := range strings.Split(output, "\n") {
484 | if !strings.Contains(val, "MIG") && strings.Contains(val, uuid) {
485 | migmode = true
486 | continue
487 | }
488 | if !strings.Contains(val, "MIG") && !strings.Contains(val, uuid) {
489 | migmode = false
490 | continue
491 | }
492 | if !migmode {
493 | continue
494 | }
495 | klog.Infoln("inspecting", val)
496 | num := strings.Split(val, "Device")[1]
497 | num = strings.Split(num, ":")[0]
498 | num = strings.TrimSpace(num)
499 | index, err := strconv.Atoi(num)
500 | if err != nil {
501 | klog.Fatal("atoi failed num=", num)
502 | }
503 | if index == idx {
504 | outputStr := strings.Split(val, ":")[2]
505 | outputStr = strings.TrimSpace(outputStr)
506 | outputStr = strings.TrimRight(outputStr, ")")
507 | return outputStr
508 | }
509 | }
510 | return ""
511 | }
512 |
513 | // Enhanced ExtractMigTemplatesFromUUID with error handling.
514 | func ExtractMigTemplatesFromUUID(uuid string) (string, int, error) {
515 | parts := strings.Split(uuid, "[")
516 | if len(parts) < 2 {
517 | return "", -1, fmt.Errorf("invalid UUID format: missing '[' delimiter")
518 | }
519 |
520 | tmp := parts[1]
521 | parts = strings.Split(tmp, "]")
522 | if len(parts) < 2 {
523 | return "", -1, fmt.Errorf("invalid UUID format: missing ']' delimiter")
524 | }
525 |
526 | tmp = parts[0]
527 | parts = strings.Split(tmp, "-")
528 | if len(parts) < 2 {
529 | return "", -1, fmt.Errorf("invalid UUID format: missing '-' delimiter")
530 | }
531 |
532 | templateGroupName := strings.TrimSpace(parts[0])
533 | if len(templateGroupName) == 0 {
534 | return "", -1, fmt.Errorf("invalid UUID format: missing template group name")
535 | }
536 |
537 | pos, err := strconv.Atoi(parts[1])
538 | if err != nil {
539 | return "", -1, fmt.Errorf("invalid position: %v", err)
540 | }
541 |
542 | return templateGroupName, pos, nil
543 | }
544 |
545 | func LoadNvidiaConfig() *config.NvidiaConfig {
546 | configs, err := LoadConfigFromCM("volcano-vgpu-device-config")
547 | if err != nil {
548 | klog.InfoS("configMap not found", err.Error())
549 | }
550 | nvidiaConfig := config.NvidiaConfig{}
551 | if configs != nil {
552 | nvidiaConfig = configs.NvidiaConfig
553 | }
554 | nvidiaConfig.DeviceSplitCount = config.DeviceSplitCount
555 | nvidiaConfig.DeviceCoreScaling = config.DeviceCoresScaling
556 | nvidiaConfig.GPUMemoryFactor = config.GPUMemoryFactor
557 | if err := readFromConfigFile(&nvidiaConfig); err != nil {
558 | klog.InfoS("readFrom device cm error", err.Error())
559 | }
560 | klog.Infoln("Loaded config=", nvidiaConfig)
561 | return &nvidiaConfig
562 | }
563 |
564 | func readFromConfigFile(sConfig *config.NvidiaConfig) error {
565 | config.Mode = "hami-core"
566 | jsonbyte, err := os.ReadFile("/config/config.json")
567 | if err != nil {
568 | return err
569 | }
570 | var deviceConfigs config.DevicePluginConfigs
571 | err = json.Unmarshal(jsonbyte, &deviceConfigs)
572 | if err != nil {
573 | return err
574 | }
575 | klog.Infof("Device Plugin Configs: %v", fmt.Sprintf("%v", deviceConfigs))
576 | for _, val := range deviceConfigs.Nodeconfig {
577 | if os.Getenv("NODE_NAME") == val.Name {
578 | klog.Infof("Reading config from file %s", val.Name)
579 | if val.Devicememoryscaling > 0 {
580 | sConfig.DeviceMemoryScaling = val.Devicememoryscaling
581 | }
582 | if val.Devicecorescaling > 0 {
583 | sConfig.DeviceCoreScaling = val.Devicecorescaling
584 | }
585 | if val.Devicesplitcount > 0 {
586 | sConfig.DeviceSplitCount = val.Devicesplitcount
587 | }
588 | if val.FilterDevice != nil && (len(val.FilterDevice.UUID) > 0 || len(val.FilterDevice.Index) > 0) {
589 | config.DevicePluginFilterDevice = val.FilterDevice
590 | }
591 | if len(val.OperatingMode) > 0 {
592 | config.Mode = val.OperatingMode
593 | }
594 | klog.Infof("FilterDevice: %v", val.FilterDevice)
595 | }
596 | }
597 | return nil
598 | }
599 |
--------------------------------------------------------------------------------
/volcano-vgpu-device-plugin.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ---
15 | apiVersion: v1
16 | kind: ConfigMap
17 | metadata:
18 | name: volcano-vgpu-device-config
19 | namespace: kube-system
20 | labels:
21 | app.kubernetes.io/component: volcano-vgpu-device-plugin
22 | data:
23 | device-config.yaml: |-
24 | nvidia:
25 | resourceCountName: volcano.sh/vgpu-number
26 | resourceMemoryName: volcano.sh/vgpu-memory
27 | resourceMemoryPercentageName: volcano.sh/vgpu-memory-percentage
28 | resourceCoreName: volcano.sh/vgpu-cores
29 | overwriteEnv: false
30 | defaultMemory: 0
31 | defaultCores: 0
32 | defaultGPUNum: 1
33 | deviceSplitCount: 10
34 | deviceMemoryScaling: 1
35 | deviceCoreScaling: 1
36 | gpuMemoryFactor: 1
37 | knownMigGeometries:
38 | - models: [ "A30" ]
39 | allowedGeometries:
40 | - group: group1
41 | geometries:
42 | - name: 1g.6gb
43 | memory: 6144
44 | count: 4
45 | - group: group2
46 | geometries:
47 | - name: 2g.12gb
48 | memory: 12288
49 | count: 2
50 | - group: group3
51 | geometries:
52 | - name: 4g.24gb
53 | memory: 24576
54 | count: 1
55 | - models: [ "A100-SXM4-40GB", "A100-40GB-PCIe", "A100-PCIE-40GB", "A100-SXM4-40GB" ]
56 | allowedGeometries:
57 | - group: "group1"
58 | geometries:
59 | - name: 1g.5gb
60 | memory: 5120
61 | count: 7
62 | - group: "group2"
63 | geometries:
64 | - name: 2g.10gb
65 | memory: 10240
66 | count: 3
67 | - name: 1g.5gb
68 | memory: 5120
69 | count: 1
70 | - group: "group3"
71 | geometries:
72 | - name: 3g.20gb
73 | memory: 20480
74 | count: 2
75 | - group: "group4"
76 | geometries:
77 | - name: 7g.40gb
78 | memory: 40960
79 | count: 1
80 | - models: [ "A100-SXM4-80GB", "A100-80GB-PCIe", "A100-PCIE-80GB"]
81 | allowedGeometries:
82 | - group: "group1"
83 | geometries:
84 | - name: 1g.10gb
85 | memory: 10240
86 | count: 7
87 | - group: "group2"
88 | geometries:
89 | - name: 2g.20gb
90 | memory: 20480
91 | count: 3
92 | - name: 1g.10gb
93 | memory: 10240
94 | count: 1
95 | - group: "group3"
96 | geometries:
97 | - name: 3g.40gb
98 | memory: 40960
99 | count: 2
100 | - group: "group4"
101 | geometries:
102 | - name: 7g.79gb
103 | memory: 80896
104 | count: 1
105 | ---
106 | apiVersion: v1
107 | kind: ConfigMap
108 | metadata:
109 | name: volcano-vgpu-node-config
110 | namespace: kube-system
111 | labels:
112 | app.kubernetes.io/component: volcano-vgpu-node-plugin
113 | data:
114 | config.json: |
115 | {
116 | "nodeconfig": [
117 | {
118 | "name": "aio-node67",
119 | "operatingmode": "hami-core",
120 | "devicememoryscaling": 1.8,
121 | "devicesplitcount": 10,
122 | "migstrategy":"none",
123 | "filterdevices": {
124 | "uuid": [],
125 | "index": []
126 | }
127 | }
128 | ]
129 | }
130 | ---
131 | apiVersion: v1
132 | kind: ServiceAccount
133 | metadata:
134 | name: volcano-device-plugin
135 | namespace: kube-system
136 | ---
137 | kind: ClusterRole
138 | apiVersion: rbac.authorization.k8s.io/v1
139 | metadata:
140 | name: volcano-device-plugin
141 | rules:
142 | - apiGroups: [""]
143 | resources: ["nodes"]
144 | verbs: ["get", "list", "watch", "update", "patch"]
145 | - apiGroups: [""]
146 | resources: ["nodes/status"]
147 | verbs: ["patch"]
148 | - apiGroups: [""]
149 | resources: ["pods"]
150 | verbs: ["get", "list", "update", "patch", "watch"]
151 | - apiGroups: [""]
152 | resources: ["configmaps"]
153 | verbs: ["get", "list", "watch", "create", "update"]
154 | ---
155 | kind: ClusterRoleBinding
156 | apiVersion: rbac.authorization.k8s.io/v1
157 | metadata:
158 | name: volcano-device-plugin
159 | subjects:
160 | - kind: ServiceAccount
161 | name: volcano-device-plugin
162 | namespace: kube-system
163 | roleRef:
164 | kind: ClusterRole
165 | name: volcano-device-plugin
166 | apiGroup: rbac.authorization.k8s.io
167 | ---
168 | apiVersion: apps/v1
169 | kind: DaemonSet
170 | metadata:
171 | name: volcano-device-plugin
172 | namespace: kube-system
173 | spec:
174 | selector:
175 | matchLabels:
176 | name: volcano-device-plugin
177 | updateStrategy:
178 | type: RollingUpdate
179 | template:
180 | metadata:
181 | # This annotation is deprecated. Kept here for backward compatibility
182 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
183 | annotations:
184 | scheduler.alpha.kubernetes.io/critical-pod: ""
185 | labels:
186 | name: volcano-device-plugin
187 | spec:
188 | tolerations:
189 | # This toleration is deprecated. Kept here for backward compatibility
190 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
191 | - key: CriticalAddonsOnly
192 | operator: Exists
193 | - key: volcano.sh/gpu-memory
194 | operator: Exists
195 | effect: NoSchedule
196 | # Mark this pod as a critical add-on; when enabled, the critical add-on
197 | # scheduler reserves resources for critical add-on pods so that they can
198 | # be rescheduled after a failure.
199 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
200 | priorityClassName: "system-node-critical"
201 | serviceAccount: volcano-device-plugin
202 | containers:
203 | - image: docker.io/projecthami/volcano-vgpu-device-plugin:v1.9.4
204 | args: ["--device-split-count=10"]
205 | lifecycle:
206 | postStart:
207 | exec:
208 | command: ["/bin/sh", "-c", "cp -f /k8s-vgpu/lib/nvidia/* /usr/local/vgpu/"]
209 | name: volcano-device-plugin
210 | env:
211 | - name: NODE_NAME
212 | valueFrom:
213 | fieldRef:
214 | fieldPath: spec.nodeName
215 | - name: HOOK_PATH
216 | value: "/usr/local/vgpu"
217 | - name: NVIDIA_VISIBLE_DEVICES
218 | value: "all"
219 | - name: NVIDIA_MIG_MONITOR_DEVICES
220 | value: "all"
221 | - name: NVIDIA_DRIVER_CAPABILITIES
222 | value: "utility"
223 | securityContext:
224 | allowPrivilegeEscalation: true
225 | previleged: true
226 | capabilities:
227 | drop: ["ALL"]
228 | add: ["SYS_ADMIN"]
229 | volumeMounts:
230 | - name: deviceconfig
231 | mountPath: /config
232 | - name: device-plugin
233 | mountPath: /var/lib/kubelet/device-plugins
234 | - name: lib
235 | mountPath: /usr/local/vgpu
236 | - name: hosttmp
237 | mountPath: /tmp
238 | - image: docker.io/projecthami/volcano-vgpu-device-plugin:v1.9.4
239 | name: monitor
240 | command:
241 | - /bin/bash
242 | - -c
243 | - volcano-vgpu-monitor
244 | env:
245 | - name: NVIDIA_VISIBLE_DEVICES
246 | value: "all"
247 | - name: NVIDIA_MIG_MONITOR_DEVICES
248 | value: "all"
249 | - name: HOOK_PATH
250 | value: "/tmp/vgpu"
251 | - name: NODE_NAME
252 | valueFrom:
253 | fieldRef:
254 | fieldPath: spec.nodeName
255 | securityContext:
256 | privileged: true
257 | allowPrivilegeEscalation: true
258 | capabilities:
259 | drop: ["ALL"]
260 | add: ["SYS_ADMIN"]
261 | volumeMounts:
262 | - name: dockers
263 | mountPath: /run/docker
264 | - name: containerds
265 | mountPath: /run/containerd
266 | - name: sysinfo
267 | mountPath: /sysinfo
268 | - name: hostvar
269 | mountPath: /hostvar
270 | - name: hosttmp
271 | mountPath: /tmp
272 | volumes:
273 | - name: deviceconfig
274 | configMap:
275 | name: volcano-vgpu-node-config
276 | - hostPath:
277 | path: /var/lib/kubelet/device-plugins
278 | type: Directory
279 | name: device-plugin
280 | - hostPath:
281 | path: /usr/local/vgpu
282 | type: DirectoryOrCreate
283 | name: lib
284 | - name: hosttmp
285 | hostPath:
286 | path: /tmp
287 | type: DirectoryOrCreate
288 | - name: dockers
289 | hostPath:
290 | path: /run/docker
291 | type: DirectoryOrCreate
292 | - name: containerds
293 | hostPath:
294 | path: /run/containerd
295 | type: DirectoryOrCreate
296 | - name: usrbin
297 | hostPath:
298 | path: /usr/bin
299 | type: Directory
300 | - name: sysinfo
301 | hostPath:
302 | path: /sys
303 | type: Directory
304 | - name: hostvar
305 | hostPath:
306 | path: /var
307 | type: Directory
308 |
--------------------------------------------------------------------------------