├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS ├── Makefile ├── README.md ├── VERSION ├── build ├── Dockerfile ├── copy-bin-lib.sh ├── extra-config.json ├── gpu-manager.conf ├── gpu-manager.service ├── gpu-manager.spec ├── start.sh └── volume.conf ├── cmd ├── client │ └── client.go └── manager │ ├── app │ └── app.go │ ├── nvidia-manager.go │ └── options │ └── options.go ├── docs └── faq.md ├── go.mod ├── go.sum ├── gpu-manager-svc.yaml ├── gpu-manager.yaml ├── hack ├── build.sh └── common.sh ├── pkg ├── algorithm │ └── nvidia │ │ ├── fragment.go │ │ ├── fragment_test.go │ │ ├── link.go │ │ ├── link_test.go │ │ ├── share.go │ │ ├── share_test.go │ │ └── util_test.go ├── api │ └── runtime │ │ ├── display │ │ ├── api.pb.go │ │ ├── api.pb.gw.go │ │ └── api.proto │ │ └── vcuda │ │ ├── api.pb.go │ │ └── api.proto ├── config │ └── config.go ├── device │ ├── dummy │ │ └── tree.go │ ├── nvidia │ │ ├── node.go │ │ ├── sort.go │ │ ├── sort_test.go │ │ ├── tree.go │ │ ├── tree_test.go │ │ └── tree_util.go │ ├── register │ │ └── register.go │ └── types.go ├── flags │ └── flags.go ├── logs │ └── logs.go ├── runtime │ ├── runtime.go │ └── runtime_stub.go ├── server │ ├── server.go │ ├── server_test.go │ ├── types.go │ ├── vcore.go │ └── vmemory.go ├── services │ ├── allocator │ │ ├── cache │ │ │ └── cache.go │ │ ├── checkpoint │ │ │ └── manager.go │ │ ├── dummy │ │ │ └── allocator.go │ │ ├── nvidia │ │ │ ├── allocator.go │ │ │ ├── allocator_test.go │ │ │ └── evaluator.go │ │ ├── register │ │ │ └── register.go │ │ └── types.go │ ├── display │ │ ├── display.go │ │ └── helper.go │ ├── response │ │ ├── fake.go │ │ └── manager.go │ ├── virtual-manager │ │ └── manager.go │ ├── volume │ │ ├── ldcache │ │ │ └── ldcache.go │ │ ├── util.go │ │ └── volume.go │ └── watchdog │ │ ├── label.go │ │ ├── label_test.go │ │ ├── watchdog.go │ │ └── watchdog_test.go ├── types │ └── types.go ├── utils │ ├── cgroup │ │ └── cgroup.go │ └── util.go └── version │ ├── .gitattributes │ ├── base.go │ ├── verflags.go │ └── version.go ├── revive.toml └── staging └── src └── google └── protobuf ├── descriptor.proto └── empty.proto /.gitignore: -------------------------------------------------------------------------------- 1 | go/ 2 | .idea 3 | .chglog/ 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | services: 4 | - docker 5 | 6 | script: 7 | - make img 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ## [Unreleased] 3 | 4 | ### Feat 5 | - add timeout option waiting for all resource server ready 6 | - upgrade vcuda 7 | 8 | ### Fix 9 | - virutal manager can't probe correct vm controller path 10 | - wrong size of device memory when app has more than 1 card 11 | - vcuda image repository url 12 | - the mismatch between gpu-manager pick up and gpu-admission predicate ([#74](https://github.com/tkestack/gpu-manager/issues/74)) 13 | - read QoS class from pod status first 14 | - kubelet 1.20 device checkpoint support ([#62](https://github.com/tkestack/gpu-manager/issues/62)) 15 | - report device memory when allocate more than one cards 16 | - DeviceGetTopologyCommonAncestor get a zero value on multi-gpu board 17 | - gpu-manager lost checkpoint data file 18 | - preserve attribute 19 | - read cgroup.procs files recursively 20 | - wait server until it's ready 21 | 22 | 23 | 24 | ## [v1.1.5] - 2021-05-10 25 | ### Docs 26 | - Add FAQ link 27 | - Update gpu manager yaml 28 | 29 | ### Feat 30 | - upgrade vcuda to 1.0.3 31 | - Upgrade vcuda-controller to v1.0.1 32 | - Use host network to build image 33 | - Update go version to 1.14.3 34 | - Support CRI interface 35 | 36 | ### Fix 37 | - kubelet 1.20 device checkpoint support ([#62](https://github.com/tkestack/gpu-manager/issues/62)) 38 | - the mismatch between gpu-manager pick up and gpu-admission predicate ([#74](https://github.com/tkestack/gpu-manager/issues/74)) 39 | - read QoS class from pod status first 40 | - report device memory when allocate more than one cards 41 | - gpu-manager lost checkpoint data file 42 | - DeviceGetTopologyCommonAncestor get a zero value on multi-gpu board 43 | - preserve attribute 44 | - read cgroup.procs files recursively 45 | - wait server until it's ready 46 | - Revert using vendor directory 47 | - Allow non-root user to communicate with gpu manager 48 | - Change ius rpm broken link 49 | - skip symlink when copy bin to |${NV_DIR}|. ([#15](https://github.com/tkestack/gpu-manager/issues/15)) 50 | 51 | ### Refact 52 | - Use vendor directory 53 | - Refact gpu-manager code 54 | 55 | 56 | 57 | ## [v1.0.9] - 2021-02-23 58 | ### Feat 59 | - use apiserver cache to list pod 60 | 61 | ### Fix 62 | - ignore not running container while recovering 63 | 64 | 65 | 66 | ## [v1.0.8] - 2021-02-22 67 | ### Feat 68 | - Upgrade vcuda-controller to v1.0.2 69 | - Use host network to build image 70 | - Upgrade vcuda-controller to v1.0.1 71 | 72 | ### Fix 73 | - missing recover tree data if information is retrieved from checkpoint file 74 | - gpu-manager lost checkpoint data file 75 | - DeviceGetTopologyCommonAncestor get a zero value on multi-gpu board 76 | - preserve attribute 77 | - upgrade go to 1.15 78 | - wait server until it's ready 79 | - Change ius rpm broken link 80 | - Allow non-root user to communicate with gpu manager 81 | 82 | ### Refact 83 | - only watch pod belong this node 84 | 85 | 86 | 87 | ## [v1.1.4] - 2021-02-05 88 | ### Fix 89 | - read QoS class from pod status first 90 | 91 | 92 | 93 | ## [v1.1.3] - 2021-02-02 94 | ### Feat 95 | - upgrade vcuda to 1.0.3 96 | 97 | ### Fix 98 | - report device memory when allocate more than one cards 99 | 100 | 101 | 102 | ## [v1.1.2] - 2020-12-09 103 | ### Docs 104 | - Add FAQ link 105 | - Update gpu manager yaml 106 | 107 | ### Feat 108 | - Upgrade vcuda-controller to v1.0.1 109 | - Use host network to build image 110 | - Update go version to 1.14.3 111 | - Support CRI interface 112 | 113 | ### Fix 114 | - gpu-manager lost checkpoint data file 115 | - DeviceGetTopologyCommonAncestor get a zero value on multi-gpu board 116 | - preserve attribute 117 | - read cgroup.procs files recursively 118 | - wait server until it's ready 119 | - Revert using vendor directory 120 | - Allow non-root user to communicate with gpu manager 121 | - Change ius rpm broken link 122 | - skip symlink when copy bin to |${NV_DIR}|. ([#15](https://github.com/tkestack/gpu-manager/issues/15)) 123 | 124 | ### Refact 125 | - Use vendor directory 126 | - Refact gpu-manager code 127 | 128 | 129 | 130 | ## [v1.0.7] - 2020-12-09 131 | ### Feat 132 | - Upgrade vcuda-controller to v1.0.2 133 | - Use host network to build image 134 | - Upgrade vcuda-controller to v1.0.1 135 | 136 | ### Fix 137 | - gpu-manager lost checkpoint data file 138 | - DeviceGetTopologyCommonAncestor get a zero value on multi-gpu board 139 | - preserve attribute 140 | - upgrade go to 1.15 141 | - wait server until it's ready 142 | - Change ius rpm broken link 143 | - Allow non-root user to communicate with gpu manager 144 | 145 | ### Refact 146 | - only watch pod belong this node 147 | 148 | 149 | 150 | ## [v1.1.1] - 2020-12-02 151 | ### Docs 152 | - Add FAQ link 153 | - Update gpu manager yaml 154 | 155 | ### Feat 156 | - Upgrade vcuda-controller to v1.0.1 157 | - Use host network to build image 158 | - Update go version to 1.14.3 159 | - Support CRI interface 160 | 161 | ### Fix 162 | - DeviceGetTopologyCommonAncestor get a zero value on multi-gpu board 163 | - preserve attribute 164 | - read cgroup.procs files recursively 165 | - wait server until it's ready 166 | - Revert using vendor directory 167 | - Allow non-root user to communicate with gpu manager 168 | - Change ius rpm broken link 169 | - skip symlink when copy bin to |${NV_DIR}|. ([#15](https://github.com/tkestack/gpu-manager/issues/15)) 170 | 171 | ### Refact 172 | - Use vendor directory 173 | - Refact gpu-manager code 174 | 175 | 176 | 177 | ## [v1.0.6] - 2020-12-02 178 | ### Fix 179 | - DeviceGetTopologyCommonAncestor get a zero value on multi-gpu board 180 | - preserve attribute 181 | 182 | 183 | 184 | ## [v1.0.5] - 2020-08-28 185 | ### Feat 186 | - Upgrade vcuda-controller to v1.0.2 187 | 188 | ### Fix 189 | - upgrade go to 1.15 190 | - wait server until it's ready 191 | 192 | ### Refact 193 | - only watch pod belong this node 194 | 195 | 196 | 197 | ## [v1.0.4] - 2020-05-21 198 | ### Feat 199 | - Use host network to build image 200 | - Upgrade vcuda-controller to v1.0.1 201 | 202 | ### Fix 203 | - Change ius rpm broken link 204 | - Allow non-root user to communicate with gpu manager 205 | 206 | 207 | 208 | ## [v1.1.0] - 2020-05-21 209 | ### Docs 210 | - Add FAQ link 211 | - Update gpu manager yaml 212 | 213 | ### Feat 214 | - Upgrade vcuda-controller to v1.0.1 215 | - Use host network to build image 216 | - Update go version to 1.14.3 217 | - Support CRI interface 218 | 219 | ### Fix 220 | - Revert using vendor directory 221 | - Allow non-root user to communicate with gpu manager 222 | - Change ius rpm broken link 223 | - skip symlink when copy bin to |${NV_DIR}|. ([#15](https://github.com/tkestack/gpu-manager/issues/15)) 224 | 225 | ### Refact 226 | - Use vendor directory 227 | - Refact gpu-manager code 228 | 229 | 230 | 231 | ## v1.0.3 - 2019-12-17 232 | 233 | [Unreleased]: https://github.com/tkestack/gpu-manager/compare/v1.1.5...HEAD 234 | [v1.1.5]: https://github.com/tkestack/gpu-manager/compare/v1.0.9...v1.1.5 235 | [v1.0.9]: https://github.com/tkestack/gpu-manager/compare/v1.0.8...v1.0.9 236 | [v1.0.8]: https://github.com/tkestack/gpu-manager/compare/v1.1.4...v1.0.8 237 | [v1.1.4]: https://github.com/tkestack/gpu-manager/compare/v1.1.3...v1.1.4 238 | [v1.1.3]: https://github.com/tkestack/gpu-manager/compare/v1.1.2...v1.1.3 239 | [v1.1.2]: https://github.com/tkestack/gpu-manager/compare/v1.0.7...v1.1.2 240 | [v1.0.7]: https://github.com/tkestack/gpu-manager/compare/v1.1.1...v1.0.7 241 | [v1.1.1]: https://github.com/tkestack/gpu-manager/compare/v1.0.6...v1.1.1 242 | [v1.0.6]: https://github.com/tkestack/gpu-manager/compare/v1.0.5...v1.0.6 243 | [v1.0.5]: https://github.com/tkestack/gpu-manager/compare/v1.0.4...v1.0.5 244 | [v1.0.4]: https://github.com/tkestack/gpu-manager/compare/v1.1.0...v1.0.4 245 | [v1.1.0]: https://github.com/tkestack/gpu-manager/compare/v1.0.3...v1.1.0 246 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING 2 | 3 | Welcome to [report Issues](https://github.com/tkestack/gpu-manager/issues) or [pull requests](https://github.com/tkestack/gpu-manager/pulls). It's recommended to read the following Contributing Guide first before contributing. 4 | 5 | This document provides a set of best practices for open source contributions - bug reports, code submissions / pull requests, etc. 6 | 7 | ## Issues 8 | 9 | We use Github Issues to track public bugs and feature requests. 10 | 11 | ### Due diligence 12 | 13 | Before submitting a issue, please do the following: 14 | 15 | * Perform **basic troubleshooting** steps: 16 | * Make sure you’re on the latest version. If you’re not on the most recent version, your problem may have been solved already! Upgrading is always the best first step. 17 | * Try older versions. If you’re already on the latest release, try rolling back a few minor versions (e.g. if on 1.7, try 1.5 or 1.6) and see if the problem goes away. This will help the devs narrow down when the problem first arose in the commit log. 18 | * Try switching up dependency versions. If the software in question has dependencies (other libraries, etc) try upgrading/downgrading those as well. 19 | * Search the project’s bug/issue tracker to make sure it’s not a known issue. 20 | * If you don’t find a pre-existing issue, consider checking with the mailing list and/or IRC channel in case the problem is non-bug-related. 21 | 22 | ### What to put in your bug report 23 | 24 | Make sure your report gets the attention it deserves: bug reports with missing information may be ignored or punted back to you, delaying a fix. The below constitutes a bare minimum; more info is almost always better: 25 | 26 | * What version of the core programming language interpreter/compiler are you using? For example, if it’s a Golang project, are you using Golang 1.13? Golang 1.12? 27 | * What operating system are you on? Windows? (32-bit? 64-bit?) Mac OS X? (10.14? 10.10?) Linux? (Which distro? Which version of that distro? 32 or 64 bits?) Again, more detail is better. 28 | * Which version or versions of the software are you using? Ideally, you followed the advice above and have ruled out (or verified that the problem exists in) a few different versions. 29 | * How can the developers recreate the bug on their end? If possible, include a copy of your code, the command you used to invoke it, and the full output of your run (if applicable.) A common tactic is to pare down your code until a simple (but still bug-causing) “base case” remains. Not only can this help you identify problems which aren’t real bugs, but it means the developer can get to fixing the bug faster. 30 | 31 | ## Pull Requests 32 | 33 | We strongly welcome your pull request to make TKEStack project better. 34 | 35 | ### Licensing of contributed material 36 | 37 | Keep in mind as you contribute, that code, docs and other material submitted to open source projects are usually considered licensed under the same terms as the rest of the work. 38 | 39 | Anything submitted to a project falls under the licensing terms in the repository’s top level LICENSE file. Per-file copyright/license headers are typically extraneous and undesirable. Please don’t add your own copyright headers to new files unless the project’s license actually requires them! 40 | 41 | ### Branch Management 42 | 43 | There are three main branches here: 44 | 45 | 1. `master` branch. 46 | 1. It is the latest (pre-)release branch. We use `master` for tags, with version number `1.1.0`, `1.2.0`, `1.3.0`... 47 | 2. **Don't submit any PR on `master` branch.** 48 | 2. `dev` branch. 49 | 1. It is our stable developing branch. After full testing, `dev` will be merged to `master` branch for the next release. 50 | 2. **You are recommended to submit bugfix or feature PR on `dev` branch.** 51 | 3. `hotfix` branch. 52 | 1. It is the latest tag version for hot fix. If we accept your pull request, we may just tag with version number `1.1.1`, `1.2.3`. 53 | 2. **Only submit urgent PR on `hotfix` branch for next specific release.** 54 | 55 | Normal bugfix or feature request should be submitted to `dev` branch. After full testing, we will merge them to `master` branch for the next release. 56 | 57 | If you have some urgent bugfixes on a published version, but the `master` branch have already far away with the latest tag version, you can submit a PR on hotfix. And it will be cherry picked to `dev` branch if it is possible. 58 | 59 | ``` 60 | master 61 | ↑ 62 | dev <--- hotfix PR 63 | ↑ 64 | feature/bugfix PR 65 | ``` 66 | 67 | ### Make Pull Requests 68 | 69 | The code team will monitor all pull request, we run some code check and test on it. After all tests passed, we will accecpt this PR. But it won't merge to `master` branch at once, which have some delay. 70 | 71 | Before submitting a pull request, please make sure the followings are done: 72 | 73 | 1. Fork the repo and create your branch from `master` or `hotfix`. 74 | 2. Update code or documentation if you have changed APIs. 75 | 3. Add the copyright notice to the top of any new files you've added. 76 | 4. Check your code lints and checkstyles. 77 | 5. Test and test again your code. 78 | 6. Now, you can submit your pull request on `dev` or `hotfix` branch. 79 | 80 | ## Code Conventions 81 | 82 | Use [Kubernetes Code Conventions](https://github.com/kubernetes/community/blob/master/contributors/guide/coding-conventions.md) for all projects in the TKEStack organization. 83 | 84 | ## Documentation isn’t optional 85 | 86 | It’s not! Patches without documentation will be returned to sender. By “documentation” we mean: 87 | 88 | * Docstrings must be created or updated for public API functions/methods/etc. (This step is optional for some bugfixes.) 89 | * New features should ideally include updates to prose documentation, including useful example code snippets. 90 | * All submissions should have a changelog entry crediting the contributor and/or any individuals instrumental in identifying the problem. 91 | 92 | ## Tests aren’t optional 93 | 94 | Any bugfix that doesn’t include a test proving the existence of the bug being fixed, may be suspect. Ditto for new features that can’t prove they actually work. 95 | 96 | We’ve found that test-first development really helps make features better architected and identifies potential edge cases earlier instead of later. Writing tests before the implementation is strongly encouraged. 97 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Thomas Song @mYmNeo 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | all: 3 | hack/build.sh manager client 4 | 5 | .PHONY: clean 6 | clean: 7 | rm -rf ./go 8 | 9 | .PHONY: test 10 | test: 11 | hack/build.sh "test" 12 | 13 | .PHONY: proto 14 | proto: 15 | hack/build.sh "proto" 16 | 17 | .PHONY: img 18 | img: 19 | hack/build.sh "img" 20 | 21 | .PHONY: fmt 22 | fmt: 23 | hack/build.sh "fmt" 24 | 25 | .PHONY: lint 26 | lint: 27 | @revive -config revive.toml -exclude vendor/... -exclude pkg/api/runtime/... ./... 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPU Manager 2 | 3 | [![Build Status](https://travis-ci.org/tkestack/gpu-manager.svg?branch=master)](https://travis-ci.org/tkestack/gpu-manager) 4 | 5 | GPU Manager is used for managing the nvidia GPU devices in Kubernetes cluster. It implements the `DevicePlugin` interface 6 | of Kubernetes. So it's compatible with 1.9+ of Kubernetes release version. 7 | 8 | To compare with the combination solution of `nvidia-docker` 9 | and `nvidia-k8s-plugin`, GPU manager will use native `runc` without modification but nvidia solution does. 10 | Besides we also support metrics report without deploying new components. 11 | 12 | To schedule a GPU payload correctly, GPU manager should work with [gpu-admission](https://github.com/tkestack/gpu-admission) which is a 13 | kubernetes scheduler plugin. 14 | 15 | GPU manager also supports the payload with fraction resource of GPU device such as 0.1 card or 100MiB gpu device memory. 16 | If you want this kind feature, please refer to [vcuda-controller](https://github.com/tkestack/vcuda-controller) project. 17 | 18 | ## Build 19 | 20 | **1.** Build binary 21 | 22 | - Prerequisite 23 | - CUDA toolkit 24 | 25 | ``` 26 | make 27 | ``` 28 | 29 | **2.** Build image 30 | 31 | - Prerequisite 32 | - Docker 33 | 34 | ``` 35 | make img 36 | ``` 37 | 38 | ## Prebuilt image 39 | 40 | Prebuilt image can be found at `thomassong/gpu-manager` 41 | 42 | ## Deploy 43 | 44 | GPU Manager is running as daemonset, and because of the RABC restriction and hydrid cluster, 45 | you need to do the following steps to make this daemonset run correctly. 46 | 47 | - service account and clusterrole 48 | 49 | ``` 50 | kubectl create sa gpu-manager -n kube-system 51 | kubectl create clusterrolebinding gpu-manager-role --clusterrole=cluster-admin --serviceaccount=kube-system:gpu-manager 52 | ``` 53 | 54 | - label node with `nvidia-device-enable=enable` 55 | 56 | ``` 57 | kubectl label node nvidia-device-enable=enable 58 | ``` 59 | 60 | - submit daemonset yaml 61 | 62 | ``` 63 | kubectl create -f gpu-manager.yaml 64 | ``` 65 | 66 | ## Pod template example 67 | 68 | There is nothing special to submit a Pod except the description of GPU resource is no longer 1 69 | . The GPU 70 | resources are described as that 100 `tencent.com/vcuda-core` for 1 GPU and N `tencent.com/vcuda-memory` for GPU memory (1 tencent.com/vcuda-memory means 256Mi 71 | GPU memory). And because of the limitation of extend resource validation of Kubernetes, to support 72 | GPU utilization limitation, you should add `tencent.com/vcuda-core-limit: XX` in the annotation 73 | field of a Pod. 74 | 75 | **Notice: the value of `tencent.com/vcuda-core` is either the multiple of 100 or any value 76 | smaller than 100.For example, 100, 200 or 20 is valid value but 150 or 250 is invalid** 77 | 78 | - Submit a Pod with 0.3 GPU utilization and 7680MiB GPU memory with 0.5 GPU utilization limit 79 | 80 | ``` 81 | apiVersion: v1 82 | kind: Pod 83 | metadata: 84 | name: vcuda 85 | annotations: 86 | tencent.com/vcuda-core-limit: 50 87 | spec: 88 | restartPolicy: Never 89 | containers: 90 | - image: 91 | name: nvidia 92 | command: 93 | - /usr/local/nvidia/bin/nvidia-smi 94 | - pmon 95 | - -d 96 | - 10 97 | resources: 98 | requests: 99 | tencent.com/vcuda-core: 50 100 | tencent.com/vcuda-memory: 30 101 | limits: 102 | tencent.com/vcuda-core: 50 103 | tencent.com/vcuda-memory: 30 104 | ``` 105 | 106 | - Submit a Pod with 2 GPU card 107 | 108 | ``` 109 | apiVersion: v1 110 | kind: Pod 111 | metadata: 112 | name: vcuda 113 | spec: 114 | restartPolicy: Never 115 | containers: 116 | - image: 117 | name: nvidia 118 | command: 119 | - /usr/local/nvidia/bin/nvidia-smi 120 | - pmon 121 | - -d 122 | - 10 123 | resources: 124 | requests: 125 | tencent.com/vcuda-core: 200 126 | tencent.com/vcuda-memory: 60 127 | limits: 128 | tencent.com/vcuda-core: 200 129 | tencent.com/vcuda-memory: 60 130 | ``` 131 | 132 | ## FAQ 133 | 134 | If you have some questions about this project, you can first refer to [FAQ](./docs/faq.md) to find a solution. 135 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.1.4 2 | -------------------------------------------------------------------------------- /build/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG base_img 2 | FROM nvidia/cuda:10.1-devel-centos7 as build 3 | 4 | ARG version 5 | ARG commit 6 | 7 | RUN yum install -y rpm-build make 8 | 9 | # default git has problems while cloning some repository 10 | RUN yum install -y https://repo.ius.io/ius-release-el7.rpm \ 11 | && yum install -y git222 12 | 13 | ENV GOLANG_VERSION 1.14.3 14 | RUN curl -sSL https://dl.google.com/go/go${GOLANG_VERSION}.linux-amd64.tar.gz \ 15 | | tar -C /usr/local -xz 16 | ENV GOPATH /go 17 | ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH 18 | 19 | RUN mkdir -p /root/rpmbuild/{SPECS,SOURCES} 20 | 21 | COPY gpu-manager.spec /root/rpmbuild/SPECS 22 | COPY gpu-manager-source.tar.gz /root/rpmbuild/SOURCES 23 | 24 | RUN echo '%_topdir /root/rpmbuild' > /root/.rpmmacros \ 25 | && echo '%__os_install_post %{nil}' >> /root/.rpmmacros \ 26 | && echo '%debug_package %{nil}' >> /root/.rpmmacros 27 | WORKDIR /root/rpmbuild/SPECS 28 | RUN rpmbuild -bb --quiet \ 29 | --define 'version '${version}'' \ 30 | --define 'commit '${commit}'' \ 31 | gpu-manager.spec 32 | 33 | FROM $base_img 34 | 35 | ARG version 36 | ARG commit 37 | 38 | COPY --from=build /root/rpmbuild/RPMS/x86_64/gpu-manager-${version}-${commit}.el7.x86_64.rpm /tmp 39 | 40 | RUN yum install epel-release -y && \ 41 | yum install -y which jq 42 | 43 | # Install packages 44 | RUN rpm -ivh /tmp/gpu-manager-${version}-${commit}.el7.x86_64.rpm \ 45 | && rm -rf /tmp/gpu-manager-${version}-${commit}.el7.x86_64.rpm 46 | 47 | # kubelet 48 | VOLUME ["/var/lib/kubelet/device-plugins"] 49 | 50 | # gpu manager storage 51 | VOLUME ["/etc/gpu-manager/vm"] 52 | VOLUME ["/etc/gpu-manager/vdriver"] 53 | VOLUME ["/var/log/gpu-manager"] 54 | 55 | # nvidia library search location 56 | VOLUME ["/usr/local/host"] 57 | 58 | RUN echo "/usr/local/nvidia/lib" > /etc/ld.so.conf.d/nvidia.conf && \ 59 | echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf 60 | 61 | ENV PATH=$PATH:/usr/local/nvidia/bin 62 | 63 | # cgroup 64 | VOLUME ["/sys/fs/cgroup"] 65 | 66 | # display 67 | EXPOSE 5678 68 | 69 | COPY start.sh / 70 | COPY copy-bin-lib.sh / 71 | 72 | CMD ["/start.sh"] 73 | -------------------------------------------------------------------------------- /build/copy-bin-lib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o pipefail 4 | set -o errexit 5 | set -o nounset 6 | 7 | FILE=${FILE:-"/etc/gpu-manager/volume.conf"} 8 | LIB_FILES=$(jq -r .volume[1].components.libraries[] ${FILE}) 9 | BIN_FILES=$(jq -r .volume[1].components.binaries[] ${FILE}) 10 | readonly NV_DIR="/usr/local/nvidia" 11 | readonly FIND_BASE=${FIND_BASE:-"/usr/local/host"} 12 | 13 | function check_arch() { 14 | local readonly lib=$1 15 | if [[ $(objdump -f ${lib} | grep -o "elf64-x86-64") == "elf64-x86-64" ]]; then 16 | echo "64" 17 | else 18 | echo "" 19 | fi 20 | } 21 | 22 | function copy_lib() { 23 | for target in $(find ${FIND_BASE} -name "${1}*" | grep -v "stubs"); do 24 | if [[ $(objdump -p ${target} 2>/dev/null | grep -o "SONAME") == "SONAME" ]]; then 25 | copy_directory ${target} "${NV_DIR}/lib$(check_arch ${target})" 26 | fi 27 | done 28 | } 29 | 30 | function copy_bin() { 31 | for target in $(find ${FIND_BASE} -name "${1}"); do 32 | if [[ -L ${target} ]]; then 33 | echo "${target} is symlink" 34 | continue 35 | fi 36 | copy_directory ${target} "${NV_DIR}/bin/" 37 | done 38 | } 39 | 40 | function copy_directory() { 41 | local readonly lib=$1 42 | local readonly path=$2 43 | 44 | echo "copy ${lib} to ${path}" 45 | cp --preserve=mode,ownership -Pf "${lib}" "${path}" 46 | } 47 | 48 | rm -rf ${NV_DIR} 49 | mkdir -p ${NV_DIR}/{bin,lib,lib64} 50 | 51 | for file in ${LIB_FILES[@]}; do 52 | copy_lib ${file} 53 | done 54 | 55 | for file in ${BIN_FILES[@]}; do 56 | copy_bin ${file} 57 | done 58 | 59 | # fix libvdpau_nvidia.so 60 | ( 61 | cd ${NV_DIR}/lib 62 | rm -rf libvdpau_nvidia.so 63 | rel_path=$(readlink -f libvdpau_nvidia.so.1) 64 | ln -s $(basename ${rel_path}) libvdpau_nvidia.so 65 | ) 66 | 67 | ( 68 | cd ${NV_DIR}/lib64 69 | rm -rf libvdpau_nvidia.so 70 | rel_path=$(readlink -f libvdpau_nvidia.so.1) 71 | ln -s $(basename ${rel_path}) libvdpau_nvidia.so 72 | ) 73 | 74 | # fix libnvidia-ml.so 75 | ( 76 | cd ${NV_DIR}/lib 77 | rm -rf libnvidia-ml.so 78 | rel_path=$(readlink -f libnvidia-ml.so.1) 79 | ln -s $(basename ${rel_path}) libnvidia-ml.so 80 | ) 81 | 82 | ( 83 | cd ${NV_DIR}/lib64 84 | rm -rf libnvidia-ml.so 85 | rel_path=$(readlink -f libnvidia-ml.so.1) 86 | ln -s $(basename ${rel_path}) libnvidia-ml.so 87 | ) 88 | -------------------------------------------------------------------------------- /build/extra-config.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /build/gpu-manager.conf: -------------------------------------------------------------------------------- 1 | GPU_MANAGER_ARGS="--extra-config=/etc/gpu-manager/extra-config.json --addr=/var/run/gpu-manager.sock --v=2 --logtostderr" 2 | -------------------------------------------------------------------------------- /build/gpu-manager.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=GPU Manager Runtime 3 | After=network-online.target docker.socket kubelet.service 4 | Wants=network-online.target kubelet.service 5 | 6 | [Service] 7 | Type=notify 8 | # the default is not to use systemd for cgroups because the delegate issues still 9 | # exists and systemd currently does not support the cgroup feature set required 10 | # for containers run by docker 11 | EnvironmentFile=-/etc/gpu-manager/gpu-manager.conf 12 | ExecStart=/usr/bin/gpu-manager $GPU_MANAGER_ARGS 13 | ExecReload=/bin/kill -s HUP $MAINPID 14 | LimitNOFILE=1048576 15 | # Having non-zero Limit*s causes performance problems due to accounting overhead 16 | # in the kernel. We recommend using cgroups to do container-local accounting. 17 | LimitNPROC=infinity 18 | LimitCORE=infinity 19 | # Uncomment TasksMax if your systemd version supports it. 20 | # Only systemd 226 and above support this version. 21 | #TasksMax=infinity 22 | TimeoutStartSec=0 23 | # set delegate yes so that systemd does not reset the cgroups of docker containers 24 | Delegate=yes 25 | # kill only the docker process, not all processes in the cgroup 26 | KillMode=process 27 | # restart the docker process if it exits prematurely 28 | Restart=on-failure 29 | StartLimitBurst=3 30 | StartLimitInterval=60s 31 | UMask=0000 32 | 33 | [Install] 34 | WantedBy=multi-user.target 35 | -------------------------------------------------------------------------------- /build/gpu-manager.spec: -------------------------------------------------------------------------------- 1 | Name: gpu-manager 2 | Version: %{version} 3 | Release: %{commit}%{?dist} 4 | Summary: GPU Manager Plugin for Kubernetes 5 | 6 | License: MIT 7 | Source: gpu-manager-source.tar.gz 8 | 9 | Requires: systemd-units 10 | 11 | %define pkgname %{name}-%{version}-%{release} 12 | 13 | %description 14 | GPU Manager Plugin for Kubernetes 15 | 16 | %prep 17 | %setup -n gpu-manager-%{version} 18 | 19 | 20 | %build 21 | make all 22 | 23 | %install 24 | install -d $RPM_BUILD_ROOT/%{_bindir} 25 | install -d $RPM_BUILD_ROOT/%{_unitdir} 26 | install -d $RPM_BUILD_ROOT/etc/gpu-manager 27 | 28 | install -p -m 755 ./go/bin/gpu-manager $RPM_BUILD_ROOT/%{_bindir}/ 29 | install -p -m 755 ./go/bin/gpu-client $RPM_BUILD_ROOT/%{_bindir}/ 30 | 31 | install -p -m 644 ./build/extra-config.json $RPM_BUILD_ROOT/etc/gpu-manager/ 32 | install -p -m 644 ./build/gpu-manager.conf $RPM_BUILD_ROOT/etc/gpu-manager/ 33 | install -p -m 644 ./build/volume.conf $RPM_BUILD_ROOT/etc/gpu-manager/ 34 | 35 | install -p -m 644 ./build/gpu-manager.service $RPM_BUILD_ROOT/%{_unitdir}/ 36 | 37 | %clean 38 | rm -rf $RPM_BUILD_ROOT 39 | 40 | %files 41 | %config(noreplace,missingok) /etc/gpu-manager/extra-config.json 42 | %config(noreplace,missingok) /etc/gpu-manager/gpu-manager.conf 43 | %config(noreplace,missingok) /etc/gpu-manager/volume.conf 44 | 45 | /%{_bindir}/gpu-manager 46 | /%{_bindir}/gpu-client 47 | 48 | /%{_unitdir}/gpu-manager.service 49 | -------------------------------------------------------------------------------- /build/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | source copy-bin-lib.sh 8 | 9 | echo "rebuild ldcache" 10 | /usr/sbin/ldconfig 11 | 12 | echo "launch gpu manager" 13 | /usr/bin/gpu-manager --extra-config=/etc/gpu-manager/extra-config.json --v=${LOG_LEVEL} --hostname-override=${NODE_NAME} --share-mode=true --volume-config=/etc/gpu-manager/volume.conf --log-dir=/var/log/gpu-manager --query-addr=0.0.0.0 ${EXTRA_FLAGS:-""} -------------------------------------------------------------------------------- /build/volume.conf: -------------------------------------------------------------------------------- 1 | { 2 | "volume": [ 3 | { 4 | "name": "nvidia", 5 | "base": "/etc/gpu-manager/vdriver", 6 | "mode": "ro", 7 | "components": { 8 | "binaries": [ 9 | "nvidia-cuda-mps-control", 10 | "nvidia-cuda-mps-server", 11 | "nvidia-debugdump", 12 | "nvidia-persistenced", 13 | "nvidia-smi", 14 | "gpu-client" 15 | ], 16 | "libraries": [ 17 | "libnvidia-ml.so", 18 | "libcuda.so", 19 | "libcuda-control.so", 20 | "libnvidia-ptxjitcompiler.so", 21 | "libnvidia-fatbinaryloader.so", 22 | "libnvidia-opencl.so", 23 | "libnvidia-compiler.so", 24 | "libvdpau_nvidia.so", 25 | "libnvidia-encode.so", 26 | "libnvcuvid.so", 27 | "libnvidia-fbc.so", 28 | "libnvidia-ifr.so", 29 | "libGL.so", 30 | "libGLX.so", 31 | "libOpenGL.so", 32 | "libGLESv1_CM.so", 33 | "libGLESv2.so", 34 | "libEGL.so", 35 | "libGLdispatch.so", 36 | "libGLX_nvidia.so", 37 | "libEGL_nvidia.so", 38 | "libGLESv2_nvidia.so", 39 | "libGLESv1_CM_nvidia.so", 40 | "libnvidia-eglcore.so", 41 | "libnvidia-egl-wayland.so", 42 | "libnvidia-glcore.so", 43 | "libnvidia-tls.so", 44 | "libnvidia-glsi.so", 45 | "libnvidia-opticalflow.so", 46 | "libnvidia-gpucomp.so" 47 | ] 48 | } 49 | }, 50 | { 51 | "name": "origin", 52 | "base": "/etc/gpu-manager/vdriver", 53 | "mode": "ro", 54 | "components": { 55 | "binaries": [ 56 | "nvidia-cuda-mps-control", 57 | "nvidia-cuda-mps-server", 58 | "nvidia-debugdump", 59 | "nvidia-persistenced", 60 | "nvidia-smi" 61 | ], 62 | "libraries": [ 63 | "libnvidia-ml.so", 64 | "libcuda.so", 65 | "libnvidia-ptxjitcompiler.so", 66 | "libnvidia-fatbinaryloader.so", 67 | "libnvidia-opencl.so", 68 | "libnvidia-compiler.so", 69 | "libvdpau_nvidia.so", 70 | "libnvidia-encode.so", 71 | "libnvcuvid.so", 72 | "libnvidia-fbc.so", 73 | "libnvidia-ifr.so", 74 | "libGL.so", 75 | "libGLX.so", 76 | "libOpenGL.so", 77 | "libGLESv1_CM.so", 78 | "libGLESv2.so", 79 | "libEGL.so", 80 | "libGLdispatch.so", 81 | "libGLX_nvidia.so", 82 | "libEGL_nvidia.so", 83 | "libGLESv2_nvidia.so", 84 | "libGLESv1_CM_nvidia.so", 85 | "libnvidia-eglcore.so", 86 | "libnvidia-egl-wayland.so", 87 | "libnvidia-glcore.so", 88 | "libnvidia-tls.so", 89 | "libnvidia-glsi.so", 90 | "libnvidia-opticalflow.so" 91 | ] 92 | } 93 | } 94 | ] 95 | } 96 | -------------------------------------------------------------------------------- /cmd/client/client.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "context" 22 | goflag "flag" 23 | 24 | vcudaapi "tkestack.io/gpu-manager/pkg/api/runtime/vcuda" 25 | "tkestack.io/gpu-manager/pkg/flags" 26 | "tkestack.io/gpu-manager/pkg/logs" 27 | "tkestack.io/gpu-manager/pkg/utils" 28 | 29 | "github.com/spf13/pflag" 30 | "google.golang.org/grpc" 31 | "k8s.io/klog" 32 | ) 33 | 34 | var ( 35 | addr, busID, podUID, contName, contID string 36 | ) 37 | 38 | func main() { 39 | cmdFlags := pflag.CommandLine 40 | 41 | cmdFlags.StringVar(&addr, "addr", "", "RPC address location for dial") 42 | cmdFlags.StringVar(&busID, "bus-id", "", "GPU card bus id of caller") 43 | cmdFlags.StringVar(&podUID, "pod-uid", "", "Pod UID of caller") 44 | cmdFlags.StringVar(&contName, "cont-name", "", "Container name of caller") 45 | cmdFlags.StringVar(&contID, "cont-id", "", "Container id of calller") 46 | 47 | flags.InitFlags() 48 | goflag.CommandLine.Parse([]string{}) 49 | logs.InitLogs() 50 | defer logs.FlushLogs() 51 | 52 | if len(addr) == 0 || len(podUID) == 0 || (len(contName) == 0 && len(contID) == 0) { 53 | klog.Fatalf("argument is empty, current: %s", cmdFlags.Args()) 54 | } 55 | 56 | conn, err := grpc.Dial(addr, utils.DefaultDialOptions...) 57 | if err != nil { 58 | klog.Fatalf("can't dial %s, error %v", addr, err) 59 | } 60 | defer conn.Close() 61 | 62 | client := vcudaapi.NewVCUDAServiceClient(conn) 63 | ctx := context.TODO() 64 | 65 | req := &vcudaapi.VDeviceRequest{ 66 | BusId: busID, 67 | PodUid: podUID, 68 | ContainerName: contName, 69 | } 70 | 71 | if len(contID) > 0 { 72 | req.ContainerName = "" 73 | req.ContainerId = contID 74 | } 75 | 76 | _, err = client.RegisterVDevice(ctx, req) 77 | if err != nil { 78 | klog.Fatalf("fail to get response from manager, error %v", err) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /cmd/manager/app/app.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package app 19 | 20 | import ( 21 | "log" 22 | "os" 23 | "path/filepath" 24 | "strings" 25 | "time" 26 | 27 | "tkestack.io/gpu-manager/cmd/manager/options" 28 | "tkestack.io/gpu-manager/pkg/config" 29 | "tkestack.io/gpu-manager/pkg/server" 30 | "tkestack.io/gpu-manager/pkg/types" 31 | "tkestack.io/gpu-manager/pkg/utils" 32 | 33 | "github.com/fsnotify/fsnotify" 34 | "k8s.io/klog" 35 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 36 | ) 37 | 38 | // #lizard forgives 39 | func Run(opt *options.Options) error { 40 | cfg := &config.Config{ 41 | Driver: opt.Driver, 42 | QueryPort: opt.QueryPort, 43 | QueryAddr: opt.QueryAddr, 44 | KubeConfig: opt.KubeConfigFile, 45 | SamplePeriod: time.Duration(opt.SamplePeriod) * time.Second, 46 | VCudaRequestsQueue: make(chan *types.VCudaRequest, 10), 47 | DevicePluginPath: pluginapi.DevicePluginPath, 48 | VirtualManagerPath: opt.VirtualManagerPath, 49 | VolumeConfigPath: opt.VolumeConfigPath, 50 | EnableShare: opt.EnableShare, 51 | AllocationCheckPeriod: time.Duration(opt.AllocationCheckPeriod) * time.Second, 52 | CheckpointPath: opt.CheckpointPath, 53 | ContainerRuntimeEndpoint: opt.ContainerRuntimeEndpoint, 54 | CgroupDriver: opt.CgroupDriver, 55 | RequestTimeout: opt.RequestTimeout, 56 | } 57 | 58 | if len(opt.HostnameOverride) > 0 { 59 | cfg.Hostname = opt.HostnameOverride 60 | } 61 | 62 | if len(opt.ExtraPath) > 0 { 63 | cfg.ExtraConfigPath = opt.ExtraPath 64 | } 65 | 66 | if len(opt.DevicePluginPath) > 0 { 67 | cfg.DevicePluginPath = opt.DevicePluginPath 68 | } 69 | 70 | cfg.NodeLabels = make(map[string]string) 71 | for _, item := range strings.Split(opt.NodeLabels, ",") { 72 | if len(item) > 0 { 73 | kvs := strings.SplitN(item, "=", 2) 74 | if len(kvs) == 2 { 75 | cfg.NodeLabels[kvs[0]] = kvs[1] 76 | } else { 77 | klog.Warningf("malformed node labels: %v", kvs) 78 | } 79 | } 80 | } 81 | 82 | srv := server.NewManager(cfg) 83 | go srv.Run() 84 | 85 | waitTimer := time.NewTimer(opt.WaitTimeout) 86 | for !srv.Ready() { 87 | select { 88 | case <-waitTimer.C: 89 | klog.Warningf("Wait too long for server ready, restarting") 90 | os.Exit(1) 91 | default: 92 | klog.Infof("Wait for internal server ready") 93 | } 94 | time.Sleep(time.Second) 95 | } 96 | waitTimer.Stop() 97 | 98 | if err := srv.RegisterToKubelet(); err != nil { 99 | return err 100 | } 101 | 102 | devicePluginSocket := filepath.Join(cfg.DevicePluginPath, types.KubeletSocket) 103 | watcher, err := utils.NewFSWatcher(cfg.DevicePluginPath) 104 | if err != nil { 105 | log.Println("Failed to created FS watcher.") 106 | os.Exit(1) 107 | } 108 | defer watcher.Close() 109 | 110 | for { 111 | select { 112 | case event := <-watcher.Events: 113 | if event.Name == devicePluginSocket && event.Op&fsnotify.Create == fsnotify.Create { 114 | time.Sleep(time.Second) 115 | klog.Fatalf("inotify: %s created, restarting.", devicePluginSocket) 116 | } 117 | case err := <-watcher.Errors: 118 | klog.Fatalf("inotify: %s", err) 119 | } 120 | } 121 | return nil 122 | } 123 | -------------------------------------------------------------------------------- /cmd/manager/nvidia-manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | goflag "flag" 22 | "fmt" 23 | "os" 24 | 25 | "k8s.io/klog" 26 | 27 | "tkestack.io/gpu-manager/cmd/manager/app" 28 | "tkestack.io/gpu-manager/cmd/manager/options" 29 | "tkestack.io/gpu-manager/pkg/flags" 30 | "tkestack.io/gpu-manager/pkg/logs" 31 | "tkestack.io/gpu-manager/pkg/version" 32 | 33 | "github.com/spf13/pflag" 34 | ) 35 | 36 | func main() { 37 | klog.InitFlags(nil) 38 | opt := options.NewOptions() 39 | opt.AddFlags(pflag.CommandLine) 40 | 41 | flags.InitFlags() 42 | goflag.CommandLine.Parse([]string{}) 43 | logs.InitLogs() 44 | defer logs.FlushLogs() 45 | 46 | version.PrintAndExitIfRequested() 47 | 48 | if err := app.Run(opt); err != nil { 49 | fmt.Fprintf(os.Stderr, "%v\n", err) 50 | os.Exit(1) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /cmd/manager/options/options.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package options 19 | 20 | import ( 21 | "time" 22 | 23 | "github.com/spf13/pflag" 24 | ) 25 | 26 | const ( 27 | DefaultDriver = "nvidia" 28 | DefaultQueryPort = 5678 29 | DefaultSamplePeriod = 1 30 | DefaultVirtualManagerPath = "/etc/gpu-manager/vm" 31 | DefaultAllocationCheckPeriod = 30 32 | DefaultCheckpointPath = "/etc/gpu-manager/checkpoint" 33 | DefaultContainerRuntimeEndpoint = "/var/run/dockershim.sock" 34 | DefaultCgroupDriver = "cgroupfs" 35 | ) 36 | 37 | // Options contains plugin information 38 | type Options struct { 39 | Driver string 40 | ExtraPath string 41 | VolumeConfigPath string 42 | QueryPort int 43 | QueryAddr string 44 | KubeConfigFile string 45 | SamplePeriod int 46 | NodeLabels string 47 | HostnameOverride string 48 | VirtualManagerPath string 49 | DevicePluginPath string 50 | EnableShare bool 51 | AllocationCheckPeriod int 52 | CheckpointPath string 53 | ContainerRuntimeEndpoint string 54 | CgroupDriver string 55 | RequestTimeout time.Duration 56 | WaitTimeout time.Duration 57 | } 58 | 59 | // NewOptions gives a default options template. 60 | func NewOptions() *Options { 61 | return &Options{ 62 | Driver: DefaultDriver, 63 | QueryPort: DefaultQueryPort, 64 | QueryAddr: "localhost", 65 | SamplePeriod: DefaultSamplePeriod, 66 | VirtualManagerPath: DefaultVirtualManagerPath, 67 | AllocationCheckPeriod: DefaultAllocationCheckPeriod, 68 | CheckpointPath: DefaultCheckpointPath, 69 | ContainerRuntimeEndpoint: DefaultContainerRuntimeEndpoint, 70 | CgroupDriver: DefaultCgroupDriver, 71 | RequestTimeout: time.Second * 5, 72 | WaitTimeout: time.Minute, 73 | } 74 | } 75 | 76 | // AddFlags add some commandline flags. 77 | func (opt *Options) AddFlags(fs *pflag.FlagSet) { 78 | fs.StringVar(&opt.Driver, "driver", opt.Driver, "The driver name for manager") 79 | fs.StringVar(&opt.ExtraPath, "extra-config", opt.ExtraPath, "The extra config file location") 80 | fs.StringVar(&opt.VolumeConfigPath, "volume-config", opt.VolumeConfigPath, "The volume config file location") 81 | fs.IntVar(&opt.QueryPort, "query-port", opt.QueryPort, "port for query statistics information") 82 | fs.StringVar(&opt.QueryAddr, "query-addr", opt.QueryAddr, "address for query statistics information") 83 | fs.StringVar(&opt.KubeConfigFile, "kubeconfig", opt.KubeConfigFile, "Path to kubeconfig file with authorization information (the master location is set by the master flag).") 84 | fs.IntVar(&opt.SamplePeriod, "sample-period", opt.SamplePeriod, "Sample period for each card, unit second") 85 | fs.StringVar(&opt.NodeLabels, "node-labels", opt.NodeLabels, "automated label for this node, if empty, node will be only labeled by gpu model") 86 | fs.StringVar(&opt.HostnameOverride, "hostname-override", opt.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.") 87 | fs.StringVar(&opt.VirtualManagerPath, "virtual-manager-path", opt.VirtualManagerPath, "configuration path for virtual manager store files") 88 | fs.StringVar(&opt.DevicePluginPath, "device-plugin-path", opt.DevicePluginPath, "the path for kubelet receive device plugin registration") 89 | fs.StringVar(&opt.CheckpointPath, "checkpoint-path", opt.CheckpointPath, "configuration path for checkpoint store file") 90 | fs.BoolVar(&opt.EnableShare, "share-mode", opt.EnableShare, "enable share mode allocation") 91 | fs.IntVar(&opt.AllocationCheckPeriod, "allocation-check-period", opt.AllocationCheckPeriod, "allocation check period, unit second") 92 | fs.StringVar(&opt.ContainerRuntimeEndpoint, "container-runtime-endpoint", opt.ContainerRuntimeEndpoint, "container runtime endpoint") 93 | fs.StringVar(&opt.CgroupDriver, "cgroup-driver", opt.CgroupDriver, "Driver that the kubelet uses to manipulate cgroups on the host. "+ 94 | "Possible values: 'cgroupfs', 'systemd'") 95 | fs.DurationVar(&opt.RequestTimeout, "runtime-request-timeout", opt.RequestTimeout, 96 | "request timeout for communicating with container runtime endpoint") 97 | fs.DurationVar(&opt.WaitTimeout, "wait-timeout", opt.WaitTimeout, "wait timeout for resource server ready") 98 | } 99 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | *1.* Q: If I use another container runtime, what should I do? 4 | 5 | A: You need to change the `EXTRA_FLAGS` of `gpu-manager.yaml`, add `--container-runtime-endpoint` options, the value is the 6 | path of your container runtime unix socket, like `/var/run/crio.sock` or something like that. 7 | 8 | *2.* Q: When I use a fraction gpu resource, my program hung 9 | 10 | A: Add environment variable `LOGGER_LEVEL` and set value to `5` to `gpu-manager.yaml, and paste your log in your issue. 11 | 12 | *3.* Q: When I use a fraction gpu resource, program reported a error like `rpc failed` 13 | 14 | A: After v1.0.3, we use CRI interface to find cgroup path, so if your cgroup driver is not `cgroupfs`, you 15 | need to change the `EXTRA_FLAGS` of `gpu-manager.yaml`, add `--cgroup-driver` options, the possible options are `cgroupfs` or `systemd`. 16 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module tkestack.io/gpu-manager 2 | 3 | go 1.14 4 | 5 | replace tkestack.io/nvml => github.com/tkestack/go-nvml v0.0.0-20191217064248-7363e630a33e 6 | 7 | require ( 8 | github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e 9 | github.com/docker/go-units v0.4.0 // indirect 10 | github.com/fsnotify/fsnotify v1.4.7 11 | github.com/godbus/dbus v0.0.0-20181101234600-2ff6f7ffd60f // indirect 12 | github.com/golang/protobuf v1.3.2 13 | github.com/grpc-ecosystem/grpc-gateway v1.12.1 14 | github.com/opencontainers/runc v1.0.0-rc9 15 | github.com/opencontainers/runtime-spec v1.0.2 // indirect 16 | github.com/pkg/errors v0.8.1 17 | github.com/prometheus/client_golang v1.2.1 18 | github.com/spf13/pflag v1.0.5 19 | golang.org/x/net v0.0.0-20191109021931-daa7c04131f5 20 | google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a 21 | google.golang.org/grpc v1.24.0 22 | k8s.io/api v0.17.4 23 | k8s.io/apimachinery v0.17.4 24 | k8s.io/client-go v0.17.4 25 | k8s.io/cri-api v0.17.4 26 | k8s.io/klog v1.0.0 27 | k8s.io/kubectl v0.17.4 28 | k8s.io/kubelet v0.17.4 29 | tkestack.io/nvml v0.0.0-00010101000000-000000000000 30 | ) 31 | -------------------------------------------------------------------------------- /gpu-manager-svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: gpu-manager-metric 5 | namespace: kube-system 6 | annotations: 7 | prometheus.io/scrape: "true" 8 | labels: 9 | kubernetes.io/cluster-service: "true" 10 | spec: 11 | clusterIP: None 12 | ports: 13 | - name: metrics 14 | port: 5678 15 | protocol: TCP 16 | targetPort: 5678 17 | selector: 18 | name: gpu-manager-ds 19 | -------------------------------------------------------------------------------- /gpu-manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: gpu-manager-daemonset 5 | namespace: kube-system 6 | spec: 7 | updateStrategy: 8 | type: RollingUpdate 9 | selector: 10 | matchLabels: 11 | name: gpu-manager-ds 12 | template: 13 | metadata: 14 | # This annotation is deprecated. Kept here for backward compatibility 15 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ 16 | annotations: 17 | scheduler.alpha.kubernetes.io/critical-pod: "" 18 | labels: 19 | name: gpu-manager-ds 20 | spec: 21 | serviceAccount: gpu-manager 22 | tolerations: 23 | # This toleration is deprecated. Kept here for backward compatibility 24 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ 25 | - key: CriticalAddonsOnly 26 | operator: Exists 27 | - key: tencent.com/vcuda-core 28 | operator: Exists 29 | effect: NoSchedule 30 | # Mark this pod as a critical add-on; when enabled, the critical add-on 31 | # scheduler reserves resources for critical add-on pods so that they can 32 | # be rescheduled after a failure. 33 | # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ 34 | priorityClassName: "system-node-critical" 35 | # only run node has gpu device 36 | nodeSelector: 37 | nvidia-device-enable: enable 38 | hostPID: true 39 | containers: 40 | - image: tkestack/gpu-manager:1.0.3 41 | imagePullPolicy: Always 42 | name: gpu-manager 43 | securityContext: 44 | privileged: true 45 | ports: 46 | - containerPort: 5678 47 | volumeMounts: 48 | - name: device-plugin 49 | mountPath: /var/lib/kubelet/device-plugins 50 | - name: vdriver 51 | mountPath: /etc/gpu-manager/vdriver 52 | - name: vmdata 53 | mountPath: /etc/gpu-manager/vm 54 | - name: log 55 | mountPath: /var/log/gpu-manager 56 | - name: checkpoint 57 | mountPath: /etc/gpu-manager/checkpoint 58 | - name: run-dir 59 | mountPath: /var/run 60 | - name: cgroup 61 | mountPath: /sys/fs/cgroup 62 | readOnly: true 63 | - name: usr-directory 64 | mountPath: /usr/local/host 65 | readOnly: true 66 | env: 67 | - name: LOG_LEVEL 68 | value: "4" 69 | - name: EXTRA_FLAGS 70 | value: "--logtostderr=false" 71 | - name: NODE_NAME 72 | valueFrom: 73 | fieldRef: 74 | fieldPath: spec.nodeName 75 | volumes: 76 | - name: device-plugin 77 | hostPath: 78 | type: Directory 79 | path: /var/lib/kubelet/device-plugins 80 | - name: vmdata 81 | hostPath: 82 | type: DirectoryOrCreate 83 | path: /etc/gpu-manager/vm 84 | - name: vdriver 85 | hostPath: 86 | type: DirectoryOrCreate 87 | path: /etc/gpu-manager/vdriver 88 | - name: log 89 | hostPath: 90 | type: DirectoryOrCreate 91 | path: /etc/gpu-manager/log 92 | - name: checkpoint 93 | hostPath: 94 | type: DirectoryOrCreate 95 | path: /etc/gpu-manager/checkpoint 96 | # We have to mount the whole /var/run directory into container, because of bind mount docker.sock 97 | # inode change after host docker is restarted 98 | - name: run-dir 99 | hostPath: 100 | type: Directory 101 | path: /var/run 102 | - name: cgroup 103 | hostPath: 104 | type: Directory 105 | path: /sys/fs/cgroup 106 | # We have to mount /usr directory instead of specified library path, because of non-existing 107 | # problem for different distro 108 | - name: usr-directory 109 | hostPath: 110 | type: Directory 111 | path: /usr 112 | -------------------------------------------------------------------------------- /hack/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | ROOT=$(cd $(dirname ${BASH_SOURCE[0]})/.. && pwd -P) 8 | 9 | source "${ROOT}/hack/common.sh" 10 | 11 | function plugin::build() { 12 | ( 13 | for arg; do 14 | case $arg in 15 | test) 16 | plugin::run_test 17 | ;; 18 | proto) 19 | plugin::generate_proto 20 | ;; 21 | img) 22 | plugin::generate_img 23 | ;; 24 | fmt) 25 | plugin::fmt 26 | ;; 27 | *) 28 | plugin::build_binary 29 | esac 30 | done 31 | ) 32 | } 33 | 34 | function plugin::run_test() { 35 | go test -timeout=1m -bench=. -cover -v ./... 36 | } 37 | 38 | function plugin::build_binary() { 39 | go build -o "${ROOT}/go/bin/gpu-$arg" -ldflags "$(plugin::version::ldflags) -s -w" ${PACKAGE}/cmd/$arg 40 | } 41 | 42 | function plugin::generate_img() { 43 | readonly local commit=$(git log --no-merges --oneline | wc -l | sed -e 's,^[ \t]*,,') 44 | readonly local version=$(<"${ROOT}/VERSION") 45 | readonly local base_img=${BASE_IMG:-"thomassong/vcuda:1.0.4"} 46 | 47 | mkdir -p "${ROOT}/go/build" 48 | tar czf "${ROOT}/go/build/gpu-manager-source.tar.gz" --transform 's,^,/gpu-manager-'${version}'/,' $(plugin::source_targets) 49 | 50 | cp -R "${ROOT}/build/"* "${ROOT}/go/build/" 51 | 52 | ( 53 | cd ${ROOT}/go/build 54 | docker build \ 55 | --network=host \ 56 | --build-arg version=${version} \ 57 | --build-arg commit=${commit} \ 58 | --build-arg base_img=${base_img} \ 59 | -t "${IMAGE_FILE}:${version}" . 60 | ) 61 | } 62 | 63 | function plugin::fmt() { 64 | local unfmt_files=() 65 | for file in $(plugin::fmt_targets); do 66 | if [[ -n $(gofmt -d -s $file 2>&1) ]]; then 67 | unfmt_files+=($file) 68 | fi 69 | done 70 | if [[ ${#unfmt_files[@]} -gt 0 ]]; then 71 | echo "need fmt ${unfmt_files[@]}" 72 | exit 1 73 | fi 74 | } 75 | 76 | plugin::build "$@" 77 | -------------------------------------------------------------------------------- /hack/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | readonly PACKAGE="tkestack.io/gpu-manager" 4 | readonly BUILD_IMAGE_REPO=plugin-build 5 | readonly LOCAL_OUTPUT_IMAGE_STAGING="${ROOT}/go/images" 6 | readonly IMAGE_FILE=${IMAGE_FILE:-"thomassong/gpu-manager"} 7 | readonly PROTO_IMAGE="proto-generater" 8 | 9 | function plugin::cleanup() { 10 | rm -rf ${ROOT}/go 11 | } 12 | 13 | function plugin::cleanup_image() { 14 | docker rm -vf ${PROTO_IMAGE} 15 | } 16 | 17 | function plugin::generate_proto() { 18 | ( 19 | docker run --rm \ 20 | -v ${ROOT}/pkg/api:/tmp/pkg/api \ 21 | -v ${ROOT}/staging/src:/tmp/staging/src \ 22 | -u $(id -u) \ 23 | devsu/grpc-gateway \ 24 | bash -c "cd /tmp && protoc \\ 25 | --proto_path=staging/src:. \\ 26 | --proto_path=/go/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis:. \\ 27 | --go_out=plugins=grpc:. \\ 28 | --grpc-gateway_out=logtostderr=true:. \\ 29 | pkg/api/runtime/display/api.proto" 30 | 31 | docker run --rm \ 32 | -v ${ROOT}/pkg/api:/tmp/pkg/api \ 33 | -u $(id -u) \ 34 | devsu/grpc-gateway \ 35 | bash -c "cd /tmp && protoc \\ 36 | --go_out=plugins=grpc:. \\ 37 | pkg/api/runtime/vcuda/api.proto" 38 | ) 39 | } 40 | 41 | function plugin::version::ldflag() { 42 | local key=${1} 43 | local val=${2} 44 | echo "-X ${PACKAGE}/pkg/version.${key}=${val}" 45 | } 46 | 47 | function plugin::version::ldflags() { 48 | GIT_COMMIT=$(git log -1 --oneline 2>/dev/null | awk '{print $1}') 49 | local -a ldflags=() 50 | if [[ -n ${GIT_COMMIT} ]]; then 51 | ldflags+=($(plugin::version::ldflag "gitCommit" "${GIT_COMMIT}")) 52 | fi 53 | 54 | echo "${ldflags[*]-}" 55 | } 56 | 57 | function plugin::source_targets() { 58 | local targets=( 59 | $(find . -mindepth 1 -maxdepth 1 -not \( \ 60 | \( -path ./go \) -prune \ 61 | \)) 62 | ) 63 | echo "${targets[@]}" 64 | } 65 | 66 | function plugin::fmt_targets() { 67 | local targets=( 68 | $(find . -not \( \ 69 | \( -path ./go \ 70 | -o -path ./vendor \ 71 | \) -prune \ 72 | \) \ 73 | -name "*.go" \ 74 | -print \ 75 | ) 76 | ) 77 | echo "${targets[@]}" 78 | } 79 | -------------------------------------------------------------------------------- /pkg/algorithm/nvidia/fragment.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "sort" 22 | 23 | "k8s.io/klog" 24 | 25 | "tkestack.io/gpu-manager/pkg/device/nvidia" 26 | ) 27 | 28 | type fragmentMode struct { 29 | tree *nvidia.NvidiaTree 30 | } 31 | 32 | //NewFragmentMode returns a new fragmentMode struct. 33 | // 34 | //Evaluate() of fragmentMode returns nodes with minimum available cores 35 | //which fullfil the request. 36 | // 37 | //Fragment mode means to allocate cores on fragmented nodes first, which 38 | //helps link mode work better. 39 | func NewFragmentMode(t *nvidia.NvidiaTree) *fragmentMode { 40 | return &fragmentMode{t} 41 | } 42 | 43 | func (al *fragmentMode) Evaluate(cores int64, _ int64) []*nvidia.NvidiaNode { 44 | var ( 45 | candidate = al.tree.Root() 46 | next *nvidia.NvidiaNode 47 | sorter = fragmentSort(nvidia.ByAvailable, nvidia.ByAllocatableMemory, nvidia.ByPids, nvidia.ByMinorID) 48 | nodes = make([]*nvidia.NvidiaNode, 0) 49 | num = int(cores / nvidia.HundredCore) 50 | ) 51 | 52 | for next != candidate { 53 | next = candidate 54 | 55 | sorter.Sort(candidate.Children) 56 | 57 | for _, node := range candidate.Children { 58 | if len(node.Children) == 0 || node.Available() < num { 59 | continue 60 | } 61 | 62 | candidate = node 63 | klog.V(2).Infof("Choose id %d, mask %b", candidate.Meta.ID, candidate.Mask) 64 | break 65 | } 66 | } 67 | 68 | for _, n := range candidate.GetAvailableLeaves() { 69 | if num == 0 { 70 | break 71 | } 72 | 73 | klog.V(2).Infof("Pick up %d mask %b", n.Meta.ID, n.Mask) 74 | nodes = append(nodes, n) 75 | num-- 76 | } 77 | 78 | if num > 0 { 79 | return nil 80 | } 81 | 82 | return nodes 83 | } 84 | 85 | type fragmentPriority struct { 86 | data []*nvidia.NvidiaNode 87 | less []nvidia.LessFunc 88 | } 89 | 90 | func fragmentSort(less ...nvidia.LessFunc) *fragmentPriority { 91 | return &fragmentPriority{ 92 | less: less, 93 | } 94 | } 95 | 96 | func (fp *fragmentPriority) Sort(data []*nvidia.NvidiaNode) { 97 | fp.data = data 98 | sort.Sort(fp) 99 | } 100 | 101 | func (fp *fragmentPriority) Len() int { 102 | return len(fp.data) 103 | } 104 | 105 | func (fp *fragmentPriority) Swap(i, j int) { 106 | fp.data[i], fp.data[j] = fp.data[j], fp.data[i] 107 | } 108 | 109 | func (fp *fragmentPriority) Less(i, j int) bool { 110 | var k int 111 | 112 | for k = 0; k < len(fp.less)-1; k++ { 113 | less := fp.less[k] 114 | switch { 115 | case less(fp.data[i], fp.data[j]): 116 | return true 117 | case less(fp.data[j], fp.data[i]): 118 | return false 119 | } 120 | } 121 | 122 | return fp.less[k](fp.data[i], fp.data[j]) 123 | } 124 | -------------------------------------------------------------------------------- /pkg/algorithm/nvidia/fragment_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "flag" 22 | "testing" 23 | 24 | "tkestack.io/gpu-manager/pkg/device/nvidia" 25 | ) 26 | 27 | func init() { 28 | flag.Set("v", "4") 29 | flag.Set("logtostderr", "true") 30 | } 31 | 32 | func TestFragment(t *testing.T) { 33 | flag.Parse() 34 | obj := nvidia.NewNvidiaTree(nil) 35 | tree, _ := obj.(*nvidia.NvidiaTree) 36 | 37 | testCase1 := 38 | ` GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 39 | GPU0 X PIX PHB PHB SOC SOC 40 | GPU1 PIX X PHB PHB SOC SOC 41 | GPU2 PHB PHB X PIX SOC SOC 42 | GPU3 PHB PHB PIX X SOC SOC 43 | GPU4 SOC SOC SOC SOC X PIX 44 | GPU5 SOC SOC SOC SOC PIX X 45 | ` 46 | tree.Init(testCase1) 47 | algo := NewFragmentMode(tree) 48 | 49 | expectCase1 := []string{ 50 | "/dev/nvidia4", "/dev/nvidia5", 51 | } 52 | 53 | cores := int64(2 * nvidia.HundredCore) 54 | pass, should, but := examining(expectCase1, algo.Evaluate(cores, 0)) 55 | if !pass { 56 | t.Fatalf("Evaluate function got wrong, should be %s, but %s", should, but) 57 | } 58 | 59 | tree.MarkOccupied(&nvidia.NvidiaNode{ 60 | Meta: nvidia.DeviceMeta{ 61 | MinorID: 4, 62 | }, 63 | }, cores, 0) 64 | 65 | expectCase2 := []string{ 66 | "/dev/nvidia5", 67 | } 68 | 69 | cores = int64(nvidia.HundredCore) 70 | pass, should, but = examining(expectCase2, algo.Evaluate(cores, 0)) 71 | if !pass { 72 | t.Fatalf("Evaluate function got wrong, should be %s, but %s", should, but) 73 | } 74 | } 75 | 76 | func TestFragmentOnlyOne(t *testing.T) { 77 | flag.Parse() 78 | obj := nvidia.NewNvidiaTree(nil) 79 | tree, _ := obj.(*nvidia.NvidiaTree) 80 | 81 | testCase1 := 82 | ` GPU0 83 | GPU0 x` 84 | 85 | tree.Init(testCase1) 86 | algo := NewFragmentMode(tree) 87 | 88 | expectCase1 := []string{ 89 | "/dev/nvidia0", 90 | } 91 | 92 | cores := int64(nvidia.HundredCore) 93 | pass, should, but := examining(expectCase1, algo.Evaluate(cores, 0)) 94 | if !pass { 95 | t.Fatalf("Evaluate function got wrong, should be %s, but %s", should, but) 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /pkg/algorithm/nvidia/link.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "sort" 22 | 23 | "k8s.io/klog" 24 | 25 | "tkestack.io/gpu-manager/pkg/device/nvidia" 26 | ) 27 | 28 | type linkMode struct { 29 | tree *nvidia.NvidiaTree 30 | } 31 | 32 | //NewLinkMode returns a new linkMode struct. 33 | // 34 | //Evaluate() of linkMode returns nodes with minimum connection overhead 35 | //of each other. 36 | func NewLinkMode(t *nvidia.NvidiaTree) *linkMode { 37 | return &linkMode{t} 38 | } 39 | 40 | func (al *linkMode) Evaluate(cores int64, memory int64) []*nvidia.NvidiaNode { 41 | var ( 42 | sorter = linkSort(nvidia.ByType, nvidia.ByAvailable, nvidia.ByAllocatableMemory, nvidia.ByPids, nvidia.ByMinorID) 43 | tmpStore = make(map[int]*nvidia.NvidiaNode) 44 | root = al.tree.Root() 45 | nodes = make([]*nvidia.NvidiaNode, 0) 46 | num = int(cores / nvidia.HundredCore) 47 | ) 48 | 49 | for _, node := range al.tree.Leaves() { 50 | for node != root { 51 | klog.V(2).Infof("Test %d mask %b", node.Meta.ID, node.Mask) 52 | if node.Available() < num { 53 | node = node.Parent 54 | continue 55 | } 56 | 57 | tmpStore[node.Meta.ID] = node 58 | klog.V(2).Infof("Choose %d mask %b", node.Meta.ID, node.Mask) 59 | break 60 | } 61 | } 62 | 63 | if len(tmpStore) == 0 { 64 | tmpStore[-1] = root 65 | } 66 | 67 | candidates := make([]*nvidia.NvidiaNode, 0) 68 | for _, n := range tmpStore { 69 | candidates = append(candidates, n) 70 | } 71 | 72 | sorter.Sort(candidates) 73 | 74 | for _, n := range candidates[0].GetAvailableLeaves() { 75 | if num == 0 { 76 | break 77 | } 78 | 79 | klog.V(2).Infof("Pick up %d mask %b", n.Meta.ID, n.Mask) 80 | nodes = append(nodes, n) 81 | num-- 82 | } 83 | 84 | if num > 0 { 85 | return nil 86 | } 87 | 88 | return nodes 89 | } 90 | 91 | type linkPriority struct { 92 | data []*nvidia.NvidiaNode 93 | less []nvidia.LessFunc 94 | } 95 | 96 | func linkSort(less ...nvidia.LessFunc) *linkPriority { 97 | return &linkPriority{ 98 | less: less, 99 | } 100 | } 101 | 102 | func (lp *linkPriority) Sort(data []*nvidia.NvidiaNode) { 103 | lp.data = data 104 | sort.Sort(lp) 105 | } 106 | 107 | func (lp *linkPriority) Len() int { 108 | return len(lp.data) 109 | } 110 | 111 | func (lp *linkPriority) Swap(i, j int) { 112 | lp.data[i], lp.data[j] = lp.data[j], lp.data[i] 113 | } 114 | 115 | func (lp *linkPriority) Less(i, j int) bool { 116 | var k int 117 | 118 | for k = 0; k < len(lp.less)-1; k++ { 119 | less := lp.less[k] 120 | switch { 121 | case less(lp.data[i], lp.data[j]): 122 | return true 123 | case less(lp.data[j], lp.data[i]): 124 | return false 125 | } 126 | } 127 | 128 | return lp.less[k](lp.data[i], lp.data[j]) 129 | } 130 | -------------------------------------------------------------------------------- /pkg/algorithm/nvidia/link_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "flag" 22 | "testing" 23 | 24 | "tkestack.io/gpu-manager/pkg/device/nvidia" 25 | ) 26 | 27 | func init() { 28 | flag.Set("v", "4") 29 | flag.Set("logtostderr", "true") 30 | } 31 | 32 | func TestLink(t *testing.T) { 33 | flag.Parse() 34 | obj := nvidia.NewNvidiaTree(nil) 35 | tree, _ := obj.(*nvidia.NvidiaTree) 36 | 37 | testCase1 := 38 | ` GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 39 | GPU0 X PIX PHB PHB SOC SOC 40 | GPU1 PIX X PHB PHB SOC SOC 41 | GPU2 PHB PHB X PIX SOC SOC 42 | GPU3 PHB PHB PIX X SOC SOC 43 | GPU4 SOC SOC SOC SOC X PIX 44 | GPU5 SOC SOC SOC SOC PIX X 45 | ` 46 | tree.Init(testCase1) 47 | algo := NewLinkMode(tree) 48 | 49 | expectCase1 := []string{ 50 | "/dev/nvidia0", 51 | "/dev/nvidia1", 52 | "/dev/nvidia2", 53 | } 54 | 55 | cores := int64(3 * nvidia.HundredCore) 56 | pass, should, but := examining(expectCase1, algo.Evaluate(cores, 0)) 57 | if !pass { 58 | t.Fatalf("Evaluate function got wrong, should be %s, but %s", should, but) 59 | } 60 | 61 | tree.MarkOccupied(&nvidia.NvidiaNode{ 62 | Meta: nvidia.DeviceMeta{ 63 | MinorID: 2, 64 | }, 65 | }, cores, 0) 66 | 67 | expectCase2 := []string{ 68 | "/dev/nvidia0", 69 | "/dev/nvidia1", 70 | } 71 | 72 | cores = int64(2 * nvidia.HundredCore) 73 | pass, should, but = examining(expectCase2, algo.Evaluate(cores, 0)) 74 | if !pass { 75 | t.Fatalf("Evaluate function got wrong, should be %s, but %s", should, but) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /pkg/algorithm/nvidia/share.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "sort" 22 | 23 | "k8s.io/klog" 24 | 25 | "tkestack.io/gpu-manager/pkg/device/nvidia" 26 | ) 27 | 28 | type shareMode struct { 29 | tree *nvidia.NvidiaTree 30 | } 31 | 32 | //NewShareMode returns a new shareMode struct. 33 | // 34 | //Evaluate() of shareMode returns one node with minimum available cores 35 | //which fullfil the request. 36 | // 37 | //Share mode means multiple application may share one GPU node which uses 38 | //GPU more efficiently. 39 | func NewShareMode(t *nvidia.NvidiaTree) *shareMode { 40 | return &shareMode{t} 41 | } 42 | 43 | func (al *shareMode) Evaluate(cores int64, memory int64) []*nvidia.NvidiaNode { 44 | var ( 45 | nodes []*nvidia.NvidiaNode 46 | tmpStore = make([]*nvidia.NvidiaNode, al.tree.Total()) 47 | sorter = shareModeSort(nvidia.ByAllocatableCores, nvidia.ByAllocatableMemory, nvidia.ByPids, nvidia.ByMinorID) 48 | ) 49 | 50 | for i := 0; i < al.tree.Total(); i++ { 51 | tmpStore[i] = al.tree.Leaves()[i] 52 | } 53 | 54 | sorter.Sort(tmpStore) 55 | 56 | for _, node := range tmpStore { 57 | if node.AllocatableMeta.Cores >= cores && node.AllocatableMeta.Memory >= memory { 58 | klog.V(2).Infof("Pick up %d mask %b, cores: %d, memory: %d", node.Meta.ID, node.Mask, node.AllocatableMeta.Cores, node.AllocatableMeta.Memory) 59 | nodes = append(nodes, node) 60 | break 61 | } 62 | } 63 | 64 | return nodes 65 | } 66 | 67 | type shareModePriority struct { 68 | data []*nvidia.NvidiaNode 69 | less []nvidia.LessFunc 70 | } 71 | 72 | func shareModeSort(less ...nvidia.LessFunc) *shareModePriority { 73 | return &shareModePriority{ 74 | less: less, 75 | } 76 | } 77 | 78 | func (smp *shareModePriority) Sort(data []*nvidia.NvidiaNode) { 79 | smp.data = data 80 | sort.Sort(smp) 81 | } 82 | 83 | func (smp *shareModePriority) Len() int { 84 | return len(smp.data) 85 | } 86 | 87 | func (smp *shareModePriority) Swap(i, j int) { 88 | smp.data[i], smp.data[j] = smp.data[j], smp.data[i] 89 | } 90 | 91 | func (smp *shareModePriority) Less(i, j int) bool { 92 | var k int 93 | 94 | for k = 0; k < len(smp.less)-1; k++ { 95 | less := smp.less[k] 96 | switch { 97 | case less(smp.data[i], smp.data[j]): 98 | return true 99 | case less(smp.data[j], smp.data[i]): 100 | return false 101 | } 102 | } 103 | 104 | return smp.less[k](smp.data[i], smp.data[j]) 105 | } 106 | -------------------------------------------------------------------------------- /pkg/algorithm/nvidia/share_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "flag" 22 | "testing" 23 | 24 | "tkestack.io/gpu-manager/pkg/device/nvidia" 25 | ) 26 | 27 | func init() { 28 | flag.Set("v", "4") 29 | flag.Set("logtostderr", "true") 30 | } 31 | 32 | func TestShare(t *testing.T) { 33 | flag.Parse() 34 | obj := nvidia.NewNvidiaTree(nil) 35 | tree, _ := obj.(*nvidia.NvidiaTree) 36 | 37 | testCase1 := 38 | ` GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 39 | GPU0 X PIX PHB PHB SOC SOC 40 | GPU1 PIX X PHB PHB SOC SOC 41 | GPU2 PHB PHB X PIX SOC SOC 42 | GPU3 PHB PHB PIX X SOC SOC 43 | GPU4 SOC SOC SOC SOC X PIX 44 | GPU5 SOC SOC SOC SOC PIX X 45 | ` 46 | tree.Init(testCase1) 47 | for _, n := range tree.Leaves() { 48 | n.AllocatableMeta.Cores = nvidia.HundredCore 49 | n.AllocatableMeta.Memory = 1024 50 | } 51 | algo := NewShareMode(tree) 52 | 53 | expectCase1 := []string{ 54 | "/dev/nvidia0", 55 | } 56 | 57 | cores := int64(0.5 * nvidia.HundredCore) 58 | pass, should, but := examining(expectCase1, algo.Evaluate(cores, 0)) 59 | if !pass { 60 | t.Fatalf("Evaluate function got wrong, should be %s, but %s", should, but) 61 | } 62 | 63 | tree.MarkOccupied(&nvidia.NvidiaNode{ 64 | Meta: nvidia.DeviceMeta{ 65 | MinorID: 0, 66 | }, 67 | }, cores, 0) 68 | 69 | expectCase2 := []string{ 70 | "/dev/nvidia1", 71 | } 72 | 73 | cores = int64(0.6 * nvidia.HundredCore) 74 | pass, should, but = examining(expectCase2, algo.Evaluate(cores, 0)) 75 | if !pass { 76 | t.Fatalf("Evaluate function got wrong, should be %s, but %s", should, but) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /pkg/algorithm/nvidia/util_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "tkestack.io/gpu-manager/pkg/device/nvidia" 22 | ) 23 | 24 | func examining(expect []string, nodes []*nvidia.NvidiaNode) (pass bool, want string, actual string) { 25 | if len(expect) != len(nodes) { 26 | return false, "", "" 27 | } 28 | 29 | for i, n := range nodes { 30 | if expect[i] != n.MinorName() { 31 | return false, expect[i], n.MinorName() 32 | } 33 | } 34 | 35 | return true, "", "" 36 | } 37 | -------------------------------------------------------------------------------- /pkg/api/runtime/display/api.pb.gw.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-grpc-gateway 2 | // source: pkg/api/runtime/display/api.proto 3 | // DO NOT EDIT! 4 | 5 | /* 6 | Package display is a reverse proxy. 7 | 8 | It translates gRPC into RESTful JSON APIs. 9 | */ 10 | package display 11 | 12 | import ( 13 | "io" 14 | "net/http" 15 | 16 | "github.com/golang/protobuf/proto" 17 | "github.com/golang/protobuf/ptypes/empty" 18 | "github.com/grpc-ecosystem/grpc-gateway/runtime" 19 | "github.com/grpc-ecosystem/grpc-gateway/utilities" 20 | "golang.org/x/net/context" 21 | "google.golang.org/grpc" 22 | "google.golang.org/grpc/codes" 23 | "google.golang.org/grpc/grpclog" 24 | "google.golang.org/grpc/status" 25 | ) 26 | 27 | var _ codes.Code 28 | var _ io.Reader 29 | var _ status.Status 30 | var _ = runtime.String 31 | var _ = utilities.NewDoubleArray 32 | 33 | func request_GPUDisplay_PrintGraph_0(ctx context.Context, marshaler runtime.Marshaler, client GPUDisplayClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { 34 | var protoReq empty.Empty 35 | var metadata runtime.ServerMetadata 36 | 37 | msg, err := client.PrintGraph(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD)) 38 | return msg, metadata, err 39 | 40 | } 41 | 42 | func request_GPUDisplay_PrintUsages_0(ctx context.Context, marshaler runtime.Marshaler, client GPUDisplayClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { 43 | var protoReq empty.Empty 44 | var metadata runtime.ServerMetadata 45 | 46 | msg, err := client.PrintUsages(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD)) 47 | return msg, metadata, err 48 | 49 | } 50 | 51 | func request_GPUDisplay_Version_0(ctx context.Context, marshaler runtime.Marshaler, client GPUDisplayClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { 52 | var protoReq empty.Empty 53 | var metadata runtime.ServerMetadata 54 | 55 | msg, err := client.Version(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD)) 56 | return msg, metadata, err 57 | 58 | } 59 | 60 | // RegisterGPUDisplayHandlerFromEndpoint is same as RegisterGPUDisplayHandler but 61 | // automatically dials to "endpoint" and closes the connection when "ctx" gets done. 62 | func RegisterGPUDisplayHandlerFromEndpoint(ctx context.Context, mux *runtime.ServeMux, endpoint string, opts []grpc.DialOption) (err error) { 63 | conn, err := grpc.Dial(endpoint, opts...) 64 | if err != nil { 65 | return err 66 | } 67 | defer func() { 68 | if err != nil { 69 | if cerr := conn.Close(); cerr != nil { 70 | grpclog.Printf("Failed to close conn to %s: %v", endpoint, cerr) 71 | } 72 | return 73 | } 74 | go func() { 75 | <-ctx.Done() 76 | if cerr := conn.Close(); cerr != nil { 77 | grpclog.Printf("Failed to close conn to %s: %v", endpoint, cerr) 78 | } 79 | }() 80 | }() 81 | 82 | return RegisterGPUDisplayHandler(ctx, mux, conn) 83 | } 84 | 85 | // RegisterGPUDisplayHandler registers the http handlers for service GPUDisplay to "mux". 86 | // The handlers forward requests to the grpc endpoint over "conn". 87 | func RegisterGPUDisplayHandler(ctx context.Context, mux *runtime.ServeMux, conn *grpc.ClientConn) error { 88 | client := NewGPUDisplayClient(conn) 89 | 90 | mux.Handle("GET", pattern_GPUDisplay_PrintGraph_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { 91 | ctx, cancel := context.WithCancel(ctx) 92 | defer cancel() 93 | if cn, ok := w.(http.CloseNotifier); ok { 94 | go func(done <-chan struct{}, closed <-chan bool) { 95 | select { 96 | case <-done: 97 | case <-closed: 98 | cancel() 99 | } 100 | }(ctx.Done(), cn.CloseNotify()) 101 | } 102 | inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) 103 | rctx, err := runtime.AnnotateContext(ctx, mux, req) 104 | if err != nil { 105 | runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) 106 | return 107 | } 108 | resp, md, err := request_GPUDisplay_PrintGraph_0(rctx, inboundMarshaler, client, req, pathParams) 109 | ctx = runtime.NewServerMetadataContext(ctx, md) 110 | if err != nil { 111 | runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) 112 | return 113 | } 114 | 115 | forward_GPUDisplay_PrintGraph_0(ctx, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) 116 | 117 | }) 118 | 119 | mux.Handle("GET", pattern_GPUDisplay_PrintUsages_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { 120 | ctx, cancel := context.WithCancel(ctx) 121 | defer cancel() 122 | if cn, ok := w.(http.CloseNotifier); ok { 123 | go func(done <-chan struct{}, closed <-chan bool) { 124 | select { 125 | case <-done: 126 | case <-closed: 127 | cancel() 128 | } 129 | }(ctx.Done(), cn.CloseNotify()) 130 | } 131 | inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) 132 | rctx, err := runtime.AnnotateContext(ctx, mux, req) 133 | if err != nil { 134 | runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) 135 | return 136 | } 137 | resp, md, err := request_GPUDisplay_PrintUsages_0(rctx, inboundMarshaler, client, req, pathParams) 138 | ctx = runtime.NewServerMetadataContext(ctx, md) 139 | if err != nil { 140 | runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) 141 | return 142 | } 143 | 144 | forward_GPUDisplay_PrintUsages_0(ctx, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) 145 | 146 | }) 147 | 148 | mux.Handle("GET", pattern_GPUDisplay_Version_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { 149 | ctx, cancel := context.WithCancel(ctx) 150 | defer cancel() 151 | if cn, ok := w.(http.CloseNotifier); ok { 152 | go func(done <-chan struct{}, closed <-chan bool) { 153 | select { 154 | case <-done: 155 | case <-closed: 156 | cancel() 157 | } 158 | }(ctx.Done(), cn.CloseNotify()) 159 | } 160 | inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) 161 | rctx, err := runtime.AnnotateContext(ctx, mux, req) 162 | if err != nil { 163 | runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) 164 | return 165 | } 166 | resp, md, err := request_GPUDisplay_Version_0(rctx, inboundMarshaler, client, req, pathParams) 167 | ctx = runtime.NewServerMetadataContext(ctx, md) 168 | if err != nil { 169 | runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) 170 | return 171 | } 172 | 173 | forward_GPUDisplay_Version_0(ctx, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) 174 | 175 | }) 176 | 177 | return nil 178 | } 179 | 180 | var ( 181 | pattern_GPUDisplay_PrintGraph_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0}, []string{"graph"}, "")) 182 | 183 | pattern_GPUDisplay_PrintUsages_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0}, []string{"usage"}, "")) 184 | 185 | pattern_GPUDisplay_Version_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0}, []string{"version"}, "")) 186 | ) 187 | 188 | var ( 189 | forward_GPUDisplay_PrintGraph_0 = runtime.ForwardResponseMessage 190 | 191 | forward_GPUDisplay_PrintUsages_0 = runtime.ForwardResponseMessage 192 | 193 | forward_GPUDisplay_Version_0 = runtime.ForwardResponseMessage 194 | ) 195 | -------------------------------------------------------------------------------- /pkg/api/runtime/display/api.proto: -------------------------------------------------------------------------------- 1 | syntax = 'proto3'; 2 | 3 | package display; 4 | 5 | import "google/api/annotations.proto"; 6 | import "google/protobuf/empty.proto"; 7 | 8 | // GPUDisplay service defines the aux APIs for remote request 9 | service GPUDisplay { 10 | // PrintGraph returns the text graph of allocator state 11 | rpc PrintGraph(google.protobuf.Empty) returns (GraphResponse) { 12 | option (google.api.http) = { 13 | get: "/graph" 14 | }; 15 | } 16 | 17 | // GPU usages 18 | rpc PrintUsages(google.protobuf.Empty) returns (UsageResponse) { 19 | option (google.api.http) = { 20 | get: "/usage" 21 | }; 22 | } 23 | 24 | // Version 25 | rpc Version(google.protobuf.Empty) returns (VersionResponse) { 26 | option (google.api.http) = { 27 | get: "/version" 28 | }; 29 | } 30 | } 31 | 32 | message GraphResponse { 33 | string graph = 1; 34 | } 35 | 36 | message UsageResponse { 37 | map usage = 1; 38 | } 39 | 40 | message ContainerStat { 41 | map stat = 1; 42 | string project = 2; 43 | string user = 3; 44 | string cluster = 4; 45 | map spec = 5; 46 | } 47 | 48 | message Devices { 49 | repeated DeviceInfo dev = 1; 50 | } 51 | 52 | message DeviceInfo { 53 | string id = 1; 54 | string card_idx = 2; 55 | float gpu = 10; 56 | float mem = 11; 57 | repeated int32 pids = 12; 58 | float device_mem = 13; 59 | } 60 | 61 | message VersionResponse { 62 | string version = 1; 63 | } 64 | 65 | message Spec { 66 | float gpu = 1; 67 | float mem = 2; 68 | } 69 | -------------------------------------------------------------------------------- /pkg/api/runtime/vcuda/api.pb.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-go. DO NOT EDIT. 2 | // source: pkg/api/runtime/vcuda/api.proto 3 | 4 | /* 5 | Package vcuda is a generated protocol buffer package. 6 | 7 | It is generated from these files: 8 | pkg/api/runtime/vcuda/api.proto 9 | 10 | It has these top-level messages: 11 | VDeviceRequest 12 | VDeviceResponse 13 | */ 14 | package vcuda 15 | 16 | import proto "github.com/golang/protobuf/proto" 17 | import fmt "fmt" 18 | import math "math" 19 | 20 | import ( 21 | context "golang.org/x/net/context" 22 | grpc "google.golang.org/grpc" 23 | ) 24 | 25 | // Reference imports to suppress errors if they are not otherwise used. 26 | var _ = proto.Marshal 27 | var _ = fmt.Errorf 28 | var _ = math.Inf 29 | 30 | // This is a compile-time assertion to ensure that this generated file 31 | // is compatible with the proto package it is being compiled against. 32 | // A compilation error at this line likely means your copy of the 33 | // proto package needs to be updated. 34 | const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package 35 | 36 | type VDeviceRequest struct { 37 | BusId string `protobuf:"bytes,1,opt,name=bus_id,json=busId" json:"bus_id,omitempty"` 38 | PodUid string `protobuf:"bytes,2,opt,name=pod_uid,json=podUid" json:"pod_uid,omitempty"` 39 | ContainerName string `protobuf:"bytes,3,opt,name=container_name,json=containerName" json:"container_name,omitempty"` 40 | ContainerId string `protobuf:"bytes,4,opt,name=container_id,json=containerId" json:"container_id,omitempty"` 41 | } 42 | 43 | func (m *VDeviceRequest) Reset() { *m = VDeviceRequest{} } 44 | func (m *VDeviceRequest) String() string { return proto.CompactTextString(m) } 45 | func (*VDeviceRequest) ProtoMessage() {} 46 | func (*VDeviceRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } 47 | 48 | func (m *VDeviceRequest) GetBusId() string { 49 | if m != nil { 50 | return m.BusId 51 | } 52 | return "" 53 | } 54 | 55 | func (m *VDeviceRequest) GetPodUid() string { 56 | if m != nil { 57 | return m.PodUid 58 | } 59 | return "" 60 | } 61 | 62 | func (m *VDeviceRequest) GetContainerName() string { 63 | if m != nil { 64 | return m.ContainerName 65 | } 66 | return "" 67 | } 68 | 69 | func (m *VDeviceRequest) GetContainerId() string { 70 | if m != nil { 71 | return m.ContainerId 72 | } 73 | return "" 74 | } 75 | 76 | type VDeviceResponse struct { 77 | } 78 | 79 | func (m *VDeviceResponse) Reset() { *m = VDeviceResponse{} } 80 | func (m *VDeviceResponse) String() string { return proto.CompactTextString(m) } 81 | func (*VDeviceResponse) ProtoMessage() {} 82 | func (*VDeviceResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } 83 | 84 | func init() { 85 | proto.RegisterType((*VDeviceRequest)(nil), "vcuda.VDeviceRequest") 86 | proto.RegisterType((*VDeviceResponse)(nil), "vcuda.VDeviceResponse") 87 | } 88 | 89 | // Reference imports to suppress errors if they are not otherwise used. 90 | var _ context.Context 91 | var _ grpc.ClientConn 92 | 93 | // This is a compile-time assertion to ensure that this generated file 94 | // is compatible with the grpc package it is being compiled against. 95 | const _ = grpc.SupportPackageIsVersion4 96 | 97 | // Client API for VCUDAService service 98 | 99 | type VCUDAServiceClient interface { 100 | RegisterVDevice(ctx context.Context, in *VDeviceRequest, opts ...grpc.CallOption) (*VDeviceResponse, error) 101 | } 102 | 103 | type vCUDAServiceClient struct { 104 | cc *grpc.ClientConn 105 | } 106 | 107 | func NewVCUDAServiceClient(cc *grpc.ClientConn) VCUDAServiceClient { 108 | return &vCUDAServiceClient{cc} 109 | } 110 | 111 | func (c *vCUDAServiceClient) RegisterVDevice(ctx context.Context, in *VDeviceRequest, opts ...grpc.CallOption) (*VDeviceResponse, error) { 112 | out := new(VDeviceResponse) 113 | err := grpc.Invoke(ctx, "/vcuda.VCUDAService/RegisterVDevice", in, out, c.cc, opts...) 114 | if err != nil { 115 | return nil, err 116 | } 117 | return out, nil 118 | } 119 | 120 | // Server API for VCUDAService service 121 | 122 | type VCUDAServiceServer interface { 123 | RegisterVDevice(context.Context, *VDeviceRequest) (*VDeviceResponse, error) 124 | } 125 | 126 | func RegisterVCUDAServiceServer(s *grpc.Server, srv VCUDAServiceServer) { 127 | s.RegisterService(&_VCUDAService_serviceDesc, srv) 128 | } 129 | 130 | func _VCUDAService_RegisterVDevice_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { 131 | in := new(VDeviceRequest) 132 | if err := dec(in); err != nil { 133 | return nil, err 134 | } 135 | if interceptor == nil { 136 | return srv.(VCUDAServiceServer).RegisterVDevice(ctx, in) 137 | } 138 | info := &grpc.UnaryServerInfo{ 139 | Server: srv, 140 | FullMethod: "/vcuda.VCUDAService/RegisterVDevice", 141 | } 142 | handler := func(ctx context.Context, req interface{}) (interface{}, error) { 143 | return srv.(VCUDAServiceServer).RegisterVDevice(ctx, req.(*VDeviceRequest)) 144 | } 145 | return interceptor(ctx, in, info, handler) 146 | } 147 | 148 | var _VCUDAService_serviceDesc = grpc.ServiceDesc{ 149 | ServiceName: "vcuda.VCUDAService", 150 | HandlerType: (*VCUDAServiceServer)(nil), 151 | Methods: []grpc.MethodDesc{ 152 | { 153 | MethodName: "RegisterVDevice", 154 | Handler: _VCUDAService_RegisterVDevice_Handler, 155 | }, 156 | }, 157 | Streams: []grpc.StreamDesc{}, 158 | Metadata: "pkg/api/runtime/vcuda/api.proto", 159 | } 160 | 161 | func init() { proto.RegisterFile("pkg/api/runtime/vcuda/api.proto", fileDescriptor0) } 162 | 163 | var fileDescriptor0 = []byte{ 164 | // 226 bytes of a gzipped FileDescriptorProto 165 | 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x5c, 0xd0, 0xcf, 0x4a, 0xc3, 0x40, 166 | 0x10, 0x06, 0x70, 0xa3, 0x36, 0xe2, 0x58, 0x5b, 0x5c, 0xa8, 0x06, 0x2f, 0x6a, 0x40, 0xf0, 0x94, 167 | 0x80, 0x3e, 0x81, 0xda, 0x4b, 0x2f, 0x1e, 0x56, 0xda, 0x6b, 0xd8, 0x64, 0x86, 0x32, 0x48, 0x76, 168 | 0xd7, 0xfd, 0xd3, 0x87, 0xf0, 0xa9, 0x25, 0xab, 0x44, 0xec, 0xf5, 0x37, 0x1f, 0xc3, 0x7c, 0x03, 169 | 0x37, 0xf6, 0x63, 0x5b, 0x2b, 0xcb, 0xb5, 0x8b, 0x3a, 0x70, 0x4f, 0xf5, 0xae, 0x8b, 0xa8, 0x06, 170 | 0xa9, 0xac, 0x33, 0xc1, 0x88, 0x49, 0x82, 0xf2, 0x2b, 0x83, 0xd9, 0x66, 0x49, 0x3b, 0xee, 0x48, 171 | 0xd2, 0x67, 0x24, 0x1f, 0xc4, 0x02, 0xf2, 0x36, 0xfa, 0x86, 0xb1, 0xc8, 0x6e, 0xb3, 0x87, 0x53, 172 | 0x39, 0x69, 0xa3, 0x5f, 0xa1, 0xb8, 0x82, 0x13, 0x6b, 0xb0, 0x89, 0x8c, 0xc5, 0x61, 0xf2, 0xdc, 173 | 0x1a, 0x5c, 0x33, 0x8a, 0x7b, 0x98, 0x75, 0x46, 0x07, 0xc5, 0x9a, 0x5c, 0xa3, 0x55, 0x4f, 0xc5, 174 | 0x51, 0x9a, 0x9f, 0x8f, 0xfa, 0xa6, 0x7a, 0x12, 0x77, 0x30, 0xfd, 0x8b, 0x31, 0x16, 0xc7, 0x29, 175 | 0x74, 0x36, 0xda, 0x0a, 0xcb, 0x0b, 0x98, 0x8f, 0xb7, 0x78, 0x6b, 0xb4, 0xa7, 0x47, 0x09, 0xd3, 176 | 0xcd, 0xeb, 0x7a, 0xf9, 0xfc, 0x4e, 0x6e, 0x70, 0xf1, 0x02, 0x73, 0x49, 0x5b, 0xf6, 0x81, 0xdc, 177 | 0x6f, 0x54, 0x2c, 0xaa, 0x54, 0xa5, 0xfa, 0x5f, 0xe3, 0xfa, 0x72, 0x9f, 0x7f, 0x36, 0x96, 0x07, 178 | 0x6d, 0x9e, 0x3e, 0xf0, 0xf4, 0x1d, 0x00, 0x00, 0xff, 0xff, 0x6a, 0xa0, 0x48, 0xf8, 0x24, 0x01, 179 | 0x00, 0x00, 180 | } 181 | -------------------------------------------------------------------------------- /pkg/api/runtime/vcuda/api.proto: -------------------------------------------------------------------------------- 1 | syntax = 'proto3'; 2 | 3 | package vcuda; 4 | 5 | service VCUDAService { 6 | rpc RegisterVDevice(VDeviceRequest) returns (VDeviceResponse) {} 7 | } 8 | 9 | message VDeviceRequest { 10 | string bus_id = 1; 11 | string pod_uid = 2; 12 | string container_name = 3; 13 | string container_id = 4; 14 | } 15 | 16 | message VDeviceResponse {} -------------------------------------------------------------------------------- /pkg/config/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package config 19 | 20 | import ( 21 | "time" 22 | 23 | "tkestack.io/gpu-manager/pkg/types" 24 | ) 25 | 26 | // Config contains the necessary options for the plugin. 27 | type Config struct { 28 | Driver string 29 | ExtraConfigPath string 30 | QueryPort int 31 | QueryAddr string 32 | KubeConfig string 33 | SamplePeriod time.Duration 34 | Hostname string 35 | NodeLabels map[string]string 36 | VirtualManagerPath string 37 | DevicePluginPath string 38 | VolumeConfigPath string 39 | EnableShare bool 40 | AllocationCheckPeriod time.Duration 41 | CheckpointPath string 42 | ContainerRuntimeEndpoint string 43 | CgroupDriver string 44 | RequestTimeout time.Duration 45 | 46 | VCudaRequestsQueue chan *types.VCudaRequest 47 | } 48 | 49 | //ExtraConfig contains extra options other than Config 50 | type ExtraConfig struct { 51 | Devices []string `json:"devices,omitempty"` 52 | } 53 | -------------------------------------------------------------------------------- /pkg/device/dummy/tree.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package dummy 19 | 20 | import ( 21 | "tkestack.io/gpu-manager/pkg/config" 22 | "tkestack.io/gpu-manager/pkg/device" 23 | ) 24 | 25 | func init() { 26 | device.Register("dummy", NewDummyTree) 27 | } 28 | 29 | //DummyTree represents dummy tree struct 30 | type DummyTree struct { 31 | } 32 | 33 | var _ device.GPUTree = &DummyTree{} 34 | 35 | //NewDummyTree creates a new DummyTree 36 | func NewDummyTree(_ *config.Config) device.GPUTree { 37 | return &DummyTree{} 38 | } 39 | 40 | //Init a DummyTree 41 | func (t *DummyTree) Init(_ string) { 42 | } 43 | 44 | //Update a DummyTree 45 | func (t *DummyTree) Update() { 46 | 47 | } 48 | -------------------------------------------------------------------------------- /pkg/device/nvidia/node.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "fmt" 22 | "math/bits" 23 | 24 | "k8s.io/klog" 25 | 26 | "tkestack.io/nvml" 27 | ) 28 | 29 | //SchedulerCache contains allocatable resource of GPU 30 | type SchedulerCache struct { 31 | Cores int64 32 | Memory int64 33 | } 34 | 35 | //DeviceMeta contains metadata of GPU device 36 | type DeviceMeta struct { 37 | ID int 38 | MinorID int 39 | UsedMemory uint64 40 | TotalMemory uint64 41 | Pids []uint 42 | BusId string 43 | Utilization uint 44 | UUID string 45 | } 46 | 47 | //NvidiaNode represents a node of Nvidia GPU 48 | type NvidiaNode struct { 49 | Meta DeviceMeta 50 | AllocatableMeta SchedulerCache 51 | 52 | Parent *NvidiaNode 53 | Children []*NvidiaNode 54 | Mask uint32 55 | 56 | pendingReset bool 57 | vchildren map[int]*NvidiaNode 58 | ntype nvml.GpuTopologyLevel 59 | tree *NvidiaTree 60 | } 61 | 62 | var ( 63 | /** test only */ 64 | nodeIndex = 0 65 | ) 66 | 67 | //NewNvidiaNode returns a new NvidiaNode 68 | func NewNvidiaNode(t *NvidiaTree) *NvidiaNode { 69 | node := &NvidiaNode{ 70 | vchildren: make(map[int]*NvidiaNode), 71 | ntype: nvml.TOPOLOGY_UNKNOWN, 72 | tree: t, 73 | Meta: DeviceMeta{ 74 | ID: nodeIndex, 75 | }, 76 | } 77 | 78 | nodeIndex++ 79 | 80 | return node 81 | } 82 | 83 | func (n *NvidiaNode) setParent(p *NvidiaNode) { 84 | n.Parent = p 85 | p.vchildren[n.Meta.ID] = n 86 | } 87 | 88 | //MinorName returns MinorID of this NvidiaNode 89 | func (n *NvidiaNode) MinorName() string { 90 | return fmt.Sprintf(NamePattern, n.Meta.MinorID) 91 | } 92 | 93 | //Type returns GpuTopologyLevel of this NvidiaNode 94 | func (n *NvidiaNode) Type() int { 95 | return int(n.ntype) 96 | } 97 | 98 | //GetAvailableLeaves returns leaves of this NvidiaNode 99 | //which available for allocating. 100 | func (n *NvidiaNode) GetAvailableLeaves() []*NvidiaNode { 101 | var leaves []*NvidiaNode 102 | 103 | mask := n.Mask 104 | 105 | for mask != 0 { 106 | id := uint32(bits.TrailingZeros32(mask)) 107 | klog.V(2).Infof("Pick up %d mask %b", id, n.tree.leaves[id].Mask) 108 | leaves = append(leaves, n.tree.leaves[id]) 109 | mask ^= one << id 110 | } 111 | 112 | return leaves 113 | } 114 | 115 | //Available returns conut of available leaves 116 | //of this NvidiaNode. 117 | func (n *NvidiaNode) Available() int { 118 | return bits.OnesCount32(n.Mask) 119 | } 120 | 121 | func (n *NvidiaNode) String() string { 122 | switch n.ntype { 123 | case nvml.TOPOLOGY_INTERNAL: 124 | return fmt.Sprintf("GPU%d", n.Meta.ID) 125 | case nvml.TOPOLOGY_SINGLE: 126 | return "PIX" 127 | case nvml.TOPOLOGY_MULTIPLE: 128 | return "PXB" 129 | case nvml.TOPOLOGY_HOSTBRIDGE: 130 | return "PHB" 131 | case nvml.TOPOLOGY_CPU: 132 | return "CPU" 133 | case nvml.TOPOLOGY_SYSTEM: 134 | return "SYS" 135 | } 136 | 137 | return "ROOT" 138 | } 139 | -------------------------------------------------------------------------------- /pkg/device/nvidia/sort.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "sort" 22 | 23 | "tkestack.io/gpu-manager/pkg/types" 24 | ) 25 | 26 | //LessFunc represents funcion to compare two NvidiaNode 27 | type LessFunc func(p1, p2 *NvidiaNode) bool 28 | 29 | var ( 30 | //ByType compares two NvidiaNode by GpuTopologyLevel 31 | ByType = func(p1, p2 *NvidiaNode) bool { 32 | return p1.Type() < p2.Type() 33 | } 34 | 35 | //ByAvailable compares two NvidiaNode by count of available leaves 36 | ByAvailable = func(p1, p2 *NvidiaNode) bool { 37 | return p1.Available() < p2.Available() 38 | } 39 | 40 | //ByID compares two NvidiaNode by ID 41 | ByID = func(p1, p2 *NvidiaNode) bool { 42 | return p1.Meta.ID < p2.Meta.ID 43 | } 44 | 45 | //ByMinorID compares two NvidiaNode by minor ID 46 | ByMinorID = func(p1, p2 *NvidiaNode) bool { 47 | return p1.Meta.MinorID < p2.Meta.MinorID 48 | } 49 | 50 | //ByMemory compares two NvidiaNode by memory already used 51 | ByMemory = func(p1, p2 *NvidiaNode) bool { 52 | return p1.Meta.UsedMemory < p2.Meta.UsedMemory 53 | } 54 | 55 | //ByPids compares two NvidiaNode by length of PIDs running on node 56 | ByPids = func(p1, p2 *NvidiaNode) bool { 57 | return len(p1.Meta.Pids) < len(p2.Meta.Pids) 58 | } 59 | 60 | //ByAllocatableCores compares two NvidiaNode by available cores 61 | ByAllocatableCores = func(p1, p2 *NvidiaNode) bool { 62 | return p1.AllocatableMeta.Cores < p2.AllocatableMeta.Cores 63 | } 64 | 65 | //ByAllocatableMemory compares two NvidiaNode by available memory 66 | ByAllocatableMemory = func(p1, p2 *NvidiaNode) bool { 67 | return p1.AllocatableMeta.Memory/types.MemoryBlockSize < p2.AllocatableMeta.Memory/types.MemoryBlockSize 68 | } 69 | 70 | //PrintSorter is used to sort nodes when printing them out 71 | PrintSorter = &printSort{ 72 | less: []LessFunc{ByType, ByAvailable, ByMinorID}, 73 | } 74 | ) 75 | 76 | type printSort struct { 77 | data []*NvidiaNode 78 | less []LessFunc 79 | } 80 | 81 | func (p *printSort) Sort(d []*NvidiaNode) { 82 | p.data = d 83 | sort.Sort(p) 84 | } 85 | 86 | func (p *printSort) Len() int { 87 | return len(p.data) 88 | } 89 | 90 | func (p *printSort) Swap(i, j int) { 91 | p.data[i], p.data[j] = p.data[j], p.data[i] 92 | } 93 | 94 | func (p *printSort) Less(i, j int) bool { 95 | var k int 96 | 97 | for k = 0; k < len(p.less)-1; k++ { 98 | less := p.less[k] 99 | switch { 100 | case less(p.data[i], p.data[j]): 101 | return true 102 | case less(p.data[j], p.data[i]): 103 | return false 104 | } 105 | } 106 | 107 | return p.less[k](p.data[i], p.data[j]) 108 | } 109 | -------------------------------------------------------------------------------- /pkg/device/nvidia/sort_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "flag" 22 | "testing" 23 | 24 | "tkestack.io/gpu-manager/pkg/types" 25 | ) 26 | 27 | func init() { 28 | flag.Set("v", "4") 29 | flag.Set("logtostderr", "true") 30 | } 31 | 32 | func TestSort(t *testing.T) { 33 | flag.Parse() 34 | //init tree 35 | obj := NewNvidiaTree(nil) 36 | tree, _ := obj.(*NvidiaTree) 37 | testCase1 := 38 | ` GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 39 | GPU0 X PIX PHB PHB SOC SOC 40 | GPU1 PIX X PHB PHB SOC SOC 41 | GPU2 PHB PHB X PIX SOC SOC 42 | GPU3 PHB PHB PIX X SOC SOC 43 | GPU4 SOC SOC SOC SOC X PIX 44 | GPU5 SOC SOC SOC SOC PIX X 45 | ` 46 | tree.Init(testCase1) 47 | for idx, n := range tree.Leaves() { 48 | n.AllocatableMeta.Cores = HundredCore 49 | n.AllocatableMeta.Memory = 1024 - int64(idx) 50 | } 51 | 52 | //test sort 53 | expectLeaves := []string{"GPU5", "GPU0", "GPU1", "GPU2", "GPU3", "GPU4"} 54 | leaves := tree.Leaves() 55 | tree.MarkOccupied(leaves[5], 100, 1*types.MemoryBlockSize) 56 | ps := &printSort{ 57 | less: []LessFunc{ByAllocatableCores, 58 | ByAvailable, 59 | ByType, 60 | ByAllocatableMemory, 61 | ByMinorID, 62 | ByPids, 63 | ByMemory}, 64 | } 65 | ps.Sort(leaves) 66 | for i, s := range expectLeaves { 67 | if s != leaves[i].String() { 68 | t.Fatalf("sort went wrong") 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /pkg/device/nvidia/tree_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "flag" 22 | "testing" 23 | 24 | "tkestack.io/gpu-manager/pkg/types" 25 | ) 26 | 27 | func init() { 28 | flag.Set("v", "4") 29 | flag.Set("logtostderr", "true") 30 | } 31 | 32 | func TestTree(t *testing.T) { 33 | flag.Parse() 34 | testCase1 := 35 | ` GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 36 | GPU0 X PIX PHB PHB SOC SOC 37 | GPU1 PIX X PHB PHB SOC SOC 38 | GPU2 PHB PHB X PIX SOC SOC 39 | GPU3 PHB PHB PIX X SOC SOC 40 | GPU4 SOC SOC SOC SOC X PIX 41 | GPU5 SOC SOC SOC SOC PIX X 42 | ` 43 | testTree(t, testCase1, 6) 44 | 45 | testCase2 := 46 | ` GPU0 47 | GPU0 x` 48 | testTree(t, testCase2, 1) 49 | } 50 | 51 | func testTree(t *testing.T, testCase string, nodeNum int) { 52 | //init tree 53 | obj := NewNvidiaTree(nil) 54 | tree, _ := obj.(*NvidiaTree) 55 | tree.Init(testCase) 56 | for _, n := range tree.Leaves() { 57 | n.AllocatableMeta.Cores = HundredCore 58 | n.AllocatableMeta.Memory = 1024 59 | } 60 | 61 | //test Leaves(), Total() and Available() 62 | leaves := tree.Leaves() 63 | if tree.Available() != nodeNum || len(leaves) != nodeNum || tree.Total() != nodeNum { 64 | t.Fatalf("available leaves number wrong") 65 | } 66 | 67 | //test Root() and GetAvailableLeaves() 68 | root := tree.Root() 69 | availableLeaves := root.GetAvailableLeaves() 70 | for i, l := range availableLeaves { 71 | if l != leaves[i] { 72 | t.Fatalf("get available leaves wrong") 73 | } 74 | } 75 | 76 | //test MarkOccupied() and MarkFree() with half core 77 | tree.MarkOccupied(leaves[0], 50, 1*types.MemoryBlockSize) 78 | if tree.Available() != (nodeNum - 1) { 79 | t.Fatalf("available leaves number wrong after MarkOccupied") 80 | } 81 | 82 | tree.MarkFree(leaves[0], 50, 1*types.MemoryBlockSize) 83 | if tree.Available() != nodeNum { 84 | t.Fatalf("available leaves number wrong after MarkFree") 85 | } 86 | 87 | //test MarkOccupied() and MarkFree() with one core 88 | tree.MarkOccupied(leaves[0], 100, 1*types.MemoryBlockSize) 89 | if tree.Available() != (nodeNum - 1) { 90 | t.Fatalf("available leaves number wrong after MarkOccupied") 91 | } 92 | 93 | tree.MarkFree(leaves[0], 100, 1*types.MemoryBlockSize) 94 | if tree.Available() != nodeNum { 95 | t.Fatalf("available leaves number wrong after MarkFree") 96 | } 97 | 98 | //test Query() 99 | if len(leaves) > 0 && tree.Query("/dev/nvidia0") != leaves[0] { 100 | t.Fatalf("method Query get wrong node") 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /pkg/device/nvidia/tree_util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | "strings" 22 | 23 | "tkestack.io/nvml" 24 | ) 25 | 26 | func parseToGpuTopologyLevel(str string) nvml.GpuTopologyLevel { 27 | switch str { 28 | case "PIX": 29 | return nvml.TOPOLOGY_SINGLE 30 | case "PXB": 31 | return nvml.TOPOLOGY_MULTIPLE 32 | case "PHB": 33 | return nvml.TOPOLOGY_HOSTBRIDGE 34 | case "SOC": 35 | return nvml.TOPOLOGY_CPU 36 | } 37 | 38 | if strings.HasPrefix(str, "GPU") { 39 | return nvml.TOPOLOGY_INTERNAL 40 | } 41 | 42 | return nvml.TOPOLOGY_UNKNOWN 43 | } 44 | -------------------------------------------------------------------------------- /pkg/device/register/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package register 19 | 20 | import ( 21 | // Register test device 22 | _ "tkestack.io/gpu-manager/pkg/device/dummy" 23 | // Register nvidia device 24 | _ "tkestack.io/gpu-manager/pkg/device/nvidia" 25 | ) 26 | -------------------------------------------------------------------------------- /pkg/device/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package device 19 | 20 | import ( 21 | "tkestack.io/gpu-manager/pkg/config" 22 | 23 | "k8s.io/klog" 24 | ) 25 | 26 | //GPUTree is an interface for GPU tree structure 27 | type GPUTree interface { 28 | Init(input string) 29 | Update() 30 | } 31 | 32 | //NewFunc is a function to create GPUTree 33 | type NewFunc func(cfg *config.Config) GPUTree 34 | 35 | var ( 36 | factory = make(map[string]NewFunc) 37 | ) 38 | 39 | //Register NewFunc with name, which can be get 40 | //by calling NewFuncForName() later. 41 | func Register(name string, item NewFunc) { 42 | if _, ok := factory[name]; ok { 43 | return 44 | } 45 | 46 | klog.V(2).Infof("Register NewFunc with name %s", name) 47 | 48 | factory[name] = item 49 | } 50 | 51 | //NewFuncForName tries to find functions with specific name 52 | //from factory, return nil if not found. 53 | func NewFuncForName(name string) NewFunc { 54 | if item, ok := factory[name]; ok { 55 | return item 56 | } 57 | 58 | klog.V(2).Infof("Can not find NewFunc with name %s", name) 59 | 60 | return nil 61 | } 62 | -------------------------------------------------------------------------------- /pkg/flags/flags.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package flags 19 | 20 | import ( 21 | goflag "flag" 22 | "strings" 23 | 24 | "github.com/spf13/pflag" 25 | ) 26 | 27 | // WordSepNormalizeFunc changes all flags that contain "_" separators 28 | func WordSepNormalizeFunc(f *pflag.FlagSet, name string) pflag.NormalizedName { 29 | if strings.Contains(name, "_") { 30 | return pflag.NormalizedName(strings.Replace(name, "_", "-", -1)) 31 | } 32 | return pflag.NormalizedName(name) 33 | } 34 | 35 | // InitFlags normalizes and parses the command line flags 36 | func InitFlags() { 37 | pflag.CommandLine.SetNormalizeFunc(WordSepNormalizeFunc) 38 | // Only klog flags will be added 39 | goflag.CommandLine.VisitAll(func(goflag *goflag.Flag) { 40 | switch goflag.Name { 41 | case "logtostderr", "alsologtostderr", 42 | "v", "stderrthreshold", "vmodule", "log_backtrace_at", "log_dir": 43 | pflag.CommandLine.AddGoFlag(goflag) 44 | } 45 | }) 46 | 47 | pflag.Parse() 48 | } 49 | -------------------------------------------------------------------------------- /pkg/logs/logs.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package logs 19 | 20 | import ( 21 | "log" 22 | "time" 23 | 24 | "google.golang.org/grpc/grpclog" 25 | "k8s.io/klog" 26 | ) 27 | 28 | // klogWriter serves as a bridge between the standard log package and the klog package. 29 | type klogWriter struct{} 30 | 31 | // Write implements the io.Writer interface. 32 | func (gw klogWriter) Write(data []byte) (n int, err error) { 33 | klog.Info(string(data)) 34 | return len(data), nil 35 | } 36 | 37 | // InitLogs initializes logs the way we want for kubernetes. 38 | func InitLogs() { 39 | logger := klogWriter{} 40 | log.SetOutput(logger) 41 | log.SetFlags(0) 42 | 43 | grpclog.SetLogger(logger) 44 | // The default klog flush interval is 30 seconds, which is frighteningly long. 45 | go func() { 46 | for range time.Tick(time.Second) { 47 | klog.Flush() 48 | } 49 | }() 50 | } 51 | 52 | //FlushLogs calls klog.Flush to flush all pending log I/O 53 | func FlushLogs() { 54 | klog.Flush() 55 | } 56 | 57 | //Fatal wraps klog.FatalDepth 58 | func (gw klogWriter) Fatal(args ...interface{}) { 59 | klog.FatalDepth(1, args...) 60 | } 61 | 62 | //Fatalf wraps klog.Fatalf 63 | func (gw klogWriter) Fatalf(format string, args ...interface{}) { 64 | klog.Fatalf(format, args...) 65 | } 66 | 67 | //Fatalln wraps klog.Fatalln 68 | func (gw klogWriter) Fatalln(args ...interface{}) { 69 | klog.Fatalln(args...) 70 | } 71 | 72 | //Print wraps klog.InfoDepth 73 | func (gw klogWriter) Print(args ...interface{}) { 74 | klog.InfoDepth(1, args...) 75 | } 76 | 77 | //Printf wraps klog.V(2).Infof 78 | func (gw klogWriter) Printf(format string, args ...interface{}) { 79 | klog.V(2).Infof(format, args...) 80 | } 81 | 82 | //Println wraps klog.Info 83 | func (gw klogWriter) Println(args ...interface{}) { 84 | klog.Info(args...) 85 | } 86 | -------------------------------------------------------------------------------- /pkg/runtime/runtime.go: -------------------------------------------------------------------------------- 1 | package runtime 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "strconv" 9 | "strings" 10 | "time" 11 | 12 | "golang.org/x/net/context" 13 | "google.golang.org/grpc" 14 | v1 "k8s.io/api/core/v1" 15 | criapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" 16 | "k8s.io/klog" 17 | "k8s.io/kubectl/pkg/util/qos" 18 | 19 | "tkestack.io/gpu-manager/pkg/services/watchdog" 20 | "tkestack.io/gpu-manager/pkg/types" 21 | "tkestack.io/gpu-manager/pkg/utils" 22 | "tkestack.io/gpu-manager/pkg/utils/cgroup" 23 | ) 24 | 25 | type ContainerRuntimeInterface interface { 26 | // Get pids in the given container id 27 | GetPidsInContainers(containerID string) ([]int, error) 28 | // InspectContainer returns the container information by the given name 29 | InspectContainer(containerID string) (*criapi.ContainerStatus, error) 30 | // RuntimeName returns the container runtime name 31 | RuntimeName() string 32 | } 33 | 34 | type containerRuntimeManager struct { 35 | cgroupDriver string 36 | runtimeName string 37 | requestTimeout time.Duration 38 | client criapi.RuntimeServiceClient 39 | } 40 | 41 | var _ ContainerRuntimeInterface = (*containerRuntimeManager)(nil) 42 | 43 | var ( 44 | containerRoot = cgroup.NewCgroupName([]string{}, "kubepods") 45 | ) 46 | 47 | func NewContainerRuntimeManager(cgroupDriver, endpoint string, requestTimeout time.Duration) (*containerRuntimeManager, error) { 48 | dialOptions := []grpc.DialOption{grpc.WithInsecure(), grpc.WithDialer(utils.UnixDial), grpc.WithBlock(), grpc.WithTimeout(time.Second * 5)} 49 | conn, err := grpc.Dial(endpoint, dialOptions...) 50 | if err != nil { 51 | return nil, err 52 | } 53 | 54 | client := criapi.NewRuntimeServiceClient(conn) 55 | 56 | m := &containerRuntimeManager{ 57 | cgroupDriver: cgroupDriver, 58 | client: client, 59 | requestTimeout: requestTimeout, 60 | } 61 | 62 | ctx, cancel := context.WithTimeout(context.Background(), m.requestTimeout) 63 | defer cancel() 64 | resp, err := client.Version(ctx, &criapi.VersionRequest{Version: "0.1.0"}) 65 | if err != nil { 66 | return nil, err 67 | } 68 | 69 | klog.V(2).Infof("Container runtime is %s", resp.RuntimeName) 70 | m.runtimeName = resp.RuntimeName 71 | 72 | return m, nil 73 | } 74 | 75 | func (m *containerRuntimeManager) GetPidsInContainers(containerID string) ([]int, error) { 76 | req := &criapi.ContainerStatusRequest{ 77 | ContainerId: containerID, 78 | } 79 | 80 | ctx, cancel := context.WithTimeout(context.Background(), m.requestTimeout) 81 | defer cancel() 82 | 83 | resp, err := m.client.ContainerStatus(ctx, req) 84 | if err != nil { 85 | klog.Errorf("can't get container %s status, %v", containerID, err) 86 | return nil, err 87 | } 88 | 89 | ns := resp.Status.Labels[types.PodNamespaceLabelKey] 90 | podName := resp.Status.Labels[types.PodNameLabelKey] 91 | 92 | pod, err := watchdog.GetPod(ns, podName) 93 | if err != nil { 94 | klog.Errorf("can't get pod %s/%s, %v", ns, podName, err) 95 | return nil, err 96 | } 97 | 98 | cgroupPath, err := m.getCgroupName(pod, containerID) 99 | if err != nil { 100 | klog.Errorf("can't get cgroup parent, %v", err) 101 | return nil, err 102 | } 103 | 104 | pids := make([]int, 0) 105 | baseDir := filepath.Clean(filepath.Join(types.CGROUP_BASE, cgroupPath)) 106 | filepath.Walk(baseDir, func(path string, info os.FileInfo, err error) error { 107 | if info == nil { 108 | return nil 109 | } 110 | if info.IsDir() || info.Name() != types.CGROUP_PROCS { 111 | return nil 112 | } 113 | 114 | p, err := readProcsFile(path) 115 | if err == nil { 116 | pids = append(pids, p...) 117 | } 118 | 119 | return nil 120 | }) 121 | 122 | return pids, nil 123 | } 124 | 125 | func readProcsFile(file string) ([]int, error) { 126 | f, err := os.Open(file) 127 | if err != nil { 128 | klog.Errorf("can't read %s, %v", file, err) 129 | return nil, nil 130 | } 131 | defer f.Close() 132 | 133 | scanner := bufio.NewScanner(f) 134 | pids := make([]int, 0) 135 | for scanner.Scan() { 136 | line := scanner.Text() 137 | if pid, err := strconv.Atoi(line); err == nil { 138 | pids = append(pids, pid) 139 | } 140 | } 141 | 142 | klog.V(4).Infof("Read from %s, pids: %v", file, pids) 143 | return pids, nil 144 | } 145 | 146 | func (m *containerRuntimeManager) getCgroupName(pod *v1.Pod, containerID string) (string, error) { 147 | podQos := pod.Status.QOSClass 148 | if len(podQos) == 0 { 149 | podQos = qos.GetPodQOS(pod) 150 | } 151 | 152 | var parentContainer cgroup.CgroupName 153 | switch podQos { 154 | case v1.PodQOSGuaranteed: 155 | parentContainer = cgroup.NewCgroupName(containerRoot) 156 | case v1.PodQOSBurstable: 157 | parentContainer = cgroup.NewCgroupName(containerRoot, strings.ToLower(string(v1.PodQOSBurstable))) 158 | case v1.PodQOSBestEffort: 159 | parentContainer = cgroup.NewCgroupName(containerRoot, strings.ToLower(string(v1.PodQOSBestEffort))) 160 | } 161 | 162 | podContainer := types.PodCgroupNamePrefix + string(pod.UID) 163 | cgroupName := cgroup.NewCgroupName(parentContainer, podContainer) 164 | 165 | switch m.cgroupDriver { 166 | case "systemd": 167 | return fmt.Sprintf("%s/%s-%s.scope", cgroupName.ToSystemd(), cgroup.SystemdPathPrefixOfRuntime(m.runtimeName), containerID), nil 168 | case "cgroupfs": 169 | return fmt.Sprintf("%s/%s", cgroupName.ToCgroupfs(), containerID), nil 170 | default: 171 | } 172 | 173 | return "", fmt.Errorf("unsupported cgroup driver") 174 | } 175 | 176 | func (m *containerRuntimeManager) InspectContainer(containerID string) (*criapi.ContainerStatus, error) { 177 | req := &criapi.ContainerStatusRequest{ 178 | ContainerId: containerID, 179 | } 180 | 181 | ctx, cancel := context.WithTimeout(context.Background(), m.requestTimeout) 182 | defer cancel() 183 | 184 | resp, err := m.client.ContainerStatus(ctx, req) 185 | if err != nil { 186 | return nil, err 187 | } 188 | 189 | return resp.Status, nil 190 | } 191 | 192 | func (m *containerRuntimeManager) RuntimeName() string { return m.runtimeName } 193 | -------------------------------------------------------------------------------- /pkg/runtime/runtime_stub.go: -------------------------------------------------------------------------------- 1 | package runtime 2 | 3 | import ( 4 | criapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" 5 | ) 6 | 7 | type containerRuntimeManagerStub struct { 8 | } 9 | 10 | var _ ContainerRuntimeInterface = (*containerRuntimeManagerStub)(nil) 11 | 12 | func NewContainerRuntimeManagerStub() *containerRuntimeManagerStub { 13 | return &containerRuntimeManagerStub{} 14 | } 15 | 16 | func (m *containerRuntimeManagerStub) GetPidsInContainers(containerID string) ([]int, error) { 17 | return nil, nil 18 | } 19 | 20 | func (m *containerRuntimeManagerStub) InspectContainer(containerID string) (*criapi.ContainerStatus, error) { 21 | return nil, nil 22 | } 23 | 24 | func (m *containerRuntimeManagerStub) RuntimeName() string { return "" } 25 | -------------------------------------------------------------------------------- /pkg/server/server_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package server 19 | 20 | import ( 21 | "context" 22 | "flag" 23 | "fmt" 24 | "io/ioutil" 25 | "net" 26 | "os" 27 | "path/filepath" 28 | "reflect" 29 | "strconv" 30 | "sync" 31 | "testing" 32 | "time" 33 | 34 | "tkestack.io/gpu-manager/cmd/manager/options" 35 | "tkestack.io/gpu-manager/pkg/config" 36 | deviceFactory "tkestack.io/gpu-manager/pkg/device" 37 | "tkestack.io/gpu-manager/pkg/device/nvidia" 38 | "tkestack.io/gpu-manager/pkg/runtime" 39 | allocFactory "tkestack.io/gpu-manager/pkg/services/allocator" 40 | "tkestack.io/gpu-manager/pkg/services/response" 41 | virtual_manager "tkestack.io/gpu-manager/pkg/services/virtual-manager" 42 | "tkestack.io/gpu-manager/pkg/services/watchdog" 43 | "tkestack.io/gpu-manager/pkg/types" 44 | "tkestack.io/gpu-manager/pkg/utils" 45 | 46 | "github.com/pkg/errors" 47 | "google.golang.org/grpc" 48 | corev1 "k8s.io/api/core/v1" 49 | "k8s.io/apimachinery/pkg/api/resource" 50 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 51 | k8stypes "k8s.io/apimachinery/pkg/types" 52 | "k8s.io/client-go/kubernetes/fake" 53 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 54 | ) 55 | 56 | func init() { 57 | flag.Set("v", "4") 58 | flag.Set("logtostderr", "true") 59 | } 60 | 61 | type kubeletStub struct { 62 | sync.Mutex 63 | socket string 64 | pluginEndpoints map[string]string 65 | server *grpc.Server 66 | } 67 | 68 | type podRawInfo struct { 69 | Name string 70 | UID string 71 | Containers []containerRawInfo 72 | } 73 | 74 | type containerRawInfo struct { 75 | Name string 76 | Cores int 77 | Memory int 78 | } 79 | 80 | // newKubeletStub returns an initialized kubeletStub for testing purpose. 81 | func newKubeletStub(socket string) *kubeletStub { 82 | return &kubeletStub{ 83 | socket: socket, 84 | pluginEndpoints: make(map[string]string), 85 | } 86 | } 87 | 88 | // Minimal implementation of deviceplugin.RegistrationServer interface 89 | func (k *kubeletStub) Register(ctx context.Context, r *pluginapi.RegisterRequest) (*pluginapi.Empty, error) { 90 | k.Lock() 91 | defer k.Unlock() 92 | k.pluginEndpoints[r.ResourceName] = r.Endpoint 93 | return &pluginapi.Empty{}, nil 94 | } 95 | 96 | func (k *kubeletStub) start() error { 97 | os.Remove(k.socket) 98 | s, err := net.Listen("unix", k.socket) 99 | if err != nil { 100 | return errors.Wrap(err, "Can't listen at the socket") 101 | } 102 | 103 | k.server = grpc.NewServer() 104 | 105 | pluginapi.RegisterRegistrationServer(k.server, k) 106 | go k.server.Serve(s) 107 | 108 | // Wait till the grpcServer is ready to serve services. 109 | return utils.WaitForServer(k.socket) 110 | } 111 | 112 | //stop servers and clean up 113 | func stopServer(srv *managerImpl) { 114 | for _, s := range srv.bundleServer { 115 | s.Stop() 116 | } 117 | srv.srv.Stop() 118 | os.RemoveAll(srv.config.VirtualManagerPath) 119 | } 120 | 121 | func TestServer(t *testing.T) { 122 | flag.Parse() 123 | tempDir, _ := ioutil.TempDir("", "gpu-manager") 124 | 125 | //init opt and cfg 126 | opt := options.NewOptions() 127 | opt.VirtualManagerPath = filepath.Clean(filepath.Join(tempDir, "vm")) 128 | opt.DevicePluginPath = tempDir 129 | opt.EnableShare = true 130 | opt.HostnameOverride = "testnode" 131 | cfg := &config.Config{ 132 | Driver: opt.Driver, 133 | QueryPort: opt.QueryPort, 134 | QueryAddr: opt.QueryAddr, 135 | KubeConfig: opt.KubeConfigFile, 136 | SamplePeriod: time.Duration(opt.SamplePeriod) * time.Second, 137 | VCudaRequestsQueue: make(chan *types.VCudaRequest, 10), 138 | DevicePluginPath: opt.DevicePluginPath, 139 | VirtualManagerPath: opt.VirtualManagerPath, 140 | VolumeConfigPath: opt.VolumeConfigPath, 141 | EnableShare: opt.EnableShare, 142 | Hostname: opt.HostnameOverride, 143 | AllocationCheckPeriod: 5 * time.Second, 144 | } 145 | 146 | defer func() { 147 | os.RemoveAll(tempDir) 148 | }() 149 | 150 | //init kubletstub 151 | kubeletSocket := filepath.Join(cfg.DevicePluginPath, "kubelet.sock") 152 | kubelet := newKubeletStub(kubeletSocket) 153 | err := kubelet.start() 154 | if err != nil { 155 | t.Fatalf("%+v", err) 156 | } 157 | defer kubelet.server.Stop() 158 | 159 | // init manager 160 | srv, _ := NewManager(cfg).(*managerImpl) 161 | fakeRuntimeManager := runtime.NewContainerRuntimeManagerStub() 162 | srv.virtualManager = virtual_manager.NewVirtualManagerForTest(cfg, fakeRuntimeManager, response.NewFakeResponseManager()) 163 | srv.virtualManager.Run() 164 | defer stopServer(srv) 165 | 166 | treeInitFn := deviceFactory.NewFuncForName(cfg.Driver) 167 | obj := treeInitFn(cfg) 168 | tree, _ := obj.(*nvidia.NvidiaTree) 169 | 170 | testCase1 := 171 | ` GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 172 | GPU0 X PIX PHB PHB SOC SOC 173 | GPU1 PIX X PHB PHB SOC SOC 174 | GPU2 PHB PHB X PIX SOC SOC 175 | GPU3 PHB PHB PIX X SOC SOC 176 | GPU4 SOC SOC SOC SOC X PIX 177 | GPU5 SOC SOC SOC SOC PIX X 178 | ` 179 | tree.Init(testCase1) 180 | for _, n := range tree.Leaves() { 181 | n.AllocatableMeta.Cores = nvidia.HundredCore 182 | n.AllocatableMeta.Memory = 1024 * 1024 * 1024 183 | n.Meta.TotalMemory = 1024 * 1024 * 1024 184 | } 185 | 186 | k8sClient := fake.NewSimpleClientset() 187 | watchdog.NewPodCacheForTest(k8sClient) 188 | initAllocator := allocFactory.NewFuncForName(cfg.Driver + "_test") 189 | srv.allocator = initAllocator(cfg, tree, k8sClient, response.NewFakeResponseManager()) 190 | srv.setupGRPCService() 191 | srv.RegisterToKubelet() 192 | for _, rs := range srv.bundleServer { 193 | go rs.Run() 194 | if err := utils.WaitForServer(rs.SocketName()); err != nil { 195 | t.Fatalf("%s failed to start: %+v", rs.SocketName(), err) 196 | } 197 | } 198 | 199 | //check if bundleServers register to kublet correctly 200 | expectEndpoints := make(map[string]string) 201 | expectEndpoints[types.VCoreAnnotation] = vcoreSocketName 202 | expectEndpoints[types.VMemoryAnnotation] = vmemorySocketName 203 | if !reflect.DeepEqual(expectEndpoints, kubelet.pluginEndpoints) { 204 | t.Fatalf("register to kublet wrong, expect %v, got %v", expectEndpoints, kubelet.pluginEndpoints) 205 | } 206 | 207 | //check if bundleServer work correctly 208 | pluginSocket := filepath.Join(opt.DevicePluginPath, kubelet.pluginEndpoints[types.VCoreAnnotation]) 209 | conn, err := grpc.Dial(pluginSocket, utils.DefaultDialOptions...) 210 | if err != nil { 211 | t.Fatalf("Failed to get connection: %+v", err) 212 | } 213 | defer conn.Close() 214 | 215 | //create pod with gpu resource required 216 | testCases := []podRawInfo{ 217 | { 218 | Name: "pod-0", 219 | UID: "uid-0", 220 | Containers: []containerRawInfo{ 221 | { 222 | Name: "container-0", 223 | Cores: 10, 224 | Memory: 1, 225 | }, 226 | { 227 | Name: "container-1", 228 | Cores: 10, 229 | Memory: 1, 230 | }, 231 | }, 232 | }, 233 | } 234 | for _, cs := range testCases { 235 | containers := []corev1.Container{} 236 | for _, c := range cs.Containers { 237 | container := corev1.Container{ 238 | Name: c.Name, 239 | Resources: corev1.ResourceRequirements{ 240 | Limits: corev1.ResourceList{ 241 | types.VCoreAnnotation: resource.MustParse(fmt.Sprintf("%d", c.Cores)), 242 | types.VMemoryAnnotation: resource.MustParse(fmt.Sprintf("%d", c.Memory)), 243 | }, 244 | }, 245 | } 246 | containers = append(containers, container) 247 | } 248 | pod := &corev1.Pod{ 249 | ObjectMeta: metav1.ObjectMeta{ 250 | Name: cs.Name, 251 | UID: k8stypes.UID(cs.UID), 252 | Annotations: make(map[string]string), 253 | }, 254 | Spec: corev1.PodSpec{ 255 | Containers: containers, 256 | }, 257 | Status: corev1.PodStatus{ 258 | Phase: corev1.PodPending, 259 | }, 260 | } 261 | pod.Annotations[types.PredicateTimeAnnotation] = "1" 262 | pod.Annotations[types.GPUAssigned] = "false" 263 | for i := range pod.Spec.Containers { 264 | pod.Annotations[types.PredicateGPUIndexPrefix+strconv.Itoa(i)] = "0" 265 | } 266 | pod, _ = k8sClient.CoreV1().Pods("test-ns").Create(pod) 267 | 268 | // wait for podLister to sync 269 | time.Sleep(time.Second * 2) 270 | 271 | client := pluginapi.NewDevicePluginClient(conn) 272 | for _, c := range pod.Spec.Containers { 273 | devicesIDs := []string{} 274 | vcore := c.Resources.Limits[types.VCoreAnnotation] 275 | for i := 0; i < int(vcore.Value()); i++ { 276 | devicesIDs = append(devicesIDs, types.VCoreAnnotation) 277 | } 278 | _, err = client.Allocate(context.Background(), &pluginapi.AllocateRequest{ 279 | ContainerRequests: []*pluginapi.ContainerAllocateRequest{ 280 | { 281 | DevicesIDs: devicesIDs, 282 | }, 283 | }, 284 | }) 285 | if err != nil { 286 | t.Errorf("Failed to allocate for container %s due to %+v", c.Name, err) 287 | } 288 | } 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /pkg/server/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package server 19 | 20 | import ( 21 | "google.golang.org/grpc" 22 | ) 23 | 24 | //Manager api 25 | type Manager interface { 26 | Ready() bool 27 | Run() error 28 | RegisterToKubelet() error 29 | } 30 | 31 | //ResourceServer api for manager 32 | type ResourceServer interface { 33 | Run() error 34 | Stop() 35 | SocketName() string 36 | ResourceName() string 37 | } 38 | 39 | type resourceServerImpl struct { 40 | srv *grpc.Server 41 | socketFile string 42 | 43 | mgr *managerImpl 44 | } 45 | -------------------------------------------------------------------------------- /pkg/server/vcore.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package server 19 | 20 | import ( 21 | "context" 22 | "net" 23 | "os" 24 | "path/filepath" 25 | "syscall" 26 | 27 | "google.golang.org/grpc" 28 | "k8s.io/klog" 29 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 30 | 31 | "tkestack.io/gpu-manager/pkg/types" 32 | ) 33 | 34 | const ( 35 | vcoreSocketName = "vcore.sock" 36 | ) 37 | 38 | type vcoreResourceServer struct { 39 | resourceServerImpl 40 | } 41 | 42 | var _ pluginapi.DevicePluginServer = &vcoreResourceServer{} 43 | var _ ResourceServer = &vcoreResourceServer{} 44 | 45 | func newVcoreServer(manager *managerImpl) ResourceServer { 46 | socketFile := filepath.Join(manager.config.DevicePluginPath, vcoreSocketName) 47 | 48 | return &vcoreResourceServer{ 49 | resourceServerImpl: resourceServerImpl{ 50 | srv: grpc.NewServer(), 51 | socketFile: socketFile, 52 | mgr: manager, 53 | }, 54 | } 55 | } 56 | 57 | func (vr *vcoreResourceServer) SocketName() string { 58 | return vr.socketFile 59 | } 60 | 61 | func (vr *vcoreResourceServer) ResourceName() string { 62 | return types.VCoreAnnotation 63 | } 64 | 65 | func (vr *vcoreResourceServer) Stop() { 66 | vr.srv.Stop() 67 | } 68 | 69 | func (vr *vcoreResourceServer) Run() error { 70 | pluginapi.RegisterDevicePluginServer(vr.srv, vr) 71 | 72 | err := syscall.Unlink(vr.socketFile) 73 | if err != nil && !os.IsNotExist(err) { 74 | return err 75 | } 76 | 77 | l, err := net.Listen("unix", vr.socketFile) 78 | if err != nil { 79 | return err 80 | } 81 | 82 | klog.V(2).Infof("Server %s is ready at %s", types.VCoreAnnotation, vr.socketFile) 83 | 84 | return vr.srv.Serve(l) 85 | } 86 | 87 | /** device plugin interface */ 88 | func (vr *vcoreResourceServer) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { 89 | klog.V(2).Infof("%+v allocation request for vcore", reqs) 90 | return vr.mgr.Allocate(ctx, reqs) 91 | } 92 | 93 | func (vr *vcoreResourceServer) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error { 94 | klog.V(2).Infof("ListAndWatch request for vcore") 95 | return vr.mgr.ListAndWatchWithResourceName(types.VCoreAnnotation, e, s) 96 | } 97 | 98 | func (vr *vcoreResourceServer) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) { 99 | klog.V(2).Infof("GetDevicePluginOptions request for vcore") 100 | return vr.mgr.GetDevicePluginOptions(ctx, e) 101 | } 102 | 103 | func (vr *vcoreResourceServer) PreStartContainer(ctx context.Context, req *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) { 104 | klog.V(2).Infof("PreStartContainer request for vcore") 105 | return vr.mgr.PreStartContainer(ctx, req) 106 | } 107 | -------------------------------------------------------------------------------- /pkg/server/vmemory.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package server 19 | 20 | import ( 21 | "context" 22 | "net" 23 | "os" 24 | "path/filepath" 25 | "syscall" 26 | 27 | "google.golang.org/grpc" 28 | "k8s.io/klog" 29 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 30 | 31 | "tkestack.io/gpu-manager/pkg/types" 32 | ) 33 | 34 | const ( 35 | vmemorySocketName = "vmemory.sock" 36 | ) 37 | 38 | type vmemoryResourceServer struct { 39 | resourceServerImpl 40 | } 41 | 42 | var _ pluginapi.DevicePluginServer = &vmemoryResourceServer{} 43 | var _ ResourceServer = &vmemoryResourceServer{} 44 | 45 | func newVmemoryServer(manager *managerImpl) ResourceServer { 46 | socketFile := filepath.Join(manager.config.DevicePluginPath, vmemorySocketName) 47 | return &vmemoryResourceServer{ 48 | resourceServerImpl: resourceServerImpl{ 49 | srv: grpc.NewServer(), 50 | socketFile: socketFile, 51 | mgr: manager, 52 | }, 53 | } 54 | } 55 | 56 | func (vr *vmemoryResourceServer) SocketName() string { 57 | return vr.socketFile 58 | } 59 | 60 | func (vr *vmemoryResourceServer) ResourceName() string { 61 | return types.VMemoryAnnotation 62 | } 63 | 64 | func (vr *vmemoryResourceServer) Stop() { 65 | vr.srv.Stop() 66 | } 67 | 68 | func (vr *vmemoryResourceServer) Run() error { 69 | pluginapi.RegisterDevicePluginServer(vr.srv, vr) 70 | 71 | err := syscall.Unlink(vr.socketFile) 72 | if err != nil && !os.IsNotExist(err) { 73 | return err 74 | } 75 | 76 | l, err := net.Listen("unix", vr.socketFile) 77 | if err != nil { 78 | return err 79 | } 80 | 81 | klog.V(2).Infof("Server %s is ready at %s", types.VMemoryAnnotation, vr.socketFile) 82 | 83 | return vr.srv.Serve(l) 84 | } 85 | 86 | /** device plugin interface */ 87 | func (vr *vmemoryResourceServer) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { 88 | klog.V(2).Infof("%+v allocation request for vmemory", reqs) 89 | fakeData := make([]*pluginapi.ContainerAllocateResponse, 0) 90 | fakeData = append(fakeData, &pluginapi.ContainerAllocateResponse{}) 91 | 92 | return &pluginapi.AllocateResponse{ 93 | ContainerResponses: fakeData, 94 | }, nil 95 | } 96 | 97 | func (vr *vmemoryResourceServer) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error { 98 | klog.V(2).Infof("ListAndWatch request for vmemory") 99 | return vr.mgr.ListAndWatchWithResourceName(types.VMemoryAnnotation, e, s) 100 | } 101 | 102 | func (vr *vmemoryResourceServer) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) { 103 | klog.V(2).Infof("GetDevicePluginOptions request for vmemory") 104 | return &pluginapi.DevicePluginOptions{}, nil 105 | } 106 | 107 | func (vr *vmemoryResourceServer) PreStartContainer(ctx context.Context, req *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) { 108 | klog.V(2).Infof("PreStartContainer request for vmemory") 109 | return &pluginapi.PreStartContainerResponse{}, nil 110 | } 111 | -------------------------------------------------------------------------------- /pkg/services/allocator/cache/cache.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package cache 19 | 20 | //Info contains infomations aboud GPU 21 | type Info struct { 22 | Devices []string 23 | Cores int64 24 | Memory int64 25 | } 26 | 27 | type containerToInfo map[string]*Info 28 | 29 | // PodCache represents a list of pod to GPU mappings. 30 | type PodCache struct { 31 | PodGPUMapping map[string]containerToInfo 32 | } 33 | 34 | //NewAllocateCache creates new PodCache 35 | func NewAllocateCache() *PodCache { 36 | return &PodCache{ 37 | PodGPUMapping: make(map[string]containerToInfo), 38 | } 39 | } 40 | 41 | //Pods returns all pods in PodCache 42 | func (pgpu *PodCache) Pods() []string { 43 | ret := make([]string, 0) 44 | for k := range pgpu.PodGPUMapping { 45 | ret = append(ret, k) 46 | } 47 | return ret 48 | } 49 | 50 | //Insert adds GPU info of pod into PodCache if not exist 51 | func (pgpu *PodCache) Insert(podUID, contName string, cache *Info) { 52 | if _, exists := pgpu.PodGPUMapping[podUID]; !exists { 53 | pgpu.PodGPUMapping[podUID] = make(containerToInfo) 54 | } 55 | pgpu.PodGPUMapping[podUID][contName] = cache 56 | } 57 | 58 | //GetCache returns GPU of pod if exist 59 | func (pgpu *PodCache) GetCache(podUID string) map[string]*Info { 60 | containers, exists := pgpu.PodGPUMapping[podUID] 61 | if !exists { 62 | return nil 63 | } 64 | 65 | return containers 66 | } 67 | 68 | //Delete removes GPU info in PodCache 69 | func (pgpu *PodCache) Delete(uid string) { 70 | delete(pgpu.PodGPUMapping, uid) 71 | } 72 | -------------------------------------------------------------------------------- /pkg/services/allocator/checkpoint/manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package checkpoint 19 | 20 | import ( 21 | "fmt" 22 | "io/ioutil" 23 | "os" 24 | "path/filepath" 25 | ) 26 | 27 | const ( 28 | // Name prefix for the temporary files. 29 | tmpPrefix = "." 30 | ) 31 | 32 | var ( 33 | // ErrKeyNotFound is the error returned if key is not found in Store. 34 | ErrKeyNotFound = fmt.Errorf("key is not found") 35 | ) 36 | 37 | // CheckpointManager stores checkpoint in file. 38 | type Manager struct { 39 | // Absolute path to the base directory for storing checkpoint files. 40 | directoryPath string 41 | // File name of the storing checkpoint file. 42 | file string 43 | } 44 | 45 | // NewManager returns an instance of CheckpointManager. 46 | func NewManager(path string, file string) (*Manager, error) { 47 | if err := ensureDirectory(path); err != nil { 48 | return nil, err 49 | } 50 | 51 | return &Manager{directoryPath: path, file: file}, nil 52 | } 53 | 54 | // Write writes the given checkpoint to file. 55 | func (f *Manager) Write(data []byte) error { 56 | if err := ensureDirectory(f.directoryPath); err != nil { 57 | return err 58 | } 59 | 60 | return writeFile(f.getPathOfFile(), data) 61 | } 62 | 63 | // Read reads the checkpoint from the file. 64 | func (f *Manager) Read() ([]byte, error) { 65 | bytes, err := ioutil.ReadFile(f.getPathOfFile()) 66 | if os.IsNotExist(err) { 67 | return bytes, ErrKeyNotFound 68 | } 69 | return bytes, err 70 | } 71 | 72 | // Delete deletes the file. 73 | func (f *Manager) Delete() error { 74 | return removePath(f.getPathOfFile()) 75 | } 76 | 77 | // getPathOfFile returns the full path of the file. 78 | func (f *Manager) getPathOfFile() string { 79 | return filepath.Join(f.directoryPath, f.file) 80 | } 81 | 82 | // ensureDirectory creates the directory if it does not exist. 83 | func ensureDirectory(path string) error { 84 | if _, err := os.Stat(path); err != nil { 85 | // MkdirAll returns nil if directory already exists. 86 | return os.MkdirAll(path, 0755) 87 | } 88 | return nil 89 | } 90 | 91 | // writeFile writes checkpoint to path in a single transaction. 92 | func writeFile(path string, data []byte) (retErr error) { 93 | // Create a temporary file in the base directory of `path` with a prefix. 94 | tmpFile, err := ioutil.TempFile(filepath.Dir(path), tmpPrefix) 95 | if err != nil { 96 | return err 97 | } 98 | 99 | tmpPath := tmpFile.Name() 100 | shouldClose := true 101 | 102 | defer func() { 103 | // Close the file. 104 | if shouldClose { 105 | if err := tmpFile.Close(); err != nil { 106 | if retErr == nil { 107 | retErr = fmt.Errorf("close error: %v", err) 108 | } else { 109 | retErr = fmt.Errorf("failed to close temp file after error %v; close error: %v", retErr, err) 110 | } 111 | } 112 | } 113 | 114 | // Clean up the temp file on error. 115 | if retErr != nil && tmpPath != "" { 116 | if err := removePath(tmpPath); err != nil { 117 | retErr = fmt.Errorf("failed to remove the temporary file (%q) after error %v; remove error: %v", tmpPath, retErr, err) 118 | } 119 | } 120 | }() 121 | 122 | // Write checkpoint. 123 | if _, err := tmpFile.Write(data); err != nil { 124 | return err 125 | } 126 | 127 | // Sync file. 128 | if err := tmpFile.Sync(); err != nil { 129 | return err 130 | } 131 | 132 | // Closing the file before renaming. 133 | err = tmpFile.Close() 134 | shouldClose = false 135 | if err != nil { 136 | return err 137 | } 138 | 139 | return os.Rename(tmpPath, path) 140 | } 141 | 142 | func removePath(path string) error { 143 | if err := os.Remove(path); err != nil && !os.IsNotExist(err) { 144 | return err 145 | } 146 | return nil 147 | } 148 | -------------------------------------------------------------------------------- /pkg/services/allocator/dummy/allocator.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package dummy 19 | 20 | import ( 21 | "context" 22 | "fmt" 23 | "time" 24 | 25 | "tkestack.io/gpu-manager/pkg/config" 26 | "tkestack.io/gpu-manager/pkg/device" 27 | "tkestack.io/gpu-manager/pkg/services/response" 28 | 29 | // Register test allocator controller 30 | _ "tkestack.io/gpu-manager/pkg/device/dummy" 31 | "tkestack.io/gpu-manager/pkg/services/allocator" 32 | 33 | "k8s.io/client-go/kubernetes" 34 | "k8s.io/klog" 35 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 36 | ) 37 | 38 | func init() { 39 | allocator.Register("dummy", NewDummyAllocator) 40 | } 41 | 42 | //DummyAllocator is a struct{} 43 | type DummyAllocator struct { 44 | } 45 | 46 | var _ allocator.GPUTopoService = &DummyAllocator{} 47 | 48 | //NewDummyAllocator returns a new DummyAllocator 49 | func NewDummyAllocator(_ *config.Config, _ device.GPUTree, _ kubernetes.Interface, _ response.Manager) allocator.GPUTopoService { 50 | return &DummyAllocator{} 51 | } 52 | 53 | //Allocate returns /dev/fuse for dummy device 54 | func (ta *DummyAllocator) Allocate(_ context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { 55 | resps := &pluginapi.AllocateResponse{} 56 | for range reqs.ContainerRequests { 57 | resps.ContainerResponses = append(resps.ContainerResponses, &pluginapi.ContainerAllocateResponse{ 58 | Devices: []*pluginapi.DeviceSpec{ 59 | { 60 | // We use /dev/fuse for dummy device 61 | ContainerPath: "/dev/fuse", 62 | HostPath: "/dev/fuse", 63 | Permissions: "mrw", 64 | }, 65 | }, 66 | }) 67 | } 68 | 69 | return resps, nil 70 | } 71 | 72 | //ListAndWatch not implement 73 | func (ta *DummyAllocator) ListAndWatch(e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error { 74 | return fmt.Errorf("not implement") 75 | } 76 | 77 | //ListAndWatchWithResourceName sends dummy device back to server 78 | func (ta *DummyAllocator) ListAndWatchWithResourceName(resourceName string, e *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error { 79 | devs := []*pluginapi.Device{ 80 | { 81 | ID: fmt.Sprintf("dummy-%s-0", resourceName), 82 | Health: pluginapi.Healthy, 83 | }, 84 | } 85 | 86 | s.Send(&pluginapi.ListAndWatchResponse{Devices: devs}) 87 | 88 | // We don't send unhealthy state 89 | for { 90 | time.Sleep(time.Second) 91 | } 92 | 93 | klog.V(2).Infof("ListAndWatch %s exit", resourceName) 94 | 95 | return nil 96 | } 97 | 98 | //GetDevicePluginOptions returns empty DevicePluginOptions 99 | func (ta *DummyAllocator) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) { 100 | return &pluginapi.DevicePluginOptions{}, nil 101 | } 102 | 103 | //PreStartContainer returns empty PreStartContainerResponse 104 | func (ta *DummyAllocator) PreStartContainer(ctx context.Context, req *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) { 105 | return &pluginapi.PreStartContainerResponse{}, nil 106 | } 107 | -------------------------------------------------------------------------------- /pkg/services/allocator/nvidia/evaluator.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package nvidia 19 | 20 | import ( 21 | node "tkestack.io/gpu-manager/pkg/device/nvidia" 22 | ) 23 | 24 | //Evaluator api for schedule algorithm 25 | type Evaluator interface { 26 | Evaluate(cores int64, memory int64) []*node.NvidiaNode 27 | } 28 | -------------------------------------------------------------------------------- /pkg/services/allocator/register/register.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package register 19 | 20 | import ( 21 | // Register test allocator 22 | _ "tkestack.io/gpu-manager/pkg/services/allocator/dummy" 23 | // Register nvidia allocator 24 | _ "tkestack.io/gpu-manager/pkg/services/allocator/nvidia" 25 | ) 26 | -------------------------------------------------------------------------------- /pkg/services/allocator/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package allocator 19 | 20 | import ( 21 | "tkestack.io/gpu-manager/pkg/config" 22 | "tkestack.io/gpu-manager/pkg/device" 23 | "tkestack.io/gpu-manager/pkg/services/response" 24 | 25 | "k8s.io/client-go/kubernetes" 26 | "k8s.io/klog" 27 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 28 | ) 29 | 30 | //GPUTopoService is server api for GPU topology service 31 | type GPUTopoService interface { 32 | pluginapi.DevicePluginServer 33 | ListAndWatchWithResourceName(string, *pluginapi.Empty, pluginapi.DevicePlugin_ListAndWatchServer) error 34 | } 35 | 36 | //NewFunc represents function for creating new GPUTopoService 37 | type NewFunc func(cfg *config.Config, 38 | tree device.GPUTree, 39 | k8sClient kubernetes.Interface, 40 | responseManager response.Manager) GPUTopoService 41 | 42 | var ( 43 | factory = make(map[string]NewFunc) 44 | ) 45 | 46 | //Register stores NewFunc in factory 47 | func Register(name string, item NewFunc) { 48 | if _, ok := factory[name]; ok { 49 | return 50 | } 51 | 52 | klog.V(2).Infof("Register NewFunc with name %s", name) 53 | 54 | factory[name] = item 55 | } 56 | 57 | //NewFuncForName tries to find NewFunc by name, return nil if not found 58 | func NewFuncForName(name string) NewFunc { 59 | if item, ok := factory[name]; ok { 60 | return item 61 | } 62 | 63 | klog.V(2).Infof("Can not find NewFunc with name %s", name) 64 | 65 | return nil 66 | } 67 | -------------------------------------------------------------------------------- /pkg/services/display/helper.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package display 19 | 20 | type containerToCgroup map[string]string 21 | 22 | // podGPUs represents a list of pod to GPU mappings. 23 | type podGPUs struct { 24 | podGPUMapping map[string]containerToCgroup 25 | } 26 | 27 | func newPodGPUs() *podGPUs { 28 | return &podGPUs{ 29 | podGPUMapping: make(map[string]containerToCgroup), 30 | } 31 | } 32 | 33 | func (pgpu *podGPUs) pods() []string { 34 | ret := make([]string, 0) 35 | for k := range pgpu.podGPUMapping { 36 | ret = append(ret, k) 37 | } 38 | return ret 39 | } 40 | 41 | func (pgpu *podGPUs) insert(podUID, contName string, cgroup string) { 42 | if _, exists := pgpu.podGPUMapping[podUID]; !exists { 43 | pgpu.podGPUMapping[podUID] = make(containerToCgroup) 44 | } 45 | pgpu.podGPUMapping[podUID][contName] = cgroup 46 | } 47 | 48 | func (pgpu *podGPUs) getCgroup(podUID, contName string) string { 49 | containers, exists := pgpu.podGPUMapping[podUID] 50 | if !exists { 51 | return "" 52 | } 53 | cgroup, exists := containers[contName] 54 | if !exists { 55 | return "" 56 | } 57 | return cgroup 58 | } 59 | 60 | func (pgpu *podGPUs) delete(uid string) []string { 61 | var cgroups []string 62 | 63 | for _, cont := range pgpu.podGPUMapping[uid] { 64 | cgroups = append(cgroups, cont) 65 | } 66 | 67 | delete(pgpu.podGPUMapping, uid) 68 | 69 | return cgroups 70 | } 71 | -------------------------------------------------------------------------------- /pkg/services/response/fake.go: -------------------------------------------------------------------------------- 1 | package response 2 | 3 | import ( 4 | "k8s.io/klog" 5 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 6 | ) 7 | 8 | type fakeResponseManager struct { 9 | data map[string]containerResponseDataMapping 10 | } 11 | 12 | var _ Manager = (*fakeResponseManager)(nil) 13 | 14 | func NewFakeResponseManager() *fakeResponseManager { 15 | return &fakeResponseManager{ 16 | data: make(map[string]containerResponseDataMapping), 17 | } 18 | } 19 | 20 | func (m *fakeResponseManager) LoadFromFile(path string) error { 21 | return nil 22 | } 23 | 24 | func (m *fakeResponseManager) InsertResp(podUID, containerName string, allocResp *pluginapi.ContainerAllocateResponse) { 25 | podData, ok := m.data[podUID] 26 | if !ok { 27 | podData = make(containerResponseDataMapping) 28 | m.data[podUID] = podData 29 | } 30 | 31 | podData[containerName] = allocResp 32 | 33 | klog.V(2).Infof("Insert %s/%s allocResp", podUID, containerName) 34 | } 35 | 36 | func (m *fakeResponseManager) DeleteResp(podUID string, containerName string) { 37 | podData, ok := m.data[podUID] 38 | if !ok { 39 | return 40 | } 41 | 42 | _, ok = podData[containerName] 43 | if !ok { 44 | return 45 | } 46 | 47 | klog.V(2).Infof("Delete %s/%s allocResp", podUID, containerName) 48 | 49 | delete(podData, containerName) 50 | 51 | if len(podData) == 0 { 52 | delete(m.data, podUID) 53 | } 54 | } 55 | 56 | func (m *fakeResponseManager) GetResp(podUID string, containerName string) *pluginapi.ContainerAllocateResponse { 57 | podData, ok := m.data[podUID] 58 | if !ok { 59 | return nil 60 | } 61 | 62 | resp, ok := podData[containerName] 63 | if !ok { 64 | return nil 65 | } 66 | 67 | return resp 68 | } 69 | 70 | func (m *fakeResponseManager) ListAll() map[string]containerResponseDataMapping { 71 | return m.data 72 | } 73 | -------------------------------------------------------------------------------- /pkg/services/response/manager.go: -------------------------------------------------------------------------------- 1 | package response 2 | 3 | import ( 4 | "sync" 5 | 6 | "k8s.io/klog" 7 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 8 | 9 | "tkestack.io/gpu-manager/pkg/types" 10 | "tkestack.io/gpu-manager/pkg/utils" 11 | ) 12 | 13 | type Manager interface { 14 | InsertResp(podUID, containerName string, resp *pluginapi.ContainerAllocateResponse) 15 | DeleteResp(podUID, containerName string) 16 | GetResp(podUID, containerName string) *pluginapi.ContainerAllocateResponse 17 | ListAll() map[string]containerResponseDataMapping 18 | LoadFromFile(path string) error 19 | } 20 | 21 | var _ Manager = (*responseManager)(nil) 22 | 23 | type responseManager struct { 24 | l sync.Mutex 25 | data map[string]containerResponseDataMapping 26 | } 27 | 28 | type containerResponseDataMapping map[string]*pluginapi.ContainerAllocateResponse 29 | 30 | func NewResponseManager() *responseManager { 31 | return &responseManager{ 32 | data: make(map[string]containerResponseDataMapping), 33 | } 34 | } 35 | 36 | func (m *responseManager) LoadFromFile(path string) error { 37 | cp, err := utils.GetCheckpointData(path) 38 | if err != nil { 39 | return err 40 | } 41 | 42 | for _, item := range cp.PodDeviceEntries { 43 | // Only vcore resource has valid response data 44 | if item.ResourceName == types.VCoreAnnotation { 45 | allocResp := &pluginapi.ContainerAllocateResponse{} 46 | if err := allocResp.Unmarshal(item.AllocResp); err != nil { 47 | return err 48 | } 49 | 50 | m.InsertResp(item.PodUID, item.ContainerName, allocResp) 51 | } 52 | } 53 | 54 | return nil 55 | } 56 | 57 | func (m *responseManager) InsertResp(podUID, containerName string, allocResp *pluginapi.ContainerAllocateResponse) { 58 | m.l.Lock() 59 | defer m.l.Unlock() 60 | 61 | podData, ok := m.data[podUID] 62 | if !ok { 63 | podData = make(containerResponseDataMapping) 64 | m.data[podUID] = podData 65 | } 66 | 67 | podData[containerName] = allocResp 68 | 69 | klog.V(2).Infof("Insert %s/%s allocResp", podUID, containerName) 70 | } 71 | 72 | func (m *responseManager) DeleteResp(podUID string, containerName string) { 73 | m.l.Lock() 74 | defer m.l.Unlock() 75 | 76 | podData, ok := m.data[podUID] 77 | if !ok { 78 | return 79 | } 80 | 81 | _, ok = podData[containerName] 82 | if !ok { 83 | return 84 | } 85 | 86 | klog.V(2).Infof("Delete %s/%s allocResp", podUID, containerName) 87 | 88 | delete(podData, containerName) 89 | 90 | if len(podData) == 0 { 91 | delete(m.data, podUID) 92 | } 93 | } 94 | 95 | func (m *responseManager) GetResp(podUID string, containerName string) *pluginapi.ContainerAllocateResponse { 96 | m.l.Lock() 97 | defer m.l.Unlock() 98 | 99 | podData, ok := m.data[podUID] 100 | if !ok { 101 | return nil 102 | } 103 | 104 | resp, ok := podData[containerName] 105 | if !ok { 106 | return nil 107 | } 108 | 109 | return resp 110 | } 111 | 112 | func (m *responseManager) ListAll() map[string]containerResponseDataMapping { 113 | m.l.Lock() 114 | defer m.l.Unlock() 115 | 116 | snapshot := make(map[string]containerResponseDataMapping) 117 | for uid, containerMapping := range m.data { 118 | podData, ok := snapshot[uid] 119 | if !ok { 120 | podData = make(containerResponseDataMapping) 121 | snapshot[uid] = podData 122 | } 123 | 124 | for name, resp := range containerMapping { 125 | podData[name] = resp 126 | } 127 | } 128 | 129 | return snapshot 130 | } 131 | -------------------------------------------------------------------------------- /pkg/services/volume/ldcache/ldcache.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package ldcache 19 | 20 | import ( 21 | "bytes" 22 | "encoding/binary" 23 | "errors" 24 | "os" 25 | "path/filepath" 26 | "syscall" 27 | "unsafe" 28 | ) 29 | 30 | const ldcachePath = "/etc/ld.so.cache" 31 | 32 | const ( 33 | magicString1 = "ld.so-1.7.0" 34 | magicString2 = "glibc-ld.so.cache" 35 | magicVersion = "1.1" 36 | ) 37 | 38 | const ( 39 | flagTypeMask = 0x00ff 40 | flagTypeELF = 0x0001 41 | 42 | flagArchMask = 0xff00 43 | flagArchI386 = 0x0000 44 | flagArchX8664 = 0x0300 45 | flagArchX32 = 0x0800 46 | flagArchPpc64le = 0x0500 47 | ) 48 | 49 | var ErrInvalidCache = errors.New("invalid ld.so.cache file") 50 | 51 | type Header1 struct { 52 | Magic [len(magicString1) + 1]byte // include null delimiter 53 | NLibs uint32 54 | } 55 | 56 | type Entry1 struct { 57 | Flags int32 58 | Key, Value uint32 59 | } 60 | 61 | type Header2 struct { 62 | Magic [len(magicString2)]byte 63 | Version [len(magicVersion)]byte 64 | NLibs uint32 65 | TableSize uint32 66 | _ [3]uint32 // unused 67 | _ uint64 // force 8 byte alignment 68 | } 69 | 70 | type Entry2 struct { 71 | Flags int32 72 | Key, Value uint32 73 | OSVersion uint32 74 | HWCap uint64 75 | } 76 | 77 | type LDCache struct { 78 | *bytes.Reader 79 | 80 | data, libs []byte 81 | header Header2 82 | entries []Entry2 83 | } 84 | 85 | func Open() (*LDCache, error) { 86 | f, err := os.Open(ldcachePath) 87 | if err != nil { 88 | return nil, err 89 | } 90 | defer f.Close() 91 | 92 | fi, err := f.Stat() 93 | if err != nil { 94 | return nil, err 95 | } 96 | d, err := syscall.Mmap(int(f.Fd()), 0, int(fi.Size()), 97 | syscall.PROT_READ, syscall.MAP_PRIVATE) 98 | if err != nil { 99 | return nil, err 100 | } 101 | 102 | cache := &LDCache{data: d, Reader: bytes.NewReader(d)} 103 | return cache, cache.parse() 104 | } 105 | 106 | func (c *LDCache) Close() error { 107 | return syscall.Munmap(c.data) 108 | } 109 | 110 | func (c *LDCache) Magic() string { 111 | return string(c.header.Magic[:]) 112 | } 113 | 114 | func (c *LDCache) Version() string { 115 | return string(c.header.Version[:]) 116 | } 117 | 118 | func strn(b []byte, n int) string { 119 | return string(b[:n]) 120 | } 121 | 122 | func (c *LDCache) parse() error { 123 | var header Header1 124 | 125 | // Check for the old format (< glibc-2.2) 126 | if c.Len() <= int(unsafe.Sizeof(header)) { 127 | return ErrInvalidCache 128 | } 129 | if strn(c.data, len(magicString1)) == magicString1 { 130 | if err := binary.Read(c, binary.LittleEndian, &header); err != nil { 131 | return err 132 | } 133 | n := int64(header.NLibs) * int64(unsafe.Sizeof(Entry1{})) 134 | offset, err := c.Seek(n, 1) // skip old entries 135 | if err != nil { 136 | return err 137 | } 138 | n = (-offset) & int64(unsafe.Alignof(c.header)-1) 139 | _, err = c.Seek(n, 1) // skip padding 140 | if err != nil { 141 | return err 142 | } 143 | } 144 | 145 | c.libs = c.data[c.Size()-int64(c.Len()):] // kv offsets start here 146 | if err := binary.Read(c, binary.LittleEndian, &c.header); err != nil { 147 | return err 148 | } 149 | if c.Magic() != magicString2 || c.Version() != magicVersion { 150 | return ErrInvalidCache 151 | } 152 | c.entries = make([]Entry2, c.header.NLibs) 153 | return binary.Read(c, binary.LittleEndian, &c.entries) 154 | } 155 | 156 | func (c *LDCache) Lookup(libs ...string) (paths32, paths64 []string) { 157 | type void struct{} 158 | var paths *[]string 159 | 160 | set := make(map[string]void) 161 | prefix := make([][]byte, len(libs)) 162 | 163 | for i := range libs { 164 | prefix[i] = []byte(libs[i]) 165 | } 166 | for _, e := range c.entries { 167 | if ((e.Flags & flagTypeMask) & flagTypeELF) == 0 { 168 | continue 169 | } 170 | switch e.Flags & flagArchMask { 171 | case flagArchX8664: 172 | fallthrough 173 | case flagArchPpc64le: 174 | paths = &paths64 175 | case flagArchX32: 176 | fallthrough 177 | case flagArchI386: 178 | paths = &paths32 179 | default: 180 | continue 181 | } 182 | if e.Key > uint32(len(c.libs)) || e.Value > uint32(len(c.libs)) { 183 | continue 184 | } 185 | lib := c.libs[e.Key:] 186 | value := c.libs[e.Value:] 187 | 188 | for _, p := range prefix { 189 | if bytes.HasPrefix(lib, p) { 190 | n := bytes.IndexByte(value, 0) 191 | if n < 0 { 192 | break 193 | } 194 | path, err := filepath.EvalSymlinks(strn(value, n)) 195 | if err != nil { 196 | break 197 | } 198 | if _, ok := set[path]; ok { 199 | break 200 | } 201 | set[path] = void{} 202 | *paths = append(*paths, path) 203 | break 204 | } 205 | } 206 | } 207 | return 208 | } 209 | -------------------------------------------------------------------------------- /pkg/services/volume/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package volume 19 | 20 | import ( 21 | "bufio" 22 | "bytes" 23 | "debug/elf" 24 | "encoding/binary" 25 | "io" 26 | "os" 27 | "os/exec" 28 | "path" 29 | "path/filepath" 30 | "regexp" 31 | "strings" 32 | ) 33 | 34 | func which(bins ...string) ([]string, error) { 35 | paths := make([]string, 0, len(bins)) 36 | 37 | out, _ := exec.Command("which", bins...).Output() 38 | r := bufio.NewReader(bytes.NewBuffer(out)) 39 | for { 40 | p, err := r.ReadString('\n') 41 | if err == io.EOF { 42 | break 43 | } 44 | if err != nil { 45 | return nil, err 46 | } 47 | if p = strings.TrimSpace(p); !path.IsAbs(p) { 48 | continue 49 | } 50 | realPath, err := filepath.EvalSymlinks(p) 51 | if err != nil { 52 | return nil, err 53 | } 54 | paths = append(paths, realPath) 55 | } 56 | return paths, nil 57 | } 58 | 59 | func clone(src, dst string) error { 60 | // Prefer hard link, fallback to copy 61 | err := os.Link(src, dst) 62 | if err != nil { 63 | err = fallbackCopy(src, dst) 64 | } 65 | return err 66 | } 67 | 68 | func fallbackCopy(src, dst string) error { 69 | s, err := os.Open(src) 70 | if err != nil { 71 | return err 72 | } 73 | defer s.Close() 74 | 75 | fi, err := s.Stat() 76 | if err != nil { 77 | return err 78 | } 79 | 80 | d, err := os.Create(dst) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | if _, err := io.Copy(d, s); err != nil { 86 | d.Close() 87 | return err 88 | } 89 | 90 | if err := d.Chmod(fi.Mode()); err != nil { 91 | d.Close() 92 | return err 93 | } 94 | 95 | return d.Close() 96 | } 97 | 98 | func blacklisted(file string, obj *elf.File) (bool, error) { 99 | lib := regexp.MustCompile(`^.*/lib([\w-]+)\.so[\d.]*$`) 100 | glcore := regexp.MustCompile(`libnvidia-e?glcore\.so`) 101 | gldispatch := regexp.MustCompile(`libGLdispatch\.so`) 102 | 103 | if m := lib.FindStringSubmatch(file); m != nil { 104 | switch m[1] { 105 | // Blacklist EGL/OpenGL libraries issued by other vendors 106 | case "EGL": 107 | fallthrough 108 | case "GLESv1_CM": 109 | fallthrough 110 | case "GLESv2": 111 | fallthrough 112 | case "GL": 113 | deps, err := obj.DynString(elf.DT_NEEDED) 114 | if err != nil { 115 | return false, err 116 | } 117 | for _, d := range deps { 118 | if glcore.MatchString(d) || gldispatch.MatchString(d) { 119 | return false, nil 120 | } 121 | } 122 | return true, nil 123 | 124 | // Blacklist TLS libraries using the old ABI (!= 2.3.99) 125 | case "nvidia-tls": 126 | const abi = 0x6300000003 127 | s, err := obj.Section(".note.ABI-tag").Data() 128 | if err != nil { 129 | return false, err 130 | } 131 | return binary.LittleEndian.Uint64(s[24:]) != abi, nil 132 | } 133 | } 134 | return false, nil 135 | } 136 | -------------------------------------------------------------------------------- /pkg/services/volume/volume.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package volume 19 | 20 | import ( 21 | "debug/elf" 22 | "encoding/json" 23 | "fmt" 24 | "os" 25 | "path" 26 | "path/filepath" 27 | "strconv" 28 | "strings" 29 | 30 | "tkestack.io/gpu-manager/pkg/services/volume/ldcache" 31 | "tkestack.io/gpu-manager/pkg/types" 32 | 33 | "k8s.io/klog" 34 | ) 35 | 36 | //VolumeManager manages volumes used by containers running GPU application 37 | type VolumeManager struct { 38 | Config []Config `json:"volume,omitempty"` 39 | cfgPath string 40 | 41 | cudaControlFile string 42 | cudaSoname map[string]string 43 | mlSoName map[string]string 44 | share bool 45 | } 46 | 47 | type components map[string][]string 48 | 49 | //Config contains volume details in config file 50 | type Config struct { 51 | Name string `json:"name,omitempty"` 52 | Mode string `json:"mode,omitempty"` 53 | Components components `json:"components,omitempty"` 54 | BasePath string `json:"base,omitempty"` 55 | } 56 | 57 | const ( 58 | binDir = "bin" 59 | lib32Dir = "lib" 60 | lib64Dir = "lib64" 61 | ) 62 | 63 | type volumeDir struct { 64 | name string 65 | files []string 66 | } 67 | 68 | //Volume contains directory and file info of volume 69 | type Volume struct { 70 | Path string 71 | dirs []volumeDir 72 | } 73 | 74 | //VolumeMap stores Volume for each type 75 | type VolumeMap map[string]*Volume 76 | 77 | //NewVolumeManager returns a new VolumeManager 78 | func NewVolumeManager(config string, share bool) (*VolumeManager, error) { 79 | f, err := os.Open(config) 80 | if err != nil { 81 | return nil, err 82 | } 83 | 84 | defer f.Close() 85 | 86 | volumeManager := &VolumeManager{ 87 | cfgPath: filepath.Dir(config), 88 | cudaSoname: make(map[string]string), 89 | mlSoName: make(map[string]string), 90 | share: share, 91 | } 92 | 93 | if err := json.NewDecoder(f).Decode(volumeManager); err != nil { 94 | return nil, err 95 | } 96 | 97 | return volumeManager, nil 98 | } 99 | 100 | //Run starts a VolumeManager 101 | func (vm *VolumeManager) Run() (err error) { 102 | cache, err := ldcache.Open() 103 | if err != nil { 104 | return err 105 | } 106 | 107 | defer func() { 108 | if e := cache.Close(); err == nil { 109 | err = e 110 | } 111 | }() 112 | 113 | vols := make(VolumeMap) 114 | for _, cfg := range vm.Config { 115 | vol := &Volume{ 116 | Path: path.Join(cfg.BasePath, cfg.Name), 117 | } 118 | 119 | if cfg.Name == "nvidia" { 120 | types.DriverLibraryPath = filepath.Join(cfg.BasePath, cfg.Name) 121 | } else { 122 | types.DriverOriginLibraryPath = filepath.Join(cfg.BasePath, cfg.Name) 123 | } 124 | 125 | for t, c := range cfg.Components { 126 | switch t { 127 | case "binaries": 128 | bins, err := which(c...) 129 | if err != nil { 130 | return err 131 | } 132 | 133 | klog.V(2).Infof("Find binaries: %+v", bins) 134 | 135 | vol.dirs = append(vol.dirs, volumeDir{binDir, bins}) 136 | case "libraries": 137 | libs32, libs64 := cache.Lookup(c...) 138 | klog.V(2).Infof("Find 32bit libraries: %+v", libs32) 139 | klog.V(2).Infof("Find 64bit libraries: %+v", libs64) 140 | 141 | vol.dirs = append(vol.dirs, volumeDir{lib32Dir, libs32}, volumeDir{lib64Dir, libs64}) 142 | } 143 | 144 | vols[cfg.Name] = vol 145 | } 146 | } 147 | 148 | if err := vm.mirror(vols); err != nil { 149 | return err 150 | } 151 | 152 | klog.V(2).Infof("Volume manager is running") 153 | 154 | return nil 155 | } 156 | 157 | // #lizard forgives 158 | func (vm *VolumeManager) mirror(vols VolumeMap) error { 159 | for driver, vol := range vols { 160 | if exist, _ := vol.exist(); !exist { 161 | if err := os.MkdirAll(vol.Path, 0755); err != nil { 162 | return err 163 | } 164 | } 165 | 166 | for _, d := range vol.dirs { 167 | vpath := path.Join(vol.Path, d.name) 168 | if err := os.MkdirAll(vpath, 0755); err != nil { 169 | return err 170 | } 171 | 172 | // For each file matching the volume components (blacklist excluded), create a hardlink/copy 173 | // of it inside the volume directory. We also need to create soname symlinks similar to what 174 | // ldconfig does since our volume will only show up at runtime. 175 | for _, f := range d.files { 176 | klog.V(2).Infof("Mirror %s to %s", f, vpath) 177 | if err := vm.mirrorFiles(driver, vpath, f); err != nil { 178 | return err 179 | } 180 | 181 | if strings.HasPrefix(path.Base(f), "libcuda.so") { 182 | driverStr := strings.SplitN(strings.TrimPrefix(path.Base(f), "libcuda.so."), ".", 2) 183 | types.DriverVersionMajor, _ = strconv.Atoi(driverStr[0]) 184 | types.DriverVersionMinor, _ = strconv.Atoi(driverStr[1]) 185 | klog.V(2).Infof("Driver version: %d.%d", types.DriverVersionMajor, types.DriverVersionMinor) 186 | } 187 | 188 | if strings.HasPrefix(path.Base(f), "libcuda-control.so") { 189 | vm.cudaControlFile = f 190 | } 191 | } 192 | } 193 | } 194 | 195 | vCudaFileFn := func(soFile string) error { 196 | if err := os.Remove(soFile); err != nil { 197 | if !os.IsNotExist(err) { 198 | return err 199 | } 200 | } 201 | if err := clone(vm.cudaControlFile, soFile); err != nil { 202 | return err 203 | } 204 | 205 | klog.V(2).Infof("Vcuda %s to %s", vm.cudaControlFile, soFile) 206 | 207 | l := strings.TrimRight(soFile, ".0123456789") 208 | if err := os.Remove(l); err != nil { 209 | if !os.IsNotExist(err) { 210 | return err 211 | } 212 | } 213 | if err := clone(vm.cudaControlFile, l); err != nil { 214 | return err 215 | } 216 | klog.V(2).Infof("Vcuda %s to %s", vm.cudaControlFile, l) 217 | return nil 218 | } 219 | 220 | if vm.share && len(vm.cudaControlFile) > 0 { 221 | if len(vm.cudaSoname) > 0 { 222 | for _, f := range vm.cudaSoname { 223 | if err := vCudaFileFn(f); err != nil { 224 | return err 225 | } 226 | } 227 | } 228 | 229 | if len(vm.mlSoName) > 0 { 230 | for _, f := range vm.mlSoName { 231 | if err := vCudaFileFn(f); err != nil { 232 | return err 233 | } 234 | } 235 | } 236 | } 237 | 238 | return nil 239 | } 240 | 241 | // #lizard forgives 242 | func (vm *VolumeManager) mirrorFiles(driver, vpath string, file string) error { 243 | obj, err := elf.Open(file) 244 | if err != nil { 245 | return fmt.Errorf("%s: %v", file, err) 246 | } 247 | defer obj.Close() 248 | 249 | ok, err := blacklisted(file, obj) 250 | if err != nil { 251 | return fmt.Errorf("%s: %v", file, err) 252 | } 253 | 254 | if ok { 255 | return nil 256 | } 257 | 258 | l := path.Join(vpath, path.Base(file)) 259 | if err := removeFile(l); err != nil { 260 | return err 261 | } 262 | 263 | if err := clone(file, l); err != nil { 264 | return err 265 | } 266 | 267 | soname, err := obj.DynString(elf.DT_SONAME) 268 | if err != nil { 269 | return fmt.Errorf("%s: %v", file, err) 270 | } 271 | 272 | if len(soname) > 0 { 273 | l = path.Join(vpath, soname[0]) 274 | if err := linkIfNotSameName(path.Base(file), l); err != nil && !os.IsExist(err) { 275 | return err 276 | } 277 | 278 | // XXX Many applications (wrongly) assume that libcuda.so exists (e.g. with dlopen) 279 | // Hardcode the libcuda symlink for the time being. 280 | if strings.Contains(driver, "nvidia") { 281 | // Remove libcuda symbol link 282 | if vm.share && driver == "nvidia" && strings.HasPrefix(soname[0], "libcuda.so") { 283 | os.Remove(l) 284 | vm.cudaSoname[l] = l 285 | } 286 | 287 | // Remove libnvidia-ml symbol link 288 | if vm.share && driver == "nvidia" && strings.HasPrefix(soname[0], "libnvidia-ml.so") { 289 | os.Remove(l) 290 | vm.mlSoName[l] = l 291 | } 292 | 293 | // XXX GLVND requires this symlink for indirect GLX support 294 | // It won't be needed once we have an indirect GLX vendor neutral library. 295 | if strings.HasPrefix(soname[0], "libGLX_nvidia") { 296 | l = strings.Replace(l, "GLX_nvidia", "GLX_indirect", 1) 297 | if err := linkIfNotSameName(path.Base(file), l); err != nil && !os.IsExist(err) { 298 | return err 299 | } 300 | } 301 | } 302 | } 303 | 304 | return nil 305 | } 306 | 307 | func (v *Volume) exist() (bool, error) { 308 | _, err := os.Stat(v.Path) 309 | if os.IsNotExist(err) { 310 | return false, nil 311 | } 312 | 313 | return true, err 314 | } 315 | 316 | func (v *Volume) remove() error { 317 | return os.RemoveAll(v.Path) 318 | } 319 | 320 | func removeFile(file string) error { 321 | if err := os.Remove(file); err != nil { 322 | if !os.IsNotExist(err) { 323 | return err 324 | } 325 | } 326 | 327 | return nil 328 | } 329 | 330 | func linkIfNotSameName(src, dst string) error { 331 | if path.Base(src) != path.Base(dst) { 332 | if err := removeFile(dst); err != nil { 333 | if !os.IsNotExist(err) { 334 | return err 335 | } 336 | } 337 | 338 | l := strings.TrimRight(dst, ".0123456789") 339 | if err := removeFile(l); err != nil { 340 | if !os.IsExist(err) { 341 | return err 342 | } 343 | } 344 | 345 | if err := os.Symlink(src, l); err != nil && !os.IsExist(err) { 346 | return err 347 | } 348 | 349 | if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { 350 | return err 351 | } 352 | } 353 | 354 | return nil 355 | } 356 | -------------------------------------------------------------------------------- /pkg/services/watchdog/label.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package watchdog 19 | 20 | import ( 21 | "os" 22 | "regexp" 23 | "time" 24 | 25 | "k8s.io/apimachinery/pkg/api/errors" 26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | "k8s.io/apimachinery/pkg/util/wait" 28 | v1core "k8s.io/client-go/kubernetes/typed/core/v1" 29 | "k8s.io/klog" 30 | "tkestack.io/nvml" 31 | ) 32 | 33 | const ( 34 | gpuModelLabel = "gaia.tencent.com/gpu-model" 35 | ) 36 | 37 | type labelFunc interface { 38 | GetLabel() string 39 | } 40 | 41 | type nodeLabeler struct { 42 | hostName string 43 | client v1core.CoreV1Interface 44 | labelMapper map[string]labelFunc 45 | } 46 | 47 | type modelFunc struct{} 48 | type stringFunc string 49 | 50 | var modelFn = modelFunc{} 51 | 52 | func (m modelFunc) GetLabel() (model string) { 53 | if err := nvml.Init(); err != nil { 54 | klog.Warningf("Can't initialize nvml library, %v", err) 55 | return 56 | } 57 | 58 | defer nvml.Shutdown() 59 | 60 | // Assume all devices on this node are the same model 61 | dev, err := nvml.DeviceGetHandleByIndex(0) 62 | if err != nil { 63 | klog.Warningf("Can't get device 0 information, %v", err) 64 | return 65 | } 66 | 67 | rawName, err := dev.DeviceGetName() 68 | if err != nil { 69 | klog.Warningf("Can't get device name, %v", err) 70 | return 71 | } 72 | 73 | klog.V(4).Infof("GPU name: %s", rawName) 74 | 75 | return getTypeName(rawName) 76 | } 77 | 78 | func (s stringFunc) GetLabel() string { 79 | return string(s) 80 | } 81 | 82 | var modelNameSplitPattern = regexp.MustCompile("\\s+") 83 | 84 | func getTypeName(name string) string { 85 | splits := modelNameSplitPattern.Split(name, -1) 86 | 87 | if len(splits) > 2 { 88 | return splits[1] 89 | } 90 | 91 | klog.V(4).Infof("GPU name splits: %v", splits) 92 | 93 | return "" 94 | } 95 | 96 | //NewNodeLabeler returns a new nodeLabeler 97 | func NewNodeLabeler(client v1core.CoreV1Interface, hostname string, labels map[string]string) *nodeLabeler { 98 | if len(hostname) == 0 { 99 | hostname, _ = os.Hostname() 100 | } 101 | 102 | klog.V(2).Infof("Labeler for hostname %s", hostname) 103 | 104 | labelMapper := make(map[string]labelFunc) 105 | for k, v := range labels { 106 | if k == gpuModelLabel { 107 | labelMapper[k] = modelFn 108 | } else { 109 | labelMapper[k] = stringFunc(v) 110 | } 111 | } 112 | 113 | return &nodeLabeler{ 114 | hostName: hostname, 115 | client: client, 116 | labelMapper: labelMapper, 117 | } 118 | } 119 | 120 | func (nl *nodeLabeler) Run() error { 121 | err := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { 122 | node, err := nl.client.Nodes().Get(nl.hostName, metav1.GetOptions{}) 123 | if err != nil { 124 | return false, err 125 | } 126 | 127 | for k, fn := range nl.labelMapper { 128 | l := fn.GetLabel() 129 | if len(l) == 0 { 130 | klog.Warningf("Empty label for %s", k) 131 | continue 132 | } 133 | 134 | klog.V(2).Infof("Label %s %s=%s", nl.hostName, k, l) 135 | node.Labels[k] = l 136 | } 137 | 138 | _, updateErr := nl.client.Nodes().Update(node) 139 | if updateErr != nil { 140 | if errors.IsConflict(updateErr) { 141 | return false, nil 142 | } 143 | return true, updateErr 144 | } 145 | 146 | return true, nil 147 | }) 148 | 149 | if err != nil { 150 | return err 151 | } 152 | 153 | klog.V(2).Infof("Auto label is running") 154 | 155 | return nil 156 | } 157 | -------------------------------------------------------------------------------- /pkg/services/watchdog/label_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package watchdog 19 | 20 | import ( 21 | "flag" 22 | "testing" 23 | "time" 24 | 25 | "k8s.io/api/core/v1" 26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | "k8s.io/apimachinery/pkg/util/wait" 28 | "k8s.io/client-go/kubernetes/fake" 29 | ) 30 | 31 | func init() { 32 | flag.Set("v", "4") 33 | flag.Set("logtostderr", "true") 34 | } 35 | 36 | func TestNodeLabeler(t *testing.T) { 37 | flag.Parse() 38 | nodeName := "testnode" 39 | testKey := "testkey" 40 | testValue := "testvalue" 41 | labels := make(map[string]string) 42 | labels[testKey] = testValue 43 | 44 | // create node with fake client 45 | k8sclient := fake.NewSimpleClientset() 46 | node := &v1.Node{ 47 | ObjectMeta: metav1.ObjectMeta{ 48 | Name: nodeName, 49 | Labels: make(map[string]string), 50 | }, 51 | } 52 | k8sclient.CoreV1().Nodes().Create(node) 53 | 54 | // create nodeLabeler and run 55 | nodeLabeler := NewNodeLabeler(k8sclient.CoreV1(), nodeName, labels) 56 | go nodeLabeler.Run() 57 | 58 | // check if nodeLabeler work well 59 | err := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { 60 | node, err := k8sclient.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) 61 | if err != nil { 62 | return false, err 63 | } 64 | if v, ok := node.Labels[testKey]; !ok || v != testValue { 65 | return false, nil 66 | } 67 | return true, nil 68 | }) 69 | if err != nil { 70 | t.Fatalf("test failed: %s", err.Error()) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /pkg/services/watchdog/watchdog.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package watchdog 19 | 20 | import ( 21 | "fmt" 22 | "time" 23 | 24 | "tkestack.io/gpu-manager/pkg/utils" 25 | 26 | "k8s.io/api/core/v1" 27 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 | "k8s.io/apimachinery/pkg/fields" 29 | "k8s.io/client-go/informers" 30 | informerCore "k8s.io/client-go/informers/core/v1" 31 | "k8s.io/client-go/kubernetes" 32 | "k8s.io/klog" 33 | ) 34 | 35 | const ( 36 | podHostField = "spec.nodeName" 37 | ) 38 | 39 | //PodCache contains a podInformer of pod 40 | type PodCache struct { 41 | podInformer informerCore.PodInformer 42 | } 43 | 44 | var ( 45 | podCache *PodCache 46 | ) 47 | 48 | //NewPodCache creates a new podCache 49 | func NewPodCache(client kubernetes.Interface, hostName string) { 50 | podCache = new(PodCache) 51 | 52 | factory := informers.NewSharedInformerFactoryWithOptions(client, time.Minute, 53 | informers.WithTweakListOptions(func(options *metav1.ListOptions) { 54 | options.FieldSelector = fields.OneTermEqualSelector(podHostField, hostName).String() 55 | })) 56 | podCache.podInformer = factory.Core().V1().Pods() 57 | 58 | ch := make(chan struct{}) 59 | go podCache.podInformer.Informer().Run(ch) 60 | 61 | for !podCache.podInformer.Informer().HasSynced() { 62 | time.Sleep(time.Second) 63 | } 64 | klog.V(2).Infof("Pod cache is running") 65 | } 66 | 67 | //NewPodCacheForTest creates a new podCache for testing 68 | func NewPodCacheForTest(client kubernetes.Interface) { 69 | podCache = new(PodCache) 70 | 71 | informers := informers.NewSharedInformerFactory(client, 0) 72 | podCache.podInformer = informers.Core().V1().Pods() 73 | podCache.podInformer.Informer().AddEventHandler(podCache) 74 | ch := make(chan struct{}) 75 | informers.Start(ch) 76 | 77 | for !podCache.podInformer.Informer().HasSynced() { 78 | time.Sleep(time.Second) 79 | } 80 | klog.V(2).Infof("Pod cache is running") 81 | } 82 | 83 | //OnAdd is a callback function for podInformer, do nothing for now. 84 | func (p *PodCache) OnAdd(obj interface{}) {} 85 | 86 | //OnUpdate is a callback function for podInformer, do nothing for now. 87 | func (p *PodCache) OnUpdate(oldObj, newObj interface{}) {} 88 | 89 | //OnDelete is a callback function for podInformer, do nothing for now. 90 | func (p *PodCache) OnDelete(obj interface{}) {} 91 | 92 | //GetActivePods get all active pods from podCache and returns them. 93 | func GetActivePods() map[string]*v1.Pod { 94 | if podCache == nil { 95 | return nil 96 | } 97 | 98 | activePods := make(map[string]*v1.Pod) 99 | 100 | for _, item := range podCache.podInformer.Informer().GetStore().List() { 101 | pod, ok := item.(*v1.Pod) 102 | if !ok { 103 | continue 104 | } 105 | 106 | if podIsTerminated(pod) { 107 | continue 108 | } 109 | 110 | if !utils.IsGPURequiredPod(pod) { 111 | continue 112 | } 113 | 114 | activePods[string(pod.UID)] = pod 115 | } 116 | 117 | return activePods 118 | } 119 | 120 | func GetPod(namespace, name string) (*v1.Pod, error) { 121 | pod, err := podCache.podInformer.Lister().Pods(namespace).Get(name) 122 | if err != nil { 123 | return nil, err 124 | } 125 | 126 | if podIsTerminated(pod) { 127 | return nil, fmt.Errorf("terminated pod") 128 | } 129 | 130 | if !utils.IsGPURequiredPod(pod) { 131 | return nil, fmt.Errorf("no gpu pod") 132 | } 133 | 134 | return pod, nil 135 | } 136 | 137 | func podIsTerminated(pod *v1.Pod) bool { 138 | return pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(pod.Status.ContainerStatuses)) 139 | } 140 | 141 | // notRunning returns true if every status is terminated or waiting, or the status list 142 | // is empty. 143 | func notRunning(statuses []v1.ContainerStatus) bool { 144 | for _, status := range statuses { 145 | if status.State.Terminated == nil && status.State.Waiting == nil { 146 | return false 147 | } 148 | } 149 | return true 150 | } 151 | -------------------------------------------------------------------------------- /pkg/services/watchdog/watchdog_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package watchdog 19 | 20 | import ( 21 | "flag" 22 | "fmt" 23 | "testing" 24 | "time" 25 | 26 | "tkestack.io/gpu-manager/pkg/types" 27 | 28 | "k8s.io/api/core/v1" 29 | "k8s.io/apimachinery/pkg/api/resource" 30 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 | k8stypes "k8s.io/apimachinery/pkg/types" 32 | "k8s.io/apimachinery/pkg/util/wait" 33 | "k8s.io/client-go/kubernetes/fake" 34 | ) 35 | 36 | func init() { 37 | flag.Set("v", "4") 38 | flag.Set("logtostderr", "true") 39 | } 40 | 41 | func TestWatchdog(t *testing.T) { 42 | flag.Parse() 43 | podName := "testpod" 44 | podUID := "testuid" 45 | ns := "test-ns" 46 | containerName := "test-container" 47 | // create pod with fake client 48 | k8sclient := fake.NewSimpleClientset() 49 | pod := &v1.Pod{ 50 | ObjectMeta: metav1.ObjectMeta{ 51 | Name: podName, 52 | UID: k8stypes.UID(podUID), 53 | }, 54 | Spec: v1.PodSpec{Containers: []v1.Container{ 55 | { 56 | Name: containerName, 57 | Resources: v1.ResourceRequirements{ 58 | Limits: v1.ResourceList{ 59 | types.VCoreAnnotation: resource.MustParse(fmt.Sprintf("%d", 1)), 60 | types.VMemoryAnnotation: resource.MustParse(fmt.Sprintf("%d", 1)), 61 | }, 62 | }, 63 | }, 64 | }}, 65 | Status: v1.PodStatus{Phase: v1.PodRunning}, 66 | } 67 | k8sclient.CoreV1().Pods(ns).Create(pod) 68 | 69 | // create watchdog and run 70 | NewPodCacheForTest(k8sclient) 71 | 72 | // check if watchdog work well 73 | err := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { 74 | activepods := GetActivePods() 75 | if v, ok := activepods[podUID]; !ok || v.Name != podName { 76 | t.Logf("can't find pod %s", podName) 77 | return false, nil 78 | } 79 | return true, nil 80 | }) 81 | if err != nil { 82 | t.Fatalf("test failed: %s", err.Error()) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /pkg/types/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package types 19 | 20 | import ( 21 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 22 | ) 23 | 24 | const ( 25 | VDeviceAnnotation = "tencent.com/vcuda-device" 26 | VCoreAnnotation = "tencent.com/vcuda-core" 27 | VCoreLimitAnnotation = "tencent.com/vcuda-core-limit" 28 | VMemoryAnnotation = "tencent.com/vcuda-memory" 29 | PredicateTimeAnnotation = "tencent.com/predicate-time" 30 | PredicateGPUIndexPrefix = "tencent.com/predicate-gpu-idx-" 31 | GPUAssigned = "tencent.com/gpu-assigned" 32 | ClusterNameAnnotation = "clusterName" 33 | 34 | VCUDA_MOUNTPOINT = "/etc/vcuda" 35 | 36 | /** 256MB */ 37 | MemoryBlockSize = 268435456 38 | 39 | KubeletSocket = "kubelet.sock" 40 | VDeviceSocket = "vcuda.sock" 41 | CheckPointFileName = "kubelet_internal_checkpoint" 42 | PreStartContainerCheckErrMsg = "PreStartContainer check failed" 43 | PreStartContainerCheckErrType = "PreStartContainerCheckErr" 44 | UnexpectedAdmissionErrType = "UnexpectedAdmissionError" 45 | ) 46 | 47 | const ( 48 | NvidiaCtlDevice = "/dev/nvidiactl" 49 | NvidiaUVMDevice = "/dev/nvidia-uvm" 50 | NvidiaFullpathRE = `^/dev/nvidia([0-9]*)$` 51 | NvidiaDevicePrefix = "/dev/nvidia" 52 | ) 53 | 54 | const ( 55 | ManagerSocket = "/var/run/gpu-manager.sock" 56 | ) 57 | 58 | const ( 59 | CGROUP_BASE = "/sys/fs/cgroup/memory" 60 | CGROUP_PROCS = "cgroup.procs" 61 | ) 62 | 63 | type VCudaRequest struct { 64 | PodUID string 65 | AllocateResponse *pluginapi.ContainerAllocateResponse 66 | ContainerName string 67 | //Deprecated 68 | Cores int64 69 | //Deprecated 70 | Memory int64 71 | Done chan error 72 | } 73 | 74 | type DevicesPerNUMA map[int64][]string 75 | 76 | type PodDevicesEntry struct { 77 | PodUID string 78 | ContainerName string 79 | ResourceName string 80 | DeviceIDs []string 81 | AllocResp []byte 82 | } 83 | 84 | type PodDevicesEntryNUMA struct { 85 | PodUID string 86 | ContainerName string 87 | ResourceName string 88 | DeviceIDs DevicesPerNUMA 89 | AllocResp []byte 90 | } 91 | 92 | type CheckpointNUMA struct { 93 | PodDeviceEntries []PodDevicesEntryNUMA 94 | RegisteredDevices map[string][]string 95 | } 96 | 97 | type Checkpoint struct { 98 | PodDeviceEntries []PodDevicesEntry 99 | RegisteredDevices map[string][]string 100 | } 101 | 102 | type CheckpointDataNUMA struct { 103 | Data *CheckpointNUMA `json:"Data"` 104 | } 105 | 106 | type CheckpointData struct { 107 | Data *Checkpoint `json:"Data"` 108 | } 109 | 110 | var ( 111 | DriverVersionMajor int 112 | DriverVersionMinor int 113 | DriverLibraryPath string 114 | DriverOriginLibraryPath string 115 | ) 116 | 117 | const ( 118 | ContainerNameLabelKey = "io.kubernetes.container.name" 119 | PodNamespaceLabelKey = "io.kubernetes.pod.namespace" 120 | PodNameLabelKey = "io.kubernetes.pod.name" 121 | PodUIDLabelKey = "io.kubernetes.pod.uid" 122 | PodCgroupNamePrefix = "pod" 123 | ) 124 | -------------------------------------------------------------------------------- /pkg/utils/cgroup/cgroup.go: -------------------------------------------------------------------------------- 1 | package cgroup 2 | 3 | import ( 4 | "fmt" 5 | "path" 6 | "strings" 7 | 8 | cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd" 9 | 10 | "k8s.io/klog" 11 | ) 12 | 13 | // CgroupName is the abstract name of a cgroup prior to any driver specific conversion. 14 | // It is specified as a list of strings from its individual components, such as: 15 | // {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} 16 | type CgroupName []string 17 | 18 | const ( 19 | // systemdSuffix is the cgroup name suffix for systemd 20 | systemdSuffix string = ".slice" 21 | ) 22 | 23 | // NewCgroupName composes a new cgroup name. 24 | // Use RootCgroupName as base to start at the root. 25 | // This function does some basic check for invalid characters at the name. 26 | func NewCgroupName(base CgroupName, components ...string) CgroupName { 27 | for _, component := range components { 28 | // Forbit using "_" in internal names. When remapping internal 29 | // names to systemd cgroup driver, we want to remap "-" => "_", 30 | // so we forbid "_" so that we can always reverse the mapping. 31 | if strings.Contains(component, "/") || strings.Contains(component, "_") { 32 | panic(fmt.Errorf("invalid character in component [%q] of CgroupName", component)) 33 | } 34 | } 35 | // copy data from the base cgroup to eliminate cases where CgroupNames share underlying slices. See #68416 36 | baseCopy := make([]string, len(base)) 37 | copy(baseCopy, base) 38 | return CgroupName(append(baseCopy, components...)) 39 | } 40 | 41 | // cgroupName.ToSystemd converts the internal cgroup name to a systemd name. 42 | // For example, the name {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes 43 | // "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice" 44 | // This function always expands the systemd name into the cgroupfs form. If only 45 | // the last part is needed, use path.Base(...) on it to discard the rest. 46 | func (cgroupName CgroupName) ToSystemd() string { 47 | if len(cgroupName) == 0 || (len(cgroupName) == 1 && cgroupName[0] == "") { 48 | return "/" 49 | } 50 | newparts := []string{} 51 | for _, part := range cgroupName { 52 | part = escapeSystemdCgroupName(part) 53 | newparts = append(newparts, part) 54 | } 55 | 56 | result, err := cgroupsystemd.ExpandSlice(strings.Join(newparts, "-") + systemdSuffix) 57 | if err != nil { 58 | // Should never happen... 59 | panic(fmt.Errorf("error converting cgroup name [%v] to systemd format: %v", cgroupName, err)) 60 | } 61 | return result 62 | } 63 | 64 | func escapeSystemdCgroupName(part string) string { 65 | return strings.Replace(part, "-", "_", -1) 66 | } 67 | 68 | func (cgroupName CgroupName) ToCgroupfs() string { 69 | return "/" + path.Join(cgroupName...) 70 | } 71 | 72 | func SystemdPathPrefixOfRuntime(runtimeName string) string { 73 | switch runtimeName { 74 | case "cri-o": 75 | return "crio" 76 | case "containerd": 77 | return "cri-containerd" 78 | default: 79 | klog.Infof("prefix of container runtime %s was not tested. Maybe not correct!", runtimeName) 80 | return runtimeName 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /pkg/utils/util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package utils 19 | 20 | import ( 21 | "context" 22 | "encoding/json" 23 | "fmt" 24 | "io/ioutil" 25 | "net" 26 | "path/filepath" 27 | "regexp" 28 | "sort" 29 | "strconv" 30 | "strings" 31 | "time" 32 | 33 | pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 34 | 35 | nvtree "tkestack.io/gpu-manager/pkg/device/nvidia" 36 | "tkestack.io/gpu-manager/pkg/types" 37 | 38 | "github.com/fsnotify/fsnotify" 39 | "github.com/pkg/errors" 40 | "google.golang.org/grpc" 41 | v1 "k8s.io/api/core/v1" 42 | apierr "k8s.io/apimachinery/pkg/api/errors" 43 | "k8s.io/klog" 44 | ) 45 | 46 | //constants used in this package 47 | const ( 48 | TruncateLen = 31 49 | kubePrefix = "k8s" 50 | ) 51 | 52 | var ( 53 | //DefaultDialOptions contains default dial options used in grpc dial 54 | DefaultDialOptions = []grpc.DialOption{grpc.WithInsecure(), grpc.WithDialer(UnixDial), grpc.WithBlock()} 55 | ) 56 | 57 | //UnixDial dials to a unix socket using net.DialTimeout 58 | func UnixDial(addr string, timeout time.Duration) (net.Conn, error) { 59 | return net.DialTimeout("unix", addr, timeout) 60 | } 61 | 62 | //IsValidGPUPath checks if path is valid Nvidia GPU device path 63 | func IsValidGPUPath(path string) bool { 64 | return regexp.MustCompile(types.NvidiaFullpathRE).MatchString(path) 65 | } 66 | 67 | //GetGPUMinorID returns id in Nvidia GPU device path 68 | func GetGPUMinorID(path string) (int, error) { 69 | str := regexp.MustCompile(types.NvidiaFullpathRE).FindStringSubmatch(path) 70 | 71 | if len(str) != 2 { 72 | return -1, fmt.Errorf("not match pattern %s", types.NvidiaFullpathRE) 73 | } 74 | 75 | id, _ := strconv.ParseInt(str[1], 10, 32) 76 | 77 | return int(id), nil 78 | } 79 | 80 | //GetGPUData get cores, memory and device names from annotations 81 | func GetGPUData(annotations map[string]string) (gpuUtil int64, gpuMemory int64, deviceNames []string) { 82 | for k, v := range annotations { 83 | switch { 84 | case strings.HasSuffix(k, types.VCoreAnnotation): 85 | gpuUtil, _ = strconv.ParseInt(v, 10, 64) 86 | case strings.HasSuffix(k, types.VMemoryAnnotation): 87 | gpuMemory, _ = strconv.ParseInt(v, 10, 64) 88 | case strings.HasSuffix(k, types.VDeviceAnnotation): 89 | deviceNames = strings.Split(annotations[k], ",") 90 | } 91 | } 92 | 93 | return gpuUtil, gpuMemory, deviceNames 94 | } 95 | 96 | //NewFSWatcher returns a file watcher created by fsnotify.NewWatcher 97 | func NewFSWatcher(files ...string) (*fsnotify.Watcher, error) { 98 | watcher, err := fsnotify.NewWatcher() 99 | if err != nil { 100 | return nil, err 101 | } 102 | 103 | for _, f := range files { 104 | err = watcher.Add(f) 105 | if err != nil { 106 | watcher.Close() 107 | return nil, err 108 | } 109 | } 110 | 111 | return watcher, nil 112 | } 113 | 114 | // WaitForServer checks if grpc server is alive 115 | // by making grpc blocking connection to the server socket 116 | func WaitForServer(socket string) error { 117 | conn, err := grpc.DialContext(context.Background(), socket, DefaultDialOptions...) 118 | if err == nil { 119 | conn.Close() 120 | return nil 121 | } 122 | return errors.Wrapf(err, "Failed dial context at %s", socket) 123 | } 124 | 125 | func GetCheckpointData(devicePluginPath string) (*types.Checkpoint, error) { 126 | cpFile := filepath.Join(devicePluginPath, types.CheckPointFileName) 127 | data, err := ioutil.ReadFile(cpFile) 128 | if err != nil { 129 | return nil, err 130 | } 131 | klog.V(4).Infof("Try NUMA checkpoint data format") 132 | cpNUMAData := &types.CheckpointDataNUMA{} 133 | err = json.Unmarshal(data, cpNUMAData) 134 | if err != nil { 135 | klog.V(4).Infof("Failed NUMA checkpoint data format") 136 | } else { // flat deviceids 137 | v2DeivcesEntryies := make([]types.PodDevicesEntry, len(cpNUMAData.Data.PodDeviceEntries)) 138 | for i, v := range cpNUMAData.Data.PodDeviceEntries { 139 | v2PodDevicesEntry := types.PodDevicesEntry{ 140 | PodUID: v.PodUID, 141 | ContainerName: v.ContainerName, 142 | ResourceName: v.ResourceName, 143 | DeviceIDs: make([]string, 0), 144 | AllocResp: v.AllocResp, 145 | } 146 | for _, devices := range v.DeviceIDs { 147 | v2PodDevicesEntry.DeviceIDs = append(v2PodDevicesEntry.DeviceIDs, devices...) 148 | } 149 | v2DeivcesEntryies[i] = v2PodDevicesEntry 150 | } 151 | cpV1Data := &types.Checkpoint{} 152 | cpV1Data.RegisteredDevices = cpNUMAData.Data.RegisteredDevices 153 | cpV1Data.PodDeviceEntries = v2DeivcesEntryies 154 | return cpV1Data, nil 155 | } 156 | 157 | klog.V(4).Infof("Try v2 checkpoint data format") 158 | cpV2Data := &types.CheckpointData{} 159 | err = json.Unmarshal(data, cpV2Data) 160 | if err != nil { 161 | return nil, err 162 | } 163 | 164 | if cpV2Data.Data != nil { 165 | return cpV2Data.Data, nil 166 | } 167 | 168 | klog.V(4).Infof("Try v1 checkpoint data format") 169 | cpV1Data := &types.Checkpoint{} 170 | err = json.Unmarshal(data, cpV1Data) 171 | if err != nil { 172 | return nil, err 173 | } 174 | 175 | return cpV1Data, nil 176 | } 177 | 178 | func IsStringSliceEqual(a, b []string) bool { 179 | if len(a) != len(b) { 180 | return false 181 | } 182 | sort.Strings(a) 183 | sort.Strings(b) 184 | for i, v := range a { 185 | if v != b[i] { 186 | return false 187 | } 188 | } 189 | return true 190 | } 191 | 192 | func ShouldRetry(err error) bool { 193 | return apierr.IsConflict(err) || apierr.IsServerTimeout(err) 194 | } 195 | 196 | func MakeContainerNamePrefix(containerName string) string { 197 | return fmt.Sprintf("/%s_%s_", kubePrefix, containerName) 198 | } 199 | 200 | func IsGPURequiredPod(pod *v1.Pod) bool { 201 | vcore := GetGPUResourceOfPod(pod, types.VCoreAnnotation) 202 | vmemory := GetGPUResourceOfPod(pod, types.VMemoryAnnotation) 203 | 204 | // Check if pod request for GPU resource 205 | if vcore <= 0 || (vcore < nvtree.HundredCore && vmemory <= 0) { 206 | klog.V(4).Infof("Pod %s in namespace %s does not Request for GPU resource", 207 | pod.Name, 208 | pod.Namespace) 209 | return false 210 | } 211 | 212 | return true 213 | } 214 | 215 | func IsGPURequiredContainer(c *v1.Container) bool { 216 | klog.V(4).Infof("Determine if the container %s needs GPU resource", c.Name) 217 | 218 | vcore := GetGPUResourceOfContainer(c, types.VCoreAnnotation) 219 | vmemory := GetGPUResourceOfContainer(c, types.VMemoryAnnotation) 220 | 221 | // Check if container request for GPU resource 222 | if vcore <= 0 || (vcore < nvtree.HundredCore && vmemory <= 0) { 223 | klog.V(4).Infof("Container %s does not Request for GPU resource", c.Name) 224 | return false 225 | } 226 | 227 | return true 228 | } 229 | 230 | func GetGPUResourceOfPod(pod *v1.Pod, resourceName v1.ResourceName) uint { 231 | var total uint 232 | containers := pod.Spec.Containers 233 | for _, container := range containers { 234 | if val, ok := container.Resources.Limits[resourceName]; ok { 235 | total += uint(val.Value()) 236 | } 237 | } 238 | return total 239 | } 240 | 241 | func ShouldDelete(pod *v1.Pod) bool { 242 | for _, status := range pod.Status.ContainerStatuses { 243 | if status.State.Waiting != nil && 244 | strings.Contains(status.State.Waiting.Message, types.PreStartContainerCheckErrMsg) { 245 | return true 246 | } 247 | } 248 | if pod.Status.Reason == types.UnexpectedAdmissionErrType { 249 | return true 250 | } 251 | return false 252 | } 253 | 254 | func IsGPUPredicatedPod(pod *v1.Pod) (predicated bool) { 255 | klog.V(4).Infof("Determine if the pod %s needs GPU resource", pod.Name) 256 | var ok bool 257 | 258 | // Check if pod request for GPU resource 259 | if GetGPUResourceOfPod(pod, types.VCoreAnnotation) <= 0 || GetGPUResourceOfPod(pod, types.VMemoryAnnotation) <= 0 { 260 | klog.V(4).Infof("Pod %s in namespace %s does not Request for GPU resource", 261 | pod.Name, 262 | pod.Namespace) 263 | return predicated 264 | } 265 | 266 | // Check if pod already has predicate time 267 | if _, ok = pod.ObjectMeta.Annotations[types.PredicateTimeAnnotation]; !ok { 268 | klog.V(4).Infof("No predicate time for pod %s in namespace %s", 269 | pod.Name, 270 | pod.Namespace) 271 | return predicated 272 | } 273 | 274 | // Check if pod has already been assigned 275 | if assigned, ok := pod.ObjectMeta.Annotations[types.GPUAssigned]; !ok { 276 | klog.V(4).Infof("No assigned flag for pod %s in namespace %s", 277 | pod.Name, 278 | pod.Namespace) 279 | return predicated 280 | } else if assigned == "true" { 281 | klog.V(4).Infof("pod %s in namespace %s has already been assigned", 282 | pod.Name, 283 | pod.Namespace) 284 | return predicated 285 | } 286 | predicated = true 287 | return predicated 288 | } 289 | 290 | // Check if pod has already been assigned 291 | func IsGPUAssignedPod(pod *v1.Pod) bool { 292 | if assigned, ok := pod.ObjectMeta.Annotations[types.GPUAssigned]; !ok { 293 | klog.V(4).Infof("No assigned flag for pod %s in namespace %s", 294 | pod.Name, 295 | pod.Namespace) 296 | return false 297 | } else if assigned == "false" { 298 | klog.V(4).Infof("pod %s in namespace %s has not been assigned", 299 | pod.Name, 300 | pod.Namespace) 301 | return false 302 | } 303 | 304 | return true 305 | } 306 | 307 | func GetPredicateTimeOfPod(pod *v1.Pod) (predicateTime uint64) { 308 | if predicateTimeStr, ok := pod.ObjectMeta.Annotations[types.PredicateTimeAnnotation]; ok { 309 | u64, err := strconv.ParseUint(predicateTimeStr, 10, 64) 310 | if err != nil { 311 | klog.Warningf("Failed to parse predicate Timestamp %s due to %v", predicateTimeStr, err) 312 | } else { 313 | predicateTime = u64 314 | } 315 | } else { 316 | // If predicate time not found, use createionTimestamp instead 317 | predicateTime = uint64(pod.ObjectMeta.CreationTimestamp.UnixNano()) 318 | } 319 | 320 | return predicateTime 321 | } 322 | 323 | func GetGPUResourceOfContainer(container *v1.Container, resourceName v1.ResourceName) uint { 324 | var count uint 325 | if val, ok := container.Resources.Limits[resourceName]; ok { 326 | count = uint(val.Value()) 327 | } 328 | return count 329 | } 330 | 331 | func GetContainerIndexByName(pod *v1.Pod, containerName string) (int, error) { 332 | containerIndex := -1 333 | for i, c := range pod.Spec.Containers { 334 | if c.Name == containerName { 335 | containerIndex = i 336 | break 337 | } 338 | } 339 | 340 | if containerIndex == -1 { 341 | return containerIndex, fmt.Errorf("failed to get index of container %s in pod %s", containerName, pod.UID) 342 | } 343 | return containerIndex, nil 344 | } 345 | 346 | func GetVirtualControllerMountPath(resp *pluginapi.ContainerAllocateResponse) string { 347 | for _, mnt := range resp.Mounts { 348 | if mnt.ContainerPath == types.VCUDA_MOUNTPOINT { 349 | return mnt.HostPath 350 | } 351 | } 352 | 353 | return "" 354 | } 355 | -------------------------------------------------------------------------------- /pkg/version/.gitattributes: -------------------------------------------------------------------------------- 1 | base.go export-subst 2 | -------------------------------------------------------------------------------- /pkg/version/base.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package version 19 | 20 | var ( 21 | gitMajor = "0" 22 | gitMinor = "1" 23 | gitCommit = "8cd842ed00fca0efbf7907fc40ee3f6085187f5c" 24 | ) 25 | -------------------------------------------------------------------------------- /pkg/version/verflags.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package version 19 | 20 | import ( 21 | "fmt" 22 | "os" 23 | "strconv" 24 | 25 | flag "github.com/spf13/pflag" 26 | ) 27 | 28 | type versionValue int 29 | 30 | const ( 31 | VersionFalse versionValue = 0 32 | VersionTrue versionValue = 1 33 | VersionRaw versionValue = 2 34 | ) 35 | 36 | const strRawVersion = "raw" 37 | 38 | func (v *versionValue) IsBoolFlag() bool { 39 | return true 40 | } 41 | 42 | func (v *versionValue) Get() interface{} { 43 | return versionValue(*v) 44 | } 45 | 46 | func (v *versionValue) Set(s string) error { 47 | if s == strRawVersion { 48 | *v = VersionRaw 49 | return nil 50 | } 51 | boolVal, err := strconv.ParseBool(s) 52 | if boolVal { 53 | *v = VersionTrue 54 | } else { 55 | *v = VersionFalse 56 | } 57 | return err 58 | } 59 | 60 | func (v *versionValue) String() string { 61 | if *v == VersionRaw { 62 | return strRawVersion 63 | } 64 | return fmt.Sprintf("%v", bool(*v == VersionTrue)) 65 | } 66 | 67 | // The type of the flag as required by the pflag.Value interface 68 | func (v *versionValue) Type() string { 69 | return "version" 70 | } 71 | 72 | func VersionVar(p *versionValue, name string, value versionValue, usage string) { 73 | *p = value 74 | flag.Var(p, name, usage) 75 | // "--version" will be treated as "--version=true" 76 | flag.Lookup(name).NoOptDefVal = "true" 77 | } 78 | 79 | func Version(name string, value versionValue, usage string) *versionValue { 80 | p := new(versionValue) 81 | VersionVar(p, name, value, usage) 82 | return p 83 | } 84 | 85 | var ( 86 | versionFlag = Version("version", VersionFalse, "Print version information and quit") 87 | ) 88 | 89 | // PrintAndExitIfRequested will check if the -version flag was passed 90 | // and, if so, print the version and exit. 91 | func PrintAndExitIfRequested() { 92 | if *versionFlag == VersionRaw { 93 | fmt.Printf("%#v\n", Get()) 94 | os.Exit(0) 95 | } else if *versionFlag == VersionTrue { 96 | fmt.Printf("Nvidia Manager %s\n", Get()) 97 | os.Exit(0) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | package version 19 | 20 | import ( 21 | "fmt" 22 | ) 23 | 24 | // Info contains version information 25 | type Info struct { 26 | Version string 27 | Commit string 28 | } 29 | 30 | // String returns info as a human-friend version string. 31 | func (info Info) String() string { 32 | return info.Commit 33 | } 34 | 35 | // Get returns the overall codebase version. It's for detecting 36 | // what code a binary was built from. 37 | func Get() Info { 38 | return Info{ 39 | Version: fmt.Sprintf("%s.%s", gitMajor, gitMinor), 40 | Commit: gitCommit, 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /revive.toml: -------------------------------------------------------------------------------- 1 | # When set to false, ignores files with "GENERATED" header, similar to golint 2 | ignoreGeneratedHeader = true 3 | severity = "warning" 4 | confidence = 0.8 5 | errorCode = 0 6 | warningCode = 0 7 | 8 | [rule.context-as-argument] 9 | [rule.context-keys-type] 10 | [rule.dot-imports] 11 | [rule.error-return] 12 | [rule.error-strings] 13 | [rule.error-naming] 14 | [rule.if-return] 15 | [rule.increment-decrement] 16 | [rule.var-declaration] 17 | [rule.package-comments] 18 | [rule.range] 19 | [rule.receiver-naming] 20 | [rule.time-naming] 21 | [rule.indent-error-flow] 22 | [rule.errorf] 23 | [rule.superfluous-else] 24 | [rule.unreachable-code] 25 | [rule.modifies-parameter] 26 | [rule.unnecessary-stmt] 27 | [rule.confusing-results] 28 | [rule.get-return] 29 | [rule.blank-imports] 30 | [rule.redefines-builtin-id] 31 | [rule.empty-lines] 32 | [rule.call-to-gc] 33 | [rule.atomic] 34 | [rule.waitgroup-by-value] 35 | [rule.range-val-in-closure] 36 | [rule.constant-logical-expr] 37 | [rule.modifies-value-receiver] 38 | [rule.bool-literal-in-expr] 39 | [rule.argument-limit] 40 | arguments =[8] 41 | [rule.function-result-limit] 42 | arguments =[8] 43 | [rule.imports-blacklist] 44 | -------------------------------------------------------------------------------- /staging/src/google/protobuf/empty.proto: -------------------------------------------------------------------------------- 1 | // Protocol Buffers - Google's data interchange format 2 | // Copyright 2008 Google Inc. All rights reserved. 3 | // https://developers.google.com/protocol-buffers/ 4 | // 5 | // Redistribution and use in source and binary forms, with or without 6 | // modification, are permitted provided that the following conditions are 7 | // met: 8 | // 9 | // * Redistributions of source code must retain the above copyright 10 | // notice, this list of conditions and the following disclaimer. 11 | // * Redistributions in binary form must reproduce the above 12 | // copyright notice, this list of conditions and the following disclaimer 13 | // in the documentation and/or other materials provided with the 14 | // distribution. 15 | // * Neither the name of Google Inc. nor the names of its 16 | // contributors may be used to endorse or promote products derived from 17 | // this software without specific prior written permission. 18 | // 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | syntax = "proto3"; 32 | 33 | package google.protobuf; 34 | 35 | option csharp_namespace = "Google.Protobuf.WellKnownTypes"; 36 | option go_package = "github.com/golang/protobuf/ptypes/empty"; 37 | option java_package = "com.google.protobuf"; 38 | option java_outer_classname = "EmptyProto"; 39 | option java_multiple_files = true; 40 | option objc_class_prefix = "GPB"; 41 | option cc_enable_arenas = true; 42 | 43 | // A generic empty message that you can re-use to avoid defining duplicated 44 | // empty messages in your APIs. A typical example is to use it as the request 45 | // or the response type of an API method. For instance: 46 | // 47 | // service Foo { 48 | // rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty); 49 | // } 50 | // 51 | // The JSON representation for `Empty` is empty JSON object `{}`. 52 | message Empty {} --------------------------------------------------------------------------------