├── .chglog
├── CHANGELOG.tpl.md
└── config.yml
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── MAINTAINERS
├── README.md
├── VERSION
├── build-img.sh
├── find_new_lib.sh
├── include
├── cuda-helper.h
├── cuda-subset.h
├── hijack.h
├── nvml-helper.h
└── nvml-subset.h
├── src
├── cuda_originals.c
├── hijack_call.c
├── loader.c
├── nvml_entry.c
└── register.c
├── tools
└── monitor_dockernized.c
└── vcuda.spec
/.chglog/CHANGELOG.tpl.md:
--------------------------------------------------------------------------------
1 | {{ if .Versions -}}
2 |
3 | ## [Unreleased]
4 |
5 | {{ if .Unreleased.CommitGroups -}}
6 | {{ range .Unreleased.CommitGroups -}}
7 | ### {{ .Title }}
8 | {{ range .Commits -}}
9 | - {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }}
10 | {{ end }}
11 | {{ end -}}
12 | {{ end -}}
13 | {{ end -}}
14 |
15 | {{ range .Versions }}
16 |
17 | ## {{ if .Tag.Previous }}[{{ .Tag.Name }}]{{ else }}{{ .Tag.Name }}{{ end }} - {{ datetime "2006-01-02" .Tag.Date }}
18 | {{ range .CommitGroups -}}
19 | ### {{ .Title }}
20 | {{ range .Commits -}}
21 | - {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }}
22 | {{ end }}
23 | {{ end -}}
24 |
25 | {{- if .RevertCommits -}}
26 | ### Reverts
27 | {{ range .RevertCommits -}}
28 | - {{ .Revert.Header }}
29 | {{ end }}
30 | {{ end -}}
31 |
32 | {{- if .MergeCommits -}}
33 | ### Pull Requests
34 | {{ range .MergeCommits -}}
35 | - {{ .Header }}
36 | {{ end }}
37 | {{ end -}}
38 |
39 | {{- if .NoteGroups -}}
40 | {{ range .NoteGroups -}}
41 | ### {{ .Title }}
42 | {{ range .Notes }}
43 | {{ .Body }}
44 | {{ end }}
45 | {{ end -}}
46 | {{ end -}}
47 | {{ end -}}
48 |
49 | {{- if .Versions }}
50 | [Unreleased]: {{ .Info.RepositoryURL }}/compare/{{ $latest := index .Versions 0 }}{{ $latest.Tag.Name }}...HEAD
51 | {{ range .Versions -}}
52 | {{ if .Tag.Previous -}}
53 | [{{ .Tag.Name }}]: {{ $.Info.RepositoryURL }}/compare/{{ .Tag.Previous.Name }}...{{ .Tag.Name }}
54 | {{ end -}}
55 | {{ end -}}
56 | {{ end -}}
--------------------------------------------------------------------------------
/.chglog/config.yml:
--------------------------------------------------------------------------------
1 | style: github
2 | template: CHANGELOG.tpl.md
3 | info:
4 | title: CHANGELOG
5 | repository_url: https://github.com/tkestack/vcuda-controller
6 | options:
7 | commits:
8 | # filters:
9 | # Type:
10 | # - feat
11 | # - fix
12 | # - perf
13 | # - refactor
14 | commit_groups:
15 | # title_maps:
16 | # feat: Features
17 | # fix: Bug Fixes
18 | # perf: Performance Improvements
19 | # refactor: Code Refactoring
20 | header:
21 | pattern: "^(\\w*)(?:\\(([\\w\\$\\.\\-\\*\\s]*)\\))?\\:\\s(.*)$"
22 | pattern_maps:
23 | - Type
24 | - Scope
25 | - Subject
26 | notes:
27 | keywords:
28 | - BREAKING CHANGE
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | .idea/
3 | cmake-build-debug/
4 | proto/*.h
5 | proto/*.cc
6 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 |
3 | services:
4 | - docker
5 |
6 | script:
7 | - ./build-img.sh
8 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | ## [Unreleased]
3 |
4 | ### Feat
5 | - add new functions of CUDA 11
6 |
7 |
8 |
9 | ## [v1.0.1] - 2020-05-21
10 | ### Docs
11 | - Add CHANGELOG.md
12 |
13 | ### Feat
14 | - Support build image use host network
15 |
16 | ### Fix
17 | - Support other container runtime
18 |
19 | ### Pull Requests
20 | - Merge pull request [#4](https://github.com/tkestack/vcuda-controller/issues/4) from mYmNeo/dev_ffmpeg
21 |
22 |
23 |
24 | ## v1.0 - 2019-11-27
25 |
26 | [Unreleased]: https://github.com/tkestack/vcuda-controller/compare/v1.0.1...HEAD
27 | [v1.0.1]: https://github.com/tkestack/vcuda-controller/compare/v1.0...v1.0.1
28 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.5)
2 | project(hijack C CXX)
3 |
4 | add_definitions(-D_GNU_SOURCE)
5 | add_compile_options(-Wall -Wshadow -Werror -Wno-format)
6 |
7 | include_directories(${CMAKE_SOURCE_DIR})
8 |
9 | if (${ENABLE_DEBUG})
10 | add_compile_options(-g -O0)
11 | else ()
12 | add_compile_options(-g -O2)
13 | endif ()
14 |
15 | set(STATIC_C_LIBRARIES -static-libgcc -static-libstdc++)
16 |
17 | if (${USE_ORIGINAL})
18 | add_definitions(-DUSE_ORIGINAL)
19 | # controller related
20 | add_library(cuda-control SHARED
21 | src/hijack_call.c
22 | include/hijack.h
23 | include/cuda-subset.h
24 | include/nvml-subset.h
25 | include/cuda-helper.h
26 | include/nvml-helper.h
27 | src/cuda_originals.c
28 | src/nvml_entry.c
29 | src/loader.c
30 | src/register.c)
31 | target_link_libraries(cuda-control ${STATIC_C_LIBRARIES})
32 | else (NOT ${USE_ORIGINAL})
33 | # controller related
34 | add_library(cuda-control SHARED
35 | src/hijack_call.c
36 | include/hijack.h
37 | include/cuda-subset.h
38 | include/nvml-subset.h
39 | include/cuda-helper.h
40 | include/nvml-helper.h
41 | src/cuda_originals.c
42 | src/nvml_entry.c
43 | src/loader.c
44 | src/register.c)
45 |
46 | target_link_libraries(cuda-control ${STATIC_C_LIBRARIES})
47 |
48 | # process monitor
49 | add_executable(nvml-monitor
50 | tools/monitor_dockernized.c
51 | src/loader.c
52 | src/register.c)
53 | target_link_libraries(nvml-monitor ${STATIC_C_LIBRARIES} -ldl -lpthread)
54 | endif ()
55 |
56 | target_compile_options(cuda-control PUBLIC $<$:-std=c++11>)
57 |
58 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # CONTRIBUTING
2 |
3 | Welcome to [report Issues](https://github.com/tkestack/vcuda-controller/issues) or [pull requests](https://github.com/tkestack/vcuda-controller/pulls). It's recommended to read the following Contributing Guide first before contributing.
4 |
5 | This document provides a set of best practices for open source contributions - bug reports, code submissions / pull requests, etc.
6 |
7 | ## Issues
8 |
9 | We use Github Issues to track public bugs and feature requests.
10 |
11 | ### Due diligence
12 |
13 | Before submitting a issue, please do the following:
14 |
15 | * Perform **basic troubleshooting** steps:
16 | * Make sure you’re on the latest version. If you’re not on the most recent version, your problem may have been solved already! Upgrading is always the best first step.
17 | * Try older versions. If you’re already on the latest release, try rolling back a few minor versions (e.g. if on 1.7, try 1.5 or 1.6) and see if the problem goes away. This will help the devs narrow down when the problem first arose in the commit log.
18 | * Try switching up dependency versions. If the software in question has dependencies (other libraries, etc) try upgrading/downgrading those as well.
19 | * Search the project’s bug/issue tracker to make sure it’s not a known issue.
20 | * If you don’t find a pre-existing issue, consider checking with the mailing list and/or IRC channel in case the problem is non-bug-related.
21 |
22 | ### What to put in your bug report
23 |
24 | Make sure your report gets the attention it deserves: bug reports with missing information may be ignored or punted back to you, delaying a fix. The below constitutes a bare minimum; more info is almost always better:
25 |
26 | * What version of the core programming language interpreter/compiler are you using? For example, if it’s a Golang project, are you using Golang 1.13? Golang 1.12?
27 | * What operating system are you on? Windows? (32-bit? 64-bit?) Mac OS X? (10.14? 10.10?) Linux? (Which distro? Which version of that distro? 32 or 64 bits?) Again, more detail is better.
28 | * Which version or versions of the software are you using? Ideally, you followed the advice above and have ruled out (or verified that the problem exists in) a few different versions.
29 | * How can the developers recreate the bug on their end? If possible, include a copy of your code, the command you used to invoke it, and the full output of your run (if applicable.) A common tactic is to pare down your code until a simple (but still bug-causing) “base case” remains. Not only can this help you identify problems which aren’t real bugs, but it means the developer can get to fixing the bug faster.
30 |
31 | ## Pull Requests
32 |
33 | We strongly welcome your pull request to make TKEStack project better.
34 |
35 | ### Licensing of contributed material
36 |
37 | Keep in mind as you contribute, that code, docs and other material submitted to open source projects are usually considered licensed under the same terms as the rest of the work.
38 |
39 | Anything submitted to a project falls under the licensing terms in the repository’s top level LICENSE file. Per-file copyright/license headers are typically extraneous and undesirable. Please don’t add your own copyright headers to new files unless the project’s license actually requires them!
40 |
41 | ### Branch Management
42 |
43 | There are three main branches here:
44 |
45 | 1. `master` branch.
46 | 1. It is the latest (pre-)release branch. We use `master` for tags, with version number `1.1.0`, `1.2.0`, `1.3.0`...
47 | 2. **Don't submit any PR on `master` branch.**
48 | 2. `dev` branch.
49 | 1. It is our stable developing branch. After full testing, `dev` will be merged to `master` branch for the next release.
50 | 2. **You are recommended to submit bugfix or feature PR on `dev` branch.**
51 | 3. `hotfix` branch.
52 | 1. It is the latest tag version for hot fix. If we accept your pull request, we may just tag with version number `1.1.1`, `1.2.3`.
53 | 2. **Only submit urgent PR on `hotfix` branch for next specific release.**
54 |
55 | Normal bugfix or feature request should be submitted to `dev` branch. After full testing, we will merge them to `master` branch for the next release.
56 |
57 | If you have some urgent bugfixes on a published version, but the `master` branch have already far away with the latest tag version, you can submit a PR on hotfix. And it will be cherry picked to `dev` branch if it is possible.
58 |
59 | ```
60 | master
61 | ↑
62 | dev <--- hotfix PR
63 | ↑
64 | feature/bugfix PR
65 | ```
66 |
67 | ### Make Pull Requests
68 |
69 | The code team will monitor all pull request, we run some code check and test on it. After all tests passed, we will accecpt this PR. But it won't merge to `master` branch at once, which have some delay.
70 |
71 | Before submitting a pull request, please make sure the followings are done:
72 |
73 | 1. Fork the repo and create your branch from `master` or `hotfix`.
74 | 2. Update code or documentation if you have changed APIs.
75 | 3. Add the copyright notice to the top of any new files you've added.
76 | 4. Check your code lints and checkstyles.
77 | 5. Test and test again your code.
78 | 6. Now, you can submit your pull request on `dev` or `hotfix` branch.
79 |
80 | ## Code Conventions
81 |
82 | Use [Kubernetes Code Conventions](https://github.com/kubernetes/community/blob/master/contributors/guide/coding-conventions.md) for all projects in the TKEStack organization.
83 |
84 | ## Documentation isn’t optional
85 |
86 | It’s not! Patches without documentation will be returned to sender. By “documentation” we mean:
87 |
88 | * Docstrings must be created or updated for public API functions/methods/etc. (This step is optional for some bugfixes.)
89 | * New features should ideally include updates to prose documentation, including useful example code snippets.
90 | * All submissions should have a changelog entry crediting the contributor and/or any individuals instrumental in identifying the problem.
91 |
92 | ## Tests aren’t optional
93 |
94 | Any bugfix that doesn’t include a test proving the existence of the bug being fixed, may be suspect. Ditto for new features that can’t prove they actually work.
95 |
96 | We’ve found that test-first development really helps make features better architected and identifies potential edge cases earlier instead of later. Writing tests before the implementation is strongly encouraged.
97 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # stage 1
2 | FROM nvidia/cuda:11.2.0-devel-ubuntu18.04 as build
3 |
4 | RUN apt update && apt install -y --no-install-recommends \
5 | curl
6 |
7 | RUN curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | apt-key add -
8 |
9 | RUN apt install -y --no-install-recommends \
10 | cmake libvdpau-dev && \
11 | rm -rf /var/lib/apt/lists/*
12 |
13 | COPY cuda-control.tar /tmp
14 |
15 | ARG version
16 |
17 | RUN cd /tmp && tar xvf /tmp/cuda-control.tar && \
18 | cd /tmp/cuda-control && mkdir vcuda-${version} && \
19 | cd vcuda-${version} && cmake -DCMAKE_BUILD_TYPE=Release .. && \
20 | make
21 |
22 | RUN cd /tmp/cuda-control && tar cf /tmp/vcuda.tar.gz -c vcuda-${version}
23 |
24 | # stage 2
25 | FROM centos:7 as rpmpkg
26 |
27 | RUN yum install -y rpm-build
28 | RUN mkdir -p /root/rpmbuild/{SPECS,SOURCES}
29 |
30 | COPY vcuda.spec /root/rpmbuild/SPECS
31 | COPY --from=build /tmp/vcuda.tar.gz /root/rpmbuild/SOURCES
32 |
33 | RUN echo '%_topdir /root/rpmbuild' > /root/.rpmmacros \
34 | && echo '%__os_install_post %{nil}' >> /root/.rpmmacros \
35 | && echo '%debug_package %{nil}' >> /root/.rpmmacros
36 |
37 | WORKDIR /root/rpmbuild/SPECS
38 |
39 | ARG version
40 | ARG commit
41 |
42 | RUN rpmbuild -bb --quiet \
43 | --define 'version '${version}'' \
44 | --define 'commit '${commit}'' \
45 | vcuda.spec
46 |
47 | # stage 3
48 | FROM centos:7
49 |
50 | ARG version
51 | ARG commit
52 |
53 | COPY --from=rpmpkg /root/rpmbuild/RPMS/x86_64/vcuda-${version}-${commit}.el7.x86_64.rpm /tmp
54 | RUN rpm -ivh /tmp/vcuda-${version}-${commit}.el7.x86_64.rpm && rm -rf /tmp/vcuda-${version}-${commit}.el7.x86_64.rpm
55 |
--------------------------------------------------------------------------------
/MAINTAINERS:
--------------------------------------------------------------------------------
1 | Thomas Song @mYmNeo
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # vcuda-controller
2 |
3 | [](https://travis-ci.org/tkestack/vcuda-controller)
4 | [](https://lgtm.com/projects/g/tkestack/vcuda-controller/alerts/)
5 | [](https://lgtm.com/projects/g/tkestack/vcuda-controller/context:cpp)
6 |
7 | This project is a wrapper of NVIDIA driver library, it's a component
8 | of [gpu-manager](https://github.com/tkestack/gpu-manager) which makes Kubernetes can not only run more than one Pod on
9 | the same GPU, but also give QoS guaranteed to each Pod. For more details, please refer to our
10 | paper [here](https://ieeexplore.ieee.org/abstract/document/8672318).
11 |
12 | ## Build
13 |
14 | ```
15 | IMAGE_FILE= ./build-img.sh
16 | ```
17 |
18 | ## CUDA/GPU support information
19 |
20 | CUDA 11.5.1 and before are supported
21 |
22 | Any architecture of GPU after Kepler are supported
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | master
2 |
--------------------------------------------------------------------------------
/build-img.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -o errexit
3 | set -o pipefail
4 | set -o nounset
5 | set -o xtrace
6 |
7 | ROOT=$(cd $(dirname ${BASH_SOURCE[0]}) && pwd -P)
8 | IMAGE_FILE=${IMAGE_FILE:-"tkestack.io/gaia/vcuda:latest"}
9 |
10 | function cleanup() {
11 | rm -rf ${ROOT}/cuda-control.tar
12 | }
13 |
14 | trap cleanup EXIT SIGTERM SIGINT
15 |
16 | function build_img() {
17 | readonly local commit=$(git log --oneline | wc -l | sed -e 's,^[ \t]*,,')
18 | readonly local version=$(<"${ROOT}/VERSION")
19 |
20 | rm -rf ${ROOT}/build
21 | mkdir ${ROOT}/build
22 | git archive -o ${ROOT}/build/cuda-control.tar --format=tar --prefix=cuda-control/ HEAD
23 | cp ${ROOT}/vcuda.spec ${ROOT}/build
24 | cp ${ROOT}/Dockerfile ${ROOT}/build
25 | (
26 | cd ${ROOT}/build
27 | docker build ${BUILD_FLAGS:-} --build-arg version=${version} --build-arg commit=${commit} -t ${IMAGE_FILE} .
28 | )
29 | }
30 |
31 | build_img
32 |
--------------------------------------------------------------------------------
/find_new_lib.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -o errexit
4 | set -o nounset
5 | set -o pipefail
6 |
7 | CUDA_LIBRARY=$1
8 | ML_LIBRARY=$2
9 |
10 | echo "find new library"
11 |
12 | while read item; do
13 | grep -q ${item} include/cuda-helper.h || echo "$item,"
14 | done < <(nm -D ${CUDA_LIBRARY} | grep " T " | awk '{print "CUDA_ENTRY_ENUM("$3")"}')
15 |
16 | echo ""
17 |
18 | while read item; do
19 | grep -q ${item} include/nvml-helper.h || echo "$item,"
20 | done < <(nm -D ${ML_LIBRARY} | grep " T " | awk '{print "NVML_ENTRY_ENUM("$3")"}')
21 |
--------------------------------------------------------------------------------
/include/cuda-helper.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Tencent is pleased to support the open source community by making TKEStack
3 | * available.
4 | *
5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved.
6 | *
7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
8 | * use this file except in compliance with the License. You may obtain a copy of
9 | * the License at
10 | *
11 | * https://opensource.org/licenses/Apache-2.0
12 | *
13 | * Unless required by applicable law or agreed to in writing, software
14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations under the License.
17 | */
18 |
19 | #ifndef HIJACK_CUDA_HELPER_H
20 | #define HIJACK_CUDA_HELPER_H
21 |
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 |
26 | #include
27 |
28 | #include "cuda-subset.h"
29 | #include "hijack.h"
30 |
31 | /**
32 | * CUDA library prefix
33 | */
34 | #define CUDA_LIBRARY_PREFIX "libcuda.so"
35 |
36 | #define CUDA_ENTRY_ENUM(x) ENTRY_##x
37 |
38 | #define CUDA_FIND_ENTRY(table, sym) ({ (table)[CUDA_ENTRY_ENUM(sym)].fn_ptr; })
39 |
40 | #define CUDA_ENTRY_CALL(table, sym, ...) \
41 | ({ \
42 | cuda_sym_t _entry = CUDA_FIND_ENTRY(table, sym); \
43 | _entry(__VA_ARGS__); \
44 | })
45 |
46 | #define CUDA_ENTRY_DEBUG_VOID_CALL(table, sym, ...) \
47 | ({ \
48 | cuda_debug_void_sym_t _entry = CUDA_FIND_ENTRY(table, sym); \
49 | _entry(__VA_ARGS__); \
50 | })
51 |
52 | #define CUDA_ENTRY_DEBUG_RESULT_CALL(table, sym, ...) \
53 | ({ \
54 | cuda_debug_result_sym_t _entry = CUDA_FIND_ENTRY(table, sym); \
55 | _entry(__VA_ARGS__); \
56 | })
57 |
58 | /**
59 | * CUDA library enumerator entry
60 | */
61 | typedef enum {
62 | /** cuInit */
63 | CUDA_ENTRY_ENUM(cuInit),
64 | /** cuDeviceGet */
65 | CUDA_ENTRY_ENUM(cuDeviceGet),
66 | /** cuDeviceGetCount */
67 | CUDA_ENTRY_ENUM(cuDeviceGetCount),
68 | /** cuDeviceGetName */
69 | CUDA_ENTRY_ENUM(cuDeviceGetName),
70 | /** cuDeviceTotalMem_v2 */
71 | CUDA_ENTRY_ENUM(cuDeviceTotalMem_v2),
72 | /** cuDeviceGetAttribute */
73 | CUDA_ENTRY_ENUM(cuDeviceGetAttribute),
74 | /** cuDeviceGetP2PAttribute */
75 | CUDA_ENTRY_ENUM(cuDeviceGetP2PAttribute),
76 | /** cuDriverGetVersion */
77 | CUDA_ENTRY_ENUM(cuDriverGetVersion),
78 | /** cuDeviceGetByPCIBusId */
79 | CUDA_ENTRY_ENUM(cuDeviceGetByPCIBusId),
80 | /** cuDeviceGetPCIBusId */
81 | CUDA_ENTRY_ENUM(cuDeviceGetPCIBusId),
82 | /** cuDevicePrimaryCtxRetain */
83 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxRetain),
84 | /** cuDevicePrimaryCtxRelease */
85 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxRelease),
86 | /** cuDevicePrimaryCtxSetFlags */
87 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxSetFlags),
88 | /** cuDevicePrimaryCtxGetState */
89 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxGetState),
90 | /** cuDevicePrimaryCtxReset */
91 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxReset),
92 | /** cuCtxCreate_v2 */
93 | CUDA_ENTRY_ENUM(cuCtxCreate_v2),
94 | /** cuCtxGetFlags */
95 | CUDA_ENTRY_ENUM(cuCtxGetFlags),
96 | /** cuCtxSetCurrent */
97 | CUDA_ENTRY_ENUM(cuCtxSetCurrent),
98 | /** cuCtxGetCurrent */
99 | CUDA_ENTRY_ENUM(cuCtxGetCurrent),
100 | /** cuCtxDetach */
101 | CUDA_ENTRY_ENUM(cuCtxDetach),
102 | /** cuCtxGetApiVersion */
103 | CUDA_ENTRY_ENUM(cuCtxGetApiVersion),
104 | /** cuCtxGetDevice */
105 | CUDA_ENTRY_ENUM(cuCtxGetDevice),
106 | /** cuCtxGetLimit */
107 | CUDA_ENTRY_ENUM(cuCtxGetLimit),
108 | /** cuCtxSetLimit */
109 | CUDA_ENTRY_ENUM(cuCtxSetLimit),
110 | /** cuCtxGetCacheConfig */
111 | CUDA_ENTRY_ENUM(cuCtxGetCacheConfig),
112 | /** cuCtxSetCacheConfig */
113 | CUDA_ENTRY_ENUM(cuCtxSetCacheConfig),
114 | /** cuCtxGetSharedMemConfig */
115 | CUDA_ENTRY_ENUM(cuCtxGetSharedMemConfig),
116 | /** cuCtxGetStreamPriorityRange */
117 | CUDA_ENTRY_ENUM(cuCtxGetStreamPriorityRange),
118 | /** cuCtxSetSharedMemConfig */
119 | CUDA_ENTRY_ENUM(cuCtxSetSharedMemConfig),
120 | /** cuCtxSynchronize */
121 | CUDA_ENTRY_ENUM(cuCtxSynchronize),
122 | /** cuModuleLoad */
123 | CUDA_ENTRY_ENUM(cuModuleLoad),
124 | /** cuModuleLoadData */
125 | CUDA_ENTRY_ENUM(cuModuleLoadData),
126 | /** cuModuleLoadFatBinary */
127 | CUDA_ENTRY_ENUM(cuModuleLoadFatBinary),
128 | /** cuModuleUnload */
129 | CUDA_ENTRY_ENUM(cuModuleUnload),
130 | /** cuModuleGetFunction */
131 | CUDA_ENTRY_ENUM(cuModuleGetFunction),
132 | /** cuModuleGetGlobal_v2 */
133 | CUDA_ENTRY_ENUM(cuModuleGetGlobal_v2),
134 | /** cuModuleGetTexRef */
135 | CUDA_ENTRY_ENUM(cuModuleGetTexRef),
136 | /** cuModuleGetSurfRef */
137 | CUDA_ENTRY_ENUM(cuModuleGetSurfRef),
138 | /** cuLinkCreate */
139 | CUDA_ENTRY_ENUM(cuLinkCreate),
140 | /** cuLinkAddData */
141 | CUDA_ENTRY_ENUM(cuLinkAddData),
142 | /** cuLinkAddFile */
143 | CUDA_ENTRY_ENUM(cuLinkAddFile),
144 | /** cuLinkComplete */
145 | CUDA_ENTRY_ENUM(cuLinkComplete),
146 | /** cuLinkDestroy */
147 | CUDA_ENTRY_ENUM(cuLinkDestroy),
148 | /** cuMemGetInfo_v2 */
149 | CUDA_ENTRY_ENUM(cuMemGetInfo_v2),
150 | /** cuMemAllocManaged */
151 | CUDA_ENTRY_ENUM(cuMemAllocManaged),
152 | /** cuMemAlloc_v2 */
153 | CUDA_ENTRY_ENUM(cuMemAlloc_v2),
154 | /** cuMemAllocPitch_v2 */
155 | CUDA_ENTRY_ENUM(cuMemAllocPitch_v2),
156 | /** cuMemFree_v2 */
157 | CUDA_ENTRY_ENUM(cuMemFree_v2),
158 | /** cuMemGetAddressRange_v2 */
159 | CUDA_ENTRY_ENUM(cuMemGetAddressRange_v2),
160 | /** cuMemFreeHost */
161 | CUDA_ENTRY_ENUM(cuMemFreeHost),
162 | /** cuMemHostAlloc */
163 | CUDA_ENTRY_ENUM(cuMemHostAlloc),
164 | /** cuMemHostGetDevicePointer_v2 */
165 | CUDA_ENTRY_ENUM(cuMemHostGetDevicePointer_v2),
166 | /** cuMemHostGetFlags */
167 | CUDA_ENTRY_ENUM(cuMemHostGetFlags),
168 | /** cuMemHostRegister_v2 */
169 | CUDA_ENTRY_ENUM(cuMemHostRegister_v2),
170 | /** cuMemHostUnregister */
171 | CUDA_ENTRY_ENUM(cuMemHostUnregister),
172 | /** cuPointerGetAttribute */
173 | CUDA_ENTRY_ENUM(cuPointerGetAttribute),
174 | /** cuPointerGetAttributes */
175 | CUDA_ENTRY_ENUM(cuPointerGetAttributes),
176 | /** cuMemcpy */
177 | CUDA_ENTRY_ENUM(cuMemcpy),
178 | /** cuMemcpy_ptds */
179 | CUDA_ENTRY_ENUM(cuMemcpy_ptds),
180 | /** cuMemcpyAsync */
181 | CUDA_ENTRY_ENUM(cuMemcpyAsync),
182 | /** cuMemcpyAsync_ptsz */
183 | CUDA_ENTRY_ENUM(cuMemcpyAsync_ptsz),
184 | /** cuMemcpyPeer */
185 | CUDA_ENTRY_ENUM(cuMemcpyPeer),
186 | /** cuMemcpyPeer_ptds */
187 | CUDA_ENTRY_ENUM(cuMemcpyPeer_ptds),
188 | /** cuMemcpyPeerAsync */
189 | CUDA_ENTRY_ENUM(cuMemcpyPeerAsync),
190 | /** cuMemcpyPeerAsync_ptsz */
191 | CUDA_ENTRY_ENUM(cuMemcpyPeerAsync_ptsz),
192 | /** cuMemcpyHtoD_v2 */
193 | CUDA_ENTRY_ENUM(cuMemcpyHtoD_v2),
194 | /** cuMemcpyHtoD_v2_ptds */
195 | CUDA_ENTRY_ENUM(cuMemcpyHtoD_v2_ptds),
196 | /** cuMemcpyHtoDAsync_v2 */
197 | CUDA_ENTRY_ENUM(cuMemcpyHtoDAsync_v2),
198 | /** cuMemcpyHtoDAsync_v2_ptsz */
199 | CUDA_ENTRY_ENUM(cuMemcpyHtoDAsync_v2_ptsz),
200 | /** cuMemcpyDtoH_v2 */
201 | CUDA_ENTRY_ENUM(cuMemcpyDtoH_v2),
202 | /** cuMemcpyDtoH_v2_ptds */
203 | CUDA_ENTRY_ENUM(cuMemcpyDtoH_v2_ptds),
204 | /** cuMemcpyDtoHAsync_v2 */
205 | CUDA_ENTRY_ENUM(cuMemcpyDtoHAsync_v2),
206 | /** cuMemcpyDtoHAsync_v2_ptsz */
207 | CUDA_ENTRY_ENUM(cuMemcpyDtoHAsync_v2_ptsz),
208 | /** cuMemcpyDtoD_v2 */
209 | CUDA_ENTRY_ENUM(cuMemcpyDtoD_v2),
210 | /** cuMemcpyDtoD_v2_ptds */
211 | CUDA_ENTRY_ENUM(cuMemcpyDtoD_v2_ptds),
212 | /** cuMemcpyDtoDAsync_v2 */
213 | CUDA_ENTRY_ENUM(cuMemcpyDtoDAsync_v2),
214 | /** cuMemcpyDtoDAsync_v2_ptsz */
215 | CUDA_ENTRY_ENUM(cuMemcpyDtoDAsync_v2_ptsz),
216 | /** cuMemcpy2DUnaligned_v2 */
217 | CUDA_ENTRY_ENUM(cuMemcpy2DUnaligned_v2),
218 | /** cuMemcpy2DUnaligned_v2_ptds */
219 | CUDA_ENTRY_ENUM(cuMemcpy2DUnaligned_v2_ptds),
220 | /** cuMemcpy2DAsync_v2 */
221 | CUDA_ENTRY_ENUM(cuMemcpy2DAsync_v2),
222 | /** cuMemcpy2DAsync_v2_ptsz */
223 | CUDA_ENTRY_ENUM(cuMemcpy2DAsync_v2_ptsz),
224 | /** cuMemcpy3D_v2 */
225 | CUDA_ENTRY_ENUM(cuMemcpy3D_v2),
226 | /** cuMemcpy3D_v2_ptds */
227 | CUDA_ENTRY_ENUM(cuMemcpy3D_v2_ptds),
228 | /** cuMemcpy3DAsync_v2 */
229 | CUDA_ENTRY_ENUM(cuMemcpy3DAsync_v2),
230 | /** cuMemcpy3DAsync_v2_ptsz */
231 | CUDA_ENTRY_ENUM(cuMemcpy3DAsync_v2_ptsz),
232 | /** cuMemcpy3DPeer */
233 | CUDA_ENTRY_ENUM(cuMemcpy3DPeer),
234 | /** cuMemcpy3DPeer_ptds */
235 | CUDA_ENTRY_ENUM(cuMemcpy3DPeer_ptds),
236 | /** cuMemcpy3DPeerAsync */
237 | CUDA_ENTRY_ENUM(cuMemcpy3DPeerAsync),
238 | /** cuMemcpy3DPeerAsync_ptsz */
239 | CUDA_ENTRY_ENUM(cuMemcpy3DPeerAsync_ptsz),
240 | /** cuMemsetD8_v2 */
241 | CUDA_ENTRY_ENUM(cuMemsetD8_v2),
242 | /** cuMemsetD8_v2_ptds */
243 | CUDA_ENTRY_ENUM(cuMemsetD8_v2_ptds),
244 | /** cuMemsetD8Async */
245 | CUDA_ENTRY_ENUM(cuMemsetD8Async),
246 | /** cuMemsetD8Async_ptsz */
247 | CUDA_ENTRY_ENUM(cuMemsetD8Async_ptsz),
248 | /** cuMemsetD2D8_v2 */
249 | CUDA_ENTRY_ENUM(cuMemsetD2D8_v2),
250 | /** cuMemsetD2D8_v2_ptds */
251 | CUDA_ENTRY_ENUM(cuMemsetD2D8_v2_ptds),
252 | /** cuMemsetD2D8Async */
253 | CUDA_ENTRY_ENUM(cuMemsetD2D8Async),
254 | /** cuMemsetD2D8Async_ptsz */
255 | CUDA_ENTRY_ENUM(cuMemsetD2D8Async_ptsz),
256 | /** cuFuncSetCacheConfig */
257 | CUDA_ENTRY_ENUM(cuFuncSetCacheConfig),
258 | /** cuFuncSetSharedMemConfig */
259 | CUDA_ENTRY_ENUM(cuFuncSetSharedMemConfig),
260 | /** cuFuncGetAttribute */
261 | CUDA_ENTRY_ENUM(cuFuncGetAttribute),
262 | /** cuArrayCreate_v2 */
263 | CUDA_ENTRY_ENUM(cuArrayCreate_v2),
264 | /** cuArrayGetDescriptor_v2 */
265 | CUDA_ENTRY_ENUM(cuArrayGetDescriptor_v2),
266 | /** cuArray3DCreate_v2 */
267 | CUDA_ENTRY_ENUM(cuArray3DCreate_v2),
268 | /** cuArray3DGetDescriptor_v2 */
269 | CUDA_ENTRY_ENUM(cuArray3DGetDescriptor_v2),
270 | /** cuArrayDestroy */
271 | CUDA_ENTRY_ENUM(cuArrayDestroy),
272 | /** cuMipmappedArrayCreate */
273 | CUDA_ENTRY_ENUM(cuMipmappedArrayCreate),
274 | /** cuMipmappedArrayGetLevel */
275 | CUDA_ENTRY_ENUM(cuMipmappedArrayGetLevel),
276 | /** cuMipmappedArrayDestroy */
277 | CUDA_ENTRY_ENUM(cuMipmappedArrayDestroy),
278 | /** cuTexRefCreate */
279 | CUDA_ENTRY_ENUM(cuTexRefCreate),
280 | /** cuTexRefDestroy */
281 | CUDA_ENTRY_ENUM(cuTexRefDestroy),
282 | /** cuTexRefSetArray */
283 | CUDA_ENTRY_ENUM(cuTexRefSetArray),
284 | /** cuTexRefSetMipmappedArray */
285 | CUDA_ENTRY_ENUM(cuTexRefSetMipmappedArray),
286 | /** cuTexRefSetAddress_v2 */
287 | CUDA_ENTRY_ENUM(cuTexRefSetAddress_v2),
288 | /** cuTexRefSetAddress2D_v3 */
289 | CUDA_ENTRY_ENUM(cuTexRefSetAddress2D_v3),
290 | /** cuTexRefSetFormat */
291 | CUDA_ENTRY_ENUM(cuTexRefSetFormat),
292 | /** cuTexRefSetAddressMode */
293 | CUDA_ENTRY_ENUM(cuTexRefSetAddressMode),
294 | /** cuTexRefSetFilterMode */
295 | CUDA_ENTRY_ENUM(cuTexRefSetFilterMode),
296 | /** cuTexRefSetMipmapFilterMode */
297 | CUDA_ENTRY_ENUM(cuTexRefSetMipmapFilterMode),
298 | /** cuTexRefSetMipmapLevelBias */
299 | CUDA_ENTRY_ENUM(cuTexRefSetMipmapLevelBias),
300 | /** cuTexRefSetMipmapLevelClamp */
301 | CUDA_ENTRY_ENUM(cuTexRefSetMipmapLevelClamp),
302 | /** cuTexRefSetMaxAnisotropy */
303 | CUDA_ENTRY_ENUM(cuTexRefSetMaxAnisotropy),
304 | /** cuTexRefSetFlags */
305 | CUDA_ENTRY_ENUM(cuTexRefSetFlags),
306 | /** cuTexRefSetBorderColor */
307 | CUDA_ENTRY_ENUM(cuTexRefSetBorderColor),
308 | /** cuTexRefGetBorderColor */
309 | CUDA_ENTRY_ENUM(cuTexRefGetBorderColor),
310 | /** cuSurfRefSetArray */
311 | CUDA_ENTRY_ENUM(cuSurfRefSetArray),
312 | /** cuTexObjectCreate */
313 | CUDA_ENTRY_ENUM(cuTexObjectCreate),
314 | /** cuTexObjectDestroy */
315 | CUDA_ENTRY_ENUM(cuTexObjectDestroy),
316 | /** cuTexObjectGetResourceDesc */
317 | CUDA_ENTRY_ENUM(cuTexObjectGetResourceDesc),
318 | /** cuTexObjectGetTextureDesc */
319 | CUDA_ENTRY_ENUM(cuTexObjectGetTextureDesc),
320 | /** cuTexObjectGetResourceViewDesc */
321 | CUDA_ENTRY_ENUM(cuTexObjectGetResourceViewDesc),
322 | /** cuSurfObjectCreate */
323 | CUDA_ENTRY_ENUM(cuSurfObjectCreate),
324 | /** cuSurfObjectDestroy */
325 | CUDA_ENTRY_ENUM(cuSurfObjectDestroy),
326 | /** cuSurfObjectGetResourceDesc */
327 | CUDA_ENTRY_ENUM(cuSurfObjectGetResourceDesc),
328 | /** cuLaunchKernel */
329 | CUDA_ENTRY_ENUM(cuLaunchKernel),
330 | /** cuLaunchKernel_ptsz */
331 | CUDA_ENTRY_ENUM(cuLaunchKernel_ptsz),
332 | /** cuEventCreate */
333 | CUDA_ENTRY_ENUM(cuEventCreate),
334 | /** cuEventRecord */
335 | CUDA_ENTRY_ENUM(cuEventRecord),
336 | /** cuEventRecord_ptsz */
337 | CUDA_ENTRY_ENUM(cuEventRecord_ptsz),
338 | /** cuEventQuery */
339 | CUDA_ENTRY_ENUM(cuEventQuery),
340 | /** cuEventSynchronize */
341 | CUDA_ENTRY_ENUM(cuEventSynchronize),
342 | /** cuEventDestroy_v2 */
343 | CUDA_ENTRY_ENUM(cuEventDestroy_v2),
344 | /** cuEventElapsedTime */
345 | CUDA_ENTRY_ENUM(cuEventElapsedTime),
346 | /** cuStreamWaitValue32 */
347 | CUDA_ENTRY_ENUM(cuStreamWaitValue32),
348 | /** cuStreamWaitValue32_ptsz */
349 | CUDA_ENTRY_ENUM(cuStreamWaitValue32_ptsz),
350 | /** cuStreamWriteValue32 */
351 | CUDA_ENTRY_ENUM(cuStreamWriteValue32),
352 | /** cuStreamWriteValue32_ptsz */
353 | CUDA_ENTRY_ENUM(cuStreamWriteValue32_ptsz),
354 | /** cuStreamBatchMemOp */
355 | CUDA_ENTRY_ENUM(cuStreamBatchMemOp),
356 | /** cuStreamBatchMemOp_ptsz */
357 | CUDA_ENTRY_ENUM(cuStreamBatchMemOp_ptsz),
358 | /** cuStreamCreate */
359 | CUDA_ENTRY_ENUM(cuStreamCreate),
360 | /** cuStreamCreateWithPriority */
361 | CUDA_ENTRY_ENUM(cuStreamCreateWithPriority),
362 | /** cuStreamGetPriority */
363 | CUDA_ENTRY_ENUM(cuStreamGetPriority),
364 | /** cuStreamGetPriority_ptsz */
365 | CUDA_ENTRY_ENUM(cuStreamGetPriority_ptsz),
366 | /** cuStreamGetFlags */
367 | CUDA_ENTRY_ENUM(cuStreamGetFlags),
368 | /** cuStreamGetFlags_ptsz */
369 | CUDA_ENTRY_ENUM(cuStreamGetFlags_ptsz),
370 | /** cuStreamDestroy_v2 */
371 | CUDA_ENTRY_ENUM(cuStreamDestroy_v2),
372 | /** cuStreamWaitEvent */
373 | CUDA_ENTRY_ENUM(cuStreamWaitEvent),
374 | /** cuStreamWaitEvent_ptsz */
375 | CUDA_ENTRY_ENUM(cuStreamWaitEvent_ptsz),
376 | /** cuStreamAddCallback */
377 | CUDA_ENTRY_ENUM(cuStreamAddCallback),
378 | /** cuStreamAddCallback_ptsz */
379 | CUDA_ENTRY_ENUM(cuStreamAddCallback_ptsz),
380 | /** cuStreamSynchronize */
381 | CUDA_ENTRY_ENUM(cuStreamSynchronize),
382 | /** cuStreamSynchronize_ptsz */
383 | CUDA_ENTRY_ENUM(cuStreamSynchronize_ptsz),
384 | /** cuStreamQuery */
385 | CUDA_ENTRY_ENUM(cuStreamQuery),
386 | /** cuStreamQuery_ptsz */
387 | CUDA_ENTRY_ENUM(cuStreamQuery_ptsz),
388 | /** cuStreamAttachMemAsync */
389 | CUDA_ENTRY_ENUM(cuStreamAttachMemAsync),
390 | /** cuStreamAttachMemAsync_ptsz */
391 | CUDA_ENTRY_ENUM(cuStreamAttachMemAsync_ptsz),
392 | /** cuDeviceCanAccessPeer */
393 | CUDA_ENTRY_ENUM(cuDeviceCanAccessPeer),
394 | /** cuCtxEnablePeerAccess */
395 | CUDA_ENTRY_ENUM(cuCtxEnablePeerAccess),
396 | /** cuCtxDisablePeerAccess */
397 | CUDA_ENTRY_ENUM(cuCtxDisablePeerAccess),
398 | /** cuIpcGetEventHandle */
399 | CUDA_ENTRY_ENUM(cuIpcGetEventHandle),
400 | /** cuIpcOpenEventHandle */
401 | CUDA_ENTRY_ENUM(cuIpcOpenEventHandle),
402 | /** cuIpcGetMemHandle */
403 | CUDA_ENTRY_ENUM(cuIpcGetMemHandle),
404 | /** cuIpcOpenMemHandle */
405 | CUDA_ENTRY_ENUM(cuIpcOpenMemHandle),
406 | /** cuIpcCloseMemHandle */
407 | CUDA_ENTRY_ENUM(cuIpcCloseMemHandle),
408 | /** cuGLCtxCreate_v2 */
409 | CUDA_ENTRY_ENUM(cuGLCtxCreate_v2),
410 | /** cuGLInit */
411 | CUDA_ENTRY_ENUM(cuGLInit),
412 | /** cuGLGetDevices */
413 | CUDA_ENTRY_ENUM(cuGLGetDevices),
414 | /** cuGLRegisterBufferObject */
415 | CUDA_ENTRY_ENUM(cuGLRegisterBufferObject),
416 | /** cuGLMapBufferObject_v2 */
417 | CUDA_ENTRY_ENUM(cuGLMapBufferObject_v2),
418 | /** cuGLMapBufferObject_v2_ptds */
419 | CUDA_ENTRY_ENUM(cuGLMapBufferObject_v2_ptds),
420 | /** cuGLMapBufferObjectAsync_v2 */
421 | CUDA_ENTRY_ENUM(cuGLMapBufferObjectAsync_v2),
422 | /** cuGLMapBufferObjectAsync_v2_ptsz */
423 | CUDA_ENTRY_ENUM(cuGLMapBufferObjectAsync_v2_ptsz),
424 | /** cuGLUnmapBufferObject */
425 | CUDA_ENTRY_ENUM(cuGLUnmapBufferObject),
426 | /** cuGLUnmapBufferObjectAsync */
427 | CUDA_ENTRY_ENUM(cuGLUnmapBufferObjectAsync),
428 | /** cuGLUnregisterBufferObject */
429 | CUDA_ENTRY_ENUM(cuGLUnregisterBufferObject),
430 | /** cuGLSetBufferObjectMapFlags */
431 | CUDA_ENTRY_ENUM(cuGLSetBufferObjectMapFlags),
432 | /** cuGraphicsGLRegisterImage */
433 | CUDA_ENTRY_ENUM(cuGraphicsGLRegisterImage),
434 | /** cuGraphicsGLRegisterBuffer */
435 | CUDA_ENTRY_ENUM(cuGraphicsGLRegisterBuffer),
436 | /** cuGraphicsUnregisterResource */
437 | CUDA_ENTRY_ENUM(cuGraphicsUnregisterResource),
438 | /** cuGraphicsMapResources */
439 | CUDA_ENTRY_ENUM(cuGraphicsMapResources),
440 | /** cuGraphicsMapResources_ptsz */
441 | CUDA_ENTRY_ENUM(cuGraphicsMapResources_ptsz),
442 | /** cuGraphicsUnmapResources */
443 | CUDA_ENTRY_ENUM(cuGraphicsUnmapResources),
444 | /** cuGraphicsUnmapResources_ptsz */
445 | CUDA_ENTRY_ENUM(cuGraphicsUnmapResources_ptsz),
446 | /** cuGraphicsResourceSetMapFlags_v2 */
447 | CUDA_ENTRY_ENUM(cuGraphicsResourceSetMapFlags_v2),
448 | /** cuGraphicsSubResourceGetMappedArray */
449 | CUDA_ENTRY_ENUM(cuGraphicsSubResourceGetMappedArray),
450 | /** cuGraphicsResourceGetMappedMipmappedArray */
451 | CUDA_ENTRY_ENUM(cuGraphicsResourceGetMappedMipmappedArray),
452 | /** cuGraphicsResourceGetMappedPointer_v2 */
453 | CUDA_ENTRY_ENUM(cuGraphicsResourceGetMappedPointer_v2),
454 | /** cuProfilerInitialize */
455 | CUDA_ENTRY_ENUM(cuProfilerInitialize),
456 | /** cuProfilerStart */
457 | CUDA_ENTRY_ENUM(cuProfilerStart),
458 | /** cuProfilerStop */
459 | CUDA_ENTRY_ENUM(cuProfilerStop),
460 | /** cuVDPAUGetDevice */
461 | CUDA_ENTRY_ENUM(cuVDPAUGetDevice),
462 | /** cuVDPAUCtxCreate_v2 */
463 | CUDA_ENTRY_ENUM(cuVDPAUCtxCreate_v2),
464 | /** cuGraphicsVDPAURegisterVideoSurface */
465 | CUDA_ENTRY_ENUM(cuGraphicsVDPAURegisterVideoSurface),
466 | /** cuGraphicsVDPAURegisterOutputSurface */
467 | CUDA_ENTRY_ENUM(cuGraphicsVDPAURegisterOutputSurface),
468 | /** cuGetExportTable */
469 | CUDA_ENTRY_ENUM(cuGetExportTable),
470 | /** cuOccupancyMaxActiveBlocksPerMultiprocessor */
471 | CUDA_ENTRY_ENUM(cuOccupancyMaxActiveBlocksPerMultiprocessor),
472 | /** cuMemAdvise */
473 | CUDA_ENTRY_ENUM(cuMemAdvise),
474 | /** cuMemPrefetchAsync */
475 | CUDA_ENTRY_ENUM(cuMemPrefetchAsync),
476 | /** cuMemPrefetchAsync_ptsz */
477 | CUDA_ENTRY_ENUM(cuMemPrefetchAsync_ptsz),
478 | /** cuMemRangeGetAttribute */
479 | CUDA_ENTRY_ENUM(cuMemRangeGetAttribute),
480 | /** cuMemRangeGetAttributes */
481 | CUDA_ENTRY_ENUM(cuMemRangeGetAttributes),
482 | /** cuGetErrorString */
483 | CUDA_ENTRY_ENUM(cuGetErrorString),
484 | /** cuGetErrorName */
485 | CUDA_ENTRY_ENUM(cuGetErrorName),
486 | /** cuArray3DCreate */
487 | CUDA_ENTRY_ENUM(cuArray3DCreate),
488 | /** cuArray3DGetDescriptor */
489 | CUDA_ENTRY_ENUM(cuArray3DGetDescriptor),
490 | /** cuArrayCreate */
491 | CUDA_ENTRY_ENUM(cuArrayCreate),
492 | /** cuArrayGetDescriptor */
493 | CUDA_ENTRY_ENUM(cuArrayGetDescriptor),
494 | /** cuCtxAttach */
495 | CUDA_ENTRY_ENUM(cuCtxAttach),
496 | /** cuCtxCreate */
497 | CUDA_ENTRY_ENUM(cuCtxCreate),
498 | /** cuCtxDestroy */
499 | CUDA_ENTRY_ENUM(cuCtxDestroy),
500 | /** cuCtxDestroy_v2 */
501 | CUDA_ENTRY_ENUM(cuCtxDestroy_v2),
502 | /** cuCtxPopCurrent */
503 | CUDA_ENTRY_ENUM(cuCtxPopCurrent),
504 | /** cuCtxPopCurrent_v2 */
505 | CUDA_ENTRY_ENUM(cuCtxPopCurrent_v2),
506 | /** cuCtxPushCurrent */
507 | CUDA_ENTRY_ENUM(cuCtxPushCurrent),
508 | /** cuCtxPushCurrent_v2 */
509 | CUDA_ENTRY_ENUM(cuCtxPushCurrent_v2),
510 | /** cudbgApiAttach */
511 | CUDA_ENTRY_ENUM(cudbgApiAttach),
512 | /** cudbgApiDetach */
513 | CUDA_ENTRY_ENUM(cudbgApiDetach),
514 | /** cudbgApiInit */
515 | CUDA_ENTRY_ENUM(cudbgApiInit),
516 | /** cudbgGetAPI */
517 | CUDA_ENTRY_ENUM(cudbgGetAPI),
518 | /** cudbgGetAPIVersion */
519 | CUDA_ENTRY_ENUM(cudbgGetAPIVersion),
520 | /** cudbgMain */
521 | CUDA_ENTRY_ENUM(cudbgMain),
522 | /** cudbgReportDriverApiError */
523 | CUDA_ENTRY_ENUM(cudbgReportDriverApiError),
524 | /** cudbgReportDriverInternalError */
525 | CUDA_ENTRY_ENUM(cudbgReportDriverInternalError),
526 | /** cuDeviceComputeCapability */
527 | CUDA_ENTRY_ENUM(cuDeviceComputeCapability),
528 | /** cuDeviceGetProperties */
529 | CUDA_ENTRY_ENUM(cuDeviceGetProperties),
530 | /** cuDeviceTotalMem */
531 | CUDA_ENTRY_ENUM(cuDeviceTotalMem),
532 | /** cuEGLInit */
533 | CUDA_ENTRY_ENUM(cuEGLInit),
534 | /** cuEGLStreamConsumerAcquireFrame */
535 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerAcquireFrame),
536 | /** cuEGLStreamConsumerConnect */
537 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerConnect),
538 | /** cuEGLStreamConsumerConnectWithFlags */
539 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerConnectWithFlags),
540 | /** cuEGLStreamConsumerDisconnect */
541 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerDisconnect),
542 | /** cuEGLStreamConsumerReleaseFrame */
543 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerReleaseFrame),
544 | /** cuEGLStreamProducerConnect */
545 | CUDA_ENTRY_ENUM(cuEGLStreamProducerConnect),
546 | /** cuEGLStreamProducerDisconnect */
547 | CUDA_ENTRY_ENUM(cuEGLStreamProducerDisconnect),
548 | /** cuEGLStreamProducerPresentFrame */
549 | CUDA_ENTRY_ENUM(cuEGLStreamProducerPresentFrame),
550 | /** cuEGLStreamProducerReturnFrame */
551 | CUDA_ENTRY_ENUM(cuEGLStreamProducerReturnFrame),
552 | /** cuEventDestroy */
553 | CUDA_ENTRY_ENUM(cuEventDestroy),
554 | /** cuFuncSetAttribute */
555 | CUDA_ENTRY_ENUM(cuFuncSetAttribute),
556 | /** cuFuncSetBlockShape */
557 | CUDA_ENTRY_ENUM(cuFuncSetBlockShape),
558 | /** cuFuncSetSharedSize */
559 | CUDA_ENTRY_ENUM(cuFuncSetSharedSize),
560 | /** cuGLCtxCreate */
561 | CUDA_ENTRY_ENUM(cuGLCtxCreate),
562 | /** cuGLGetDevices_v2 */
563 | CUDA_ENTRY_ENUM(cuGLGetDevices_v2),
564 | /** cuGLMapBufferObject */
565 | CUDA_ENTRY_ENUM(cuGLMapBufferObject),
566 | /** cuGLMapBufferObjectAsync */
567 | CUDA_ENTRY_ENUM(cuGLMapBufferObjectAsync),
568 | /** cuGraphicsEGLRegisterImage */
569 | CUDA_ENTRY_ENUM(cuGraphicsEGLRegisterImage),
570 | /** cuGraphicsResourceGetMappedEglFrame */
571 | CUDA_ENTRY_ENUM(cuGraphicsResourceGetMappedEglFrame),
572 | /** cuGraphicsResourceGetMappedPointer */
573 | CUDA_ENTRY_ENUM(cuGraphicsResourceGetMappedPointer),
574 | /** cuGraphicsResourceSetMapFlags */
575 | CUDA_ENTRY_ENUM(cuGraphicsResourceSetMapFlags),
576 | /** cuLaunch */
577 | CUDA_ENTRY_ENUM(cuLaunch),
578 | /** cuLaunchCooperativeKernel */
579 | CUDA_ENTRY_ENUM(cuLaunchCooperativeKernel),
580 | /** cuLaunchCooperativeKernelMultiDevice */
581 | CUDA_ENTRY_ENUM(cuLaunchCooperativeKernelMultiDevice),
582 | /** cuLaunchCooperativeKernel_ptsz */
583 | CUDA_ENTRY_ENUM(cuLaunchCooperativeKernel_ptsz),
584 | /** cuLaunchGrid */
585 | CUDA_ENTRY_ENUM(cuLaunchGrid),
586 | /** cuLaunchGridAsync */
587 | CUDA_ENTRY_ENUM(cuLaunchGridAsync),
588 | /** cuLinkAddData_v2 */
589 | CUDA_ENTRY_ENUM(cuLinkAddData_v2),
590 | /** cuLinkAddFile_v2 */
591 | CUDA_ENTRY_ENUM(cuLinkAddFile_v2),
592 | /** cuLinkCreate_v2 */
593 | CUDA_ENTRY_ENUM(cuLinkCreate_v2),
594 | /** cuMemAlloc */
595 | CUDA_ENTRY_ENUM(cuMemAlloc),
596 | /** cuMemAllocHost */
597 | CUDA_ENTRY_ENUM(cuMemAllocHost),
598 | /** cuMemAllocHost_v2 */
599 | CUDA_ENTRY_ENUM(cuMemAllocHost_v2),
600 | /** cuMemAllocPitch */
601 | CUDA_ENTRY_ENUM(cuMemAllocPitch),
602 | /** cuMemcpy2D */
603 | CUDA_ENTRY_ENUM(cuMemcpy2D),
604 | /** cuMemcpy2DAsync */
605 | CUDA_ENTRY_ENUM(cuMemcpy2DAsync),
606 | /** cuMemcpy2DUnaligned */
607 | CUDA_ENTRY_ENUM(cuMemcpy2DUnaligned),
608 | /** cuMemcpy2D_v2 */
609 | CUDA_ENTRY_ENUM(cuMemcpy2D_v2),
610 | /** cuMemcpy2D_v2_ptds */
611 | CUDA_ENTRY_ENUM(cuMemcpy2D_v2_ptds),
612 | /** cuMemcpy3D */
613 | CUDA_ENTRY_ENUM(cuMemcpy3D),
614 | /** cuMemcpy3DAsync */
615 | CUDA_ENTRY_ENUM(cuMemcpy3DAsync),
616 | /** cuMemcpyAtoA */
617 | CUDA_ENTRY_ENUM(cuMemcpyAtoA),
618 | /** cuMemcpyAtoA_v2 */
619 | CUDA_ENTRY_ENUM(cuMemcpyAtoA_v2),
620 | /** cuMemcpyAtoA_v2_ptds */
621 | CUDA_ENTRY_ENUM(cuMemcpyAtoA_v2_ptds),
622 | /** cuMemcpyAtoD */
623 | CUDA_ENTRY_ENUM(cuMemcpyAtoD),
624 | /** cuMemcpyAtoD_v2 */
625 | CUDA_ENTRY_ENUM(cuMemcpyAtoD_v2),
626 | /** cuMemcpyAtoD_v2_ptds */
627 | CUDA_ENTRY_ENUM(cuMemcpyAtoD_v2_ptds),
628 | /** cuMemcpyAtoH */
629 | CUDA_ENTRY_ENUM(cuMemcpyAtoH),
630 | /** cuMemcpyAtoHAsync */
631 | CUDA_ENTRY_ENUM(cuMemcpyAtoHAsync),
632 | /** cuMemcpyAtoHAsync_v2 */
633 | CUDA_ENTRY_ENUM(cuMemcpyAtoHAsync_v2),
634 | /** cuMemcpyAtoHAsync_v2_ptsz */
635 | CUDA_ENTRY_ENUM(cuMemcpyAtoHAsync_v2_ptsz),
636 | /** cuMemcpyAtoH_v2 */
637 | CUDA_ENTRY_ENUM(cuMemcpyAtoH_v2),
638 | /** cuMemcpyAtoH_v2_ptds */
639 | CUDA_ENTRY_ENUM(cuMemcpyAtoH_v2_ptds),
640 | /** cuMemcpyDtoA */
641 | CUDA_ENTRY_ENUM(cuMemcpyDtoA),
642 | /** cuMemcpyDtoA_v2 */
643 | CUDA_ENTRY_ENUM(cuMemcpyDtoA_v2),
644 | /** cuMemcpyDtoA_v2_ptds */
645 | CUDA_ENTRY_ENUM(cuMemcpyDtoA_v2_ptds),
646 | /** cuMemcpyDtoD */
647 | CUDA_ENTRY_ENUM(cuMemcpyDtoD),
648 | /** cuMemcpyDtoDAsync */
649 | CUDA_ENTRY_ENUM(cuMemcpyDtoDAsync),
650 | /** cuMemcpyDtoH */
651 | CUDA_ENTRY_ENUM(cuMemcpyDtoH),
652 | /** cuMemcpyDtoHAsync */
653 | CUDA_ENTRY_ENUM(cuMemcpyDtoHAsync),
654 | /** cuMemcpyHtoA */
655 | CUDA_ENTRY_ENUM(cuMemcpyHtoA),
656 | /** cuMemcpyHtoAAsync */
657 | CUDA_ENTRY_ENUM(cuMemcpyHtoAAsync),
658 | /** cuMemcpyHtoAAsync_v2 */
659 | CUDA_ENTRY_ENUM(cuMemcpyHtoAAsync_v2),
660 | /** cuMemcpyHtoAAsync_v2_ptsz */
661 | CUDA_ENTRY_ENUM(cuMemcpyHtoAAsync_v2_ptsz),
662 | /** cuMemcpyHtoA_v2 */
663 | CUDA_ENTRY_ENUM(cuMemcpyHtoA_v2),
664 | /** cuMemcpyHtoA_v2_ptds */
665 | CUDA_ENTRY_ENUM(cuMemcpyHtoA_v2_ptds),
666 | /** cuMemcpyHtoD */
667 | CUDA_ENTRY_ENUM(cuMemcpyHtoD),
668 | /** cuMemcpyHtoDAsync */
669 | CUDA_ENTRY_ENUM(cuMemcpyHtoDAsync),
670 | /** cuMemFree */
671 | CUDA_ENTRY_ENUM(cuMemFree),
672 | /** cuMemGetAddressRange */
673 | CUDA_ENTRY_ENUM(cuMemGetAddressRange),
674 | // Deprecated
675 | // CUDA_ENTRY_ENUM(cuMemGetAttribute),
676 | // CUDA_ENTRY_ENUM(cuMemGetAttribute_v2),
677 | /** cuMemGetInfo */
678 | CUDA_ENTRY_ENUM(cuMemGetInfo),
679 | /** cuMemHostGetDevicePointer */
680 | CUDA_ENTRY_ENUM(cuMemHostGetDevicePointer),
681 | /** cuMemHostRegister */
682 | CUDA_ENTRY_ENUM(cuMemHostRegister),
683 | /** cuMemsetD16 */
684 | CUDA_ENTRY_ENUM(cuMemsetD16),
685 | /** cuMemsetD16Async */
686 | CUDA_ENTRY_ENUM(cuMemsetD16Async),
687 | /** cuMemsetD16Async_ptsz */
688 | CUDA_ENTRY_ENUM(cuMemsetD16Async_ptsz),
689 | /** cuMemsetD16_v2 */
690 | CUDA_ENTRY_ENUM(cuMemsetD16_v2),
691 | /** cuMemsetD16_v2_ptds */
692 | CUDA_ENTRY_ENUM(cuMemsetD16_v2_ptds),
693 | /** cuMemsetD2D16 */
694 | CUDA_ENTRY_ENUM(cuMemsetD2D16),
695 | /** cuMemsetD2D16Async */
696 | CUDA_ENTRY_ENUM(cuMemsetD2D16Async),
697 | /** cuMemsetD2D16Async_ptsz */
698 | CUDA_ENTRY_ENUM(cuMemsetD2D16Async_ptsz),
699 | /** cuMemsetD2D16_v2 */
700 | CUDA_ENTRY_ENUM(cuMemsetD2D16_v2),
701 | /** cuMemsetD2D16_v2_ptds */
702 | CUDA_ENTRY_ENUM(cuMemsetD2D16_v2_ptds),
703 | /** cuMemsetD2D32 */
704 | CUDA_ENTRY_ENUM(cuMemsetD2D32),
705 | /** cuMemsetD2D32Async */
706 | CUDA_ENTRY_ENUM(cuMemsetD2D32Async),
707 | /** cuMemsetD2D32Async_ptsz */
708 | CUDA_ENTRY_ENUM(cuMemsetD2D32Async_ptsz),
709 | /** cuMemsetD2D32_v2 */
710 | CUDA_ENTRY_ENUM(cuMemsetD2D32_v2),
711 | /** cuMemsetD2D32_v2_ptds */
712 | CUDA_ENTRY_ENUM(cuMemsetD2D32_v2_ptds),
713 | /** cuMemsetD2D8 */
714 | CUDA_ENTRY_ENUM(cuMemsetD2D8),
715 | /** cuMemsetD32 */
716 | CUDA_ENTRY_ENUM(cuMemsetD32),
717 | /** cuMemsetD32Async */
718 | CUDA_ENTRY_ENUM(cuMemsetD32Async),
719 | /** cuMemsetD32Async_ptsz */
720 | CUDA_ENTRY_ENUM(cuMemsetD32Async_ptsz),
721 | /** cuMemsetD32_v2 */
722 | CUDA_ENTRY_ENUM(cuMemsetD32_v2),
723 | /** cuMemsetD32_v2_ptds */
724 | CUDA_ENTRY_ENUM(cuMemsetD32_v2_ptds),
725 | /** cuMemsetD8 */
726 | CUDA_ENTRY_ENUM(cuMemsetD8),
727 | /** cuModuleGetGlobal */
728 | CUDA_ENTRY_ENUM(cuModuleGetGlobal),
729 | /** cuModuleLoadDataEx */
730 | CUDA_ENTRY_ENUM(cuModuleLoadDataEx),
731 | /** cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags */
732 | CUDA_ENTRY_ENUM(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags),
733 | /** cuOccupancyMaxPotentialBlockSize */
734 | CUDA_ENTRY_ENUM(cuOccupancyMaxPotentialBlockSize),
735 | /** cuOccupancyMaxPotentialBlockSizeWithFlags */
736 | CUDA_ENTRY_ENUM(cuOccupancyMaxPotentialBlockSizeWithFlags),
737 | /** cuParamSetf */
738 | CUDA_ENTRY_ENUM(cuParamSetf),
739 | /** cuParamSeti */
740 | CUDA_ENTRY_ENUM(cuParamSeti),
741 | /** cuParamSetSize */
742 | CUDA_ENTRY_ENUM(cuParamSetSize),
743 | /** cuParamSetTexRef */
744 | CUDA_ENTRY_ENUM(cuParamSetTexRef),
745 | /** cuParamSetv */
746 | CUDA_ENTRY_ENUM(cuParamSetv),
747 | /** cuPointerSetAttribute */
748 | CUDA_ENTRY_ENUM(cuPointerSetAttribute),
749 | /** cuStreamDestroy */
750 | CUDA_ENTRY_ENUM(cuStreamDestroy),
751 | /** cuStreamWaitValue64 */
752 | CUDA_ENTRY_ENUM(cuStreamWaitValue64),
753 | /** cuStreamWaitValue64_ptsz */
754 | CUDA_ENTRY_ENUM(cuStreamWaitValue64_ptsz),
755 | /** cuStreamWriteValue64 */
756 | CUDA_ENTRY_ENUM(cuStreamWriteValue64),
757 | /** cuStreamWriteValue64_ptsz */
758 | CUDA_ENTRY_ENUM(cuStreamWriteValue64_ptsz),
759 | /** cuSurfRefGetArray */
760 | CUDA_ENTRY_ENUM(cuSurfRefGetArray),
761 | /** cuTexRefGetAddress */
762 | CUDA_ENTRY_ENUM(cuTexRefGetAddress),
763 | /** cuTexRefGetAddressMode */
764 | CUDA_ENTRY_ENUM(cuTexRefGetAddressMode),
765 | /** cuTexRefGetAddress_v2 */
766 | CUDA_ENTRY_ENUM(cuTexRefGetAddress_v2),
767 | /** cuTexRefGetArray */
768 | CUDA_ENTRY_ENUM(cuTexRefGetArray),
769 | /** cuTexRefGetFilterMode */
770 | CUDA_ENTRY_ENUM(cuTexRefGetFilterMode),
771 | /** cuTexRefGetFlags */
772 | CUDA_ENTRY_ENUM(cuTexRefGetFlags),
773 | /** cuTexRefGetFormat */
774 | CUDA_ENTRY_ENUM(cuTexRefGetFormat),
775 | /** cuTexRefGetMaxAnisotropy */
776 | CUDA_ENTRY_ENUM(cuTexRefGetMaxAnisotropy),
777 | /** cuTexRefGetMipmapFilterMode */
778 | CUDA_ENTRY_ENUM(cuTexRefGetMipmapFilterMode),
779 | /** cuTexRefGetMipmapLevelBias */
780 | CUDA_ENTRY_ENUM(cuTexRefGetMipmapLevelBias),
781 | /** cuTexRefGetMipmapLevelClamp */
782 | CUDA_ENTRY_ENUM(cuTexRefGetMipmapLevelClamp),
783 | /** cuTexRefGetMipmappedArray */
784 | CUDA_ENTRY_ENUM(cuTexRefGetMipmappedArray),
785 | /** cuTexRefSetAddress */
786 | CUDA_ENTRY_ENUM(cuTexRefSetAddress),
787 | /** cuTexRefSetAddress2D */
788 | CUDA_ENTRY_ENUM(cuTexRefSetAddress2D),
789 | /** cuTexRefSetAddress2D_v2 */
790 | CUDA_ENTRY_ENUM(cuTexRefSetAddress2D_v2),
791 | /** cuVDPAUCtxCreate */
792 | CUDA_ENTRY_ENUM(cuVDPAUCtxCreate),
793 | /** cuEGLApiInit */
794 | CUDA_ENTRY_ENUM(cuEGLApiInit),
795 | /** cuDestroyExternalMemory */
796 | CUDA_ENTRY_ENUM(cuDestroyExternalMemory),
797 | /** cuDestroyExternalSemaphore */
798 | CUDA_ENTRY_ENUM(cuDestroyExternalSemaphore),
799 | /** cuDeviceGetUuid */
800 | CUDA_ENTRY_ENUM(cuDeviceGetUuid),
801 | /** cuExternalMemoryGetMappedBuffer */
802 | CUDA_ENTRY_ENUM(cuExternalMemoryGetMappedBuffer),
803 | /** cuExternalMemoryGetMappedMipmappedArray */
804 | CUDA_ENTRY_ENUM(cuExternalMemoryGetMappedMipmappedArray),
805 | /** cuGraphAddChildGraphNode */
806 | CUDA_ENTRY_ENUM(cuGraphAddChildGraphNode),
807 | /** cuGraphAddDependencies */
808 | CUDA_ENTRY_ENUM(cuGraphAddDependencies),
809 | /** cuGraphAddEmptyNode */
810 | CUDA_ENTRY_ENUM(cuGraphAddEmptyNode),
811 | /** cuGraphAddHostNode */
812 | CUDA_ENTRY_ENUM(cuGraphAddHostNode),
813 | /** cuGraphAddKernelNode */
814 | CUDA_ENTRY_ENUM(cuGraphAddKernelNode),
815 | /** cuGraphAddMemcpyNode */
816 | CUDA_ENTRY_ENUM(cuGraphAddMemcpyNode),
817 | /** cuGraphAddMemsetNode */
818 | CUDA_ENTRY_ENUM(cuGraphAddMemsetNode),
819 | /** cuGraphChildGraphNodeGetGraph */
820 | CUDA_ENTRY_ENUM(cuGraphChildGraphNodeGetGraph),
821 | /** cuGraphClone */
822 | CUDA_ENTRY_ENUM(cuGraphClone),
823 | /** cuGraphCreate */
824 | CUDA_ENTRY_ENUM(cuGraphCreate),
825 | /** cuGraphDestroy */
826 | CUDA_ENTRY_ENUM(cuGraphDestroy),
827 | /** cuGraphDestroyNode */
828 | CUDA_ENTRY_ENUM(cuGraphDestroyNode),
829 | /** cuGraphExecDestroy */
830 | CUDA_ENTRY_ENUM(cuGraphExecDestroy),
831 | /** cuGraphGetEdges */
832 | CUDA_ENTRY_ENUM(cuGraphGetEdges),
833 | /** cuGraphGetNodes */
834 | CUDA_ENTRY_ENUM(cuGraphGetNodes),
835 | /** cuGraphGetRootNodes */
836 | CUDA_ENTRY_ENUM(cuGraphGetRootNodes),
837 | /** cuGraphHostNodeGetParams */
838 | CUDA_ENTRY_ENUM(cuGraphHostNodeGetParams),
839 | /** cuGraphHostNodeSetParams */
840 | CUDA_ENTRY_ENUM(cuGraphHostNodeSetParams),
841 | /** cuGraphInstantiate */
842 | CUDA_ENTRY_ENUM(cuGraphInstantiate),
843 | /** cuGraphKernelNodeGetParams */
844 | CUDA_ENTRY_ENUM(cuGraphKernelNodeGetParams),
845 | /** cuGraphKernelNodeSetParams */
846 | CUDA_ENTRY_ENUM(cuGraphKernelNodeSetParams),
847 | /** cuGraphLaunch */
848 | CUDA_ENTRY_ENUM(cuGraphLaunch),
849 | /** cuGraphLaunch_ptsz */
850 | CUDA_ENTRY_ENUM(cuGraphLaunch_ptsz),
851 | /** cuGraphMemcpyNodeGetParams */
852 | CUDA_ENTRY_ENUM(cuGraphMemcpyNodeGetParams),
853 | /** cuGraphMemcpyNodeSetParams */
854 | CUDA_ENTRY_ENUM(cuGraphMemcpyNodeSetParams),
855 | /** cuGraphMemsetNodeGetParams */
856 | CUDA_ENTRY_ENUM(cuGraphMemsetNodeGetParams),
857 | /** cuGraphMemsetNodeSetParams */
858 | CUDA_ENTRY_ENUM(cuGraphMemsetNodeSetParams),
859 | /** cuGraphNodeFindInClone */
860 | CUDA_ENTRY_ENUM(cuGraphNodeFindInClone),
861 | /** cuGraphNodeGetDependencies */
862 | CUDA_ENTRY_ENUM(cuGraphNodeGetDependencies),
863 | /** cuGraphNodeGetDependentNodes */
864 | CUDA_ENTRY_ENUM(cuGraphNodeGetDependentNodes),
865 | /** cuGraphNodeGetType */
866 | CUDA_ENTRY_ENUM(cuGraphNodeGetType),
867 | /** cuGraphRemoveDependencies */
868 | CUDA_ENTRY_ENUM(cuGraphRemoveDependencies),
869 | /** cuImportExternalMemory */
870 | CUDA_ENTRY_ENUM(cuImportExternalMemory),
871 | /** cuImportExternalSemaphore */
872 | CUDA_ENTRY_ENUM(cuImportExternalSemaphore),
873 | /** cuLaunchHostFunc */
874 | CUDA_ENTRY_ENUM(cuLaunchHostFunc),
875 | /** cuLaunchHostFunc_ptsz */
876 | CUDA_ENTRY_ENUM(cuLaunchHostFunc_ptsz),
877 | /** cuSignalExternalSemaphoresAsync */
878 | CUDA_ENTRY_ENUM(cuSignalExternalSemaphoresAsync),
879 | /** cuSignalExternalSemaphoresAsync_ptsz */
880 | CUDA_ENTRY_ENUM(cuSignalExternalSemaphoresAsync_ptsz),
881 | /** cuStreamBeginCapture */
882 | CUDA_ENTRY_ENUM(cuStreamBeginCapture),
883 | /** cuStreamBeginCapture_ptsz */
884 | CUDA_ENTRY_ENUM(cuStreamBeginCapture_ptsz),
885 | /** cuStreamEndCapture */
886 | CUDA_ENTRY_ENUM(cuStreamEndCapture),
887 | /** cuStreamEndCapture_ptsz */
888 | CUDA_ENTRY_ENUM(cuStreamEndCapture_ptsz),
889 | /** cuStreamGetCtx */
890 | CUDA_ENTRY_ENUM(cuStreamGetCtx),
891 | /** cuStreamGetCtx_ptsz */
892 | CUDA_ENTRY_ENUM(cuStreamGetCtx_ptsz),
893 | /** cuStreamIsCapturing */
894 | CUDA_ENTRY_ENUM(cuStreamIsCapturing),
895 | /** cuStreamIsCapturing_ptsz */
896 | CUDA_ENTRY_ENUM(cuStreamIsCapturing_ptsz),
897 | /** cuWaitExternalSemaphoresAsync */
898 | CUDA_ENTRY_ENUM(cuWaitExternalSemaphoresAsync),
899 | /** cuWaitExternalSemaphoresAsync_ptsz */
900 | CUDA_ENTRY_ENUM(cuWaitExternalSemaphoresAsync_ptsz),
901 | /** cuGraphExecKernelNodeSetParams */
902 | CUDA_ENTRY_ENUM(cuGraphExecKernelNodeSetParams),
903 | /** cuStreamBeginCapture_v2 */
904 | CUDA_ENTRY_ENUM(cuStreamBeginCapture_v2),
905 | /** cuStreamBeginCapture_v2_ptsz */
906 | CUDA_ENTRY_ENUM(cuStreamBeginCapture_v2_ptsz),
907 | /** cuStreamGetCaptureInfo */
908 | CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo),
909 | /** cuStreamGetCaptureInfo_ptsz */
910 | CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo_ptsz),
911 | /** cuThreadExchangeStreamCaptureMode */
912 | CUDA_ENTRY_ENUM(cuThreadExchangeStreamCaptureMode),
913 | /** cuDeviceGetNvSciSyncAttributes */
914 | CUDA_ENTRY_ENUM(cuDeviceGetNvSciSyncAttributes),
915 | /** cuGraphExecHostNodeSetParams */
916 | CUDA_ENTRY_ENUM(cuGraphExecHostNodeSetParams),
917 | /** cuGraphExecMemcpyNodeSetParams */
918 | CUDA_ENTRY_ENUM(cuGraphExecMemcpyNodeSetParams),
919 | /** cuGraphExecMemsetNodeSetParams */
920 | CUDA_ENTRY_ENUM(cuGraphExecMemsetNodeSetParams),
921 | /** cuGraphExecUpdate */
922 | CUDA_ENTRY_ENUM(cuGraphExecUpdate),
923 | /** cuMemAddressFree */
924 | CUDA_ENTRY_ENUM(cuMemAddressFree),
925 | /** cuMemAddressReserve */
926 | CUDA_ENTRY_ENUM(cuMemAddressReserve),
927 | /** cuMemCreate */
928 | CUDA_ENTRY_ENUM(cuMemCreate),
929 | /** cuMemExportToShareableHandle */
930 | CUDA_ENTRY_ENUM(cuMemExportToShareableHandle),
931 | /** cuMemGetAccess */
932 | CUDA_ENTRY_ENUM(cuMemGetAccess),
933 | /** cuMemGetAllocationGranularity */
934 | CUDA_ENTRY_ENUM(cuMemGetAllocationGranularity),
935 | /** cuMemGetAllocationPropertiesFromHandle */
936 | CUDA_ENTRY_ENUM(cuMemGetAllocationPropertiesFromHandle),
937 | /** cuMemImportFromShareableHandle */
938 | CUDA_ENTRY_ENUM(cuMemImportFromShareableHandle),
939 | /** cuMemMap */
940 | CUDA_ENTRY_ENUM(cuMemMap),
941 | /** cuMemRelease */
942 | CUDA_ENTRY_ENUM(cuMemRelease),
943 | /** cuMemSetAccess */
944 | CUDA_ENTRY_ENUM(cuMemSetAccess),
945 | /** cuMemUnmap */
946 | CUDA_ENTRY_ENUM(cuMemUnmap),
947 | /** cuCtxResetPersistingL2Cache */
948 | CUDA_ENTRY_ENUM(cuCtxResetPersistingL2Cache),
949 | /** cuDevicePrimaryCtxRelease_v2 */
950 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxRelease_v2),
951 | /** cuDevicePrimaryCtxReset_v2 */
952 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxReset_v2),
953 | /** cuDevicePrimaryCtxSetFlags_v2 */
954 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxSetFlags_v2),
955 | /** cuFuncGetModule */
956 | CUDA_ENTRY_ENUM(cuFuncGetModule),
957 | /** cuGraphInstantiate_v2 */
958 | CUDA_ENTRY_ENUM(cuGraphInstantiate_v2),
959 | /** cuGraphKernelNodeCopyAttributes */
960 | CUDA_ENTRY_ENUM(cuGraphKernelNodeCopyAttributes),
961 | /** cuGraphKernelNodeGetAttribute */
962 | CUDA_ENTRY_ENUM(cuGraphKernelNodeGetAttribute),
963 | /** cuGraphKernelNodeSetAttribute */
964 | CUDA_ENTRY_ENUM(cuGraphKernelNodeSetAttribute),
965 | /** cuMemRetainAllocationHandle */
966 | CUDA_ENTRY_ENUM(cuMemRetainAllocationHandle),
967 | /** cuOccupancyAvailableDynamicSMemPerBlock */
968 | CUDA_ENTRY_ENUM(cuOccupancyAvailableDynamicSMemPerBlock),
969 | /** cuStreamCopyAttributes */
970 | CUDA_ENTRY_ENUM(cuStreamCopyAttributes),
971 | /** cuStreamCopyAttributes_ptsz */
972 | CUDA_ENTRY_ENUM(cuStreamCopyAttributes_ptsz),
973 | /** cuStreamGetAttribute */
974 | CUDA_ENTRY_ENUM(cuStreamGetAttribute),
975 | /** cuStreamGetAttribute_ptsz */
976 | CUDA_ENTRY_ENUM(cuStreamGetAttribute_ptsz),
977 | /** cuStreamSetAttribute */
978 | CUDA_ENTRY_ENUM(cuStreamSetAttribute),
979 | /** cuStreamSetAttribute_ptsz */
980 | CUDA_ENTRY_ENUM(cuStreamSetAttribute_ptsz),
981 | /** 11.2 */
982 | /** cuArrayGetPlane */
983 | CUDA_ENTRY_ENUM(cuArrayGetPlane),
984 | /** cuArrayGetSparseProperties */
985 | CUDA_ENTRY_ENUM(cuArrayGetSparseProperties),
986 | /** cuDeviceGetDefaultMemPool */
987 | CUDA_ENTRY_ENUM(cuDeviceGetDefaultMemPool),
988 | /** cuDeviceGetLuid */
989 | CUDA_ENTRY_ENUM(cuDeviceGetLuid),
990 | /** cuDeviceGetMemPool */
991 | CUDA_ENTRY_ENUM(cuDeviceGetMemPool),
992 | /** cuDeviceGetTexture1DLinearMaxWidth */
993 | CUDA_ENTRY_ENUM(cuDeviceGetTexture1DLinearMaxWidth),
994 | /** cuDeviceSetMemPool */
995 | CUDA_ENTRY_ENUM(cuDeviceSetMemPool),
996 | /** cuEventRecordWithFlags */
997 | CUDA_ENTRY_ENUM(cuEventRecordWithFlags),
998 | /** cuEventRecordWithFlags_ptsz */
999 | CUDA_ENTRY_ENUM(cuEventRecordWithFlags_ptsz),
1000 | /** cuGraphAddEventRecordNode */
1001 | CUDA_ENTRY_ENUM(cuGraphAddEventRecordNode),
1002 | /** cuGraphAddEventWaitNode */
1003 | CUDA_ENTRY_ENUM(cuGraphAddEventWaitNode),
1004 | /** cuGraphAddExternalSemaphoresSignalNode */
1005 | CUDA_ENTRY_ENUM(cuGraphAddExternalSemaphoresSignalNode),
1006 | /** cuGraphAddExternalSemaphoresWaitNode */
1007 | CUDA_ENTRY_ENUM(cuGraphAddExternalSemaphoresWaitNode),
1008 | /** cuGraphEventRecordNodeGetEvent */
1009 | CUDA_ENTRY_ENUM(cuGraphEventRecordNodeGetEvent),
1010 | /** cuGraphEventRecordNodeSetEvent */
1011 | CUDA_ENTRY_ENUM(cuGraphEventRecordNodeSetEvent),
1012 | /** cuGraphEventWaitNodeGetEvent */
1013 | CUDA_ENTRY_ENUM(cuGraphEventWaitNodeGetEvent),
1014 | /** cuGraphEventWaitNodeSetEvent */
1015 | CUDA_ENTRY_ENUM(cuGraphEventWaitNodeSetEvent),
1016 | /** cuGraphExecChildGraphNodeSetParams */
1017 | CUDA_ENTRY_ENUM(cuGraphExecChildGraphNodeSetParams),
1018 | /** cuGraphExecEventRecordNodeSetEvent */
1019 | CUDA_ENTRY_ENUM(cuGraphExecEventRecordNodeSetEvent),
1020 | /** cuGraphExecEventWaitNodeSetEvent */
1021 | CUDA_ENTRY_ENUM(cuGraphExecEventWaitNodeSetEvent),
1022 | /** cuGraphExecExternalSemaphoresSignalNodeSetParams */
1023 | CUDA_ENTRY_ENUM(cuGraphExecExternalSemaphoresSignalNodeSetParams),
1024 | /** cuGraphExecExternalSemaphoresWaitNodeSetParams */
1025 | CUDA_ENTRY_ENUM(cuGraphExecExternalSemaphoresWaitNodeSetParams),
1026 | /** cuGraphExternalSemaphoresSignalNodeGetParams */
1027 | CUDA_ENTRY_ENUM(cuGraphExternalSemaphoresSignalNodeGetParams),
1028 | /** cuGraphExternalSemaphoresSignalNodeSetParams */
1029 | CUDA_ENTRY_ENUM(cuGraphExternalSemaphoresSignalNodeSetParams),
1030 | /** cuGraphExternalSemaphoresWaitNodeGetParams */
1031 | CUDA_ENTRY_ENUM(cuGraphExternalSemaphoresWaitNodeGetParams),
1032 | /** cuGraphExternalSemaphoresWaitNodeSetParams */
1033 | CUDA_ENTRY_ENUM(cuGraphExternalSemaphoresWaitNodeSetParams),
1034 | /** cuGraphUpload */
1035 | CUDA_ENTRY_ENUM(cuGraphUpload),
1036 | /** cuGraphUpload_ptsz */
1037 | CUDA_ENTRY_ENUM(cuGraphUpload_ptsz),
1038 | /** cuIpcOpenMemHandle_v2 */
1039 | CUDA_ENTRY_ENUM(cuIpcOpenMemHandle_v2),
1040 | /** memory pool should be concerned ? */
1041 | /** cuMemAllocAsync */
1042 | CUDA_ENTRY_ENUM(cuMemAllocAsync),
1043 | /** cuMemAllocAsync_ptsz */
1044 | CUDA_ENTRY_ENUM(cuMemAllocAsync_ptsz),
1045 | /** cuMemAllocFromPoolAsync */
1046 | CUDA_ENTRY_ENUM(cuMemAllocFromPoolAsync),
1047 | /** cuMemAllocFromPoolAsync_ptsz */
1048 | CUDA_ENTRY_ENUM(cuMemAllocFromPoolAsync_ptsz),
1049 | /** cuMemFreeAsync */
1050 | CUDA_ENTRY_ENUM(cuMemFreeAsync),
1051 | /** cuMemFreeAsync_ptsz */
1052 | CUDA_ENTRY_ENUM(cuMemFreeAsync_ptsz),
1053 | /** cuMemMapArrayAsync */
1054 | CUDA_ENTRY_ENUM(cuMemMapArrayAsync),
1055 | /** cuMemMapArrayAsync_ptsz */
1056 | CUDA_ENTRY_ENUM(cuMemMapArrayAsync_ptsz),
1057 | /** cuMemPoolCreate */
1058 | CUDA_ENTRY_ENUM(cuMemPoolCreate),
1059 | /** cuMemPoolDestroy */
1060 | CUDA_ENTRY_ENUM(cuMemPoolDestroy),
1061 | /** cuMemPoolExportPointer */
1062 | CUDA_ENTRY_ENUM(cuMemPoolExportPointer),
1063 | /** cuMemPoolExportToShareableHandle */
1064 | CUDA_ENTRY_ENUM(cuMemPoolExportToShareableHandle),
1065 | /** cuMemPoolGetAccess */
1066 | CUDA_ENTRY_ENUM(cuMemPoolGetAccess),
1067 | /** cuMemPoolGetAttribute */
1068 | CUDA_ENTRY_ENUM(cuMemPoolGetAttribute),
1069 | /** cuMemPoolImportFromShareableHandle */
1070 | CUDA_ENTRY_ENUM(cuMemPoolImportFromShareableHandle),
1071 | /** cuMemPoolImportPointer */
1072 | CUDA_ENTRY_ENUM(cuMemPoolImportPointer),
1073 | /** cuMemPoolSetAccess */
1074 | CUDA_ENTRY_ENUM(cuMemPoolSetAccess),
1075 | /** cuMemPoolSetAttribute */
1076 | CUDA_ENTRY_ENUM(cuMemPoolSetAttribute),
1077 | /** cuMemPoolTrimTo */
1078 | CUDA_ENTRY_ENUM(cuMemPoolTrimTo),
1079 | /** cuMipmappedArrayGetSparseProperties */
1080 | CUDA_ENTRY_ENUM(cuMipmappedArrayGetSparseProperties),
1081 | CUDA_ENTRY_ENUM(cuCtxCreate_v3),
1082 | CUDA_ENTRY_ENUM(cuCtxGetExecAffinity),
1083 | CUDA_ENTRY_ENUM(cuDeviceGetExecAffinitySupport),
1084 | CUDA_ENTRY_ENUM(cuDeviceGetGraphMemAttribute),
1085 | CUDA_ENTRY_ENUM(cuDeviceGetUuid_v2),
1086 | CUDA_ENTRY_ENUM(cuDeviceGraphMemTrim),
1087 | CUDA_ENTRY_ENUM(cuDeviceSetGraphMemAttribute),
1088 | CUDA_ENTRY_ENUM(cuFlushGPUDirectRDMAWrites),
1089 | CUDA_ENTRY_ENUM(cuGetProcAddress),
1090 | CUDA_ENTRY_ENUM(cuGraphAddMemAllocNode),
1091 | CUDA_ENTRY_ENUM(cuGraphAddMemFreeNode),
1092 | CUDA_ENTRY_ENUM(cuGraphDebugDotPrint),
1093 | CUDA_ENTRY_ENUM(cuGraphInstantiateWithFlags),
1094 | CUDA_ENTRY_ENUM(cuGraphMemAllocNodeGetParams),
1095 | CUDA_ENTRY_ENUM(cuGraphMemFreeNodeGetParams),
1096 | CUDA_ENTRY_ENUM(cuGraphReleaseUserObject),
1097 | CUDA_ENTRY_ENUM(cuGraphRetainUserObject),
1098 | CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo_v2),
1099 | CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo_v2_ptsz),
1100 | CUDA_ENTRY_ENUM(cuStreamUpdateCaptureDependencies),
1101 | CUDA_ENTRY_ENUM(cuStreamUpdateCaptureDependencies_ptsz),
1102 | CUDA_ENTRY_ENUM(cuUserObjectCreate),
1103 | CUDA_ENTRY_ENUM(cuUserObjectRelease),
1104 | CUDA_ENTRY_ENUM(cuUserObjectRetain),
1105 | CUDA_ENTRY_END
1106 | } cuda_entry_enum_t;
1107 |
1108 | /**
1109 | * CUDA library function pointer
1110 | */
1111 | typedef CUresult (*cuda_sym_t)();
1112 |
1113 | /**
1114 | * CUDA library debug function pointer
1115 | */
1116 | typedef void (*cuda_debug_void_sym_t)();
1117 |
1118 | /**
1119 | * CUDA library debug result function pointer
1120 | */
1121 | typedef CUDBGResult (*cuda_debug_result_sym_t)();
1122 |
1123 | #ifdef __cplusplus
1124 | }
1125 | #endif
1126 |
1127 | #endif // HIJACK_CUDA_HELPER_H
1128 |
--------------------------------------------------------------------------------
/include/hijack.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Tencent is pleased to support the open source community by making TKEStack
3 | * available.
4 | *
5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved.
6 | *
7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
8 | * use this file except in compliance with the License. You may obtain a copy of
9 | * the License at
10 | *
11 | * https://opensource.org/licenses/Apache-2.0
12 | *
13 | * Unless required by applicable law or agreed to in writing, software
14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations under the License.
17 | */
18 |
19 | #ifndef HIJACK_LIBRARY_H
20 | #define HIJACK_LIBRARY_H
21 |
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 |
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 |
33 | #include "nvml-subset.h"
34 |
35 | /**
36 | * Controller configuration base path
37 | */
38 | #define VCUDA_CONFIG_PATH "/etc/vcuda/"
39 |
40 | /**
41 | * Controller pid information file name
42 | */
43 | #define PIDS_CONFIG_NAME "pids.config"
44 |
45 | /**
46 | * Controller configuration file name
47 | */
48 | #define CONTROLLER_CONFIG_NAME "vcuda.config"
49 | #define PIDS_CONFIG_PATH (VCUDA_CONFIG_PATH "/" PIDS_CONFIG_NAME)
50 | #define CONTROLLER_CONFIG_PATH (VCUDA_CONFIG_PATH "/" CONTROLLER_CONFIG_NAME)
51 |
52 | /**
53 | * RPC binary base path
54 | */
55 | #define RPC_CLIENT_PATH "/usr/local/nvidia/bin/"
56 |
57 | /**
58 | * RPC binary file name
59 | */
60 | #define RPC_CLIENT_NAME "gpu-client"
61 |
62 | /**
63 | * RPC address
64 | */
65 | #define RPC_ADDR (VCUDA_CONFIG_PATH "vcuda.sock")
66 |
67 | /**
68 | * Default prefix for cgroup path
69 | */
70 | #define EMPTY_PREFIX "0xdead"
71 |
72 | /**
73 | * Proc file path for driver version
74 | */
75 | #define DRIVER_VERSION_PROC_PATH "/proc/driver/nvidia/version"
76 |
77 | /**
78 | * Driver regular expression pattern
79 | */
80 | #define DRIVER_VERSION_MATCH_PATTERN "([0-9]+)(\\.[0-9]+)+"
81 |
82 | /**
83 | * Max sample pid size
84 | */
85 | #define MAX_PIDS (1024)
86 |
87 | #define likely(x) __builtin_expect(!!(x), 1)
88 | #define unlikely(x) __builtin_expect(!!(x), 0)
89 |
90 | #define ROUND_UP(n, base) ((n) % (base) ? (n) + (base) - (n) % (base) : (n))
91 |
92 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2 * !!(condition)]))
93 |
94 | #define CAS(ptr, old, new) __sync_bool_compare_and_swap((ptr), (old), (new))
95 | #define UNUSED __attribute__((unused))
96 |
97 | #define MILLISEC (1000UL * 1000UL)
98 |
99 | #define TIME_TICK (10)
100 | #define FACTOR (32)
101 | #define MAX_UTILIZATION (100)
102 | #define CHANGE_LIMIT_INTERVAL (30)
103 | #define USAGE_THRESHOLD (5)
104 |
105 | #define GET_VALID_VALUE(x) (((x) >= 0 && (x) <= 100) ? (x) : 0)
106 | #define CODEC_NORMALIZE(x) (x * 85 / 100)
107 |
108 | typedef struct {
109 | void *fn_ptr;
110 | char *name;
111 | } entry_t;
112 |
113 | typedef struct {
114 | int major;
115 | int minor;
116 | } __attribute__((packed, aligned(8))) version_t;
117 |
118 | /**
119 | * Controller configuration data format
120 | */
121 | typedef struct {
122 | char pod_uid[48];
123 | int limit;
124 | char occupied[4044];
125 | char container_name[FILENAME_MAX];
126 | char bus_id[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE];
127 | uint64_t gpu_memory;
128 | int utilization;
129 | int hard_limit;
130 | version_t driver_version;
131 | int enable;
132 | } __attribute__((packed, aligned(8))) resource_data_t;
133 |
134 | typedef enum {
135 | INFO = 0,
136 | ERROR = 1,
137 | WARNING = 2,
138 | FATAL = 3,
139 | VERBOSE = 4,
140 | } log_level_enum_t;
141 |
142 | #define LOGGER(level, format, ...) \
143 | ({ \
144 | char *_print_level_str = getenv("LOGGER_LEVEL"); \
145 | int _print_level = 3; \
146 | if (_print_level_str) { \
147 | _print_level = (int)strtoul(_print_level_str, NULL, 10); \
148 | _print_level = _print_level < 0 ? 3 : _print_level; \
149 | } \
150 | if (level <= _print_level) { \
151 | fprintf(stderr, "%s:%d " format "\n", __FILE__, __LINE__, \
152 | ##__VA_ARGS__); \
153 | } \
154 | if (level == FATAL) { \
155 | exit(-1); \
156 | } \
157 | })
158 |
159 | /**
160 | * Read controller configuration from \aCONTROLLER_CONFIG_PATH
161 | *
162 | * @return 0 -> success
163 | */
164 | int read_controller_configuration();
165 |
166 | /**
167 | * Load library and initialize some data
168 | */
169 | void load_necessary_data();
170 |
171 | /**
172 | * Register data to remote controller to retrieve configuration
173 | *
174 | * @param bus_id bus is of GPU card
175 | * @param pod_uid pod uid of Pod
176 | * @param container_name container name of Pod
177 | */
178 | void register_to_remote_with_data(const char *bus_id, const char *pod_uid,
179 | const char *container_name);
180 |
181 | /**
182 | * Tell whether we're using old method to find controller configuration path
183 | *
184 | * @return 1 -> using new, 0 -> using old
185 | */
186 | int is_custom_config_path();
187 |
188 | #ifdef __cplusplus
189 | }
190 | #endif
191 |
192 | #endif
193 |
--------------------------------------------------------------------------------
/include/nvml-helper.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Tencent is pleased to support the open source community by making TKEStack
3 | * available.
4 | *
5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved.
6 | *
7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
8 | * use this file except in compliance with the License. You may obtain a copy of
9 | * the License at
10 | *
11 | * https://opensource.org/licenses/Apache-2.0
12 | *
13 | * Unless required by applicable law or agreed to in writing, software
14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations under the License.
17 | */
18 |
19 | #ifndef HIJACK_NVML_HELPER_H
20 | #define HIJACK_NVML_HELPER_H
21 |
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 |
26 | #include
27 |
28 | #include "hijack.h"
29 | #include "nvml-subset.h"
30 |
31 | /**
32 | * NVML management library prefix
33 | */
34 | #define DRIVER_ML_LIBRARY_PREFIX "libnvidia-ml.so"
35 |
36 | #define NVML_ENTRY_ENUM(x) ENTRY_##x
37 |
38 | #define NVML_FIND_ENTRY(table, sym) ({ (table)[NVML_ENTRY_ENUM(sym)].fn_ptr; })
39 |
40 | #define NVML_ENTRY_CALL(table, sym, ...) \
41 | ({ \
42 | LOGGER(5, "Hijacking %s\n", #sym); \
43 | driver_sym_t _entry = NVML_FIND_ENTRY(table, sym); \
44 | _entry(__VA_ARGS__); \
45 | })
46 |
47 | typedef nvmlReturn_t (*driver_sym_t)();
48 |
49 | /**
50 | * NVML management library enumerator entry
51 | */
52 | typedef enum {
53 | /** nvmlInit */
54 | NVML_ENTRY_ENUM(nvmlInit),
55 | /** nvmlShutdown */
56 | NVML_ENTRY_ENUM(nvmlShutdown),
57 | /** nvmlErrorString */
58 | NVML_ENTRY_ENUM(nvmlErrorString),
59 | /** nvmlDeviceGetHandleByIndex */
60 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByIndex),
61 | /** nvmlDeviceGetComputeRunningProcesses */
62 | NVML_ENTRY_ENUM(nvmlDeviceGetComputeRunningProcesses),
63 | /** nvmlDeviceGetPciInfo */
64 | NVML_ENTRY_ENUM(nvmlDeviceGetPciInfo),
65 | /** nvmlDeviceGetProcessUtilization */
66 | NVML_ENTRY_ENUM(nvmlDeviceGetProcessUtilization),
67 | /** nvmlDeviceGetCount */
68 | NVML_ENTRY_ENUM(nvmlDeviceGetCount),
69 | /** nvmlDeviceClearAccountingPids */
70 | NVML_ENTRY_ENUM(nvmlDeviceClearAccountingPids),
71 | /** nvmlDeviceClearCpuAffinity */
72 | NVML_ENTRY_ENUM(nvmlDeviceClearCpuAffinity),
73 | /** nvmlDeviceClearEccErrorCounts */
74 | NVML_ENTRY_ENUM(nvmlDeviceClearEccErrorCounts),
75 | /** nvmlDeviceDiscoverGpus */
76 | NVML_ENTRY_ENUM(nvmlDeviceDiscoverGpus),
77 | /** nvmlDeviceFreezeNvLinkUtilizationCounter */
78 | NVML_ENTRY_ENUM(nvmlDeviceFreezeNvLinkUtilizationCounter),
79 | /** nvmlDeviceGetAccountingBufferSize */
80 | NVML_ENTRY_ENUM(nvmlDeviceGetAccountingBufferSize),
81 | /** nvmlDeviceGetAccountingMode */
82 | NVML_ENTRY_ENUM(nvmlDeviceGetAccountingMode),
83 | /** nvmlDeviceGetAccountingPids */
84 | NVML_ENTRY_ENUM(nvmlDeviceGetAccountingPids),
85 | /** nvmlDeviceGetAccountingStats */
86 | NVML_ENTRY_ENUM(nvmlDeviceGetAccountingStats),
87 | /** nvmlDeviceGetActiveVgpus */
88 | NVML_ENTRY_ENUM(nvmlDeviceGetActiveVgpus),
89 | /** nvmlDeviceGetAPIRestriction */
90 | NVML_ENTRY_ENUM(nvmlDeviceGetAPIRestriction),
91 | /** nvmlDeviceGetApplicationsClock */
92 | NVML_ENTRY_ENUM(nvmlDeviceGetApplicationsClock),
93 | /** nvmlDeviceGetAutoBoostedClocksEnabled */
94 | NVML_ENTRY_ENUM(nvmlDeviceGetAutoBoostedClocksEnabled),
95 | /** nvmlDeviceGetBAR1MemoryInfo */
96 | NVML_ENTRY_ENUM(nvmlDeviceGetBAR1MemoryInfo),
97 | /** nvmlDeviceGetBoardId */
98 | NVML_ENTRY_ENUM(nvmlDeviceGetBoardId),
99 | /** nvmlDeviceGetBoardPartNumber */
100 | NVML_ENTRY_ENUM(nvmlDeviceGetBoardPartNumber),
101 | /** nvmlDeviceGetBrand */
102 | NVML_ENTRY_ENUM(nvmlDeviceGetBrand),
103 | /** nvmlDeviceGetBridgeChipInfo */
104 | NVML_ENTRY_ENUM(nvmlDeviceGetBridgeChipInfo),
105 | /** nvmlDeviceGetClock */
106 | NVML_ENTRY_ENUM(nvmlDeviceGetClock),
107 | /** nvmlDeviceGetClockInfo */
108 | NVML_ENTRY_ENUM(nvmlDeviceGetClockInfo),
109 | /** nvmlDeviceGetComputeMode */
110 | NVML_ENTRY_ENUM(nvmlDeviceGetComputeMode),
111 | /** nvmlDeviceGetCount_v2 */
112 | NVML_ENTRY_ENUM(nvmlDeviceGetCount_v2),
113 | /** nvmlDeviceGetCpuAffinity */
114 | NVML_ENTRY_ENUM(nvmlDeviceGetCpuAffinity),
115 | /** nvmlDeviceGetCreatableVgpus */
116 | NVML_ENTRY_ENUM(nvmlDeviceGetCreatableVgpus),
117 | /** nvmlDeviceGetCudaComputeCapability */
118 | NVML_ENTRY_ENUM(nvmlDeviceGetCudaComputeCapability),
119 | /** nvmlDeviceGetCurrentClocksThrottleReasons */
120 | NVML_ENTRY_ENUM(nvmlDeviceGetCurrentClocksThrottleReasons),
121 | /** nvmlDeviceGetCurrPcieLinkGeneration */
122 | NVML_ENTRY_ENUM(nvmlDeviceGetCurrPcieLinkGeneration),
123 | /** nvmlDeviceGetCurrPcieLinkWidth */
124 | NVML_ENTRY_ENUM(nvmlDeviceGetCurrPcieLinkWidth),
125 | /** nvmlDeviceGetDecoderUtilization */
126 | NVML_ENTRY_ENUM(nvmlDeviceGetDecoderUtilization),
127 | /** nvmlDeviceGetDefaultApplicationsClock */
128 | NVML_ENTRY_ENUM(nvmlDeviceGetDefaultApplicationsClock),
129 | /** nvmlDeviceGetDetailedEccErrors */
130 | NVML_ENTRY_ENUM(nvmlDeviceGetDetailedEccErrors),
131 | /** nvmlDeviceGetDisplayActive */
132 | NVML_ENTRY_ENUM(nvmlDeviceGetDisplayActive),
133 | /** nvmlDeviceGetDisplayMode */
134 | NVML_ENTRY_ENUM(nvmlDeviceGetDisplayMode),
135 | /** nvmlDeviceGetDriverModel */
136 | NVML_ENTRY_ENUM(nvmlDeviceGetDriverModel),
137 | /** nvmlDeviceGetEccMode */
138 | NVML_ENTRY_ENUM(nvmlDeviceGetEccMode),
139 | /** nvmlDeviceGetEncoderCapacity */
140 | NVML_ENTRY_ENUM(nvmlDeviceGetEncoderCapacity),
141 | /** nvmlDeviceGetEncoderSessions */
142 | NVML_ENTRY_ENUM(nvmlDeviceGetEncoderSessions),
143 | /** nvmlDeviceGetEncoderStats */
144 | NVML_ENTRY_ENUM(nvmlDeviceGetEncoderStats),
145 | /** nvmlDeviceGetEncoderUtilization */
146 | NVML_ENTRY_ENUM(nvmlDeviceGetEncoderUtilization),
147 | /** nvmlDeviceGetEnforcedPowerLimit */
148 | NVML_ENTRY_ENUM(nvmlDeviceGetEnforcedPowerLimit),
149 | /** nvmlDeviceGetFanSpeed */
150 | NVML_ENTRY_ENUM(nvmlDeviceGetFanSpeed),
151 | /** nvmlDeviceGetFanSpeed_v2 */
152 | NVML_ENTRY_ENUM(nvmlDeviceGetFanSpeed_v2),
153 | /** nvmlDeviceGetFieldValues */
154 | NVML_ENTRY_ENUM(nvmlDeviceGetFieldValues),
155 | /** nvmlDeviceGetGpuOperationMode */
156 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuOperationMode),
157 | /** nvmlDeviceGetGraphicsRunningProcesses */
158 | NVML_ENTRY_ENUM(nvmlDeviceGetGraphicsRunningProcesses),
159 | /** nvmlDeviceGetGridLicensableFeatures */
160 | NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures),
161 | /** nvmlDeviceGetHandleByIndex_v2 */
162 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByIndex_v2),
163 | /** nvmlDeviceGetHandleByPciBusId */
164 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByPciBusId),
165 | /** nvmlDeviceGetHandleByPciBusId_v2 */
166 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByPciBusId_v2),
167 | /** nvmlDeviceGetHandleBySerial */
168 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleBySerial),
169 | /** nvmlDeviceGetHandleByUUID */
170 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByUUID),
171 | /** nvmlDeviceGetIndex */
172 | NVML_ENTRY_ENUM(nvmlDeviceGetIndex),
173 | /** nvmlDeviceGetInforomConfigurationChecksum */
174 | NVML_ENTRY_ENUM(nvmlDeviceGetInforomConfigurationChecksum),
175 | /** nvmlDeviceGetInforomImageVersion */
176 | NVML_ENTRY_ENUM(nvmlDeviceGetInforomImageVersion),
177 | /** nvmlDeviceGetInforomVersion */
178 | NVML_ENTRY_ENUM(nvmlDeviceGetInforomVersion),
179 | /** nvmlDeviceGetMaxClockInfo */
180 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxClockInfo),
181 | /** nvmlDeviceGetMaxCustomerBoostClock */
182 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxCustomerBoostClock),
183 | /** nvmlDeviceGetMaxPcieLinkGeneration */
184 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxPcieLinkGeneration),
185 | /** nvmlDeviceGetMaxPcieLinkWidth */
186 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxPcieLinkWidth),
187 | /** nvmlDeviceGetMemoryErrorCounter */
188 | NVML_ENTRY_ENUM(nvmlDeviceGetMemoryErrorCounter),
189 | /** nvmlDeviceGetMemoryInfo */
190 | NVML_ENTRY_ENUM(nvmlDeviceGetMemoryInfo),
191 | /** nvmlDeviceGetMinorNumber */
192 | NVML_ENTRY_ENUM(nvmlDeviceGetMinorNumber),
193 | /** nvmlDeviceGetMPSComputeRunningProcesses */
194 | NVML_ENTRY_ENUM(nvmlDeviceGetMPSComputeRunningProcesses),
195 | /** nvmlDeviceGetMultiGpuBoard */
196 | NVML_ENTRY_ENUM(nvmlDeviceGetMultiGpuBoard),
197 | /** nvmlDeviceGetName */
198 | NVML_ENTRY_ENUM(nvmlDeviceGetName),
199 | /** nvmlDeviceGetNvLinkCapability */
200 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkCapability),
201 | /** nvmlDeviceGetNvLinkErrorCounter */
202 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkErrorCounter),
203 | /** nvmlDeviceGetNvLinkRemotePciInfo */
204 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkRemotePciInfo),
205 | /** nvmlDeviceGetNvLinkRemotePciInfo_v2 */
206 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkRemotePciInfo_v2),
207 | /** nvmlDeviceGetNvLinkState */
208 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkState),
209 | /** nvmlDeviceGetNvLinkUtilizationControl */
210 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkUtilizationControl),
211 | /** nvmlDeviceGetNvLinkUtilizationCounter */
212 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkUtilizationCounter),
213 | /** nvmlDeviceGetNvLinkVersion */
214 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkVersion),
215 | /** nvmlDeviceGetP2PStatus */
216 | NVML_ENTRY_ENUM(nvmlDeviceGetP2PStatus),
217 | /** nvmlDeviceGetPcieReplayCounter */
218 | NVML_ENTRY_ENUM(nvmlDeviceGetPcieReplayCounter),
219 | /** nvmlDeviceGetPcieThroughput */
220 | NVML_ENTRY_ENUM(nvmlDeviceGetPcieThroughput),
221 | /** nvmlDeviceGetPciInfo_v2 */
222 | NVML_ENTRY_ENUM(nvmlDeviceGetPciInfo_v2),
223 | /** nvmlDeviceGetPciInfo_v3 */
224 | NVML_ENTRY_ENUM(nvmlDeviceGetPciInfo_v3),
225 | /** nvmlDeviceGetPerformanceState */
226 | NVML_ENTRY_ENUM(nvmlDeviceGetPerformanceState),
227 | /** nvmlDeviceGetPersistenceMode */
228 | NVML_ENTRY_ENUM(nvmlDeviceGetPersistenceMode),
229 | /** nvmlDeviceGetPowerManagementDefaultLimit */
230 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerManagementDefaultLimit),
231 | /** nvmlDeviceGetPowerManagementLimit */
232 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerManagementLimit),
233 | /** nvmlDeviceGetPowerManagementLimitConstraints */
234 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerManagementLimitConstraints),
235 | /** nvmlDeviceGetPowerManagementMode */
236 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerManagementMode),
237 | /** nvmlDeviceGetPowerState */
238 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerState),
239 | /** nvmlDeviceGetPowerUsage */
240 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerUsage),
241 | /** nvmlDeviceGetRetiredPages */
242 | NVML_ENTRY_ENUM(nvmlDeviceGetRetiredPages),
243 | /** nvmlDeviceGetRetiredPagesPendingStatus */
244 | NVML_ENTRY_ENUM(nvmlDeviceGetRetiredPagesPendingStatus),
245 | /** nvmlDeviceGetSamples */
246 | NVML_ENTRY_ENUM(nvmlDeviceGetSamples),
247 | /** nvmlDeviceGetSerial */
248 | NVML_ENTRY_ENUM(nvmlDeviceGetSerial),
249 | /** nvmlDeviceGetSupportedClocksThrottleReasons */
250 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedClocksThrottleReasons),
251 | /** nvmlDeviceGetSupportedEventTypes */
252 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedEventTypes),
253 | /** nvmlDeviceGetSupportedGraphicsClocks */
254 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedGraphicsClocks),
255 | /** nvmlDeviceGetSupportedMemoryClocks */
256 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedMemoryClocks),
257 | /** nvmlDeviceGetSupportedVgpus */
258 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedVgpus),
259 | /** nvmlDeviceGetTemperature */
260 | NVML_ENTRY_ENUM(nvmlDeviceGetTemperature),
261 | /** nvmlDeviceGetTemperatureThreshold */
262 | NVML_ENTRY_ENUM(nvmlDeviceGetTemperatureThreshold),
263 | /** nvmlDeviceGetTopologyCommonAncestor */
264 | NVML_ENTRY_ENUM(nvmlDeviceGetTopologyCommonAncestor),
265 | /** nvmlDeviceGetTopologyNearestGpus */
266 | NVML_ENTRY_ENUM(nvmlDeviceGetTopologyNearestGpus),
267 | /** nvmlDeviceGetTotalEccErrors */
268 | NVML_ENTRY_ENUM(nvmlDeviceGetTotalEccErrors),
269 | /** nvmlDeviceGetTotalEnergyConsumption */
270 | NVML_ENTRY_ENUM(nvmlDeviceGetTotalEnergyConsumption),
271 | /** nvmlDeviceGetUtilizationRates */
272 | NVML_ENTRY_ENUM(nvmlDeviceGetUtilizationRates),
273 | /** nvmlDeviceGetUUID */
274 | NVML_ENTRY_ENUM(nvmlDeviceGetUUID),
275 | /** nvmlDeviceGetVbiosVersion */
276 | NVML_ENTRY_ENUM(nvmlDeviceGetVbiosVersion),
277 | /** nvmlDeviceGetVgpuMetadata */
278 | NVML_ENTRY_ENUM(nvmlDeviceGetVgpuMetadata),
279 | /** nvmlDeviceGetVgpuProcessUtilization */
280 | NVML_ENTRY_ENUM(nvmlDeviceGetVgpuProcessUtilization),
281 | /** nvmlDeviceGetVgpuUtilization */
282 | NVML_ENTRY_ENUM(nvmlDeviceGetVgpuUtilization),
283 | /** nvmlDeviceGetViolationStatus */
284 | NVML_ENTRY_ENUM(nvmlDeviceGetViolationStatus),
285 | /** nvmlDeviceGetVirtualizationMode */
286 | NVML_ENTRY_ENUM(nvmlDeviceGetVirtualizationMode),
287 | /** nvmlDeviceModifyDrainState */
288 | NVML_ENTRY_ENUM(nvmlDeviceModifyDrainState),
289 | /** nvmlDeviceOnSameBoard */
290 | NVML_ENTRY_ENUM(nvmlDeviceOnSameBoard),
291 | /** nvmlDeviceQueryDrainState */
292 | NVML_ENTRY_ENUM(nvmlDeviceQueryDrainState),
293 | /** nvmlDeviceRegisterEvents */
294 | NVML_ENTRY_ENUM(nvmlDeviceRegisterEvents),
295 | /** nvmlDeviceRemoveGpu */
296 | NVML_ENTRY_ENUM(nvmlDeviceRemoveGpu),
297 | /** nvmlDeviceRemoveGpu_v2 */
298 | NVML_ENTRY_ENUM(nvmlDeviceRemoveGpu_v2),
299 | /** nvmlDeviceResetApplicationsClocks */
300 | NVML_ENTRY_ENUM(nvmlDeviceResetApplicationsClocks),
301 | /** nvmlDeviceResetNvLinkErrorCounters */
302 | NVML_ENTRY_ENUM(nvmlDeviceResetNvLinkErrorCounters),
303 | /** nvmlDeviceResetNvLinkUtilizationCounter */
304 | NVML_ENTRY_ENUM(nvmlDeviceResetNvLinkUtilizationCounter),
305 | /** nvmlDeviceSetAccountingMode */
306 | NVML_ENTRY_ENUM(nvmlDeviceSetAccountingMode),
307 | /** nvmlDeviceSetAPIRestriction */
308 | NVML_ENTRY_ENUM(nvmlDeviceSetAPIRestriction),
309 | /** nvmlDeviceSetApplicationsClocks */
310 | NVML_ENTRY_ENUM(nvmlDeviceSetApplicationsClocks),
311 | /** nvmlDeviceSetAutoBoostedClocksEnabled */
312 | NVML_ENTRY_ENUM(nvmlDeviceSetAutoBoostedClocksEnabled),
313 | /** nvmlDeviceSetComputeMode */
314 | NVML_ENTRY_ENUM(nvmlDeviceSetComputeMode),
315 | /** nvmlDeviceSetCpuAffinity */
316 | NVML_ENTRY_ENUM(nvmlDeviceSetCpuAffinity),
317 | /** nvmlDeviceSetDefaultAutoBoostedClocksEnabled */
318 | NVML_ENTRY_ENUM(nvmlDeviceSetDefaultAutoBoostedClocksEnabled),
319 | /** nvmlDeviceSetDriverModel */
320 | NVML_ENTRY_ENUM(nvmlDeviceSetDriverModel),
321 | /** nvmlDeviceSetEccMode */
322 | NVML_ENTRY_ENUM(nvmlDeviceSetEccMode),
323 | /** nvmlDeviceSetGpuOperationMode */
324 | NVML_ENTRY_ENUM(nvmlDeviceSetGpuOperationMode),
325 | /** nvmlDeviceSetNvLinkUtilizationControl */
326 | NVML_ENTRY_ENUM(nvmlDeviceSetNvLinkUtilizationControl),
327 | /** nvmlDeviceSetPersistenceMode */
328 | NVML_ENTRY_ENUM(nvmlDeviceSetPersistenceMode),
329 | /** nvmlDeviceSetPowerManagementLimit */
330 | NVML_ENTRY_ENUM(nvmlDeviceSetPowerManagementLimit),
331 | /** nvmlDeviceSetVirtualizationMode */
332 | NVML_ENTRY_ENUM(nvmlDeviceSetVirtualizationMode),
333 | /** nvmlDeviceValidateInforom */
334 | NVML_ENTRY_ENUM(nvmlDeviceValidateInforom),
335 | /** nvmlEventSetCreate */
336 | NVML_ENTRY_ENUM(nvmlEventSetCreate),
337 | /** nvmlEventSetFree */
338 | NVML_ENTRY_ENUM(nvmlEventSetFree),
339 | /** nvmlEventSetWait */
340 | NVML_ENTRY_ENUM(nvmlEventSetWait),
341 | /** nvmlGetVgpuCompatibility */
342 | NVML_ENTRY_ENUM(nvmlGetVgpuCompatibility),
343 | /** nvmlInit_v2 */
344 | NVML_ENTRY_ENUM(nvmlInit_v2),
345 | /** nvmlInitWithFlags */
346 | NVML_ENTRY_ENUM(nvmlInitWithFlags),
347 | /** nvmlInternalGetExportTable */
348 | NVML_ENTRY_ENUM(nvmlInternalGetExportTable),
349 | /** nvmlSystemGetCudaDriverVersion */
350 | NVML_ENTRY_ENUM(nvmlSystemGetCudaDriverVersion),
351 | /** nvmlSystemGetCudaDriverVersion_v2 */
352 | NVML_ENTRY_ENUM(nvmlSystemGetCudaDriverVersion_v2),
353 | /** nvmlSystemGetDriverVersion */
354 | NVML_ENTRY_ENUM(nvmlSystemGetDriverVersion),
355 | /** nvmlSystemGetHicVersion */
356 | NVML_ENTRY_ENUM(nvmlSystemGetHicVersion),
357 | /** nvmlSystemGetNVMLVersion */
358 | NVML_ENTRY_ENUM(nvmlSystemGetNVMLVersion),
359 | /** nvmlSystemGetProcessName */
360 | NVML_ENTRY_ENUM(nvmlSystemGetProcessName),
361 | /** nvmlSystemGetTopologyGpuSet */
362 | NVML_ENTRY_ENUM(nvmlSystemGetTopologyGpuSet),
363 | /** nvmlUnitGetCount */
364 | NVML_ENTRY_ENUM(nvmlUnitGetCount),
365 | /** nvmlUnitGetDevices */
366 | NVML_ENTRY_ENUM(nvmlUnitGetDevices),
367 | /** nvmlUnitGetFanSpeedInfo */
368 | NVML_ENTRY_ENUM(nvmlUnitGetFanSpeedInfo),
369 | /** nvmlUnitGetHandleByIndex */
370 | NVML_ENTRY_ENUM(nvmlUnitGetHandleByIndex),
371 | /** nvmlUnitGetLedState */
372 | NVML_ENTRY_ENUM(nvmlUnitGetLedState),
373 | /** nvmlUnitGetPsuInfo */
374 | NVML_ENTRY_ENUM(nvmlUnitGetPsuInfo),
375 | /** nvmlUnitGetTemperature */
376 | NVML_ENTRY_ENUM(nvmlUnitGetTemperature),
377 | /** nvmlUnitGetUnitInfo */
378 | NVML_ENTRY_ENUM(nvmlUnitGetUnitInfo),
379 | /** nvmlUnitSetLedState */
380 | NVML_ENTRY_ENUM(nvmlUnitSetLedState),
381 | /** nvmlVgpuInstanceGetEncoderCapacity */
382 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetEncoderCapacity),
383 | /** nvmlVgpuInstanceGetEncoderSessions */
384 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetEncoderSessions),
385 | /** nvmlVgpuInstanceGetEncoderStats */
386 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetEncoderStats),
387 | /** nvmlVgpuInstanceGetFbUsage */
388 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetFbUsage),
389 | /** nvmlVgpuInstanceGetFrameRateLimit */
390 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetFrameRateLimit),
391 | /** nvmlVgpuInstanceGetLicenseStatus */
392 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetLicenseStatus),
393 | /** nvmlVgpuInstanceGetMetadata */
394 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetMetadata),
395 | /** nvmlVgpuInstanceGetType */
396 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetType),
397 | /** nvmlVgpuInstanceGetUUID */
398 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetUUID),
399 | /** nvmlVgpuInstanceGetVmDriverVersion */
400 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetVmDriverVersion),
401 | /** nvmlVgpuInstanceGetVmID */
402 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetVmID),
403 | /** nvmlVgpuInstanceSetEncoderCapacity */
404 | NVML_ENTRY_ENUM(nvmlVgpuInstanceSetEncoderCapacity),
405 | /** nvmlVgpuTypeGetClass */
406 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetClass),
407 | /** nvmlVgpuTypeGetDeviceID */
408 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetDeviceID),
409 | /** nvmlVgpuTypeGetFramebufferSize */
410 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetFramebufferSize),
411 | /** nvmlVgpuTypeGetFrameRateLimit */
412 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetFrameRateLimit),
413 | /** nvmlVgpuTypeGetLicense */
414 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetLicense),
415 | /** nvmlVgpuTypeGetMaxInstances */
416 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetMaxInstances),
417 | /** nvmlVgpuTypeGetName */
418 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetName),
419 | /** nvmlVgpuTypeGetNumDisplayHeads */
420 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetNumDisplayHeads),
421 | /** nvmlVgpuTypeGetResolution */
422 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetResolution),
423 | /** nvmlDeviceGetFBCSessions */
424 | NVML_ENTRY_ENUM(nvmlDeviceGetFBCSessions),
425 | /** nvmlDeviceGetFBCStats */
426 | NVML_ENTRY_ENUM(nvmlDeviceGetFBCStats),
427 | /** nvmlDeviceGetGridLicensableFeatures_v2 */
428 | NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures_v2),
429 | /** nvmlDeviceGetRetiredPages_v2 */
430 | NVML_ENTRY_ENUM(nvmlDeviceGetRetiredPages_v2),
431 | /** nvmlDeviceResetGpuLockedClocks */
432 | NVML_ENTRY_ENUM(nvmlDeviceResetGpuLockedClocks),
433 | /** nvmlDeviceSetGpuLockedClocks */
434 | NVML_ENTRY_ENUM(nvmlDeviceSetGpuLockedClocks),
435 | /** nvmlGetBlacklistDeviceCount */
436 | NVML_ENTRY_ENUM(nvmlGetBlacklistDeviceCount),
437 | /** nvmlGetBlacklistDeviceInfoByIndex */
438 | NVML_ENTRY_ENUM(nvmlGetBlacklistDeviceInfoByIndex),
439 | /** nvmlVgpuInstanceGetAccountingMode */
440 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetAccountingMode),
441 | /** nvmlVgpuInstanceGetAccountingPids */
442 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetAccountingPids),
443 | /** nvmlVgpuInstanceGetAccountingStats */
444 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetAccountingStats),
445 | /** nvmlVgpuInstanceGetFBCSessions */
446 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetFBCSessions),
447 | /** nvmlVgpuInstanceGetFBCStats */
448 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetFBCStats),
449 | /** nvmlVgpuTypeGetMaxInstancesPerVm */
450 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetMaxInstancesPerVm),
451 | /** nvmlGetVgpuVersion */
452 | NVML_ENTRY_ENUM(nvmlGetVgpuVersion),
453 | /** nvmlSetVgpuVersion */
454 | NVML_ENTRY_ENUM(nvmlSetVgpuVersion),
455 | /** nvmlDeviceGetGridLicensableFeatures_v3 */
456 | NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures_v3),
457 | /** nvmlDeviceGetHostVgpuMode */
458 | NVML_ENTRY_ENUM(nvmlDeviceGetHostVgpuMode),
459 | /** nvmlDeviceGetPgpuMetadataString */
460 | NVML_ENTRY_ENUM(nvmlDeviceGetPgpuMetadataString),
461 | /** nvmlVgpuInstanceGetEccMode */
462 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetEccMode),
463 | /** nvmlComputeInstanceDestroy */
464 | NVML_ENTRY_ENUM(nvmlComputeInstanceDestroy),
465 | /** nvmlComputeInstanceGetInfo */
466 | NVML_ENTRY_ENUM(nvmlComputeInstanceGetInfo),
467 | /** nvmlDeviceCreateGpuInstance */
468 | NVML_ENTRY_ENUM(nvmlDeviceCreateGpuInstance),
469 | /** nvmlDeviceGetArchitecture */
470 | NVML_ENTRY_ENUM(nvmlDeviceGetArchitecture),
471 | /** nvmlDeviceGetAttributes */
472 | NVML_ENTRY_ENUM(nvmlDeviceGetAttributes),
473 | /** nvmlDeviceGetAttributes_v2 */
474 | NVML_ENTRY_ENUM(nvmlDeviceGetAttributes_v2),
475 | /** nvmlDeviceGetComputeInstanceId */
476 | NVML_ENTRY_ENUM(nvmlDeviceGetComputeInstanceId),
477 | /** nvmlDeviceGetCpuAffinityWithinScope */
478 | NVML_ENTRY_ENUM(nvmlDeviceGetCpuAffinityWithinScope),
479 | /** nvmlDeviceGetDeviceHandleFromMigDeviceHandle */
480 | NVML_ENTRY_ENUM(nvmlDeviceGetDeviceHandleFromMigDeviceHandle),
481 | /** nvmlDeviceGetGpuInstanceById */
482 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstanceById),
483 | /** nvmlDeviceGetGpuInstanceId */
484 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstanceId),
485 | /** nvmlDeviceGetGpuInstancePossiblePlacements */
486 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements),
487 | /** nvmlDeviceGetGpuInstanceProfileInfo */
488 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstanceProfileInfo),
489 | /** nvmlDeviceGetGpuInstanceRemainingCapacity */
490 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstanceRemainingCapacity),
491 | /** nvmlDeviceGetGpuInstances */
492 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstances),
493 | /** nvmlDeviceGetMaxMigDeviceCount */
494 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxMigDeviceCount),
495 | /** nvmlDeviceGetMemoryAffinity */
496 | NVML_ENTRY_ENUM(nvmlDeviceGetMemoryAffinity),
497 | /** nvmlDeviceGetMigDeviceHandleByIndex */
498 | NVML_ENTRY_ENUM(nvmlDeviceGetMigDeviceHandleByIndex),
499 | /** nvmlDeviceGetMigMode */
500 | NVML_ENTRY_ENUM(nvmlDeviceGetMigMode),
501 | /** nvmlDeviceGetRemappedRows */
502 | NVML_ENTRY_ENUM(nvmlDeviceGetRemappedRows),
503 | /** nvmlDeviceGetRowRemapperHistogram */
504 | NVML_ENTRY_ENUM(nvmlDeviceGetRowRemapperHistogram),
505 | /** nvmlDeviceIsMigDeviceHandle */
506 | NVML_ENTRY_ENUM(nvmlDeviceIsMigDeviceHandle),
507 | /** nvmlDeviceSetMigMode */
508 | NVML_ENTRY_ENUM(nvmlDeviceSetMigMode),
509 | /** nvmlEventSetWait_v2 */
510 | NVML_ENTRY_ENUM(nvmlEventSetWait_v2),
511 | /** nvmlGpuInstanceCreateComputeInstance */
512 | NVML_ENTRY_ENUM(nvmlGpuInstanceCreateComputeInstance),
513 | /** nvmlGpuInstanceDestroy */
514 | NVML_ENTRY_ENUM(nvmlGpuInstanceDestroy),
515 | /** nvmlGpuInstanceGetComputeInstanceById */
516 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetComputeInstanceById),
517 | /** nvmlGpuInstanceGetComputeInstanceProfileInfo */
518 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetComputeInstanceProfileInfo),
519 | /** nvmlGpuInstanceGetComputeInstanceRemainingCapacity */
520 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetComputeInstanceRemainingCapacity),
521 | /** nvmlGpuInstanceGetComputeInstances */
522 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetComputeInstances),
523 | /** nvmlGpuInstanceGetInfo */
524 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetInfo),
525 | /** nvmlVgpuInstanceClearAccountingPids */
526 | NVML_ENTRY_ENUM(nvmlVgpuInstanceClearAccountingPids),
527 | /** nvmlVgpuInstanceGetMdevUUID */
528 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetMdevUUID),
529 | /** nvmlComputeInstanceGetInfo_v2 */
530 | NVML_ENTRY_ENUM(nvmlComputeInstanceGetInfo_v2),
531 | /** nvmlDeviceGetComputeRunningProcesses_v2 */
532 | NVML_ENTRY_ENUM(nvmlDeviceGetComputeRunningProcesses_v2),
533 | /** nvmlDeviceGetGraphicsRunningProcesses_v2 */
534 | NVML_ENTRY_ENUM(nvmlDeviceGetGraphicsRunningProcesses_v2),
535 | /** nvmlDeviceSetTemperatureThreshold */
536 | NVML_ENTRY_ENUM(nvmlDeviceSetTemperatureThreshold),
537 | /** nvmlRetry_NvRmControl */
538 | NVML_ENTRY_ENUM(nvmlRetry_NvRmControl),
539 | /** nvmlVgpuInstanceGetGpuInstanceId */
540 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetGpuInstanceId),
541 | /** nvmlVgpuTypeGetGpuInstanceProfileId */
542 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetGpuInstanceProfileId),
543 | NVML_ENTRY_ENUM(nvmlDeviceCreateGpuInstanceWithPlacement),
544 | NVML_ENTRY_ENUM(nvmlDeviceGetBusType),
545 | NVML_ENTRY_ENUM(nvmlDeviceGetClkMonStatus),
546 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements_v2),
547 | NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures_v4),
548 | NVML_ENTRY_ENUM(nvmlDeviceGetIrqNum),
549 | NVML_ENTRY_ENUM(nvmlDeviceGetMPSComputeRunningProcesses_v2),
550 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkRemoteDeviceType),
551 | NVML_ENTRY_ENUM(nvmlDeviceResetMemoryLockedClocks),
552 | NVML_ENTRY_ENUM(nvmlDeviceSetMemoryLockedClocks),
553 | NVML_ENTRY_ENUM(nvmlGetExcludedDeviceCount),
554 | NVML_ENTRY_ENUM(nvmlGetExcludedDeviceInfoByIndex),
555 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetLicenseInfo),
556 | NVML_ENTRY_END
557 | } nvml_entry_enum_t;
558 |
559 | #ifdef __cplusplus
560 | }
561 | #endif
562 |
563 | #endif // HIJACK_NVML_HELPER_H
564 |
--------------------------------------------------------------------------------
/src/hijack_call.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Tencent is pleased to support the open source community by making TKEStack
3 | * available.
4 | *
5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved.
6 | *
7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
8 | * use this file except in compliance with the License. You may obtain a copy of
9 | * the License at
10 | *
11 | * https://opensource.org/licenses/Apache-2.0
12 | *
13 | * Unless required by applicable law or agreed to in writing, software
14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations under the License.
17 | */
18 |
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 |
28 | #include "include/cuda-helper.h"
29 | #include "include/hijack.h"
30 | #include "include/nvml-helper.h"
31 |
32 | extern resource_data_t g_vcuda_config;
33 | extern entry_t cuda_library_entry[];
34 | extern entry_t nvml_library_entry[];
35 | extern char pid_path[];
36 |
37 | typedef void (*atomic_fn_ptr)(int, void *);
38 |
39 | static pthread_once_t g_init_set = PTHREAD_ONCE_INIT;
40 | static pthread_once_t g_register_set = PTHREAD_ONCE_INIT;
41 |
42 | static volatile int g_cur_cuda_cores = 0;
43 | static volatile int g_total_cuda_cores = 0;
44 |
45 | static int g_max_thread_per_sm = 0;
46 | static int g_sm_num = 0;
47 |
48 | static int g_block_x = 1, g_block_y = 1, g_block_z = 1;
49 | static uint32_t g_block_locker = 0;
50 |
51 | static const struct timespec g_cycle = {
52 | .tv_sec = 0,
53 | .tv_nsec = TIME_TICK * MILLISEC,
54 | };
55 |
56 | static const struct timespec g_wait = {
57 | .tv_sec = 0,
58 | .tv_nsec = 120 * MILLISEC,
59 | };
60 |
61 | /** pid mapping related */
62 | static int g_pids_table[MAX_PIDS];
63 | static int g_pids_table_size;
64 |
65 | /** internal function definition */
66 | static void register_to_remote();
67 |
68 | static void atomic_action(const char *, atomic_fn_ptr, void *);
69 |
70 | static void active_utilization_notifier();
71 |
72 | static void *utilization_watcher(void *);
73 |
74 | static void load_pids_table(int, void *);
75 |
76 | static void get_used_gpu_memory(int, void *);
77 |
78 | static void get_used_gpu_utilization(int, void *);
79 |
80 | static void initialization();
81 |
82 | static void rate_limiter(int, int);
83 |
84 | static void change_token(int);
85 |
86 | static const char *nvml_error(nvmlReturn_t);
87 |
88 | static const char *cuda_error(CUresult, const char **);
89 |
90 | static int int_match(const void *, const void *);
91 |
92 | static int delta(int, int, int);
93 |
94 | /** export function definition */
95 | CUresult cuDriverGetVersion(int *driverVersion);
96 | CUresult cuInit(unsigned int flag);
97 | CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion,
98 | cuuint64_t flags);
99 | CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize,
100 | unsigned int flags);
101 | CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
102 | CUresult cuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
103 | CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch,
104 | size_t WidthInBytes, size_t Height,
105 | unsigned int ElementSizeBytes);
106 | CUresult cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes,
107 | size_t Height, unsigned int ElementSizeBytes);
108 | CUresult cuArrayCreate_v2(CUarray *pHandle,
109 | const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
110 | CUresult cuArrayCreate(CUarray *pHandle,
111 | const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
112 | CUresult cuArray3DCreate_v2(CUarray *pHandle,
113 | const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
114 | CUresult cuArray3DCreate(CUarray *pHandle,
115 | const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
116 | CUresult
117 | cuMipmappedArrayCreate(CUmipmappedArray *pHandle,
118 | const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc,
119 | unsigned int numMipmapLevels);
120 | CUresult cuDeviceTotalMem_v2(size_t *bytes, CUdevice dev);
121 | CUresult cuDeviceTotalMem(size_t *bytes, CUdevice dev);
122 | CUresult cuMemGetInfo_v2(size_t *free, size_t *total);
123 | CUresult cuMemGetInfo(size_t *free, size_t *total);
124 | CUresult cuLaunchKernel_ptsz(CUfunction f, unsigned int gridDimX,
125 | unsigned int gridDimY, unsigned int gridDimZ,
126 | unsigned int blockDimX, unsigned int blockDimY,
127 | unsigned int blockDimZ,
128 | unsigned int sharedMemBytes, CUstream hStream,
129 | void **kernelParams, void **extra);
130 | CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX,
131 | unsigned int gridDimY, unsigned int gridDimZ,
132 | unsigned int blockDimX, unsigned int blockDimY,
133 | unsigned int blockDimZ, unsigned int sharedMemBytes,
134 | CUstream hStream, void **kernelParams, void **extra);
135 | CUresult cuLaunch(CUfunction f);
136 | CUresult cuLaunchCooperativeKernel_ptsz(
137 | CUfunction f, unsigned int gridDimX, unsigned int gridDimY,
138 | unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY,
139 | unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream,
140 | void **kernelParams);
141 | CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX,
142 | unsigned int gridDimY, unsigned int gridDimZ,
143 | unsigned int blockDimX,
144 | unsigned int blockDimY,
145 | unsigned int blockDimZ,
146 | unsigned int sharedMemBytes,
147 | CUstream hStream, void **kernelParams);
148 | CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height);
149 | CUresult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height,
150 | CUstream hStream);
151 | CUresult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
152 |
153 | entry_t cuda_hooks_entry[] = {
154 | {.name = "cuDriverGetVersion", .fn_ptr = cuDriverGetVersion},
155 | {.name = "cuInit", .fn_ptr = cuInit},
156 | {.name = "cuGetProcAddress", .fn_ptr = cuGetProcAddress},
157 | {.name = "cuMemAllocManaged", .fn_ptr = cuMemAllocManaged},
158 | {.name = "cuMemAlloc_v2", .fn_ptr = cuMemAlloc_v2},
159 | {.name = "cuMemAlloc", .fn_ptr = cuMemAlloc},
160 | {.name = "cuMemAllocPitch_v2", .fn_ptr = cuMemAllocPitch_v2},
161 | {.name = "cuMemAllocPitch", .fn_ptr = cuMemAllocPitch},
162 | {.name = "cuArrayCreate_v2", .fn_ptr = cuArrayCreate_v2},
163 | {.name = "cuArrayCreate", .fn_ptr = cuArrayCreate},
164 | {.name = "cuArray3DCreate_v2", .fn_ptr = cuArray3DCreate_v2},
165 | {.name = "cuArray3DCreate", .fn_ptr = cuArray3DCreate},
166 | {.name = "cuMipmappedArrayCreate", .fn_ptr = cuMipmappedArrayCreate},
167 | {.name = "cuDeviceTotalMem_v2", .fn_ptr = cuDeviceTotalMem_v2},
168 | {.name = "cuDeviceTotalMem", .fn_ptr = cuDeviceTotalMem},
169 | {.name = "cuMemGetInfo_v2", .fn_ptr = cuMemGetInfo_v2},
170 | {.name = "cuMemGetInfo", .fn_ptr = cuMemGetInfo},
171 | {.name = "cuLaunchKernel_ptsz", .fn_ptr = cuLaunchKernel_ptsz},
172 | {.name = "cuLaunchKernel", .fn_ptr = cuLaunchKernel},
173 | {.name = "cuLaunch", .fn_ptr = cuLaunch},
174 | {.name = "cuLaunchCooperativeKernel_ptsz",
175 | .fn_ptr = cuLaunchCooperativeKernel_ptsz},
176 | {.name = "cuLaunchCooperativeKernel", .fn_ptr = cuLaunchCooperativeKernel},
177 | {.name = "cuLaunchGrid", .fn_ptr = cuLaunchGrid},
178 | {.name = "cuLaunchGridAsync", .fn_ptr = cuLaunchGridAsync},
179 | {.name = "cuFuncSetBlockShape", .fn_ptr = cuFuncSetBlockShape},
180 | };
181 |
182 | const int cuda_hook_nums =
183 | sizeof(cuda_hooks_entry) / sizeof(cuda_hooks_entry[0]);
184 |
185 | /** dynamic rate control */
186 | typedef struct {
187 | int user_current;
188 | int sys_current;
189 | int valid;
190 | uint64_t checktime;
191 | int sys_process_num;
192 | } utilization_t;
193 |
194 | /** helper function */
195 | int int_match(const void *a, const void *b) {
196 | const int *ra = (const int *)a;
197 | const int *rb = (const int *)b;
198 |
199 | if (*ra < *rb) {
200 | return -1;
201 | }
202 |
203 | if (*ra > *rb) {
204 | return 1;
205 | }
206 |
207 | return 0;
208 | }
209 |
210 | static void atomic_action(const char *filename, atomic_fn_ptr fn_ptr,
211 | void *arg) {
212 | int fd;
213 |
214 | fd = open(filename, O_RDONLY);
215 | if (unlikely(fd == -1)) {
216 | LOGGER(FATAL, "can't open %s, error %s", filename, strerror(errno));
217 | }
218 |
219 | fn_ptr(fd, arg);
220 |
221 | close(fd);
222 | }
223 |
224 | const char *nvml_error(nvmlReturn_t code) {
225 | const char *(*err_fn)(nvmlReturn_t) = NULL;
226 |
227 | err_fn = nvml_library_entry[NVML_ENTRY_ENUM(nvmlErrorString)].fn_ptr;
228 | if (unlikely(!err_fn)) {
229 | LOGGER(FATAL, "can't find nvmlErrorString");
230 | }
231 |
232 | return err_fn(code);
233 | }
234 |
235 | const char *cuda_error(CUresult code, const char **p) {
236 | CUDA_ENTRY_CALL(cuda_library_entry, cuGetErrorString, code, p);
237 |
238 | return *p;
239 | }
240 |
241 | static void change_token(int delta) {
242 | int cuda_cores_before = 0, cuda_cores_after = 0;
243 |
244 | LOGGER(5, "delta: %d, curr: %d", delta, g_cur_cuda_cores);
245 | do {
246 | cuda_cores_before = g_cur_cuda_cores;
247 | cuda_cores_after = cuda_cores_before + delta;
248 |
249 | if (unlikely(cuda_cores_after > g_total_cuda_cores)) {
250 | cuda_cores_after = g_total_cuda_cores;
251 | }
252 | } while (!CAS(&g_cur_cuda_cores, cuda_cores_before, cuda_cores_after));
253 | }
254 |
255 | static void rate_limiter(int grids, int blocks) {
256 | int before_cuda_cores = 0;
257 | int after_cuda_cores = 0;
258 | int kernel_size = grids;
259 |
260 | LOGGER(5, "grid: %d, blocks: %d", grids, blocks);
261 | LOGGER(5, "launch kernel %d, curr core: %d", kernel_size, g_cur_cuda_cores);
262 | if (g_vcuda_config.enable) {
263 | do {
264 | CHECK:
265 | before_cuda_cores = g_cur_cuda_cores;
266 | LOGGER(8, "current core: %d", g_cur_cuda_cores);
267 | if (before_cuda_cores < 0) {
268 | nanosleep(&g_cycle, NULL);
269 | goto CHECK;
270 | }
271 | after_cuda_cores = before_cuda_cores - kernel_size;
272 | } while (!CAS(&g_cur_cuda_cores, before_cuda_cores, after_cuda_cores));
273 | }
274 | }
275 |
276 | static int delta(int up_limit, int user_current, int share) {
277 | int utilization_diff =
278 | abs(up_limit - user_current) < 5 ? 5 : abs(up_limit - user_current);
279 | int increment =
280 | g_sm_num * g_sm_num * g_max_thread_per_sm / 256 * utilization_diff / 10;
281 |
282 | /* Accelerate cuda cores allocation when utilization vary widely */
283 | if (utilization_diff > up_limit / 2) {
284 | increment = increment * utilization_diff * 2 / (up_limit + 1);
285 | }
286 |
287 | if (unlikely(increment < 0)) {
288 | LOGGER(3, "overflow: %d, current sm: %d, thread_per_sm: %d, diff: %d",
289 | increment, g_sm_num, g_max_thread_per_sm, utilization_diff);
290 | }
291 |
292 | if (user_current <= up_limit) {
293 | share = share + increment > g_total_cuda_cores ? g_total_cuda_cores
294 | : share + increment;
295 | } else {
296 | share = share - increment < 0 ? 0 : share - increment;
297 | }
298 |
299 | return share;
300 | }
301 |
302 | // #lizard forgives
303 | static void *utilization_watcher(void *arg UNUSED) {
304 | utilization_t top_result = {
305 | .user_current = 0,
306 | .sys_current = 0,
307 | .sys_process_num = 0,
308 | };
309 | int sys_free = 0;
310 | int share = 0;
311 | int i = 0;
312 | int avg_sys_free = 0;
313 | int pre_sys_process_num = 1;
314 | int up_limit = g_vcuda_config.utilization;
315 |
316 | LOGGER(5, "start %s", __FUNCTION__);
317 | LOGGER(4, "sm: %d, thread per sm: %d", g_sm_num, g_max_thread_per_sm);
318 | while (1) {
319 | nanosleep(&g_wait, NULL);
320 | do {
321 | atomic_action(pid_path, get_used_gpu_utilization, (void *)&top_result);
322 | } while (!top_result.valid);
323 |
324 | sys_free = MAX_UTILIZATION - top_result.sys_current;
325 |
326 | if (g_vcuda_config.hard_limit) {
327 | /* Avoid usage jitter when application is initialized*/
328 | if (top_result.sys_process_num == 1 &&
329 | top_result.user_current < up_limit / 10) {
330 | g_cur_cuda_cores =
331 | delta(g_vcuda_config.utilization, top_result.user_current, share);
332 | continue;
333 | }
334 | share = delta(g_vcuda_config.utilization, top_result.user_current, share);
335 | } else {
336 | if (pre_sys_process_num != top_result.sys_process_num) {
337 | /* When a new process comes, all processes are reset to initial value*/
338 | if (pre_sys_process_num < top_result.sys_process_num) {
339 | share = g_max_thread_per_sm;
340 | up_limit = g_vcuda_config.utilization;
341 | i = 0;
342 | avg_sys_free = 0;
343 | }
344 | pre_sys_process_num = top_result.sys_process_num;
345 | }
346 |
347 | /* 1.Only one process on the GPU
348 | * Allocate cuda cores according to the limit value.
349 | *
350 | * 2.Multiple processes on the GPU
351 | * First, change the up_limit of the process according to the
352 | * historical resource utilization. Second, allocate the cuda
353 | * cores according to the changed limit value.*/
354 | if (top_result.sys_process_num == 1) {
355 | share = delta(g_vcuda_config.limit, top_result.user_current, share);
356 | } else {
357 | i++;
358 | avg_sys_free += sys_free;
359 | if (i % CHANGE_LIMIT_INTERVAL == 0) {
360 | if (avg_sys_free * 2 / CHANGE_LIMIT_INTERVAL > USAGE_THRESHOLD) {
361 | up_limit = up_limit + g_vcuda_config.utilization / 10 >
362 | g_vcuda_config.limit
363 | ? g_vcuda_config.limit
364 | : up_limit + g_vcuda_config.utilization / 10;
365 | }
366 | i = 0;
367 | }
368 | avg_sys_free = i % (CHANGE_LIMIT_INTERVAL / 2) == 0 ? 0 : avg_sys_free;
369 | share = delta(up_limit, top_result.user_current, share);
370 | }
371 | }
372 |
373 | change_token(share);
374 |
375 | LOGGER(4, "util: %d, up_limit: %d, share: %d, cur: %d",
376 | top_result.user_current, up_limit, share, g_cur_cuda_cores);
377 | }
378 | }
379 |
380 | static void active_utilization_notifier() {
381 | pthread_t tid;
382 |
383 | pthread_create(&tid, NULL, utilization_watcher, NULL);
384 |
385 | #ifdef __APPLE__
386 | pthread_setname_np("utilization_watcher");
387 | #else
388 | pthread_setname_np(tid, "utilization_watcher");
389 | #endif
390 | }
391 |
392 | static void get_used_gpu_utilization(int fd, void *arg) {
393 | nvmlProcessUtilizationSample_t processes_sample[MAX_PIDS];
394 | int processes_num = MAX_PIDS;
395 | unsigned int running_processes = MAX_PIDS;
396 | nvmlProcessInfo_t pids_on_device[MAX_PIDS];
397 | nvmlDevice_t dev;
398 | utilization_t *top_result = (utilization_t *)arg;
399 | nvmlReturn_t ret;
400 | struct timeval cur;
401 | size_t microsec;
402 | int codec_util = 0;
403 |
404 | int i;
405 |
406 | ret =
407 | NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetHandleByIndex, 0, &dev);
408 | if (unlikely(ret)) {
409 | LOGGER(4, "nvmlDeviceGetHandleByIndex: %s", nvml_error(ret));
410 | return;
411 | }
412 |
413 | ret =
414 | NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetComputeRunningProcesses,
415 | dev, &running_processes, pids_on_device);
416 | if (unlikely(ret)) {
417 | LOGGER(4, "nvmlDeviceGetComputeRunningProcesses: %s", nvml_error(ret));
418 | return;
419 | }
420 |
421 | top_result->sys_process_num = running_processes;
422 |
423 | load_pids_table(fd, NULL);
424 | gettimeofday(&cur, NULL);
425 | microsec = (cur.tv_sec - 1) * 1000UL * 1000UL + cur.tv_usec;
426 | top_result->checktime = microsec;
427 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetProcessUtilization,
428 | dev, processes_sample, &processes_num, microsec);
429 | if (unlikely(ret)) {
430 | LOGGER(4, "nvmlDeviceGetProcessUtilization: %s", nvml_error(ret));
431 | return;
432 | }
433 |
434 | top_result->user_current = 0;
435 | top_result->sys_current = 0;
436 | for (i = 0; i < processes_num; i++) {
437 | if (processes_sample[i].timeStamp >= top_result->checktime) {
438 | top_result->valid = 1;
439 | top_result->sys_current += GET_VALID_VALUE(processes_sample[i].smUtil);
440 |
441 | codec_util = GET_VALID_VALUE(processes_sample[i].encUtil) +
442 | GET_VALID_VALUE(processes_sample[i].decUtil);
443 | top_result->sys_current += CODEC_NORMALIZE(codec_util);
444 |
445 | LOGGER(8, "try to find %d from pid tables", processes_sample[i].pid);
446 | if (likely(bsearch(&processes_sample[i].pid, g_pids_table,
447 | (size_t)g_pids_table_size, sizeof(int), int_match))) {
448 | top_result->user_current += GET_VALID_VALUE(processes_sample[i].smUtil);
449 |
450 | codec_util = GET_VALID_VALUE(processes_sample[i].encUtil) +
451 | GET_VALID_VALUE(processes_sample[i].decUtil);
452 | top_result->user_current += CODEC_NORMALIZE(codec_util);
453 | }
454 | }
455 | }
456 |
457 | LOGGER(5, "sys utilization: %d", top_result->sys_current);
458 | LOGGER(5, "used utilization: %d", top_result->user_current);
459 | }
460 |
461 | static void load_pids_table(int fd, void *arg UNUSED) {
462 | int item = 0;
463 | int rsize = 0;
464 | int i = 0;
465 |
466 | for (item = 0; item < MAX_PIDS; item++) {
467 | rsize = (int)read(fd, g_pids_table + item, sizeof(int));
468 | if (unlikely(rsize != sizeof(int))) {
469 | break;
470 | }
471 | }
472 |
473 | for (i = 0; i < item; i++) {
474 | LOGGER(8, "pid: %d", g_pids_table[i]);
475 | }
476 |
477 | g_pids_table_size = item;
478 |
479 | LOGGER(8, "read %d items from %s", g_pids_table_size, pid_path);
480 | }
481 |
482 | static void get_used_gpu_memory(int fd, void *arg) {
483 | size_t *used_memory = arg;
484 |
485 | nvmlDevice_t dev;
486 | nvmlProcessInfo_t pids_on_device[MAX_PIDS];
487 | unsigned int size_on_device = MAX_PIDS;
488 | int ret;
489 |
490 | unsigned int i;
491 |
492 | load_pids_table(fd, NULL);
493 |
494 | ret =
495 | NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetHandleByIndex, 0, &dev);
496 | if (unlikely(ret)) {
497 | LOGGER(4, "nvmlDeviceGetHandleByIndex can't find device 0, return %d", ret);
498 | *used_memory = g_vcuda_config.gpu_memory;
499 | return;
500 | }
501 |
502 | ret =
503 | NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetComputeRunningProcesses,
504 | dev, &size_on_device, pids_on_device);
505 | if (unlikely(ret)) {
506 | LOGGER(4,
507 | "nvmlDeviceGetComputeRunningProcesses can't get pids on device 0, "
508 | "return %d",
509 | ret);
510 | *used_memory = g_vcuda_config.gpu_memory;
511 | return;
512 | }
513 |
514 | for (i = 0; i < size_on_device; i++) {
515 | LOGGER(4, "summary: %d used %lld", pids_on_device[i].pid,
516 | pids_on_device[i].usedGpuMemory);
517 | }
518 |
519 | for (i = 0; i < size_on_device; i++) {
520 | if (bsearch(&pids_on_device[i].pid, g_pids_table, (size_t)g_pids_table_size,
521 | sizeof(int), int_match)) {
522 | LOGGER(4, "%d use memory: %lld", pids_on_device[i].pid,
523 | pids_on_device[i].usedGpuMemory);
524 | *used_memory += pids_on_device[i].usedGpuMemory;
525 | }
526 | }
527 |
528 | LOGGER(4, "total used memory: %zu", *used_memory);
529 | }
530 |
531 | // #lizard forgives
532 | static void register_to_remote() {
533 | nvmlPciInfo_t pci_info;
534 | nvmlDevice_t nvml_dev;
535 | int ret;
536 |
537 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetHandleByIndex, 0,
538 | &nvml_dev);
539 | if (unlikely(ret)) {
540 | LOGGER(FATAL, "can't find device 0, error %s",
541 | nvml_error((nvmlReturn_t)ret));
542 | }
543 |
544 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, nvml_dev,
545 | &pci_info);
546 | if (unlikely(ret)) {
547 | LOGGER(FATAL, "can't find device 0, error %s",
548 | nvml_error((nvmlReturn_t)ret));
549 | }
550 |
551 | strncpy(g_vcuda_config.bus_id, pci_info.busId,
552 | NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE);
553 |
554 | register_to_remote_with_data(g_vcuda_config.bus_id, g_vcuda_config.pod_uid,
555 | g_vcuda_config.container_name);
556 | }
557 |
558 | static void initialization() {
559 | int ret;
560 | const char *cuda_err_string = NULL;
561 |
562 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuInit, 0);
563 | if (unlikely(ret)) {
564 | LOGGER(FATAL, "cuInit error %s",
565 | cuda_error((CUresult)ret, &cuda_err_string));
566 | }
567 |
568 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceGetAttribute, &g_sm_num,
569 | CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 0);
570 | if (unlikely(ret)) {
571 | LOGGER(FATAL, "can't get processor number, error %s",
572 | cuda_error((CUresult)ret, &cuda_err_string));
573 | }
574 |
575 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceGetAttribute,
576 | &g_max_thread_per_sm,
577 | CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, 0);
578 | if (unlikely(ret)) {
579 | LOGGER(FATAL, "can't get max thread per processor, error %s",
580 | cuda_error((CUresult)ret, &cuda_err_string));
581 | }
582 |
583 | g_total_cuda_cores = g_max_thread_per_sm * g_sm_num * FACTOR;
584 | LOGGER(4, "total cuda cores: %d", g_total_cuda_cores);
585 | active_utilization_notifier();
586 | }
587 |
588 | /** hijack entrypoint */
589 | CUresult cuDriverGetVersion(int *driverVersion) {
590 | CUresult ret;
591 |
592 | load_necessary_data();
593 | if (!is_custom_config_path()) {
594 | pthread_once(&g_register_set, register_to_remote);
595 | }
596 | pthread_once(&g_init_set, initialization);
597 |
598 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuDriverGetVersion, driverVersion);
599 | if (unlikely(ret)) {
600 | goto DONE;
601 | }
602 |
603 | DONE:
604 | return ret;
605 | }
606 |
607 | CUresult cuInit(unsigned int flag) {
608 | CUresult ret;
609 |
610 | load_necessary_data();
611 | if (!is_custom_config_path()) {
612 | pthread_once(&g_register_set, register_to_remote);
613 | }
614 | pthread_once(&g_init_set, initialization);
615 |
616 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuInit, flag);
617 |
618 | if (unlikely(ret)) {
619 | goto DONE;
620 | }
621 |
622 | DONE:
623 | return ret;
624 | }
625 |
626 | CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion,
627 | cuuint64_t flags) {
628 | CUresult ret;
629 | int i;
630 |
631 | load_necessary_data();
632 | if (!is_custom_config_path()) {
633 | pthread_once(&g_register_set, register_to_remote);
634 | }
635 | pthread_once(&g_init_set, initialization);
636 |
637 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn,
638 | cudaVersion, flags);
639 | if (ret == CUDA_SUCCESS) {
640 | for (i = 0; i < cuda_hook_nums; i++) {
641 | if (!strcmp(symbol, cuda_hooks_entry[i].name)) {
642 | LOGGER(5, "Match hook %s", symbol);
643 | *pfn = cuda_hooks_entry[i].fn_ptr;
644 | break;
645 | }
646 | }
647 | }
648 |
649 | return ret;
650 | }
651 |
652 | CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize,
653 | unsigned int flags) {
654 | size_t used = 0;
655 | size_t request_size = bytesize;
656 | CUresult ret;
657 |
658 | if (g_vcuda_config.enable) {
659 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
660 |
661 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
662 | ret = CUDA_ERROR_OUT_OF_MEMORY;
663 | goto DONE;
664 | }
665 | }
666 |
667 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAllocManaged, dptr, bytesize,
668 | flags);
669 | DONE:
670 | return ret;
671 | }
672 |
673 | CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize) {
674 | size_t used = 0;
675 | size_t request_size = bytesize;
676 | CUresult ret;
677 |
678 | if (g_vcuda_config.enable) {
679 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
680 |
681 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
682 | ret = CUDA_ERROR_OUT_OF_MEMORY;
683 | goto DONE;
684 | }
685 | }
686 |
687 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAlloc_v2, dptr, bytesize);
688 | DONE:
689 | return ret;
690 | }
691 |
692 | CUresult cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) {
693 | size_t used = 0;
694 | size_t request_size = bytesize;
695 | CUresult ret;
696 |
697 | if (g_vcuda_config.enable) {
698 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
699 |
700 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
701 | ret = CUDA_ERROR_OUT_OF_MEMORY;
702 | goto DONE;
703 | }
704 | }
705 |
706 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAlloc, dptr, bytesize);
707 | DONE:
708 | return ret;
709 | }
710 |
711 | CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch,
712 | size_t WidthInBytes, size_t Height,
713 | unsigned int ElementSizeBytes) {
714 | size_t used = 0;
715 | size_t request_size = ROUND_UP(WidthInBytes * Height, ElementSizeBytes);
716 | CUresult ret;
717 |
718 | if (g_vcuda_config.enable) {
719 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
720 |
721 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
722 | ret = CUDA_ERROR_OUT_OF_MEMORY;
723 | goto DONE;
724 | }
725 | }
726 |
727 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAllocPitch_v2, dptr, pPitch,
728 | WidthInBytes, Height, ElementSizeBytes);
729 | DONE:
730 | return ret;
731 | }
732 |
733 | CUresult cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes,
734 | size_t Height, unsigned int ElementSizeBytes) {
735 | size_t used = 0;
736 | size_t request_size = ROUND_UP(WidthInBytes * Height, ElementSizeBytes);
737 | CUresult ret;
738 |
739 | if (g_vcuda_config.enable) {
740 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
741 |
742 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
743 | ret = CUDA_ERROR_OUT_OF_MEMORY;
744 | goto DONE;
745 | }
746 | }
747 |
748 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAllocPitch, dptr, pPitch,
749 | WidthInBytes, Height, ElementSizeBytes);
750 | DONE:
751 | return ret;
752 | }
753 |
754 | static size_t get_array_base_size(int format) {
755 | size_t base_size = 0;
756 |
757 | switch (format) {
758 | case CU_AD_FORMAT_UNSIGNED_INT8:
759 | case CU_AD_FORMAT_SIGNED_INT8:
760 | base_size = 8;
761 | break;
762 | case CU_AD_FORMAT_UNSIGNED_INT16:
763 | case CU_AD_FORMAT_SIGNED_INT16:
764 | case CU_AD_FORMAT_HALF:
765 | base_size = 16;
766 | break;
767 | case CU_AD_FORMAT_UNSIGNED_INT32:
768 | case CU_AD_FORMAT_SIGNED_INT32:
769 | case CU_AD_FORMAT_FLOAT:
770 | base_size = 32;
771 | break;
772 | default:
773 | base_size = 32;
774 | }
775 |
776 | return base_size;
777 | }
778 |
779 | static CUresult
780 | cuArrayCreate_helper(const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) {
781 | size_t used = 0;
782 | size_t base_size = 0;
783 | size_t request_size = 0;
784 | CUresult ret = CUDA_SUCCESS;
785 |
786 | if (g_vcuda_config.enable) {
787 | base_size = get_array_base_size(pAllocateArray->Format);
788 | request_size = base_size * pAllocateArray->NumChannels *
789 | pAllocateArray->Height * pAllocateArray->Width;
790 |
791 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
792 |
793 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
794 | ret = CUDA_ERROR_OUT_OF_MEMORY;
795 | goto DONE;
796 | }
797 | }
798 |
799 | DONE:
800 | return ret;
801 | }
802 |
803 | CUresult cuArrayCreate_v2(CUarray *pHandle,
804 | const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) {
805 | CUresult ret;
806 |
807 | ret = cuArrayCreate_helper(pAllocateArray);
808 | if (ret != CUDA_SUCCESS) {
809 | goto DONE;
810 | }
811 |
812 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuArrayCreate_v2, pHandle,
813 | pAllocateArray);
814 | DONE:
815 | return ret;
816 | }
817 |
818 | CUresult cuArrayCreate(CUarray *pHandle,
819 | const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) {
820 | CUresult ret;
821 |
822 | ret = cuArrayCreate_helper(pAllocateArray);
823 | if (ret != CUDA_SUCCESS) {
824 | goto DONE;
825 | }
826 |
827 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuArrayCreate, pHandle,
828 | pAllocateArray);
829 | DONE:
830 | return ret;
831 | }
832 |
833 | static CUresult
834 | cuArray3DCreate_helper(const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) {
835 | size_t used = 0;
836 | size_t base_size = 0;
837 | size_t request_size = 0;
838 | CUresult ret = CUDA_SUCCESS;
839 |
840 | if (g_vcuda_config.enable) {
841 | base_size = get_array_base_size(pAllocateArray->Format);
842 | request_size = base_size * pAllocateArray->NumChannels *
843 | pAllocateArray->Height * pAllocateArray->Width *
844 | pAllocateArray->Depth;
845 |
846 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
847 |
848 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
849 | ret = CUDA_ERROR_OUT_OF_MEMORY;
850 | goto DONE;
851 | }
852 | }
853 |
854 | DONE:
855 | return ret;
856 | }
857 |
858 | CUresult cuArray3DCreate_v2(CUarray *pHandle,
859 | const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) {
860 | CUresult ret;
861 |
862 | ret = cuArray3DCreate_helper(pAllocateArray);
863 | if (ret != CUDA_SUCCESS) {
864 | goto DONE;
865 | }
866 |
867 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuArray3DCreate_v2, pHandle,
868 | pAllocateArray);
869 | DONE:
870 | return ret;
871 | }
872 |
873 | CUresult cuArray3DCreate(CUarray *pHandle,
874 | const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) {
875 | CUresult ret;
876 |
877 | ret = cuArray3DCreate_helper(pAllocateArray);
878 | if (ret != CUDA_SUCCESS) {
879 | goto DONE;
880 | }
881 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuArray3DCreate, pHandle,
882 | pAllocateArray);
883 | DONE:
884 | return ret;
885 | }
886 |
887 | CUresult
888 | cuMipmappedArrayCreate(CUmipmappedArray *pHandle,
889 | const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc,
890 | unsigned int numMipmapLevels) {
891 | size_t used = 0;
892 | size_t base_size = 0;
893 | size_t request_size = 0;
894 | CUresult ret;
895 |
896 | if (g_vcuda_config.enable) {
897 | base_size = get_array_base_size(pMipmappedArrayDesc->Format);
898 | request_size = base_size * pMipmappedArrayDesc->NumChannels *
899 | pMipmappedArrayDesc->Height * pMipmappedArrayDesc->Width *
900 | pMipmappedArrayDesc->Depth;
901 |
902 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
903 |
904 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
905 | ret = CUDA_ERROR_OUT_OF_MEMORY;
906 | goto DONE;
907 | }
908 | }
909 |
910 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMipmappedArrayCreate, pHandle,
911 | pMipmappedArrayDesc, numMipmapLevels);
912 | DONE:
913 | return ret;
914 | }
915 |
916 | CUresult cuDeviceTotalMem_v2(size_t *bytes, CUdevice dev) {
917 | if (g_vcuda_config.enable) {
918 | *bytes = g_vcuda_config.gpu_memory;
919 |
920 | return CUDA_SUCCESS;
921 | }
922 |
923 | return CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceTotalMem_v2, bytes, dev);
924 | }
925 |
926 | CUresult cuDeviceTotalMem(size_t *bytes, CUdevice dev) {
927 | if (g_vcuda_config.enable) {
928 | *bytes = g_vcuda_config.gpu_memory;
929 |
930 | return CUDA_SUCCESS;
931 | }
932 |
933 | return CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceTotalMem, bytes, dev);
934 | }
935 |
936 | CUresult cuMemGetInfo_v2(size_t *free, size_t *total) {
937 | size_t used = 0;
938 |
939 | if (g_vcuda_config.enable) {
940 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
941 |
942 | *total = g_vcuda_config.gpu_memory;
943 | *free =
944 | used > g_vcuda_config.gpu_memory ? 0 : g_vcuda_config.gpu_memory - used;
945 |
946 | return CUDA_SUCCESS;
947 | }
948 |
949 | return CUDA_ENTRY_CALL(cuda_library_entry, cuMemGetInfo_v2, free, total);
950 | }
951 |
952 | CUresult cuMemGetInfo(size_t *free, size_t *total) {
953 | size_t used = 0;
954 |
955 | if (g_vcuda_config.enable) {
956 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
957 |
958 | *total = g_vcuda_config.gpu_memory;
959 | *free =
960 | used > g_vcuda_config.gpu_memory ? 0 : g_vcuda_config.gpu_memory - used;
961 |
962 | return CUDA_SUCCESS;
963 | }
964 |
965 | return CUDA_ENTRY_CALL(cuda_library_entry, cuMemGetInfo, free, total);
966 | }
967 |
968 | CUresult cuLaunchKernel_ptsz(CUfunction f, unsigned int gridDimX,
969 | unsigned int gridDimY, unsigned int gridDimZ,
970 | unsigned int blockDimX, unsigned int blockDimY,
971 | unsigned int blockDimZ,
972 | unsigned int sharedMemBytes, CUstream hStream,
973 | void **kernelParams, void **extra) {
974 | rate_limiter(gridDimX * gridDimY * gridDimZ,
975 | blockDimX * blockDimY * blockDimZ);
976 |
977 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchKernel_ptsz, f, gridDimX,
978 | gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
979 | sharedMemBytes, hStream, kernelParams, extra);
980 | }
981 |
982 | CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX,
983 | unsigned int gridDimY, unsigned int gridDimZ,
984 | unsigned int blockDimX, unsigned int blockDimY,
985 | unsigned int blockDimZ, unsigned int sharedMemBytes,
986 | CUstream hStream, void **kernelParams, void **extra) {
987 | rate_limiter(gridDimX * gridDimY * gridDimZ,
988 | blockDimX * blockDimY * blockDimZ);
989 |
990 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchKernel, f, gridDimX,
991 | gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
992 | sharedMemBytes, hStream, kernelParams, extra);
993 | }
994 |
995 | CUresult cuLaunch(CUfunction f) {
996 | rate_limiter(1, g_block_x * g_block_y * g_block_z);
997 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunch, f);
998 | }
999 |
1000 | CUresult cuLaunchCooperativeKernel_ptsz(
1001 | CUfunction f, unsigned int gridDimX, unsigned int gridDimY,
1002 | unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY,
1003 | unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream,
1004 | void **kernelParams) {
1005 | rate_limiter(gridDimX * gridDimY * gridDimZ,
1006 | blockDimX * blockDimY * blockDimZ);
1007 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchCooperativeKernel_ptsz, f,
1008 | gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY,
1009 | blockDimZ, sharedMemBytes, hStream, kernelParams);
1010 | }
1011 |
1012 | CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX,
1013 | unsigned int gridDimY, unsigned int gridDimZ,
1014 | unsigned int blockDimX,
1015 | unsigned int blockDimY,
1016 | unsigned int blockDimZ,
1017 | unsigned int sharedMemBytes,
1018 | CUstream hStream, void **kernelParams) {
1019 | rate_limiter(gridDimX * gridDimY * gridDimZ,
1020 | blockDimX * blockDimY * blockDimZ);
1021 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchCooperativeKernel, f,
1022 | gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY,
1023 | blockDimZ, sharedMemBytes, hStream, kernelParams);
1024 | }
1025 |
1026 | CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height) {
1027 | rate_limiter(grid_width * grid_height, g_block_x * g_block_y * g_block_z);
1028 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchGrid, f, grid_width,
1029 | grid_height);
1030 | }
1031 |
1032 | CUresult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height,
1033 | CUstream hStream) {
1034 | rate_limiter(grid_width * grid_height, g_block_x * g_block_y * g_block_z);
1035 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchGridAsync, f, grid_width,
1036 | grid_height, hStream);
1037 | }
1038 |
1039 | CUresult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) {
1040 | if (g_vcuda_config.enable) {
1041 | while (!CAS(&g_block_locker, 0, 1)) {
1042 | }
1043 |
1044 | g_block_x = x;
1045 | g_block_y = y;
1046 | g_block_z = z;
1047 |
1048 | LOGGER(5, "Set block shape: %d, %d, %d", x, y, z);
1049 |
1050 | while (!CAS(&g_block_locker, 1, 0)) {
1051 | }
1052 | }
1053 | return CUDA_ENTRY_CALL(cuda_library_entry, cuFuncSetBlockShape, hfunc, x, y,
1054 | z);
1055 | }
1056 |
--------------------------------------------------------------------------------
/src/loader.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Tencent is pleased to support the open source community by making TKEStack
3 | * available.
4 | *
5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved.
6 | *
7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
8 | * use this file except in compliance with the License. You may obtain a copy of
9 | * the License at
10 | *
11 | * https://opensource.org/licenses/Apache-2.0
12 | *
13 | * Unless required by applicable law or agreed to in writing, software
14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations under the License.
17 | */
18 |
19 | //
20 | // Created by thomas on 6/15/18.
21 | //
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 |
30 | #include "include/cuda-helper.h"
31 | #include "include/hijack.h"
32 | #include "include/nvml-helper.h"
33 |
34 | entry_t cuda_library_entry[] = {
35 | {.name = "cuInit"},
36 | {.name = "cuDeviceGet"},
37 | {.name = "cuDeviceGetCount"},
38 | {.name = "cuDeviceGetName"},
39 | {.name = "cuDeviceTotalMem_v2"},
40 | {.name = "cuDeviceGetAttribute"},
41 | {.name = "cuDeviceGetP2PAttribute"},
42 | {.name = "cuDriverGetVersion"},
43 | {.name = "cuDeviceGetByPCIBusId"},
44 | {.name = "cuDeviceGetPCIBusId"},
45 | {.name = "cuDevicePrimaryCtxRetain"},
46 | {.name = "cuDevicePrimaryCtxRelease"},
47 | {.name = "cuDevicePrimaryCtxSetFlags"},
48 | {.name = "cuDevicePrimaryCtxGetState"},
49 | {.name = "cuDevicePrimaryCtxReset"},
50 | {.name = "cuCtxCreate_v2"},
51 | {.name = "cuCtxGetFlags"},
52 | {.name = "cuCtxSetCurrent"},
53 | {.name = "cuCtxGetCurrent"},
54 | {.name = "cuCtxDetach"},
55 | {.name = "cuCtxGetApiVersion"},
56 | {.name = "cuCtxGetDevice"},
57 | {.name = "cuCtxGetLimit"},
58 | {.name = "cuCtxSetLimit"},
59 | {.name = "cuCtxGetCacheConfig"},
60 | {.name = "cuCtxSetCacheConfig"},
61 | {.name = "cuCtxGetSharedMemConfig"},
62 | {.name = "cuCtxGetStreamPriorityRange"},
63 | {.name = "cuCtxSetSharedMemConfig"},
64 | {.name = "cuCtxSynchronize"},
65 | {.name = "cuModuleLoad"},
66 | {.name = "cuModuleLoadData"},
67 | {.name = "cuModuleLoadFatBinary"},
68 | {.name = "cuModuleUnload"},
69 | {.name = "cuModuleGetFunction"},
70 | {.name = "cuModuleGetGlobal_v2"},
71 | {.name = "cuModuleGetTexRef"},
72 | {.name = "cuModuleGetSurfRef"},
73 | {.name = "cuLinkCreate"},
74 | {.name = "cuLinkAddData"},
75 | {.name = "cuLinkAddFile"},
76 | {.name = "cuLinkComplete"},
77 | {.name = "cuLinkDestroy"},
78 | {.name = "cuMemGetInfo_v2"},
79 | {.name = "cuMemAllocManaged"},
80 | {.name = "cuMemAlloc_v2"},
81 | {.name = "cuMemAllocPitch_v2"},
82 | {.name = "cuMemFree_v2"},
83 | {.name = "cuMemGetAddressRange_v2"},
84 | {.name = "cuMemFreeHost"},
85 | {.name = "cuMemHostAlloc"},
86 | {.name = "cuMemHostGetDevicePointer_v2"},
87 | {.name = "cuMemHostGetFlags"},
88 | {.name = "cuMemHostRegister_v2"},
89 | {.name = "cuMemHostUnregister"},
90 | {.name = "cuPointerGetAttribute"},
91 | {.name = "cuPointerGetAttributes"},
92 | {.name = "cuMemcpy"},
93 | {.name = "cuMemcpy_ptds"},
94 | {.name = "cuMemcpyAsync"},
95 | {.name = "cuMemcpyAsync_ptsz"},
96 | {.name = "cuMemcpyPeer"},
97 | {.name = "cuMemcpyPeer_ptds"},
98 | {.name = "cuMemcpyPeerAsync"},
99 | {.name = "cuMemcpyPeerAsync_ptsz"},
100 | {.name = "cuMemcpyHtoD_v2"},
101 | {.name = "cuMemcpyHtoD_v2_ptds"},
102 | {.name = "cuMemcpyHtoDAsync_v2"},
103 | {.name = "cuMemcpyHtoDAsync_v2_ptsz"},
104 | {.name = "cuMemcpyDtoH_v2"},
105 | {.name = "cuMemcpyDtoH_v2_ptds"},
106 | {.name = "cuMemcpyDtoHAsync_v2"},
107 | {.name = "cuMemcpyDtoHAsync_v2_ptsz"},
108 | {.name = "cuMemcpyDtoD_v2"},
109 | {.name = "cuMemcpyDtoD_v2_ptds"},
110 | {.name = "cuMemcpyDtoDAsync_v2"},
111 | {.name = "cuMemcpyDtoDAsync_v2_ptsz"},
112 | {.name = "cuMemcpy2DUnaligned_v2"},
113 | {.name = "cuMemcpy2DUnaligned_v2_ptds"},
114 | {.name = "cuMemcpy2DAsync_v2"},
115 | {.name = "cuMemcpy2DAsync_v2_ptsz"},
116 | {.name = "cuMemcpy3D_v2"},
117 | {.name = "cuMemcpy3D_v2_ptds"},
118 | {.name = "cuMemcpy3DAsync_v2"},
119 | {.name = "cuMemcpy3DAsync_v2_ptsz"},
120 | {.name = "cuMemcpy3DPeer"},
121 | {.name = "cuMemcpy3DPeer_ptds"},
122 | {.name = "cuMemcpy3DPeerAsync"},
123 | {.name = "cuMemcpy3DPeerAsync_ptsz"},
124 | {.name = "cuMemsetD8_v2"},
125 | {.name = "cuMemsetD8_v2_ptds"},
126 | {.name = "cuMemsetD8Async"},
127 | {.name = "cuMemsetD8Async_ptsz"},
128 | {.name = "cuMemsetD2D8_v2"},
129 | {.name = "cuMemsetD2D8_v2_ptds"},
130 | {.name = "cuMemsetD2D8Async"},
131 | {.name = "cuMemsetD2D8Async_ptsz"},
132 | {.name = "cuFuncSetCacheConfig"},
133 | {.name = "cuFuncSetSharedMemConfig"},
134 | {.name = "cuFuncGetAttribute"},
135 | {.name = "cuArrayCreate_v2"},
136 | {.name = "cuArrayGetDescriptor_v2"},
137 | {.name = "cuArray3DCreate_v2"},
138 | {.name = "cuArray3DGetDescriptor_v2"},
139 | {.name = "cuArrayDestroy"},
140 | {.name = "cuMipmappedArrayCreate"},
141 | {.name = "cuMipmappedArrayGetLevel"},
142 | {.name = "cuMipmappedArrayDestroy"},
143 | {.name = "cuTexRefCreate"},
144 | {.name = "cuTexRefDestroy"},
145 | {.name = "cuTexRefSetArray"},
146 | {.name = "cuTexRefSetMipmappedArray"},
147 | {.name = "cuTexRefSetAddress_v2"},
148 | {.name = "cuTexRefSetAddress2D_v3"},
149 | {.name = "cuTexRefSetFormat"},
150 | {.name = "cuTexRefSetAddressMode"},
151 | {.name = "cuTexRefSetFilterMode"},
152 | {.name = "cuTexRefSetMipmapFilterMode"},
153 | {.name = "cuTexRefSetMipmapLevelBias"},
154 | {.name = "cuTexRefSetMipmapLevelClamp"},
155 | {.name = "cuTexRefSetMaxAnisotropy"},
156 | {.name = "cuTexRefSetFlags"},
157 | {.name = "cuTexRefSetBorderColor"},
158 | {.name = "cuTexRefGetBorderColor"},
159 | {.name = "cuSurfRefSetArray"},
160 | {.name = "cuTexObjectCreate"},
161 | {.name = "cuTexObjectDestroy"},
162 | {.name = "cuTexObjectGetResourceDesc"},
163 | {.name = "cuTexObjectGetTextureDesc"},
164 | {.name = "cuTexObjectGetResourceViewDesc"},
165 | {.name = "cuSurfObjectCreate"},
166 | {.name = "cuSurfObjectDestroy"},
167 | {.name = "cuSurfObjectGetResourceDesc"},
168 | {.name = "cuLaunchKernel"},
169 | {.name = "cuLaunchKernel_ptsz"},
170 | {.name = "cuEventCreate"},
171 | {.name = "cuEventRecord"},
172 | {.name = "cuEventRecord_ptsz"},
173 | {.name = "cuEventQuery"},
174 | {.name = "cuEventSynchronize"},
175 | {.name = "cuEventDestroy_v2"},
176 | {.name = "cuEventElapsedTime"},
177 | {.name = "cuStreamWaitValue32"},
178 | {.name = "cuStreamWaitValue32_ptsz"},
179 | {.name = "cuStreamWriteValue32"},
180 | {.name = "cuStreamWriteValue32_ptsz"},
181 | {.name = "cuStreamBatchMemOp"},
182 | {.name = "cuStreamBatchMemOp_ptsz"},
183 | {.name = "cuStreamCreate"},
184 | {.name = "cuStreamCreateWithPriority"},
185 | {.name = "cuStreamGetPriority"},
186 | {.name = "cuStreamGetPriority_ptsz"},
187 | {.name = "cuStreamGetFlags"},
188 | {.name = "cuStreamGetFlags_ptsz"},
189 | {.name = "cuStreamDestroy_v2"},
190 | {.name = "cuStreamWaitEvent"},
191 | {.name = "cuStreamWaitEvent_ptsz"},
192 | {.name = "cuStreamAddCallback"},
193 | {.name = "cuStreamAddCallback_ptsz"},
194 | {.name = "cuStreamSynchronize"},
195 | {.name = "cuStreamSynchronize_ptsz"},
196 | {.name = "cuStreamQuery"},
197 | {.name = "cuStreamQuery_ptsz"},
198 | {.name = "cuStreamAttachMemAsync"},
199 | {.name = "cuStreamAttachMemAsync_ptsz"},
200 | {.name = "cuDeviceCanAccessPeer"},
201 | {.name = "cuCtxEnablePeerAccess"},
202 | {.name = "cuCtxDisablePeerAccess"},
203 | {.name = "cuIpcGetEventHandle"},
204 | {.name = "cuIpcOpenEventHandle"},
205 | {.name = "cuIpcGetMemHandle"},
206 | {.name = "cuIpcOpenMemHandle"},
207 | {.name = "cuIpcCloseMemHandle"},
208 | {.name = "cuGLCtxCreate_v2"},
209 | {.name = "cuGLInit"},
210 | {.name = "cuGLGetDevices"},
211 | {.name = "cuGLRegisterBufferObject"},
212 | {.name = "cuGLMapBufferObject_v2"},
213 | {.name = "cuGLMapBufferObject_v2_ptds"},
214 | {.name = "cuGLMapBufferObjectAsync_v2"},
215 | {.name = "cuGLMapBufferObjectAsync_v2_ptsz"},
216 | {.name = "cuGLUnmapBufferObject"},
217 | {.name = "cuGLUnmapBufferObjectAsync"},
218 | {.name = "cuGLUnregisterBufferObject"},
219 | {.name = "cuGLSetBufferObjectMapFlags"},
220 | {.name = "cuGraphicsGLRegisterImage"},
221 | {.name = "cuGraphicsGLRegisterBuffer"},
222 | {.name = "cuGraphicsUnregisterResource"},
223 | {.name = "cuGraphicsMapResources"},
224 | {.name = "cuGraphicsMapResources_ptsz"},
225 | {.name = "cuGraphicsUnmapResources"},
226 | {.name = "cuGraphicsUnmapResources_ptsz"},
227 | {.name = "cuGraphicsResourceSetMapFlags_v2"},
228 | {.name = "cuGraphicsSubResourceGetMappedArray"},
229 | {.name = "cuGraphicsResourceGetMappedMipmappedArray"},
230 | {.name = "cuGraphicsResourceGetMappedPointer_v2"},
231 | {.name = "cuProfilerInitialize"},
232 | {.name = "cuProfilerStart"},
233 | {.name = "cuProfilerStop"},
234 | {.name = "cuVDPAUGetDevice"},
235 | {.name = "cuVDPAUCtxCreate_v2"},
236 | {.name = "cuGraphicsVDPAURegisterVideoSurface"},
237 | {.name = "cuGraphicsVDPAURegisterOutputSurface"},
238 | {.name = "cuGetExportTable"},
239 | {.name = "cuOccupancyMaxActiveBlocksPerMultiprocessor"},
240 | {.name = "cuMemAdvise"},
241 | {.name = "cuMemPrefetchAsync"},
242 | {.name = "cuMemPrefetchAsync_ptsz"},
243 | {.name = "cuMemRangeGetAttribute"},
244 | {.name = "cuMemRangeGetAttributes"},
245 | {.name = "cuGetErrorString"},
246 | {.name = "cuGetErrorName"},
247 | {.name = "cuArray3DCreate"},
248 | {.name = "cuArray3DGetDescriptor"},
249 | {.name = "cuArrayCreate"},
250 | {.name = "cuArrayGetDescriptor"},
251 | {.name = "cuCtxAttach"},
252 | {.name = "cuCtxCreate"},
253 | {.name = "cuCtxDestroy"},
254 | {.name = "cuCtxDestroy_v2"},
255 | {.name = "cuCtxPopCurrent"},
256 | {.name = "cuCtxPopCurrent_v2"},
257 | {.name = "cuCtxPushCurrent"},
258 | {.name = "cuCtxPushCurrent_v2"},
259 | {.name = "cudbgApiAttach"},
260 | {.name = "cudbgApiDetach"},
261 | {.name = "cudbgApiInit"},
262 | {.name = "cudbgGetAPI"},
263 | {.name = "cudbgGetAPIVersion"},
264 | {.name = "cudbgMain"},
265 | {.name = "cudbgReportDriverApiError"},
266 | {.name = "cudbgReportDriverInternalError"},
267 | {.name = "cuDeviceComputeCapability"},
268 | {.name = "cuDeviceGetProperties"},
269 | {.name = "cuDeviceTotalMem"},
270 | {.name = "cuEGLInit"},
271 | {.name = "cuEGLStreamConsumerAcquireFrame"},
272 | {.name = "cuEGLStreamConsumerConnect"},
273 | {.name = "cuEGLStreamConsumerConnectWithFlags"},
274 | {.name = "cuEGLStreamConsumerDisconnect"},
275 | {.name = "cuEGLStreamConsumerReleaseFrame"},
276 | {.name = "cuEGLStreamProducerConnect"},
277 | {.name = "cuEGLStreamProducerDisconnect"},
278 | {.name = "cuEGLStreamProducerPresentFrame"},
279 | {.name = "cuEGLStreamProducerReturnFrame"},
280 | {.name = "cuEventDestroy"},
281 | {.name = "cuFuncSetAttribute"},
282 | {.name = "cuFuncSetBlockShape"},
283 | {.name = "cuFuncSetSharedSize"},
284 | {.name = "cuGLCtxCreate"},
285 | {.name = "cuGLGetDevices_v2"},
286 | {.name = "cuGLMapBufferObject"},
287 | {.name = "cuGLMapBufferObjectAsync"},
288 | {.name = "cuGraphicsEGLRegisterImage"},
289 | {.name = "cuGraphicsResourceGetMappedEglFrame"},
290 | {.name = "cuGraphicsResourceGetMappedPointer"},
291 | {.name = "cuGraphicsResourceSetMapFlags"},
292 | {.name = "cuLaunch"},
293 | {.name = "cuLaunchCooperativeKernel"},
294 | {.name = "cuLaunchCooperativeKernelMultiDevice"},
295 | {.name = "cuLaunchCooperativeKernel_ptsz"},
296 | {.name = "cuLaunchGrid"},
297 | {.name = "cuLaunchGridAsync"},
298 | {.name = "cuLinkAddData_v2"},
299 | {.name = "cuLinkAddFile_v2"},
300 | {.name = "cuLinkCreate_v2"},
301 | {.name = "cuMemAlloc"},
302 | {.name = "cuMemAllocHost"},
303 | {.name = "cuMemAllocHost_v2"},
304 | {.name = "cuMemAllocPitch"},
305 | {.name = "cuMemcpy2D"},
306 | {.name = "cuMemcpy2DAsync"},
307 | {.name = "cuMemcpy2DUnaligned"},
308 | {.name = "cuMemcpy2D_v2"},
309 | {.name = "cuMemcpy2D_v2_ptds"},
310 | {.name = "cuMemcpy3D"},
311 | {.name = "cuMemcpy3DAsync"},
312 | {.name = "cuMemcpyAtoA"},
313 | {.name = "cuMemcpyAtoA_v2"},
314 | {.name = "cuMemcpyAtoA_v2_ptds"},
315 | {.name = "cuMemcpyAtoD"},
316 | {.name = "cuMemcpyAtoD_v2"},
317 | {.name = "cuMemcpyAtoD_v2_ptds"},
318 | {.name = "cuMemcpyAtoH"},
319 | {.name = "cuMemcpyAtoHAsync"},
320 | {.name = "cuMemcpyAtoHAsync_v2"},
321 | {.name = "cuMemcpyAtoHAsync_v2_ptsz"},
322 | {.name = "cuMemcpyAtoH_v2"},
323 | {.name = "cuMemcpyAtoH_v2_ptds"},
324 | {.name = "cuMemcpyDtoA"},
325 | {.name = "cuMemcpyDtoA_v2"},
326 | {.name = "cuMemcpyDtoA_v2_ptds"},
327 | {.name = "cuMemcpyDtoD"},
328 | {.name = "cuMemcpyDtoDAsync"},
329 | {.name = "cuMemcpyDtoH"},
330 | {.name = "cuMemcpyDtoHAsync"},
331 | {.name = "cuMemcpyHtoA"},
332 | {.name = "cuMemcpyHtoAAsync"},
333 | {.name = "cuMemcpyHtoAAsync_v2"},
334 | {.name = "cuMemcpyHtoAAsync_v2_ptsz"},
335 | {.name = "cuMemcpyHtoA_v2"},
336 | {.name = "cuMemcpyHtoA_v2_ptds"},
337 | {.name = "cuMemcpyHtoD"},
338 | {.name = "cuMemcpyHtoDAsync"},
339 | {.name = "cuMemFree"},
340 | {.name = "cuMemGetAddressRange"},
341 | //{.name = "cuMemGetAttribute"},
342 | //{.name = "cuMemGetAttribute_v2"},
343 | {.name = "cuMemGetInfo"},
344 | {.name = "cuMemHostGetDevicePointer"},
345 | {.name = "cuMemHostRegister"},
346 | {.name = "cuMemsetD16"},
347 | {.name = "cuMemsetD16Async"},
348 | {.name = "cuMemsetD16Async_ptsz"},
349 | {.name = "cuMemsetD16_v2"},
350 | {.name = "cuMemsetD16_v2_ptds"},
351 | {.name = "cuMemsetD2D16"},
352 | {.name = "cuMemsetD2D16Async"},
353 | {.name = "cuMemsetD2D16Async_ptsz"},
354 | {.name = "cuMemsetD2D16_v2"},
355 | {.name = "cuMemsetD2D16_v2_ptds"},
356 | {.name = "cuMemsetD2D32"},
357 | {.name = "cuMemsetD2D32Async"},
358 | {.name = "cuMemsetD2D32Async_ptsz"},
359 | {.name = "cuMemsetD2D32_v2"},
360 | {.name = "cuMemsetD2D32_v2_ptds"},
361 | {.name = "cuMemsetD2D8"},
362 | {.name = "cuMemsetD32"},
363 | {.name = "cuMemsetD32Async"},
364 | {.name = "cuMemsetD32Async_ptsz"},
365 | {.name = "cuMemsetD32_v2"},
366 | {.name = "cuMemsetD32_v2_ptds"},
367 | {.name = "cuMemsetD8"},
368 | {.name = "cuModuleGetGlobal"},
369 | {.name = "cuModuleLoadDataEx"},
370 | {.name = "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"},
371 | {.name = "cuOccupancyMaxPotentialBlockSize"},
372 | {.name = "cuOccupancyMaxPotentialBlockSizeWithFlags"},
373 | {.name = "cuParamSetf"},
374 | {.name = "cuParamSeti"},
375 | {.name = "cuParamSetSize"},
376 | {.name = "cuParamSetTexRef"},
377 | {.name = "cuParamSetv"},
378 | {.name = "cuPointerSetAttribute"},
379 | {.name = "cuStreamDestroy"},
380 | {.name = "cuStreamWaitValue64"},
381 | {.name = "cuStreamWaitValue64_ptsz"},
382 | {.name = "cuStreamWriteValue64"},
383 | {.name = "cuStreamWriteValue64_ptsz"},
384 | {.name = "cuSurfRefGetArray"},
385 | {.name = "cuTexRefGetAddress"},
386 | {.name = "cuTexRefGetAddressMode"},
387 | {.name = "cuTexRefGetAddress_v2"},
388 | {.name = "cuTexRefGetArray"},
389 | {.name = "cuTexRefGetFilterMode"},
390 | {.name = "cuTexRefGetFlags"},
391 | {.name = "cuTexRefGetFormat"},
392 | {.name = "cuTexRefGetMaxAnisotropy"},
393 | {.name = "cuTexRefGetMipmapFilterMode"},
394 | {.name = "cuTexRefGetMipmapLevelBias"},
395 | {.name = "cuTexRefGetMipmapLevelClamp"},
396 | {.name = "cuTexRefGetMipmappedArray"},
397 | {.name = "cuTexRefSetAddress"},
398 | {.name = "cuTexRefSetAddress2D"},
399 | {.name = "cuTexRefSetAddress2D_v2"},
400 | {.name = "cuVDPAUCtxCreate"},
401 | {.name = "cuEGLApiInit"},
402 | {.name = "cuDestroyExternalMemory"},
403 | {.name = "cuDestroyExternalSemaphore"},
404 | {.name = "cuDeviceGetUuid"},
405 | {.name = "cuExternalMemoryGetMappedBuffer"},
406 | {.name = "cuExternalMemoryGetMappedMipmappedArray"},
407 | {.name = "cuGraphAddChildGraphNode"},
408 | {.name = "cuGraphAddDependencies"},
409 | {.name = "cuGraphAddEmptyNode"},
410 | {.name = "cuGraphAddHostNode"},
411 | {.name = "cuGraphAddKernelNode"},
412 | {.name = "cuGraphAddMemcpyNode"},
413 | {.name = "cuGraphAddMemsetNode"},
414 | {.name = "cuGraphChildGraphNodeGetGraph"},
415 | {.name = "cuGraphClone"},
416 | {.name = "cuGraphCreate"},
417 | {.name = "cuGraphDestroy"},
418 | {.name = "cuGraphDestroyNode"},
419 | {.name = "cuGraphExecDestroy"},
420 | {.name = "cuGraphGetEdges"},
421 | {.name = "cuGraphGetNodes"},
422 | {.name = "cuGraphGetRootNodes"},
423 | {.name = "cuGraphHostNodeGetParams"},
424 | {.name = "cuGraphHostNodeSetParams"},
425 | {.name = "cuGraphInstantiate"},
426 | {.name = "cuGraphKernelNodeGetParams"},
427 | {.name = "cuGraphKernelNodeSetParams"},
428 | {.name = "cuGraphLaunch"},
429 | {.name = "cuGraphLaunch_ptsz"},
430 | {.name = "cuGraphMemcpyNodeGetParams"},
431 | {.name = "cuGraphMemcpyNodeSetParams"},
432 | {.name = "cuGraphMemsetNodeGetParams"},
433 | {.name = "cuGraphMemsetNodeSetParams"},
434 | {.name = "cuGraphNodeFindInClone"},
435 | {.name = "cuGraphNodeGetDependencies"},
436 | {.name = "cuGraphNodeGetDependentNodes"},
437 | {.name = "cuGraphNodeGetType"},
438 | {.name = "cuGraphRemoveDependencies"},
439 | {.name = "cuImportExternalMemory"},
440 | {.name = "cuImportExternalSemaphore"},
441 | {.name = "cuLaunchHostFunc"},
442 | {.name = "cuLaunchHostFunc_ptsz"},
443 | {.name = "cuSignalExternalSemaphoresAsync"},
444 | {.name = "cuSignalExternalSemaphoresAsync_ptsz"},
445 | {.name = "cuStreamBeginCapture"},
446 | {.name = "cuStreamBeginCapture_ptsz"},
447 | {.name = "cuStreamEndCapture"},
448 | {.name = "cuStreamEndCapture_ptsz"},
449 | {.name = "cuStreamGetCtx"},
450 | {.name = "cuStreamGetCtx_ptsz"},
451 | {.name = "cuStreamIsCapturing"},
452 | {.name = "cuStreamIsCapturing_ptsz"},
453 | {.name = "cuWaitExternalSemaphoresAsync"},
454 | {.name = "cuWaitExternalSemaphoresAsync_ptsz"},
455 | {.name = "cuGraphExecKernelNodeSetParams"},
456 | {.name = "cuStreamBeginCapture_v2"},
457 | {.name = "cuStreamBeginCapture_v2_ptsz"},
458 | {.name = "cuStreamGetCaptureInfo"},
459 | {.name = "cuStreamGetCaptureInfo_ptsz"},
460 | {.name = "cuThreadExchangeStreamCaptureMode"},
461 | {.name = "cuDeviceGetNvSciSyncAttributes"},
462 | {.name = "cuGraphExecHostNodeSetParams"},
463 | {.name = "cuGraphExecMemcpyNodeSetParams"},
464 | {.name = "cuGraphExecMemsetNodeSetParams"},
465 | {.name = "cuGraphExecUpdate"},
466 | {.name = "cuMemAddressFree"},
467 | {.name = "cuMemAddressReserve"},
468 | {.name = "cuMemCreate"},
469 | {.name = "cuMemExportToShareableHandle"},
470 | {.name = "cuMemGetAccess"},
471 | {.name = "cuMemGetAllocationGranularity"},
472 | {.name = "cuMemGetAllocationPropertiesFromHandle"},
473 | {.name = "cuMemImportFromShareableHandle"},
474 | {.name = "cuMemMap"},
475 | {.name = "cuMemRelease"},
476 | {.name = "cuMemSetAccess"},
477 | {.name = "cuMemUnmap"},
478 | {.name = "cuCtxResetPersistingL2Cache"},
479 | {.name = "cuDevicePrimaryCtxRelease_v2"},
480 | {.name = "cuDevicePrimaryCtxReset_v2"},
481 | {.name = "cuDevicePrimaryCtxSetFlags_v2"},
482 | {.name = "cuFuncGetModule"},
483 | {.name = "cuGraphInstantiate_v2"},
484 | {.name = "cuGraphKernelNodeCopyAttributes"},
485 | {.name = "cuGraphKernelNodeGetAttribute"},
486 | {.name = "cuGraphKernelNodeSetAttribute"},
487 | {.name = "cuMemRetainAllocationHandle"},
488 | {.name = "cuOccupancyAvailableDynamicSMemPerBlock"},
489 | {.name = "cuStreamCopyAttributes"},
490 | {.name = "cuStreamCopyAttributes_ptsz"},
491 | {.name = "cuStreamGetAttribute"},
492 | {.name = "cuStreamGetAttribute_ptsz"},
493 | {.name = "cuStreamSetAttribute"},
494 | {.name = "cuStreamSetAttribute_ptsz"},
495 | {.name = "cuArrayGetPlane"},
496 | {.name = "cuArrayGetSparseProperties"},
497 | {.name = "cuDeviceGetDefaultMemPool"},
498 | {.name = "cuDeviceGetLuid"},
499 | {.name = "cuDeviceGetMemPool"},
500 | {.name = "cuDeviceGetTexture1DLinearMaxWidth"},
501 | {.name = "cuDeviceSetMemPool"},
502 | {.name = "cuEventRecordWithFlags"},
503 | {.name = "cuEventRecordWithFlags_ptsz"},
504 | {.name = "cuGraphAddEventRecordNode"},
505 | {.name = "cuGraphAddEventWaitNode"},
506 | {.name = "cuGraphAddExternalSemaphoresSignalNode"},
507 | {.name = "cuGraphAddExternalSemaphoresWaitNode"},
508 | {.name = "cuGraphEventRecordNodeGetEvent"},
509 | {.name = "cuGraphEventRecordNodeSetEvent"},
510 | {.name = "cuGraphEventWaitNodeGetEvent"},
511 | {.name = "cuGraphEventWaitNodeSetEvent"},
512 | {.name = "cuGraphExecChildGraphNodeSetParams"},
513 | {.name = "cuGraphExecEventRecordNodeSetEvent"},
514 | {.name = "cuGraphExecEventWaitNodeSetEvent"},
515 | {.name = "cuGraphExecExternalSemaphoresSignalNodeSetParams"},
516 | {.name = "cuGraphExecExternalSemaphoresWaitNodeSetParams"},
517 | {.name = "cuGraphExternalSemaphoresSignalNodeGetParams"},
518 | {.name = "cuGraphExternalSemaphoresSignalNodeSetParams"},
519 | {.name = "cuGraphExternalSemaphoresWaitNodeGetParams"},
520 | {.name = "cuGraphExternalSemaphoresWaitNodeSetParams"},
521 | {.name = "cuGraphUpload"},
522 | {.name = "cuGraphUpload_ptsz"},
523 | {.name = "cuIpcOpenMemHandle_v2"},
524 | {.name = "cuMemAllocAsync"},
525 | {.name = "cuMemAllocAsync_ptsz"},
526 | {.name = "cuMemAllocFromPoolAsync"},
527 | {.name = "cuMemAllocFromPoolAsync_ptsz"},
528 | {.name = "cuMemFreeAsync"},
529 | {.name = "cuMemFreeAsync_ptsz"},
530 | {.name = "cuMemMapArrayAsync"},
531 | {.name = "cuMemMapArrayAsync_ptsz"},
532 | {.name = "cuMemPoolCreate"},
533 | {.name = "cuMemPoolDestroy"},
534 | {.name = "cuMemPoolExportPointer"},
535 | {.name = "cuMemPoolExportToShareableHandle"},
536 | {.name = "cuMemPoolGetAccess"},
537 | {.name = "cuMemPoolGetAttribute"},
538 | {.name = "cuMemPoolImportFromShareableHandle"},
539 | {.name = "cuMemPoolImportPointer"},
540 | {.name = "cuMemPoolSetAccess"},
541 | {.name = "cuMemPoolSetAttribute"},
542 | {.name = "cuMemPoolTrimTo"},
543 | {.name = "cuMipmappedArrayGetSparseProperties"},
544 | {.name = "cuCtxCreate_v3"},
545 | {.name = "cuCtxGetExecAffinity"},
546 | {.name = "cuDeviceGetExecAffinitySupport"},
547 | {.name = "cuDeviceGetGraphMemAttribute"},
548 | {.name = "cuDeviceGetUuid_v2"},
549 | {.name = "cuDeviceGraphMemTrim"},
550 | {.name = "cuDeviceSetGraphMemAttribute"},
551 | {.name = "cuFlushGPUDirectRDMAWrites"},
552 | {.name = "cuGetProcAddress"},
553 | {.name = "cuGraphAddMemAllocNode"},
554 | {.name = "cuGraphAddMemFreeNode"},
555 | {.name = "cuGraphDebugDotPrint"},
556 | {.name = "cuGraphInstantiateWithFlags"},
557 | {.name = "cuGraphMemAllocNodeGetParams"},
558 | {.name = "cuGraphMemFreeNodeGetParams"},
559 | {.name = "cuGraphReleaseUserObject"},
560 | {.name = "cuGraphRetainUserObject"},
561 | {.name = "cuStreamGetCaptureInfo_v2"},
562 | {.name = "cuStreamGetCaptureInfo_v2_ptsz"},
563 | {.name = "cuStreamUpdateCaptureDependencies"},
564 | {.name = "cuStreamUpdateCaptureDependencies_ptsz"},
565 | {.name = "cuUserObjectCreate"},
566 | {.name = "cuUserObjectRelease"},
567 | {.name = "cuUserObjectRetain"},
568 | };
569 |
570 | entry_t nvml_library_entry[] = {
571 | {.name = "nvmlInit"},
572 | {.name = "nvmlShutdown"},
573 | {.name = "nvmlErrorString"},
574 | {.name = "nvmlDeviceGetHandleByIndex"},
575 | {.name = "nvmlDeviceGetComputeRunningProcesses"},
576 | {.name = "nvmlDeviceGetPciInfo"},
577 | {.name = "nvmlDeviceGetProcessUtilization"},
578 | {.name = "nvmlDeviceGetCount"},
579 | {.name = "nvmlDeviceClearAccountingPids"},
580 | {.name = "nvmlDeviceClearCpuAffinity"},
581 | {.name = "nvmlDeviceClearEccErrorCounts"},
582 | {.name = "nvmlDeviceDiscoverGpus"},
583 | {.name = "nvmlDeviceFreezeNvLinkUtilizationCounter"},
584 | {.name = "nvmlDeviceGetAccountingBufferSize"},
585 | {.name = "nvmlDeviceGetAccountingMode"},
586 | {.name = "nvmlDeviceGetAccountingPids"},
587 | {.name = "nvmlDeviceGetAccountingStats"},
588 | {.name = "nvmlDeviceGetActiveVgpus"},
589 | {.name = "nvmlDeviceGetAPIRestriction"},
590 | {.name = "nvmlDeviceGetApplicationsClock"},
591 | {.name = "nvmlDeviceGetAutoBoostedClocksEnabled"},
592 | {.name = "nvmlDeviceGetBAR1MemoryInfo"},
593 | {.name = "nvmlDeviceGetBoardId"},
594 | {.name = "nvmlDeviceGetBoardPartNumber"},
595 | {.name = "nvmlDeviceGetBrand"},
596 | {.name = "nvmlDeviceGetBridgeChipInfo"},
597 | {.name = "nvmlDeviceGetClock"},
598 | {.name = "nvmlDeviceGetClockInfo"},
599 | {.name = "nvmlDeviceGetComputeMode"},
600 | {.name = "nvmlDeviceGetCount_v2"},
601 | {.name = "nvmlDeviceGetCpuAffinity"},
602 | {.name = "nvmlDeviceGetCreatableVgpus"},
603 | {.name = "nvmlDeviceGetCudaComputeCapability"},
604 | {.name = "nvmlDeviceGetCurrentClocksThrottleReasons"},
605 | {.name = "nvmlDeviceGetCurrPcieLinkGeneration"},
606 | {.name = "nvmlDeviceGetCurrPcieLinkWidth"},
607 | {.name = "nvmlDeviceGetDecoderUtilization"},
608 | {.name = "nvmlDeviceGetDefaultApplicationsClock"},
609 | {.name = "nvmlDeviceGetDetailedEccErrors"},
610 | {.name = "nvmlDeviceGetDisplayActive"},
611 | {.name = "nvmlDeviceGetDisplayMode"},
612 | {.name = "nvmlDeviceGetDriverModel"},
613 | {.name = "nvmlDeviceGetEccMode"},
614 | {.name = "nvmlDeviceGetEncoderCapacity"},
615 | {.name = "nvmlDeviceGetEncoderSessions"},
616 | {.name = "nvmlDeviceGetEncoderStats"},
617 | {.name = "nvmlDeviceGetEncoderUtilization"},
618 | {.name = "nvmlDeviceGetEnforcedPowerLimit"},
619 | {.name = "nvmlDeviceGetFanSpeed"},
620 | {.name = "nvmlDeviceGetFanSpeed_v2"},
621 | {.name = "nvmlDeviceGetFieldValues"},
622 | {.name = "nvmlDeviceGetGpuOperationMode"},
623 | {.name = "nvmlDeviceGetGraphicsRunningProcesses"},
624 | {.name = "nvmlDeviceGetGridLicensableFeatures"},
625 | {.name = "nvmlDeviceGetHandleByIndex_v2"},
626 | {.name = "nvmlDeviceGetHandleByPciBusId"},
627 | {.name = "nvmlDeviceGetHandleByPciBusId_v2"},
628 | {.name = "nvmlDeviceGetHandleBySerial"},
629 | {.name = "nvmlDeviceGetHandleByUUID"},
630 | {.name = "nvmlDeviceGetIndex"},
631 | {.name = "nvmlDeviceGetInforomConfigurationChecksum"},
632 | {.name = "nvmlDeviceGetInforomImageVersion"},
633 | {.name = "nvmlDeviceGetInforomVersion"},
634 | {.name = "nvmlDeviceGetMaxClockInfo"},
635 | {.name = "nvmlDeviceGetMaxCustomerBoostClock"},
636 | {.name = "nvmlDeviceGetMaxPcieLinkGeneration"},
637 | {.name = "nvmlDeviceGetMaxPcieLinkWidth"},
638 | {.name = "nvmlDeviceGetMemoryErrorCounter"},
639 | {.name = "nvmlDeviceGetMemoryInfo"},
640 | {.name = "nvmlDeviceGetMinorNumber"},
641 | {.name = "nvmlDeviceGetMPSComputeRunningProcesses"},
642 | {.name = "nvmlDeviceGetMultiGpuBoard"},
643 | {.name = "nvmlDeviceGetName"},
644 | {.name = "nvmlDeviceGetNvLinkCapability"},
645 | {.name = "nvmlDeviceGetNvLinkErrorCounter"},
646 | {.name = "nvmlDeviceGetNvLinkRemotePciInfo"},
647 | {.name = "nvmlDeviceGetNvLinkRemotePciInfo_v2"},
648 | {.name = "nvmlDeviceGetNvLinkState"},
649 | {.name = "nvmlDeviceGetNvLinkUtilizationControl"},
650 | {.name = "nvmlDeviceGetNvLinkUtilizationCounter"},
651 | {.name = "nvmlDeviceGetNvLinkVersion"},
652 | {.name = "nvmlDeviceGetP2PStatus"},
653 | {.name = "nvmlDeviceGetPcieReplayCounter"},
654 | {.name = "nvmlDeviceGetPcieThroughput"},
655 | {.name = "nvmlDeviceGetPciInfo_v2"},
656 | {.name = "nvmlDeviceGetPciInfo_v3"},
657 | {.name = "nvmlDeviceGetPerformanceState"},
658 | {.name = "nvmlDeviceGetPersistenceMode"},
659 | {.name = "nvmlDeviceGetPowerManagementDefaultLimit"},
660 | {.name = "nvmlDeviceGetPowerManagementLimit"},
661 | {.name = "nvmlDeviceGetPowerManagementLimitConstraints"},
662 | {.name = "nvmlDeviceGetPowerManagementMode"},
663 | {.name = "nvmlDeviceGetPowerState"},
664 | {.name = "nvmlDeviceGetPowerUsage"},
665 | {.name = "nvmlDeviceGetRetiredPages"},
666 | {.name = "nvmlDeviceGetRetiredPagesPendingStatus"},
667 | {.name = "nvmlDeviceGetSamples"},
668 | {.name = "nvmlDeviceGetSerial"},
669 | {.name = "nvmlDeviceGetSupportedClocksThrottleReasons"},
670 | {.name = "nvmlDeviceGetSupportedEventTypes"},
671 | {.name = "nvmlDeviceGetSupportedGraphicsClocks"},
672 | {.name = "nvmlDeviceGetSupportedMemoryClocks"},
673 | {.name = "nvmlDeviceGetSupportedVgpus"},
674 | {.name = "nvmlDeviceGetTemperature"},
675 | {.name = "nvmlDeviceGetTemperatureThreshold"},
676 | {.name = "nvmlDeviceGetTopologyCommonAncestor"},
677 | {.name = "nvmlDeviceGetTopologyNearestGpus"},
678 | {.name = "nvmlDeviceGetTotalEccErrors"},
679 | {.name = "nvmlDeviceGetTotalEnergyConsumption"},
680 | {.name = "nvmlDeviceGetUtilizationRates"},
681 | {.name = "nvmlDeviceGetUUID"},
682 | {.name = "nvmlDeviceGetVbiosVersion"},
683 | {.name = "nvmlDeviceGetVgpuMetadata"},
684 | {.name = "nvmlDeviceGetVgpuProcessUtilization"},
685 | {.name = "nvmlDeviceGetVgpuUtilization"},
686 | {.name = "nvmlDeviceGetViolationStatus"},
687 | {.name = "nvmlDeviceGetVirtualizationMode"},
688 | {.name = "nvmlDeviceModifyDrainState"},
689 | {.name = "nvmlDeviceOnSameBoard"},
690 | {.name = "nvmlDeviceQueryDrainState"},
691 | {.name = "nvmlDeviceRegisterEvents"},
692 | {.name = "nvmlDeviceRemoveGpu"},
693 | {.name = "nvmlDeviceRemoveGpu_v2"},
694 | {.name = "nvmlDeviceResetApplicationsClocks"},
695 | {.name = "nvmlDeviceResetNvLinkErrorCounters"},
696 | {.name = "nvmlDeviceResetNvLinkUtilizationCounter"},
697 | {.name = "nvmlDeviceSetAccountingMode"},
698 | {.name = "nvmlDeviceSetAPIRestriction"},
699 | {.name = "nvmlDeviceSetApplicationsClocks"},
700 | {.name = "nvmlDeviceSetAutoBoostedClocksEnabled"},
701 | /** We hijack this call*/
702 | {.name = "nvmlDeviceSetComputeMode"},
703 | {.name = "nvmlDeviceSetCpuAffinity"},
704 | {.name = "nvmlDeviceSetDefaultAutoBoostedClocksEnabled"},
705 | {.name = "nvmlDeviceSetDriverModel"},
706 | {.name = "nvmlDeviceSetEccMode"},
707 | {.name = "nvmlDeviceSetGpuOperationMode"},
708 | {.name = "nvmlDeviceSetNvLinkUtilizationControl"},
709 | {.name = "nvmlDeviceSetPersistenceMode"},
710 | {.name = "nvmlDeviceSetPowerManagementLimit"},
711 | {.name = "nvmlDeviceSetVirtualizationMode"},
712 | {.name = "nvmlDeviceValidateInforom"},
713 | {.name = "nvmlEventSetCreate"},
714 | {.name = "nvmlEventSetFree"},
715 | {.name = "nvmlEventSetWait"},
716 | {.name = "nvmlGetVgpuCompatibility"},
717 | {.name = "nvmlInit_v2"},
718 | {.name = "nvmlInitWithFlags"},
719 | {.name = "nvmlInternalGetExportTable"},
720 | {.name = "nvmlSystemGetCudaDriverVersion"},
721 | {.name = "nvmlSystemGetCudaDriverVersion_v2"},
722 | {.name = "nvmlSystemGetDriverVersion"},
723 | {.name = "nvmlSystemGetHicVersion"},
724 | {.name = "nvmlSystemGetNVMLVersion"},
725 | {.name = "nvmlSystemGetProcessName"},
726 | {.name = "nvmlSystemGetTopologyGpuSet"},
727 | {.name = "nvmlUnitGetCount"},
728 | {.name = "nvmlUnitGetDevices"},
729 | {.name = "nvmlUnitGetFanSpeedInfo"},
730 | {.name = "nvmlUnitGetHandleByIndex"},
731 | {.name = "nvmlUnitGetLedState"},
732 | {.name = "nvmlUnitGetPsuInfo"},
733 | {.name = "nvmlUnitGetTemperature"},
734 | {.name = "nvmlUnitGetUnitInfo"},
735 | {.name = "nvmlUnitSetLedState"},
736 | {.name = "nvmlVgpuInstanceGetEncoderCapacity"},
737 | {.name = "nvmlVgpuInstanceGetEncoderSessions"},
738 | {.name = "nvmlVgpuInstanceGetEncoderStats"},
739 | {.name = "nvmlVgpuInstanceGetFbUsage"},
740 | {.name = "nvmlVgpuInstanceGetFrameRateLimit"},
741 | {.name = "nvmlVgpuInstanceGetLicenseStatus"},
742 | {.name = "nvmlVgpuInstanceGetMetadata"},
743 | {.name = "nvmlVgpuInstanceGetType"},
744 | {.name = "nvmlVgpuInstanceGetUUID"},
745 | {.name = "nvmlVgpuInstanceGetVmDriverVersion"},
746 | {.name = "nvmlVgpuInstanceGetVmID"},
747 | {.name = "nvmlVgpuInstanceSetEncoderCapacity"},
748 | {.name = "nvmlVgpuTypeGetClass"},
749 | {.name = "nvmlVgpuTypeGetDeviceID"},
750 | {.name = "nvmlVgpuTypeGetFramebufferSize"},
751 | {.name = "nvmlVgpuTypeGetFrameRateLimit"},
752 | {.name = "nvmlVgpuTypeGetLicense"},
753 | {.name = "nvmlVgpuTypeGetMaxInstances"},
754 | {.name = "nvmlVgpuTypeGetName"},
755 | {.name = "nvmlVgpuTypeGetNumDisplayHeads"},
756 | {.name = "nvmlVgpuTypeGetResolution"},
757 | {.name = "nvmlDeviceGetFBCSessions"},
758 | {.name = "nvmlDeviceGetFBCStats"},
759 | {.name = "nvmlDeviceGetGridLicensableFeatures_v2"},
760 | {.name = "nvmlDeviceGetRetiredPages_v2"},
761 | {.name = "nvmlDeviceResetGpuLockedClocks"},
762 | {.name = "nvmlDeviceSetGpuLockedClocks"},
763 | {.name = "nvmlGetBlacklistDeviceCount"},
764 | {.name = "nvmlGetBlacklistDeviceInfoByIndex"},
765 | {.name = "nvmlVgpuInstanceGetAccountingMode"},
766 | {.name = "nvmlVgpuInstanceGetAccountingPids"},
767 | {.name = "nvmlVgpuInstanceGetAccountingStats"},
768 | {.name = "nvmlVgpuInstanceGetFBCSessions"},
769 | {.name = "nvmlVgpuInstanceGetFBCStats"},
770 | {.name = "nvmlVgpuTypeGetMaxInstancesPerVm"},
771 | {.name = "nvmlGetVgpuVersion"},
772 | {.name = "nvmlSetVgpuVersion"},
773 | {.name = "nvmlDeviceGetGridLicensableFeatures_v3"},
774 | {.name = "nvmlDeviceGetHostVgpuMode"},
775 | {.name = "nvmlDeviceGetPgpuMetadataString"},
776 | {.name = "nvmlVgpuInstanceGetEccMode"},
777 | {.name = "nvmlComputeInstanceDestroy"},
778 | {.name = "nvmlComputeInstanceGetInfo"},
779 | {.name = "nvmlDeviceCreateGpuInstance"},
780 | {.name = "nvmlDeviceGetArchitecture"},
781 | {.name = "nvmlDeviceGetAttributes"},
782 | {.name = "nvmlDeviceGetAttributes_v2"},
783 | {.name = "nvmlDeviceGetComputeInstanceId"},
784 | {.name = "nvmlDeviceGetCpuAffinityWithinScope"},
785 | {.name = "nvmlDeviceGetDeviceHandleFromMigDeviceHandle"},
786 | {.name = "nvmlDeviceGetGpuInstanceById"},
787 | {.name = "nvmlDeviceGetGpuInstanceId"},
788 | {.name = "nvmlDeviceGetGpuInstancePossiblePlacements"},
789 | {.name = "nvmlDeviceGetGpuInstanceProfileInfo"},
790 | {.name = "nvmlDeviceGetGpuInstanceRemainingCapacity"},
791 | {.name = "nvmlDeviceGetGpuInstances"},
792 | {.name = "nvmlDeviceGetMaxMigDeviceCount"},
793 | {.name = "nvmlDeviceGetMemoryAffinity"},
794 | {.name = "nvmlDeviceGetMigDeviceHandleByIndex"},
795 | {.name = "nvmlDeviceGetMigMode"},
796 | {.name = "nvmlDeviceGetRemappedRows"},
797 | {.name = "nvmlDeviceGetRowRemapperHistogram"},
798 | {.name = "nvmlDeviceIsMigDeviceHandle"},
799 | {.name = "nvmlDeviceSetMigMode"},
800 | {.name = "nvmlEventSetWait_v2"},
801 | {.name = "nvmlGpuInstanceCreateComputeInstance"},
802 | {.name = "nvmlGpuInstanceDestroy"},
803 | {.name = "nvmlGpuInstanceGetComputeInstanceById"},
804 | {.name = "nvmlGpuInstanceGetComputeInstanceProfileInfo"},
805 | {.name = "nvmlGpuInstanceGetComputeInstanceRemainingCapacity"},
806 | {.name = "nvmlGpuInstanceGetComputeInstances"},
807 | {.name = "nvmlGpuInstanceGetInfo"},
808 | {.name = "nvmlVgpuInstanceClearAccountingPids"},
809 | {.name = "nvmlVgpuInstanceGetMdevUUID"},
810 | {.name = "nvmlComputeInstanceGetInfo_v2"},
811 | {.name = "nvmlDeviceGetComputeRunningProcesses_v2"},
812 | {.name = "nvmlDeviceGetGraphicsRunningProcesses_v2"},
813 | {.name = "nvmlDeviceSetTemperatureThreshold"},
814 | {.name = "nvmlRetry_NvRmControl"},
815 | {.name = "nvmlVgpuInstanceGetGpuInstanceId"},
816 | {.name = "nvmlVgpuTypeGetGpuInstanceProfileId"},
817 | {.name = "nvmlDeviceCreateGpuInstanceWithPlacement"},
818 | {.name = "nvmlDeviceGetBusType"},
819 | {.name = "nvmlDeviceGetClkMonStatus"},
820 | {.name = "nvmlDeviceGetGpuInstancePossiblePlacements_v2"},
821 | {.name = "nvmlDeviceGetGridLicensableFeatures_v4"},
822 | {.name = "nvmlDeviceGetIrqNum"},
823 | {.name = "nvmlDeviceGetMPSComputeRunningProcesses_v2"},
824 | {.name = "nvmlDeviceGetNvLinkRemoteDeviceType"},
825 | {.name = "nvmlDeviceResetMemoryLockedClocks"},
826 | {.name = "nvmlDeviceSetMemoryLockedClocks"},
827 | {.name = "nvmlGetExcludedDeviceCount"},
828 | {.name = "nvmlGetExcludedDeviceInfoByIndex"},
829 | {.name = "nvmlVgpuInstanceGetLicenseInfo"},
830 | };
831 |
832 | static void UNUSED bug_on() {
833 | BUILD_BUG_ON((sizeof(nvml_library_entry) / sizeof(nvml_library_entry[0])) !=
834 | NVML_ENTRY_END);
835 |
836 | BUILD_BUG_ON((sizeof(cuda_library_entry) / sizeof(cuda_library_entry[0])) !=
837 | CUDA_ENTRY_END);
838 | }
839 |
840 | /** register once set */
841 | static pthread_once_t g_cuda_set = PTHREAD_ONCE_INIT;
842 | static pthread_once_t g_driver_set = PTHREAD_ONCE_INIT;
843 |
844 | resource_data_t g_vcuda_config = {
845 | .pod_uid = "",
846 | .limit = 0,
847 | .container_name = "",
848 | .utilization = 0,
849 | .gpu_memory = 0,
850 | .enable = 1,
851 | };
852 |
853 | static char base_dir[FILENAME_MAX] = EMPTY_PREFIX;
854 | char config_path[FILENAME_MAX] = CONTROLLER_CONFIG_PATH;
855 | char pid_path[FILENAME_MAX] = PIDS_CONFIG_PATH;
856 | char driver_version[FILENAME_MAX] = "";
857 |
858 | static void load_driver_libraries() {
859 | void *table = NULL;
860 | char driver_filename[FILENAME_MAX];
861 | int i;
862 |
863 | snprintf(driver_filename, FILENAME_MAX - 1, "%s.%s", DRIVER_ML_LIBRARY_PREFIX,
864 | driver_version);
865 | driver_filename[FILENAME_MAX - 1] = '\0';
866 |
867 | table = dlopen(driver_filename, RTLD_NOW | RTLD_NODELETE);
868 | if (unlikely(!table)) {
869 | LOGGER(FATAL, "can't find library %s", driver_filename);
870 | }
871 |
872 | for (i = 0; i < NVML_ENTRY_END; i++) {
873 | nvml_library_entry[i].fn_ptr = dlsym(table, nvml_library_entry[i].name);
874 | if (unlikely(!nvml_library_entry[i].fn_ptr)) {
875 | LOGGER(4, "can't find function %s in %s", nvml_library_entry[i].name,
876 | driver_filename);
877 | }
878 | }
879 |
880 | dlclose(table);
881 |
882 | // Initialize the ml driver
883 | if (NVML_FIND_ENTRY(nvml_library_entry, nvmlInitWithFlags)) {
884 | NVML_ENTRY_CALL(nvml_library_entry, nvmlInitWithFlags, 0);
885 | } else if (NVML_FIND_ENTRY(nvml_library_entry, nvmlInit_v2)) {
886 | NVML_ENTRY_CALL(nvml_library_entry, nvmlInit_v2);
887 | } else {
888 | NVML_ENTRY_CALL(nvml_library_entry, nvmlInit);
889 | }
890 | }
891 |
892 | static void load_cuda_single_library(int idx) {
893 | void *table = NULL;
894 | char cuda_filename[FILENAME_MAX];
895 |
896 | snprintf(cuda_filename, FILENAME_MAX - 1, "%s.%s", CUDA_LIBRARY_PREFIX,
897 | driver_version);
898 | cuda_filename[FILENAME_MAX - 1] = '\0';
899 |
900 | table = dlopen(cuda_filename, RTLD_NOW | RTLD_NODELETE);
901 | if (unlikely(!table)) {
902 | LOGGER(FATAL, "can't find library %s", cuda_filename);
903 | }
904 |
905 | cuda_library_entry[idx].fn_ptr = dlsym(table, cuda_library_entry[idx].name);
906 | if (unlikely(!cuda_library_entry[idx].fn_ptr)) {
907 | LOGGER(4, "can't find function %s in %s", cuda_library_entry[idx].name,
908 | cuda_filename);
909 | }
910 |
911 | dlclose(table);
912 | }
913 |
914 | void load_cuda_libraries() {
915 | void *table = NULL;
916 | int i = 0;
917 | char cuda_filename[FILENAME_MAX];
918 |
919 | LOGGER(4, "Start hijacking");
920 |
921 | snprintf(cuda_filename, FILENAME_MAX - 1, "%s.%s", CUDA_LIBRARY_PREFIX,
922 | driver_version);
923 | cuda_filename[FILENAME_MAX - 1] = '\0';
924 | cuda_filename[FILENAME_MAX - 1] = '\0';
925 |
926 | table = dlopen(cuda_filename, RTLD_NOW | RTLD_NODELETE);
927 | if (unlikely(!table)) {
928 | LOGGER(FATAL, "can't find library %s", cuda_filename);
929 | }
930 |
931 | for (i = 0; i < CUDA_ENTRY_END; i++) {
932 | cuda_library_entry[i].fn_ptr = dlsym(table, cuda_library_entry[i].name);
933 | if (unlikely(!cuda_library_entry[i].fn_ptr)) {
934 | LOGGER(4, "can't find function %s in %s", cuda_library_entry[i].name,
935 | cuda_filename);
936 | }
937 | }
938 |
939 | dlclose(table);
940 | }
941 |
942 | // #lizard forgives
943 | int get_cgroup_data(const char *pid_cgroup, char *pod_uid, char *container_id,
944 | size_t size) {
945 | int ret = 1;
946 | FILE *cgroup_fd = NULL;
947 | char *token = NULL, *last_ptr = NULL, *last_second = NULL;
948 | char *cgroup_ptr = NULL;
949 | char buffer[4096];
950 | int is_systemd = 0;
951 | char *prune_pos = NULL;
952 |
953 | cgroup_fd = fopen(pid_cgroup, "r");
954 | if (unlikely(!cgroup_fd)) {
955 | LOGGER(4, "can't open %s, error %s", pid_cgroup, strerror(errno));
956 | goto DONE;
957 | }
958 |
959 | /**
960 | * find memory cgroup name
961 | */
962 | while (!feof(cgroup_fd)) {
963 | buffer[0] = '\0';
964 | if (unlikely(!fgets(buffer, sizeof(buffer), cgroup_fd))) {
965 | LOGGER(4, "can't get line from %s", pid_cgroup);
966 | goto DONE;
967 | }
968 |
969 | buffer[strlen(buffer) - 1] = '\0';
970 |
971 | last_ptr = NULL;
972 | token = buffer;
973 | for (token = strtok_r(token, ":", &last_ptr); token;
974 | token = NULL, token = strtok_r(token, ":", &last_ptr)) {
975 | if (!strcmp(token, "memory")) {
976 | cgroup_ptr = strtok_r(NULL, ":", &last_ptr);
977 | break;
978 | }
979 | }
980 |
981 | if (cgroup_ptr) {
982 | break;
983 | }
984 | }
985 |
986 | if (!cgroup_ptr) {
987 | LOGGER(4, "can't find memory cgroup from %s", pid_cgroup);
988 | goto DONE;
989 | }
990 |
991 | /**
992 | * find container id
993 | */
994 | last_ptr = NULL;
995 | last_second = NULL;
996 | token = cgroup_ptr;
997 | while (*token) {
998 | if (*token == '/') {
999 | last_second = last_ptr;
1000 | last_ptr = token;
1001 | }
1002 | ++token;
1003 | }
1004 |
1005 | if (!last_ptr) {
1006 | goto DONE;
1007 | }
1008 |
1009 | strncpy(container_id, last_ptr + 1, size);
1010 | container_id[size - 1] = '\0';
1011 |
1012 | /**
1013 | * if cgroup is systemd, cgroup pattern should be like
1014 | * /kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod27882189_b4d9_11e9_b287_ec0d9ae89a20.slice/docker-4aa615892ab2a014d52178bdf3da1c4a45c8ddfb5171dd6e39dc910f96693e14.scope
1015 | * /kubepods.slice/kubepods-pod019c1fe8_0d92_4aa0_b61c_4df58bdde71c.slice/cri-containerd-9e073649debeec6d511391c9ec7627ee67ce3a3fb508b0fa0437a97f8e58ba98.scope
1016 | */
1017 | if ((prune_pos = strstr(container_id, ".scope"))) {
1018 | is_systemd = 1;
1019 | *prune_pos = '\0';
1020 | }
1021 |
1022 | /**
1023 | * find pod uid
1024 | */
1025 | *last_ptr = '\0';
1026 | if (!last_second) {
1027 | goto DONE;
1028 | }
1029 |
1030 | strncpy(pod_uid, last_second, size);
1031 | pod_uid[size - 1] = '\0';
1032 |
1033 | if (is_systemd && (prune_pos = strstr(pod_uid, ".slice"))) {
1034 | *prune_pos = '\0';
1035 | }
1036 |
1037 | /**
1038 | * remove unnecessary chars from $container_id and $pod_uid
1039 | */
1040 | if (is_systemd) {
1041 | /**
1042 | * For this kind of cgroup path, we need to find the last appearance of
1043 | * slash
1044 | * /kubepods.slice/kubepods-pod019c1fe8_0d92_4aa0_b61c_4df58bdde71c.slice/cri-containerd-9e073649debeec6d511391c9ec7627ee67ce3a3fb508b0fa0437a97f8e58ba98.scope
1045 | */
1046 | prune_pos = NULL;
1047 | token = container_id;
1048 | while (*token) {
1049 | if (*token == '-') {
1050 | prune_pos = token;
1051 | }
1052 | ++token;
1053 | }
1054 |
1055 | if (!prune_pos) {
1056 | LOGGER(4, "no - prefix");
1057 | goto DONE;
1058 | }
1059 |
1060 | memmove(container_id, prune_pos + 1, strlen(container_id));
1061 |
1062 | prune_pos = strstr(pod_uid, "-pod");
1063 | if (!prune_pos) {
1064 | LOGGER(4, "no pod string");
1065 | goto DONE;
1066 | }
1067 | prune_pos += strlen("-pod");
1068 | memmove(pod_uid, prune_pos, strlen(prune_pos));
1069 | pod_uid[strlen(prune_pos)] = '\0';
1070 | prune_pos = pod_uid;
1071 | while (*prune_pos) {
1072 | if (*prune_pos == '_') {
1073 | *prune_pos = '-';
1074 | }
1075 | ++prune_pos;
1076 | }
1077 | } else {
1078 | memmove(pod_uid, pod_uid + strlen("/pod"), strlen(pod_uid));
1079 | }
1080 |
1081 | ret = 0;
1082 | DONE:
1083 | if (cgroup_fd) {
1084 | fclose(cgroup_fd);
1085 | }
1086 | return ret;
1087 | }
1088 |
1089 | static int get_path_by_cgroup(const char *pid_cgroup) {
1090 | int ret = 1;
1091 | char pod_uid[4096], container_id[4096];
1092 |
1093 | if (is_custom_config_path()) {
1094 | return 0;
1095 | }
1096 |
1097 | if (unlikely(get_cgroup_data(pid_cgroup, pod_uid, container_id,
1098 | sizeof(container_id)))) {
1099 | LOGGER(4, "can't find container id from %s", pid_cgroup);
1100 | goto DONE;
1101 | }
1102 |
1103 | snprintf(base_dir, sizeof(base_dir), "%s%s", VCUDA_CONFIG_PATH, container_id);
1104 | snprintf(config_path, sizeof(config_path), "%s/%s", base_dir,
1105 | CONTROLLER_CONFIG_NAME);
1106 | snprintf(pid_path, sizeof(pid_path), "%s/%s", base_dir, PIDS_CONFIG_NAME);
1107 |
1108 | LOGGER(4, "config file: %s", config_path);
1109 | LOGGER(4, "pid file: %s", pid_path);
1110 | ret = 0;
1111 |
1112 | LOGGER(4, "register to remote: pod uid: %s, cont id: %s", pod_uid,
1113 | container_id);
1114 | register_to_remote_with_data("", pod_uid, container_id);
1115 | DONE:
1116 | return ret;
1117 | }
1118 |
1119 | static int is_default_config_path() {
1120 | int fd = -1;
1121 |
1122 | fd = open(config_path, O_RDONLY);
1123 | if (fd == -1) {
1124 | return 0;
1125 | }
1126 |
1127 | close(fd);
1128 |
1129 | return 1;
1130 | }
1131 |
1132 | static void matchRegex(const char *pattern, const char *matchString,
1133 | char *version) {
1134 | regex_t regex;
1135 | int reti;
1136 | regmatch_t matches[1];
1137 | char msgbuf[512];
1138 |
1139 | reti = regcomp(®ex, pattern, REG_EXTENDED);
1140 | if (reti) {
1141 | LOGGER(4, "Could not compile regex: %s", DRIVER_VERSION_MATCH_PATTERN);
1142 | return;
1143 | }
1144 |
1145 | reti = regexec(®ex, matchString, 1, matches, 0);
1146 | switch (reti) {
1147 | case 0:
1148 | strncpy(version, matchString + matches[0].rm_so,
1149 | matches[0].rm_eo - matches[0].rm_so);
1150 | version[matches[0].rm_eo - matches[0].rm_so] = '\0';
1151 | break;
1152 | case REG_NOMATCH:
1153 | LOGGER(4, "Regex does not match for string: %s", matchString);
1154 | break;
1155 | default:
1156 | regerror(reti, ®ex, msgbuf, sizeof(msgbuf));
1157 | LOGGER(4, "Regex match failed: %s", msgbuf);
1158 | }
1159 |
1160 | regfree(®ex);
1161 | return;
1162 | }
1163 |
1164 | static void read_version_from_proc(char *version) {
1165 | char *line = NULL;
1166 | size_t len = 0;
1167 |
1168 | FILE *fp = fopen(DRIVER_VERSION_PROC_PATH, "r");
1169 | if (fp == NULL) {
1170 | LOGGER(4, "can't open %s, error %s", DRIVER_VERSION_PROC_PATH,
1171 | strerror(errno));
1172 | return;
1173 | }
1174 |
1175 | while ((getline(&line, &len, fp) != -1)) {
1176 | if (strncmp(line, "NVRM", 4) == 0) {
1177 | matchRegex(DRIVER_VERSION_MATCH_PATTERN, line, version);
1178 | break;
1179 | }
1180 | }
1181 | fclose(fp);
1182 | }
1183 |
1184 | int read_controller_configuration() {
1185 | int fd = 0;
1186 | int rsize;
1187 | int ret = 1;
1188 |
1189 | if (!is_default_config_path()) {
1190 | if (get_path_by_cgroup("/proc/self/cgroup")) {
1191 | LOGGER(FATAL, "can't get config file path");
1192 | }
1193 | }
1194 |
1195 | fd = open(config_path, O_RDONLY);
1196 | if (unlikely(fd == -1)) {
1197 | LOGGER(4, "can't open %s, error %s", config_path, strerror(errno));
1198 | goto DONE;
1199 | }
1200 |
1201 | rsize = (int)read(fd, (void *)&g_vcuda_config, sizeof(resource_data_t));
1202 | if (unlikely(rsize != sizeof(g_vcuda_config))) {
1203 | LOGGER(4, "can't read %s, need %zu but got %d", CONTROLLER_CONFIG_PATH,
1204 | sizeof(resource_data_t), rsize);
1205 | goto DONE;
1206 | }
1207 |
1208 | read_version_from_proc(driver_version);
1209 | ret = 0;
1210 |
1211 | LOGGER(4, "pod uid : %s", g_vcuda_config.pod_uid);
1212 | LOGGER(4, "limit : %d", g_vcuda_config.limit);
1213 | LOGGER(4, "container name : %s", g_vcuda_config.container_name);
1214 | LOGGER(4, "total utilization: %d", g_vcuda_config.utilization);
1215 | LOGGER(4, "total gpu memory : %" PRIu64, g_vcuda_config.gpu_memory);
1216 | LOGGER(4, "driver version : %s", driver_version);
1217 | LOGGER(4, "hard limit mode : %d", g_vcuda_config.hard_limit);
1218 | LOGGER(4, "enable mode : %d", g_vcuda_config.enable);
1219 | DONE:
1220 | if (likely(fd)) {
1221 | close(fd);
1222 | }
1223 |
1224 | return ret;
1225 | }
1226 |
1227 | void load_necessary_data() {
1228 | read_controller_configuration();
1229 | load_cuda_single_library(CUDA_ENTRY_ENUM(cuDriverGetVersion));
1230 |
1231 | pthread_once(&g_cuda_set, load_cuda_libraries);
1232 | pthread_once(&g_driver_set, load_driver_libraries);
1233 | }
1234 |
1235 | int is_custom_config_path() { return strcmp(base_dir, EMPTY_PREFIX) != 0; }
1236 |
--------------------------------------------------------------------------------
/src/register.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Tencent is pleased to support the open source community by making TKEStack available.
3 | *
4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved.
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
7 | * this file except in compliance with the License. You may obtain a copy of the
8 | * License at
9 | *
10 | * https://opensource.org/licenses/Apache-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the
15 | * specific language governing permissions and limitations under the License.
16 | */
17 |
18 | //
19 | // Created by Thomas Song on 2019-04-15.
20 | //
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | #include "include/hijack.h"
28 |
29 | static const struct timespec g_cycle = {
30 | .tv_sec = 0,
31 | .tv_nsec = TIME_TICK * MILLISEC,
32 | };
33 |
34 | // #lizard forgives
35 | void register_to_remote_with_data(const char* bus_id, const char* pod_uid,
36 | const char* container) {
37 | pid_t register_pid;
38 | int wstatus = 0, wret = 0;
39 | pid_t child_pid;
40 | int pipe_fd[2];
41 | int ret = -1;
42 |
43 | ret = pipe(pipe_fd);
44 | if (unlikely(ret)) {
45 | LOGGER(FATAL, "create pipe failed, error %s", strerror(errno));
46 | }
47 |
48 | register_pid = fork();
49 | if (!register_pid) {
50 | close(pipe_fd[1]);
51 | while (read(pipe_fd[0], &child_pid, sizeof(pid_t)) == 0) {
52 | nanosleep(&g_cycle, NULL);
53 | }
54 |
55 | // child
56 | if (is_custom_config_path()) {
57 | ret = execl((RPC_CLIENT_PATH RPC_CLIENT_NAME), RPC_CLIENT_NAME, "--addr",
58 | RPC_ADDR, "--bus-id", bus_id, "--pod-uid", pod_uid,
59 | "--cont-id", container, (char*)NULL);
60 | } else {
61 | ret = execl((RPC_CLIENT_PATH RPC_CLIENT_NAME), RPC_CLIENT_NAME, "--addr",
62 | RPC_ADDR, "--bus-id", bus_id, "--pod-uid", pod_uid,
63 | "--cont-name", container, (char*)NULL);
64 | }
65 | if (unlikely(ret == -1)) {
66 | LOGGER(FATAL, "can't register to manager, error %s", strerror(errno));
67 | }
68 |
69 | close(pipe_fd[0]);
70 | _exit(EXIT_SUCCESS);
71 | } else {
72 | close(pipe_fd[0]);
73 |
74 | while (write(pipe_fd[1], ®ister_pid, sizeof(pid_t)) == 0) {
75 | nanosleep(&g_cycle, NULL);
76 | }
77 |
78 | do {
79 | wret = waitpid(register_pid, &wstatus, WUNTRACED | WCONTINUED);
80 | if (unlikely(wret == -1)) {
81 | LOGGER(FATAL, "waitpid failed, error %s", strerror(errno));
82 | }
83 | } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus));
84 |
85 | ret = WEXITSTATUS(wstatus);
86 | if (unlikely(ret)) {
87 | LOGGER(FATAL, "rpc client exit with %d", ret);
88 | }
89 |
90 | close(pipe_fd[1]);
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/tools/monitor_dockernized.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Tencent is pleased to support the open source community by making TKEStack available.
3 | *
4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved.
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
7 | * this file except in compliance with the License. You may obtain a copy of the
8 | * License at
9 | *
10 | * https://opensource.org/licenses/Apache-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the
15 | * specific language governing permissions and limitations under the License.
16 | */
17 |
18 | //
19 | // Created by thomas on 5/17/18.
20 | //
21 |
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | #include "include/hijack.h"
28 | #include "include/nvml-helper.h"
29 |
30 | extern entry_t nvml_library_entry[];
31 | extern char pid_path[];
32 | extern char driver_version[];
33 |
34 | static void load_driver_libraries() {
35 | void *table = NULL;
36 | char driver_filename[FILENAME_MAX];
37 | int i;
38 |
39 | snprintf(driver_filename, FILENAME_MAX - 1, "%s.%s", DRIVER_ML_LIBRARY_PREFIX,
40 | driver_version);
41 | driver_filename[FILENAME_MAX - 1] = '\0';
42 |
43 | table = dlopen(driver_filename, RTLD_NOW | RTLD_NODELETE);
44 | if (unlikely(!table)) {
45 | LOGGER(FATAL, "can't find library %s", driver_filename);
46 | }
47 |
48 | for (i = 0; i < NVML_ENTRY_END; i++) {
49 | nvml_library_entry[i].fn_ptr = dlsym(table, nvml_library_entry[i].name);
50 | if (unlikely(!nvml_library_entry[i].fn_ptr)) {
51 | LOGGER(4, "can't find function %s in %s", nvml_library_entry[i].name,
52 | driver_filename);
53 | }
54 | }
55 |
56 | dlclose(table);
57 | }
58 |
59 | int main(void) {
60 | int ret = 0;
61 |
62 | int fd = 0;
63 | int item = 0;
64 | int rsize = 0;
65 |
66 | int i = 0, j = 0, k = 0;
67 |
68 | int pids_table[MAX_PIDS];
69 | int pids_table_size = 0;
70 |
71 | int device_num = 0;
72 | nvmlDevice_t dev;
73 | nvmlProcessInfo_t pids_on_device[MAX_PIDS];
74 | unsigned int size_on_device = MAX_PIDS;
75 |
76 | struct timeval cur;
77 | size_t microsec;
78 | nvmlProcessUtilizationSample_t processes_sample[MAX_PIDS];
79 | int processes_num = MAX_PIDS;
80 |
81 | int sm_util = 0;
82 | uint64_t memory = 0;
83 | nvmlProcessInfo_t *process_match = NULL;
84 | nvmlProcessUtilizationSample_t *sample_match = NULL;
85 |
86 | fd = open(pid_path, O_RDONLY);
87 | if (unlikely(fd == -1)) {
88 | LOGGER(5, "can't open %s", pid_path);
89 | return 0;
90 | }
91 |
92 | for (item = 0; item < MAX_PIDS; item++) {
93 | rsize = (int) read(fd, pids_table + item, sizeof(int));
94 | if (unlikely(rsize != sizeof(int))) {
95 | break;
96 | }
97 | }
98 |
99 | for (i = 0; i < item; i++) {
100 | LOGGER(5, "pid: %d", pids_table[i]);
101 | }
102 |
103 | pids_table_size = item;
104 |
105 | LOGGER(5, "read %d items from %s", pids_table_size, pid_path);
106 |
107 | if (unlikely(read_controller_configuration())) {
108 | LOGGER(5, "can't read controller file");
109 | return 1;
110 | }
111 |
112 | load_driver_libraries();
113 |
114 | NVML_ENTRY_CALL(nvml_library_entry, nvmlInit);
115 |
116 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetCount, &device_num);
117 | if (unlikely(ret)) {
118 | LOGGER(ERROR, "Get device number return %d", ret);
119 | return 1;
120 | }
121 |
122 | for (i = 0; i < device_num; i++) {
123 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetHandleByIndex, i,
124 | &dev);
125 | if (unlikely(ret)) {
126 | LOGGER(ERROR, "Get device %d return %d", i, ret);
127 | continue;
128 | }
129 |
130 | size_on_device = MAX_PIDS;
131 | ret = NVML_ENTRY_CALL(nvml_library_entry,
132 | nvmlDeviceGetComputeRunningProcesses, dev,
133 | &size_on_device, pids_on_device);
134 | if (unlikely(ret)) {
135 | LOGGER(ERROR, "Get process gpu memory return %d", ret);
136 | continue;
137 | }
138 |
139 | for (j = 0; j < size_on_device; j++) {
140 | LOGGER(4, "summary: %d used %lld", pids_on_device[j].pid,
141 | pids_on_device[j].usedGpuMemory);
142 | }
143 |
144 | processes_num = MAX_PIDS;
145 | gettimeofday(&cur, NULL);
146 | microsec = (cur.tv_sec - 1) * 1000UL * 1000UL + cur.tv_usec;
147 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetProcessUtilization,
148 | dev, processes_sample, &processes_num, microsec);
149 | if (unlikely(ret)) {
150 | LOGGER(ERROR, "Get process utilization return %d", ret);
151 | continue;
152 | }
153 |
154 | for (j = 0; j < processes_num; j++) {
155 | LOGGER(4, "summary: %d util %d", processes_sample[j].pid,
156 | processes_sample[j].smUtil);
157 | }
158 |
159 | fprintf(stderr, "Device\tProcess\tUtilization\tMemory\n");
160 | for (j = 0; j < pids_table_size; j++) {
161 | process_match = NULL;
162 | sample_match = NULL;
163 |
164 | for (k = 0; k < size_on_device; k++) {
165 | if (pids_on_device[k].pid == pids_table[j]) {
166 | process_match = &pids_on_device[k];
167 | }
168 | }
169 |
170 | for (k = 0; k < processes_num; k++) {
171 | if (processes_sample[k].pid == pids_table[j]) {
172 | sample_match = &processes_sample[k];
173 | }
174 | }
175 |
176 | if (process_match) {
177 | memory = process_match->usedGpuMemory;
178 | memory >>= 20;
179 | if (sample_match) {
180 | sm_util = sample_match->smUtil;
181 | } else {
182 | sm_util = 0;
183 | }
184 | fprintf(stderr, "%-6d\txxx\t%-11d\t%-6" PRIu64 " MB\n", i, sm_util,
185 | memory);
186 | }
187 | }
188 | }
189 |
190 | NVML_ENTRY_CALL(nvml_library_entry, nvmlShutdown);
191 | close(fd);
192 | }
193 |
--------------------------------------------------------------------------------
/vcuda.spec:
--------------------------------------------------------------------------------
1 | Name: vcuda
2 | Version: %{version}
3 | Release: %{commit}%{?dist}
4 | Summary: GPU virtual device library
5 |
6 | License: MIT
7 | Source: vcuda.tar.gz
8 |
9 | Requires: systemd-units
10 |
11 | %define pkgname %{name}-%{version}-%{release}
12 |
13 | %description
14 | GPU virtual device library
15 |
16 | %prep
17 | %setup
18 |
19 | %install
20 | install -d $RPM_BUILD_ROOT/%{_libdir}
21 | install -d $RPM_BUILD_ROOT/%{_bindir}
22 |
23 | install -p -m 755 libcuda-control.so $RPM_BUILD_ROOT/%{_libdir}/
24 | install -p -m 755 nvml-monitor $RPM_BUILD_ROOT/%{_bindir}/
25 |
26 | %clean
27 | rm -rf $RPM_BUILD_ROOT
28 |
29 | %files
30 | /%{_libdir}/libcuda-control.so
31 | /%{_bindir}/nvml-monitor
32 |
33 | %post
34 | ldconfig
35 |
36 | %postun
37 | ldconfig
38 |
--------------------------------------------------------------------------------