├── .chglog ├── CHANGELOG.tpl.md └── config.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── CMakeLists.txt ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MAINTAINERS ├── README.md ├── VERSION ├── build-img.sh ├── find_new_lib.sh ├── include ├── cuda-helper.h ├── cuda-subset.h ├── hijack.h ├── nvml-helper.h └── nvml-subset.h ├── src ├── cuda_originals.c ├── hijack_call.c ├── loader.c ├── nvml_entry.c └── register.c ├── tools └── monitor_dockernized.c └── vcuda.spec /.chglog/CHANGELOG.tpl.md: -------------------------------------------------------------------------------- 1 | {{ if .Versions -}} 2 | 3 | ## [Unreleased] 4 | 5 | {{ if .Unreleased.CommitGroups -}} 6 | {{ range .Unreleased.CommitGroups -}} 7 | ### {{ .Title }} 8 | {{ range .Commits -}} 9 | - {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} 10 | {{ end }} 11 | {{ end -}} 12 | {{ end -}} 13 | {{ end -}} 14 | 15 | {{ range .Versions }} 16 | 17 | ## {{ if .Tag.Previous }}[{{ .Tag.Name }}]{{ else }}{{ .Tag.Name }}{{ end }} - {{ datetime "2006-01-02" .Tag.Date }} 18 | {{ range .CommitGroups -}} 19 | ### {{ .Title }} 20 | {{ range .Commits -}} 21 | - {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} 22 | {{ end }} 23 | {{ end -}} 24 | 25 | {{- if .RevertCommits -}} 26 | ### Reverts 27 | {{ range .RevertCommits -}} 28 | - {{ .Revert.Header }} 29 | {{ end }} 30 | {{ end -}} 31 | 32 | {{- if .MergeCommits -}} 33 | ### Pull Requests 34 | {{ range .MergeCommits -}} 35 | - {{ .Header }} 36 | {{ end }} 37 | {{ end -}} 38 | 39 | {{- if .NoteGroups -}} 40 | {{ range .NoteGroups -}} 41 | ### {{ .Title }} 42 | {{ range .Notes }} 43 | {{ .Body }} 44 | {{ end }} 45 | {{ end -}} 46 | {{ end -}} 47 | {{ end -}} 48 | 49 | {{- if .Versions }} 50 | [Unreleased]: {{ .Info.RepositoryURL }}/compare/{{ $latest := index .Versions 0 }}{{ $latest.Tag.Name }}...HEAD 51 | {{ range .Versions -}} 52 | {{ if .Tag.Previous -}} 53 | [{{ .Tag.Name }}]: {{ $.Info.RepositoryURL }}/compare/{{ .Tag.Previous.Name }}...{{ .Tag.Name }} 54 | {{ end -}} 55 | {{ end -}} 56 | {{ end -}} -------------------------------------------------------------------------------- /.chglog/config.yml: -------------------------------------------------------------------------------- 1 | style: github 2 | template: CHANGELOG.tpl.md 3 | info: 4 | title: CHANGELOG 5 | repository_url: https://github.com/tkestack/vcuda-controller 6 | options: 7 | commits: 8 | # filters: 9 | # Type: 10 | # - feat 11 | # - fix 12 | # - perf 13 | # - refactor 14 | commit_groups: 15 | # title_maps: 16 | # feat: Features 17 | # fix: Bug Fixes 18 | # perf: Performance Improvements 19 | # refactor: Code Refactoring 20 | header: 21 | pattern: "^(\\w*)(?:\\(([\\w\\$\\.\\-\\*\\s]*)\\))?\\:\\s(.*)$" 22 | pattern_maps: 23 | - Type 24 | - Scope 25 | - Subject 26 | notes: 27 | keywords: 28 | - BREAKING CHANGE -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .idea/ 3 | cmake-build-debug/ 4 | proto/*.h 5 | proto/*.cc 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | services: 4 | - docker 5 | 6 | script: 7 | - ./build-img.sh 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ## [Unreleased] 3 | 4 | ### Feat 5 | - add new functions of CUDA 11 6 | 7 | 8 | 9 | ## [v1.0.1] - 2020-05-21 10 | ### Docs 11 | - Add CHANGELOG.md 12 | 13 | ### Feat 14 | - Support build image use host network 15 | 16 | ### Fix 17 | - Support other container runtime 18 | 19 | ### Pull Requests 20 | - Merge pull request [#4](https://github.com/tkestack/vcuda-controller/issues/4) from mYmNeo/dev_ffmpeg 21 | 22 | 23 | 24 | ## v1.0 - 2019-11-27 25 | 26 | [Unreleased]: https://github.com/tkestack/vcuda-controller/compare/v1.0.1...HEAD 27 | [v1.0.1]: https://github.com/tkestack/vcuda-controller/compare/v1.0...v1.0.1 28 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(hijack C CXX) 3 | 4 | add_definitions(-D_GNU_SOURCE) 5 | add_compile_options(-Wall -Wshadow -Werror -Wno-format) 6 | 7 | include_directories(${CMAKE_SOURCE_DIR}) 8 | 9 | if (${ENABLE_DEBUG}) 10 | add_compile_options(-g -O0) 11 | else () 12 | add_compile_options(-g -O2) 13 | endif () 14 | 15 | set(STATIC_C_LIBRARIES -static-libgcc -static-libstdc++) 16 | 17 | if (${USE_ORIGINAL}) 18 | add_definitions(-DUSE_ORIGINAL) 19 | # controller related 20 | add_library(cuda-control SHARED 21 | src/hijack_call.c 22 | include/hijack.h 23 | include/cuda-subset.h 24 | include/nvml-subset.h 25 | include/cuda-helper.h 26 | include/nvml-helper.h 27 | src/cuda_originals.c 28 | src/nvml_entry.c 29 | src/loader.c 30 | src/register.c) 31 | target_link_libraries(cuda-control ${STATIC_C_LIBRARIES}) 32 | else (NOT ${USE_ORIGINAL}) 33 | # controller related 34 | add_library(cuda-control SHARED 35 | src/hijack_call.c 36 | include/hijack.h 37 | include/cuda-subset.h 38 | include/nvml-subset.h 39 | include/cuda-helper.h 40 | include/nvml-helper.h 41 | src/cuda_originals.c 42 | src/nvml_entry.c 43 | src/loader.c 44 | src/register.c) 45 | 46 | target_link_libraries(cuda-control ${STATIC_C_LIBRARIES}) 47 | 48 | # process monitor 49 | add_executable(nvml-monitor 50 | tools/monitor_dockernized.c 51 | src/loader.c 52 | src/register.c) 53 | target_link_libraries(nvml-monitor ${STATIC_C_LIBRARIES} -ldl -lpthread) 54 | endif () 55 | 56 | target_compile_options(cuda-control PUBLIC $<$:-std=c++11>) 57 | 58 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING 2 | 3 | Welcome to [report Issues](https://github.com/tkestack/vcuda-controller/issues) or [pull requests](https://github.com/tkestack/vcuda-controller/pulls). It's recommended to read the following Contributing Guide first before contributing. 4 | 5 | This document provides a set of best practices for open source contributions - bug reports, code submissions / pull requests, etc. 6 | 7 | ## Issues 8 | 9 | We use Github Issues to track public bugs and feature requests. 10 | 11 | ### Due diligence 12 | 13 | Before submitting a issue, please do the following: 14 | 15 | * Perform **basic troubleshooting** steps: 16 | * Make sure you’re on the latest version. If you’re not on the most recent version, your problem may have been solved already! Upgrading is always the best first step. 17 | * Try older versions. If you’re already on the latest release, try rolling back a few minor versions (e.g. if on 1.7, try 1.5 or 1.6) and see if the problem goes away. This will help the devs narrow down when the problem first arose in the commit log. 18 | * Try switching up dependency versions. If the software in question has dependencies (other libraries, etc) try upgrading/downgrading those as well. 19 | * Search the project’s bug/issue tracker to make sure it’s not a known issue. 20 | * If you don’t find a pre-existing issue, consider checking with the mailing list and/or IRC channel in case the problem is non-bug-related. 21 | 22 | ### What to put in your bug report 23 | 24 | Make sure your report gets the attention it deserves: bug reports with missing information may be ignored or punted back to you, delaying a fix. The below constitutes a bare minimum; more info is almost always better: 25 | 26 | * What version of the core programming language interpreter/compiler are you using? For example, if it’s a Golang project, are you using Golang 1.13? Golang 1.12? 27 | * What operating system are you on? Windows? (32-bit? 64-bit?) Mac OS X? (10.14? 10.10?) Linux? (Which distro? Which version of that distro? 32 or 64 bits?) Again, more detail is better. 28 | * Which version or versions of the software are you using? Ideally, you followed the advice above and have ruled out (or verified that the problem exists in) a few different versions. 29 | * How can the developers recreate the bug on their end? If possible, include a copy of your code, the command you used to invoke it, and the full output of your run (if applicable.) A common tactic is to pare down your code until a simple (but still bug-causing) “base case” remains. Not only can this help you identify problems which aren’t real bugs, but it means the developer can get to fixing the bug faster. 30 | 31 | ## Pull Requests 32 | 33 | We strongly welcome your pull request to make TKEStack project better. 34 | 35 | ### Licensing of contributed material 36 | 37 | Keep in mind as you contribute, that code, docs and other material submitted to open source projects are usually considered licensed under the same terms as the rest of the work. 38 | 39 | Anything submitted to a project falls under the licensing terms in the repository’s top level LICENSE file. Per-file copyright/license headers are typically extraneous and undesirable. Please don’t add your own copyright headers to new files unless the project’s license actually requires them! 40 | 41 | ### Branch Management 42 | 43 | There are three main branches here: 44 | 45 | 1. `master` branch. 46 | 1. It is the latest (pre-)release branch. We use `master` for tags, with version number `1.1.0`, `1.2.0`, `1.3.0`... 47 | 2. **Don't submit any PR on `master` branch.** 48 | 2. `dev` branch. 49 | 1. It is our stable developing branch. After full testing, `dev` will be merged to `master` branch for the next release. 50 | 2. **You are recommended to submit bugfix or feature PR on `dev` branch.** 51 | 3. `hotfix` branch. 52 | 1. It is the latest tag version for hot fix. If we accept your pull request, we may just tag with version number `1.1.1`, `1.2.3`. 53 | 2. **Only submit urgent PR on `hotfix` branch for next specific release.** 54 | 55 | Normal bugfix or feature request should be submitted to `dev` branch. After full testing, we will merge them to `master` branch for the next release. 56 | 57 | If you have some urgent bugfixes on a published version, but the `master` branch have already far away with the latest tag version, you can submit a PR on hotfix. And it will be cherry picked to `dev` branch if it is possible. 58 | 59 | ``` 60 | master 61 | ↑ 62 | dev <--- hotfix PR 63 | ↑ 64 | feature/bugfix PR 65 | ``` 66 | 67 | ### Make Pull Requests 68 | 69 | The code team will monitor all pull request, we run some code check and test on it. After all tests passed, we will accecpt this PR. But it won't merge to `master` branch at once, which have some delay. 70 | 71 | Before submitting a pull request, please make sure the followings are done: 72 | 73 | 1. Fork the repo and create your branch from `master` or `hotfix`. 74 | 2. Update code or documentation if you have changed APIs. 75 | 3. Add the copyright notice to the top of any new files you've added. 76 | 4. Check your code lints and checkstyles. 77 | 5. Test and test again your code. 78 | 6. Now, you can submit your pull request on `dev` or `hotfix` branch. 79 | 80 | ## Code Conventions 81 | 82 | Use [Kubernetes Code Conventions](https://github.com/kubernetes/community/blob/master/contributors/guide/coding-conventions.md) for all projects in the TKEStack organization. 83 | 84 | ## Documentation isn’t optional 85 | 86 | It’s not! Patches without documentation will be returned to sender. By “documentation” we mean: 87 | 88 | * Docstrings must be created or updated for public API functions/methods/etc. (This step is optional for some bugfixes.) 89 | * New features should ideally include updates to prose documentation, including useful example code snippets. 90 | * All submissions should have a changelog entry crediting the contributor and/or any individuals instrumental in identifying the problem. 91 | 92 | ## Tests aren’t optional 93 | 94 | Any bugfix that doesn’t include a test proving the existence of the bug being fixed, may be suspect. Ditto for new features that can’t prove they actually work. 95 | 96 | We’ve found that test-first development really helps make features better architected and identifies potential edge cases earlier instead of later. Writing tests before the implementation is strongly encouraged. 97 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # stage 1 2 | FROM nvidia/cuda:11.2.0-devel-ubuntu18.04 as build 3 | 4 | RUN apt update && apt install -y --no-install-recommends \ 5 | curl 6 | 7 | RUN curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | apt-key add - 8 | 9 | RUN apt install -y --no-install-recommends \ 10 | cmake libvdpau-dev && \ 11 | rm -rf /var/lib/apt/lists/* 12 | 13 | COPY cuda-control.tar /tmp 14 | 15 | ARG version 16 | 17 | RUN cd /tmp && tar xvf /tmp/cuda-control.tar && \ 18 | cd /tmp/cuda-control && mkdir vcuda-${version} && \ 19 | cd vcuda-${version} && cmake -DCMAKE_BUILD_TYPE=Release .. && \ 20 | make 21 | 22 | RUN cd /tmp/cuda-control && tar cf /tmp/vcuda.tar.gz -c vcuda-${version} 23 | 24 | # stage 2 25 | FROM centos:7 as rpmpkg 26 | 27 | RUN yum install -y rpm-build 28 | RUN mkdir -p /root/rpmbuild/{SPECS,SOURCES} 29 | 30 | COPY vcuda.spec /root/rpmbuild/SPECS 31 | COPY --from=build /tmp/vcuda.tar.gz /root/rpmbuild/SOURCES 32 | 33 | RUN echo '%_topdir /root/rpmbuild' > /root/.rpmmacros \ 34 | && echo '%__os_install_post %{nil}' >> /root/.rpmmacros \ 35 | && echo '%debug_package %{nil}' >> /root/.rpmmacros 36 | 37 | WORKDIR /root/rpmbuild/SPECS 38 | 39 | ARG version 40 | ARG commit 41 | 42 | RUN rpmbuild -bb --quiet \ 43 | --define 'version '${version}'' \ 44 | --define 'commit '${commit}'' \ 45 | vcuda.spec 46 | 47 | # stage 3 48 | FROM centos:7 49 | 50 | ARG version 51 | ARG commit 52 | 53 | COPY --from=rpmpkg /root/rpmbuild/RPMS/x86_64/vcuda-${version}-${commit}.el7.x86_64.rpm /tmp 54 | RUN rpm -ivh /tmp/vcuda-${version}-${commit}.el7.x86_64.rpm && rm -rf /tmp/vcuda-${version}-${commit}.el7.x86_64.rpm 55 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Thomas Song @mYmNeo 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vcuda-controller 2 | 3 | [![Build Status](https://travis-ci.org/tkestack/vcuda-controller.svg?branch=master)](https://travis-ci.org/tkestack/vcuda-controller) 4 | [![Total alerts](https://img.shields.io/lgtm/alerts/g/tkestack/vcuda-controller.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tkestack/vcuda-controller/alerts/) 5 | [![Language grade: C/C++](https://img.shields.io/lgtm/grade/cpp/g/tkestack/vcuda-controller.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/tkestack/vcuda-controller/context:cpp) 6 | 7 | This project is a wrapper of NVIDIA driver library, it's a component 8 | of [gpu-manager](https://github.com/tkestack/gpu-manager) which makes Kubernetes can not only run more than one Pod on 9 | the same GPU, but also give QoS guaranteed to each Pod. For more details, please refer to our 10 | paper [here](https://ieeexplore.ieee.org/abstract/document/8672318). 11 | 12 | ## Build 13 | 14 | ``` 15 | IMAGE_FILE= ./build-img.sh 16 | ``` 17 | 18 | ## CUDA/GPU support information 19 | 20 | CUDA 11.5.1 and before are supported 21 | 22 | Any architecture of GPU after Kepler are supported -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | master 2 | -------------------------------------------------------------------------------- /build-img.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | set -o pipefail 4 | set -o nounset 5 | set -o xtrace 6 | 7 | ROOT=$(cd $(dirname ${BASH_SOURCE[0]}) && pwd -P) 8 | IMAGE_FILE=${IMAGE_FILE:-"tkestack.io/gaia/vcuda:latest"} 9 | 10 | function cleanup() { 11 | rm -rf ${ROOT}/cuda-control.tar 12 | } 13 | 14 | trap cleanup EXIT SIGTERM SIGINT 15 | 16 | function build_img() { 17 | readonly local commit=$(git log --oneline | wc -l | sed -e 's,^[ \t]*,,') 18 | readonly local version=$(<"${ROOT}/VERSION") 19 | 20 | rm -rf ${ROOT}/build 21 | mkdir ${ROOT}/build 22 | git archive -o ${ROOT}/build/cuda-control.tar --format=tar --prefix=cuda-control/ HEAD 23 | cp ${ROOT}/vcuda.spec ${ROOT}/build 24 | cp ${ROOT}/Dockerfile ${ROOT}/build 25 | ( 26 | cd ${ROOT}/build 27 | docker build ${BUILD_FLAGS:-} --build-arg version=${version} --build-arg commit=${commit} -t ${IMAGE_FILE} . 28 | ) 29 | } 30 | 31 | build_img 32 | -------------------------------------------------------------------------------- /find_new_lib.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | CUDA_LIBRARY=$1 8 | ML_LIBRARY=$2 9 | 10 | echo "find new library" 11 | 12 | while read item; do 13 | grep -q ${item} include/cuda-helper.h || echo "$item," 14 | done < <(nm -D ${CUDA_LIBRARY} | grep " T " | awk '{print "CUDA_ENTRY_ENUM("$3")"}') 15 | 16 | echo "" 17 | 18 | while read item; do 19 | grep -q ${item} include/nvml-helper.h || echo "$item," 20 | done < <(nm -D ${ML_LIBRARY} | grep " T " | awk '{print "NVML_ENTRY_ENUM("$3")"}') 21 | -------------------------------------------------------------------------------- /include/cuda-helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack 3 | * available. 4 | * 5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 8 | * use this file except in compliance with the License. You may obtain a copy of 9 | * the License at 10 | * 11 | * https://opensource.org/licenses/Apache-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations under the License. 17 | */ 18 | 19 | #ifndef HIJACK_CUDA_HELPER_H 20 | #define HIJACK_CUDA_HELPER_H 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | #include 27 | 28 | #include "cuda-subset.h" 29 | #include "hijack.h" 30 | 31 | /** 32 | * CUDA library prefix 33 | */ 34 | #define CUDA_LIBRARY_PREFIX "libcuda.so" 35 | 36 | #define CUDA_ENTRY_ENUM(x) ENTRY_##x 37 | 38 | #define CUDA_FIND_ENTRY(table, sym) ({ (table)[CUDA_ENTRY_ENUM(sym)].fn_ptr; }) 39 | 40 | #define CUDA_ENTRY_CALL(table, sym, ...) \ 41 | ({ \ 42 | cuda_sym_t _entry = CUDA_FIND_ENTRY(table, sym); \ 43 | _entry(__VA_ARGS__); \ 44 | }) 45 | 46 | #define CUDA_ENTRY_DEBUG_VOID_CALL(table, sym, ...) \ 47 | ({ \ 48 | cuda_debug_void_sym_t _entry = CUDA_FIND_ENTRY(table, sym); \ 49 | _entry(__VA_ARGS__); \ 50 | }) 51 | 52 | #define CUDA_ENTRY_DEBUG_RESULT_CALL(table, sym, ...) \ 53 | ({ \ 54 | cuda_debug_result_sym_t _entry = CUDA_FIND_ENTRY(table, sym); \ 55 | _entry(__VA_ARGS__); \ 56 | }) 57 | 58 | /** 59 | * CUDA library enumerator entry 60 | */ 61 | typedef enum { 62 | /** cuInit */ 63 | CUDA_ENTRY_ENUM(cuInit), 64 | /** cuDeviceGet */ 65 | CUDA_ENTRY_ENUM(cuDeviceGet), 66 | /** cuDeviceGetCount */ 67 | CUDA_ENTRY_ENUM(cuDeviceGetCount), 68 | /** cuDeviceGetName */ 69 | CUDA_ENTRY_ENUM(cuDeviceGetName), 70 | /** cuDeviceTotalMem_v2 */ 71 | CUDA_ENTRY_ENUM(cuDeviceTotalMem_v2), 72 | /** cuDeviceGetAttribute */ 73 | CUDA_ENTRY_ENUM(cuDeviceGetAttribute), 74 | /** cuDeviceGetP2PAttribute */ 75 | CUDA_ENTRY_ENUM(cuDeviceGetP2PAttribute), 76 | /** cuDriverGetVersion */ 77 | CUDA_ENTRY_ENUM(cuDriverGetVersion), 78 | /** cuDeviceGetByPCIBusId */ 79 | CUDA_ENTRY_ENUM(cuDeviceGetByPCIBusId), 80 | /** cuDeviceGetPCIBusId */ 81 | CUDA_ENTRY_ENUM(cuDeviceGetPCIBusId), 82 | /** cuDevicePrimaryCtxRetain */ 83 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxRetain), 84 | /** cuDevicePrimaryCtxRelease */ 85 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxRelease), 86 | /** cuDevicePrimaryCtxSetFlags */ 87 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxSetFlags), 88 | /** cuDevicePrimaryCtxGetState */ 89 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxGetState), 90 | /** cuDevicePrimaryCtxReset */ 91 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxReset), 92 | /** cuCtxCreate_v2 */ 93 | CUDA_ENTRY_ENUM(cuCtxCreate_v2), 94 | /** cuCtxGetFlags */ 95 | CUDA_ENTRY_ENUM(cuCtxGetFlags), 96 | /** cuCtxSetCurrent */ 97 | CUDA_ENTRY_ENUM(cuCtxSetCurrent), 98 | /** cuCtxGetCurrent */ 99 | CUDA_ENTRY_ENUM(cuCtxGetCurrent), 100 | /** cuCtxDetach */ 101 | CUDA_ENTRY_ENUM(cuCtxDetach), 102 | /** cuCtxGetApiVersion */ 103 | CUDA_ENTRY_ENUM(cuCtxGetApiVersion), 104 | /** cuCtxGetDevice */ 105 | CUDA_ENTRY_ENUM(cuCtxGetDevice), 106 | /** cuCtxGetLimit */ 107 | CUDA_ENTRY_ENUM(cuCtxGetLimit), 108 | /** cuCtxSetLimit */ 109 | CUDA_ENTRY_ENUM(cuCtxSetLimit), 110 | /** cuCtxGetCacheConfig */ 111 | CUDA_ENTRY_ENUM(cuCtxGetCacheConfig), 112 | /** cuCtxSetCacheConfig */ 113 | CUDA_ENTRY_ENUM(cuCtxSetCacheConfig), 114 | /** cuCtxGetSharedMemConfig */ 115 | CUDA_ENTRY_ENUM(cuCtxGetSharedMemConfig), 116 | /** cuCtxGetStreamPriorityRange */ 117 | CUDA_ENTRY_ENUM(cuCtxGetStreamPriorityRange), 118 | /** cuCtxSetSharedMemConfig */ 119 | CUDA_ENTRY_ENUM(cuCtxSetSharedMemConfig), 120 | /** cuCtxSynchronize */ 121 | CUDA_ENTRY_ENUM(cuCtxSynchronize), 122 | /** cuModuleLoad */ 123 | CUDA_ENTRY_ENUM(cuModuleLoad), 124 | /** cuModuleLoadData */ 125 | CUDA_ENTRY_ENUM(cuModuleLoadData), 126 | /** cuModuleLoadFatBinary */ 127 | CUDA_ENTRY_ENUM(cuModuleLoadFatBinary), 128 | /** cuModuleUnload */ 129 | CUDA_ENTRY_ENUM(cuModuleUnload), 130 | /** cuModuleGetFunction */ 131 | CUDA_ENTRY_ENUM(cuModuleGetFunction), 132 | /** cuModuleGetGlobal_v2 */ 133 | CUDA_ENTRY_ENUM(cuModuleGetGlobal_v2), 134 | /** cuModuleGetTexRef */ 135 | CUDA_ENTRY_ENUM(cuModuleGetTexRef), 136 | /** cuModuleGetSurfRef */ 137 | CUDA_ENTRY_ENUM(cuModuleGetSurfRef), 138 | /** cuLinkCreate */ 139 | CUDA_ENTRY_ENUM(cuLinkCreate), 140 | /** cuLinkAddData */ 141 | CUDA_ENTRY_ENUM(cuLinkAddData), 142 | /** cuLinkAddFile */ 143 | CUDA_ENTRY_ENUM(cuLinkAddFile), 144 | /** cuLinkComplete */ 145 | CUDA_ENTRY_ENUM(cuLinkComplete), 146 | /** cuLinkDestroy */ 147 | CUDA_ENTRY_ENUM(cuLinkDestroy), 148 | /** cuMemGetInfo_v2 */ 149 | CUDA_ENTRY_ENUM(cuMemGetInfo_v2), 150 | /** cuMemAllocManaged */ 151 | CUDA_ENTRY_ENUM(cuMemAllocManaged), 152 | /** cuMemAlloc_v2 */ 153 | CUDA_ENTRY_ENUM(cuMemAlloc_v2), 154 | /** cuMemAllocPitch_v2 */ 155 | CUDA_ENTRY_ENUM(cuMemAllocPitch_v2), 156 | /** cuMemFree_v2 */ 157 | CUDA_ENTRY_ENUM(cuMemFree_v2), 158 | /** cuMemGetAddressRange_v2 */ 159 | CUDA_ENTRY_ENUM(cuMemGetAddressRange_v2), 160 | /** cuMemFreeHost */ 161 | CUDA_ENTRY_ENUM(cuMemFreeHost), 162 | /** cuMemHostAlloc */ 163 | CUDA_ENTRY_ENUM(cuMemHostAlloc), 164 | /** cuMemHostGetDevicePointer_v2 */ 165 | CUDA_ENTRY_ENUM(cuMemHostGetDevicePointer_v2), 166 | /** cuMemHostGetFlags */ 167 | CUDA_ENTRY_ENUM(cuMemHostGetFlags), 168 | /** cuMemHostRegister_v2 */ 169 | CUDA_ENTRY_ENUM(cuMemHostRegister_v2), 170 | /** cuMemHostUnregister */ 171 | CUDA_ENTRY_ENUM(cuMemHostUnregister), 172 | /** cuPointerGetAttribute */ 173 | CUDA_ENTRY_ENUM(cuPointerGetAttribute), 174 | /** cuPointerGetAttributes */ 175 | CUDA_ENTRY_ENUM(cuPointerGetAttributes), 176 | /** cuMemcpy */ 177 | CUDA_ENTRY_ENUM(cuMemcpy), 178 | /** cuMemcpy_ptds */ 179 | CUDA_ENTRY_ENUM(cuMemcpy_ptds), 180 | /** cuMemcpyAsync */ 181 | CUDA_ENTRY_ENUM(cuMemcpyAsync), 182 | /** cuMemcpyAsync_ptsz */ 183 | CUDA_ENTRY_ENUM(cuMemcpyAsync_ptsz), 184 | /** cuMemcpyPeer */ 185 | CUDA_ENTRY_ENUM(cuMemcpyPeer), 186 | /** cuMemcpyPeer_ptds */ 187 | CUDA_ENTRY_ENUM(cuMemcpyPeer_ptds), 188 | /** cuMemcpyPeerAsync */ 189 | CUDA_ENTRY_ENUM(cuMemcpyPeerAsync), 190 | /** cuMemcpyPeerAsync_ptsz */ 191 | CUDA_ENTRY_ENUM(cuMemcpyPeerAsync_ptsz), 192 | /** cuMemcpyHtoD_v2 */ 193 | CUDA_ENTRY_ENUM(cuMemcpyHtoD_v2), 194 | /** cuMemcpyHtoD_v2_ptds */ 195 | CUDA_ENTRY_ENUM(cuMemcpyHtoD_v2_ptds), 196 | /** cuMemcpyHtoDAsync_v2 */ 197 | CUDA_ENTRY_ENUM(cuMemcpyHtoDAsync_v2), 198 | /** cuMemcpyHtoDAsync_v2_ptsz */ 199 | CUDA_ENTRY_ENUM(cuMemcpyHtoDAsync_v2_ptsz), 200 | /** cuMemcpyDtoH_v2 */ 201 | CUDA_ENTRY_ENUM(cuMemcpyDtoH_v2), 202 | /** cuMemcpyDtoH_v2_ptds */ 203 | CUDA_ENTRY_ENUM(cuMemcpyDtoH_v2_ptds), 204 | /** cuMemcpyDtoHAsync_v2 */ 205 | CUDA_ENTRY_ENUM(cuMemcpyDtoHAsync_v2), 206 | /** cuMemcpyDtoHAsync_v2_ptsz */ 207 | CUDA_ENTRY_ENUM(cuMemcpyDtoHAsync_v2_ptsz), 208 | /** cuMemcpyDtoD_v2 */ 209 | CUDA_ENTRY_ENUM(cuMemcpyDtoD_v2), 210 | /** cuMemcpyDtoD_v2_ptds */ 211 | CUDA_ENTRY_ENUM(cuMemcpyDtoD_v2_ptds), 212 | /** cuMemcpyDtoDAsync_v2 */ 213 | CUDA_ENTRY_ENUM(cuMemcpyDtoDAsync_v2), 214 | /** cuMemcpyDtoDAsync_v2_ptsz */ 215 | CUDA_ENTRY_ENUM(cuMemcpyDtoDAsync_v2_ptsz), 216 | /** cuMemcpy2DUnaligned_v2 */ 217 | CUDA_ENTRY_ENUM(cuMemcpy2DUnaligned_v2), 218 | /** cuMemcpy2DUnaligned_v2_ptds */ 219 | CUDA_ENTRY_ENUM(cuMemcpy2DUnaligned_v2_ptds), 220 | /** cuMemcpy2DAsync_v2 */ 221 | CUDA_ENTRY_ENUM(cuMemcpy2DAsync_v2), 222 | /** cuMemcpy2DAsync_v2_ptsz */ 223 | CUDA_ENTRY_ENUM(cuMemcpy2DAsync_v2_ptsz), 224 | /** cuMemcpy3D_v2 */ 225 | CUDA_ENTRY_ENUM(cuMemcpy3D_v2), 226 | /** cuMemcpy3D_v2_ptds */ 227 | CUDA_ENTRY_ENUM(cuMemcpy3D_v2_ptds), 228 | /** cuMemcpy3DAsync_v2 */ 229 | CUDA_ENTRY_ENUM(cuMemcpy3DAsync_v2), 230 | /** cuMemcpy3DAsync_v2_ptsz */ 231 | CUDA_ENTRY_ENUM(cuMemcpy3DAsync_v2_ptsz), 232 | /** cuMemcpy3DPeer */ 233 | CUDA_ENTRY_ENUM(cuMemcpy3DPeer), 234 | /** cuMemcpy3DPeer_ptds */ 235 | CUDA_ENTRY_ENUM(cuMemcpy3DPeer_ptds), 236 | /** cuMemcpy3DPeerAsync */ 237 | CUDA_ENTRY_ENUM(cuMemcpy3DPeerAsync), 238 | /** cuMemcpy3DPeerAsync_ptsz */ 239 | CUDA_ENTRY_ENUM(cuMemcpy3DPeerAsync_ptsz), 240 | /** cuMemsetD8_v2 */ 241 | CUDA_ENTRY_ENUM(cuMemsetD8_v2), 242 | /** cuMemsetD8_v2_ptds */ 243 | CUDA_ENTRY_ENUM(cuMemsetD8_v2_ptds), 244 | /** cuMemsetD8Async */ 245 | CUDA_ENTRY_ENUM(cuMemsetD8Async), 246 | /** cuMemsetD8Async_ptsz */ 247 | CUDA_ENTRY_ENUM(cuMemsetD8Async_ptsz), 248 | /** cuMemsetD2D8_v2 */ 249 | CUDA_ENTRY_ENUM(cuMemsetD2D8_v2), 250 | /** cuMemsetD2D8_v2_ptds */ 251 | CUDA_ENTRY_ENUM(cuMemsetD2D8_v2_ptds), 252 | /** cuMemsetD2D8Async */ 253 | CUDA_ENTRY_ENUM(cuMemsetD2D8Async), 254 | /** cuMemsetD2D8Async_ptsz */ 255 | CUDA_ENTRY_ENUM(cuMemsetD2D8Async_ptsz), 256 | /** cuFuncSetCacheConfig */ 257 | CUDA_ENTRY_ENUM(cuFuncSetCacheConfig), 258 | /** cuFuncSetSharedMemConfig */ 259 | CUDA_ENTRY_ENUM(cuFuncSetSharedMemConfig), 260 | /** cuFuncGetAttribute */ 261 | CUDA_ENTRY_ENUM(cuFuncGetAttribute), 262 | /** cuArrayCreate_v2 */ 263 | CUDA_ENTRY_ENUM(cuArrayCreate_v2), 264 | /** cuArrayGetDescriptor_v2 */ 265 | CUDA_ENTRY_ENUM(cuArrayGetDescriptor_v2), 266 | /** cuArray3DCreate_v2 */ 267 | CUDA_ENTRY_ENUM(cuArray3DCreate_v2), 268 | /** cuArray3DGetDescriptor_v2 */ 269 | CUDA_ENTRY_ENUM(cuArray3DGetDescriptor_v2), 270 | /** cuArrayDestroy */ 271 | CUDA_ENTRY_ENUM(cuArrayDestroy), 272 | /** cuMipmappedArrayCreate */ 273 | CUDA_ENTRY_ENUM(cuMipmappedArrayCreate), 274 | /** cuMipmappedArrayGetLevel */ 275 | CUDA_ENTRY_ENUM(cuMipmappedArrayGetLevel), 276 | /** cuMipmappedArrayDestroy */ 277 | CUDA_ENTRY_ENUM(cuMipmappedArrayDestroy), 278 | /** cuTexRefCreate */ 279 | CUDA_ENTRY_ENUM(cuTexRefCreate), 280 | /** cuTexRefDestroy */ 281 | CUDA_ENTRY_ENUM(cuTexRefDestroy), 282 | /** cuTexRefSetArray */ 283 | CUDA_ENTRY_ENUM(cuTexRefSetArray), 284 | /** cuTexRefSetMipmappedArray */ 285 | CUDA_ENTRY_ENUM(cuTexRefSetMipmappedArray), 286 | /** cuTexRefSetAddress_v2 */ 287 | CUDA_ENTRY_ENUM(cuTexRefSetAddress_v2), 288 | /** cuTexRefSetAddress2D_v3 */ 289 | CUDA_ENTRY_ENUM(cuTexRefSetAddress2D_v3), 290 | /** cuTexRefSetFormat */ 291 | CUDA_ENTRY_ENUM(cuTexRefSetFormat), 292 | /** cuTexRefSetAddressMode */ 293 | CUDA_ENTRY_ENUM(cuTexRefSetAddressMode), 294 | /** cuTexRefSetFilterMode */ 295 | CUDA_ENTRY_ENUM(cuTexRefSetFilterMode), 296 | /** cuTexRefSetMipmapFilterMode */ 297 | CUDA_ENTRY_ENUM(cuTexRefSetMipmapFilterMode), 298 | /** cuTexRefSetMipmapLevelBias */ 299 | CUDA_ENTRY_ENUM(cuTexRefSetMipmapLevelBias), 300 | /** cuTexRefSetMipmapLevelClamp */ 301 | CUDA_ENTRY_ENUM(cuTexRefSetMipmapLevelClamp), 302 | /** cuTexRefSetMaxAnisotropy */ 303 | CUDA_ENTRY_ENUM(cuTexRefSetMaxAnisotropy), 304 | /** cuTexRefSetFlags */ 305 | CUDA_ENTRY_ENUM(cuTexRefSetFlags), 306 | /** cuTexRefSetBorderColor */ 307 | CUDA_ENTRY_ENUM(cuTexRefSetBorderColor), 308 | /** cuTexRefGetBorderColor */ 309 | CUDA_ENTRY_ENUM(cuTexRefGetBorderColor), 310 | /** cuSurfRefSetArray */ 311 | CUDA_ENTRY_ENUM(cuSurfRefSetArray), 312 | /** cuTexObjectCreate */ 313 | CUDA_ENTRY_ENUM(cuTexObjectCreate), 314 | /** cuTexObjectDestroy */ 315 | CUDA_ENTRY_ENUM(cuTexObjectDestroy), 316 | /** cuTexObjectGetResourceDesc */ 317 | CUDA_ENTRY_ENUM(cuTexObjectGetResourceDesc), 318 | /** cuTexObjectGetTextureDesc */ 319 | CUDA_ENTRY_ENUM(cuTexObjectGetTextureDesc), 320 | /** cuTexObjectGetResourceViewDesc */ 321 | CUDA_ENTRY_ENUM(cuTexObjectGetResourceViewDesc), 322 | /** cuSurfObjectCreate */ 323 | CUDA_ENTRY_ENUM(cuSurfObjectCreate), 324 | /** cuSurfObjectDestroy */ 325 | CUDA_ENTRY_ENUM(cuSurfObjectDestroy), 326 | /** cuSurfObjectGetResourceDesc */ 327 | CUDA_ENTRY_ENUM(cuSurfObjectGetResourceDesc), 328 | /** cuLaunchKernel */ 329 | CUDA_ENTRY_ENUM(cuLaunchKernel), 330 | /** cuLaunchKernel_ptsz */ 331 | CUDA_ENTRY_ENUM(cuLaunchKernel_ptsz), 332 | /** cuEventCreate */ 333 | CUDA_ENTRY_ENUM(cuEventCreate), 334 | /** cuEventRecord */ 335 | CUDA_ENTRY_ENUM(cuEventRecord), 336 | /** cuEventRecord_ptsz */ 337 | CUDA_ENTRY_ENUM(cuEventRecord_ptsz), 338 | /** cuEventQuery */ 339 | CUDA_ENTRY_ENUM(cuEventQuery), 340 | /** cuEventSynchronize */ 341 | CUDA_ENTRY_ENUM(cuEventSynchronize), 342 | /** cuEventDestroy_v2 */ 343 | CUDA_ENTRY_ENUM(cuEventDestroy_v2), 344 | /** cuEventElapsedTime */ 345 | CUDA_ENTRY_ENUM(cuEventElapsedTime), 346 | /** cuStreamWaitValue32 */ 347 | CUDA_ENTRY_ENUM(cuStreamWaitValue32), 348 | /** cuStreamWaitValue32_ptsz */ 349 | CUDA_ENTRY_ENUM(cuStreamWaitValue32_ptsz), 350 | /** cuStreamWriteValue32 */ 351 | CUDA_ENTRY_ENUM(cuStreamWriteValue32), 352 | /** cuStreamWriteValue32_ptsz */ 353 | CUDA_ENTRY_ENUM(cuStreamWriteValue32_ptsz), 354 | /** cuStreamBatchMemOp */ 355 | CUDA_ENTRY_ENUM(cuStreamBatchMemOp), 356 | /** cuStreamBatchMemOp_ptsz */ 357 | CUDA_ENTRY_ENUM(cuStreamBatchMemOp_ptsz), 358 | /** cuStreamCreate */ 359 | CUDA_ENTRY_ENUM(cuStreamCreate), 360 | /** cuStreamCreateWithPriority */ 361 | CUDA_ENTRY_ENUM(cuStreamCreateWithPriority), 362 | /** cuStreamGetPriority */ 363 | CUDA_ENTRY_ENUM(cuStreamGetPriority), 364 | /** cuStreamGetPriority_ptsz */ 365 | CUDA_ENTRY_ENUM(cuStreamGetPriority_ptsz), 366 | /** cuStreamGetFlags */ 367 | CUDA_ENTRY_ENUM(cuStreamGetFlags), 368 | /** cuStreamGetFlags_ptsz */ 369 | CUDA_ENTRY_ENUM(cuStreamGetFlags_ptsz), 370 | /** cuStreamDestroy_v2 */ 371 | CUDA_ENTRY_ENUM(cuStreamDestroy_v2), 372 | /** cuStreamWaitEvent */ 373 | CUDA_ENTRY_ENUM(cuStreamWaitEvent), 374 | /** cuStreamWaitEvent_ptsz */ 375 | CUDA_ENTRY_ENUM(cuStreamWaitEvent_ptsz), 376 | /** cuStreamAddCallback */ 377 | CUDA_ENTRY_ENUM(cuStreamAddCallback), 378 | /** cuStreamAddCallback_ptsz */ 379 | CUDA_ENTRY_ENUM(cuStreamAddCallback_ptsz), 380 | /** cuStreamSynchronize */ 381 | CUDA_ENTRY_ENUM(cuStreamSynchronize), 382 | /** cuStreamSynchronize_ptsz */ 383 | CUDA_ENTRY_ENUM(cuStreamSynchronize_ptsz), 384 | /** cuStreamQuery */ 385 | CUDA_ENTRY_ENUM(cuStreamQuery), 386 | /** cuStreamQuery_ptsz */ 387 | CUDA_ENTRY_ENUM(cuStreamQuery_ptsz), 388 | /** cuStreamAttachMemAsync */ 389 | CUDA_ENTRY_ENUM(cuStreamAttachMemAsync), 390 | /** cuStreamAttachMemAsync_ptsz */ 391 | CUDA_ENTRY_ENUM(cuStreamAttachMemAsync_ptsz), 392 | /** cuDeviceCanAccessPeer */ 393 | CUDA_ENTRY_ENUM(cuDeviceCanAccessPeer), 394 | /** cuCtxEnablePeerAccess */ 395 | CUDA_ENTRY_ENUM(cuCtxEnablePeerAccess), 396 | /** cuCtxDisablePeerAccess */ 397 | CUDA_ENTRY_ENUM(cuCtxDisablePeerAccess), 398 | /** cuIpcGetEventHandle */ 399 | CUDA_ENTRY_ENUM(cuIpcGetEventHandle), 400 | /** cuIpcOpenEventHandle */ 401 | CUDA_ENTRY_ENUM(cuIpcOpenEventHandle), 402 | /** cuIpcGetMemHandle */ 403 | CUDA_ENTRY_ENUM(cuIpcGetMemHandle), 404 | /** cuIpcOpenMemHandle */ 405 | CUDA_ENTRY_ENUM(cuIpcOpenMemHandle), 406 | /** cuIpcCloseMemHandle */ 407 | CUDA_ENTRY_ENUM(cuIpcCloseMemHandle), 408 | /** cuGLCtxCreate_v2 */ 409 | CUDA_ENTRY_ENUM(cuGLCtxCreate_v2), 410 | /** cuGLInit */ 411 | CUDA_ENTRY_ENUM(cuGLInit), 412 | /** cuGLGetDevices */ 413 | CUDA_ENTRY_ENUM(cuGLGetDevices), 414 | /** cuGLRegisterBufferObject */ 415 | CUDA_ENTRY_ENUM(cuGLRegisterBufferObject), 416 | /** cuGLMapBufferObject_v2 */ 417 | CUDA_ENTRY_ENUM(cuGLMapBufferObject_v2), 418 | /** cuGLMapBufferObject_v2_ptds */ 419 | CUDA_ENTRY_ENUM(cuGLMapBufferObject_v2_ptds), 420 | /** cuGLMapBufferObjectAsync_v2 */ 421 | CUDA_ENTRY_ENUM(cuGLMapBufferObjectAsync_v2), 422 | /** cuGLMapBufferObjectAsync_v2_ptsz */ 423 | CUDA_ENTRY_ENUM(cuGLMapBufferObjectAsync_v2_ptsz), 424 | /** cuGLUnmapBufferObject */ 425 | CUDA_ENTRY_ENUM(cuGLUnmapBufferObject), 426 | /** cuGLUnmapBufferObjectAsync */ 427 | CUDA_ENTRY_ENUM(cuGLUnmapBufferObjectAsync), 428 | /** cuGLUnregisterBufferObject */ 429 | CUDA_ENTRY_ENUM(cuGLUnregisterBufferObject), 430 | /** cuGLSetBufferObjectMapFlags */ 431 | CUDA_ENTRY_ENUM(cuGLSetBufferObjectMapFlags), 432 | /** cuGraphicsGLRegisterImage */ 433 | CUDA_ENTRY_ENUM(cuGraphicsGLRegisterImage), 434 | /** cuGraphicsGLRegisterBuffer */ 435 | CUDA_ENTRY_ENUM(cuGraphicsGLRegisterBuffer), 436 | /** cuGraphicsUnregisterResource */ 437 | CUDA_ENTRY_ENUM(cuGraphicsUnregisterResource), 438 | /** cuGraphicsMapResources */ 439 | CUDA_ENTRY_ENUM(cuGraphicsMapResources), 440 | /** cuGraphicsMapResources_ptsz */ 441 | CUDA_ENTRY_ENUM(cuGraphicsMapResources_ptsz), 442 | /** cuGraphicsUnmapResources */ 443 | CUDA_ENTRY_ENUM(cuGraphicsUnmapResources), 444 | /** cuGraphicsUnmapResources_ptsz */ 445 | CUDA_ENTRY_ENUM(cuGraphicsUnmapResources_ptsz), 446 | /** cuGraphicsResourceSetMapFlags_v2 */ 447 | CUDA_ENTRY_ENUM(cuGraphicsResourceSetMapFlags_v2), 448 | /** cuGraphicsSubResourceGetMappedArray */ 449 | CUDA_ENTRY_ENUM(cuGraphicsSubResourceGetMappedArray), 450 | /** cuGraphicsResourceGetMappedMipmappedArray */ 451 | CUDA_ENTRY_ENUM(cuGraphicsResourceGetMappedMipmappedArray), 452 | /** cuGraphicsResourceGetMappedPointer_v2 */ 453 | CUDA_ENTRY_ENUM(cuGraphicsResourceGetMappedPointer_v2), 454 | /** cuProfilerInitialize */ 455 | CUDA_ENTRY_ENUM(cuProfilerInitialize), 456 | /** cuProfilerStart */ 457 | CUDA_ENTRY_ENUM(cuProfilerStart), 458 | /** cuProfilerStop */ 459 | CUDA_ENTRY_ENUM(cuProfilerStop), 460 | /** cuVDPAUGetDevice */ 461 | CUDA_ENTRY_ENUM(cuVDPAUGetDevice), 462 | /** cuVDPAUCtxCreate_v2 */ 463 | CUDA_ENTRY_ENUM(cuVDPAUCtxCreate_v2), 464 | /** cuGraphicsVDPAURegisterVideoSurface */ 465 | CUDA_ENTRY_ENUM(cuGraphicsVDPAURegisterVideoSurface), 466 | /** cuGraphicsVDPAURegisterOutputSurface */ 467 | CUDA_ENTRY_ENUM(cuGraphicsVDPAURegisterOutputSurface), 468 | /** cuGetExportTable */ 469 | CUDA_ENTRY_ENUM(cuGetExportTable), 470 | /** cuOccupancyMaxActiveBlocksPerMultiprocessor */ 471 | CUDA_ENTRY_ENUM(cuOccupancyMaxActiveBlocksPerMultiprocessor), 472 | /** cuMemAdvise */ 473 | CUDA_ENTRY_ENUM(cuMemAdvise), 474 | /** cuMemPrefetchAsync */ 475 | CUDA_ENTRY_ENUM(cuMemPrefetchAsync), 476 | /** cuMemPrefetchAsync_ptsz */ 477 | CUDA_ENTRY_ENUM(cuMemPrefetchAsync_ptsz), 478 | /** cuMemRangeGetAttribute */ 479 | CUDA_ENTRY_ENUM(cuMemRangeGetAttribute), 480 | /** cuMemRangeGetAttributes */ 481 | CUDA_ENTRY_ENUM(cuMemRangeGetAttributes), 482 | /** cuGetErrorString */ 483 | CUDA_ENTRY_ENUM(cuGetErrorString), 484 | /** cuGetErrorName */ 485 | CUDA_ENTRY_ENUM(cuGetErrorName), 486 | /** cuArray3DCreate */ 487 | CUDA_ENTRY_ENUM(cuArray3DCreate), 488 | /** cuArray3DGetDescriptor */ 489 | CUDA_ENTRY_ENUM(cuArray3DGetDescriptor), 490 | /** cuArrayCreate */ 491 | CUDA_ENTRY_ENUM(cuArrayCreate), 492 | /** cuArrayGetDescriptor */ 493 | CUDA_ENTRY_ENUM(cuArrayGetDescriptor), 494 | /** cuCtxAttach */ 495 | CUDA_ENTRY_ENUM(cuCtxAttach), 496 | /** cuCtxCreate */ 497 | CUDA_ENTRY_ENUM(cuCtxCreate), 498 | /** cuCtxDestroy */ 499 | CUDA_ENTRY_ENUM(cuCtxDestroy), 500 | /** cuCtxDestroy_v2 */ 501 | CUDA_ENTRY_ENUM(cuCtxDestroy_v2), 502 | /** cuCtxPopCurrent */ 503 | CUDA_ENTRY_ENUM(cuCtxPopCurrent), 504 | /** cuCtxPopCurrent_v2 */ 505 | CUDA_ENTRY_ENUM(cuCtxPopCurrent_v2), 506 | /** cuCtxPushCurrent */ 507 | CUDA_ENTRY_ENUM(cuCtxPushCurrent), 508 | /** cuCtxPushCurrent_v2 */ 509 | CUDA_ENTRY_ENUM(cuCtxPushCurrent_v2), 510 | /** cudbgApiAttach */ 511 | CUDA_ENTRY_ENUM(cudbgApiAttach), 512 | /** cudbgApiDetach */ 513 | CUDA_ENTRY_ENUM(cudbgApiDetach), 514 | /** cudbgApiInit */ 515 | CUDA_ENTRY_ENUM(cudbgApiInit), 516 | /** cudbgGetAPI */ 517 | CUDA_ENTRY_ENUM(cudbgGetAPI), 518 | /** cudbgGetAPIVersion */ 519 | CUDA_ENTRY_ENUM(cudbgGetAPIVersion), 520 | /** cudbgMain */ 521 | CUDA_ENTRY_ENUM(cudbgMain), 522 | /** cudbgReportDriverApiError */ 523 | CUDA_ENTRY_ENUM(cudbgReportDriverApiError), 524 | /** cudbgReportDriverInternalError */ 525 | CUDA_ENTRY_ENUM(cudbgReportDriverInternalError), 526 | /** cuDeviceComputeCapability */ 527 | CUDA_ENTRY_ENUM(cuDeviceComputeCapability), 528 | /** cuDeviceGetProperties */ 529 | CUDA_ENTRY_ENUM(cuDeviceGetProperties), 530 | /** cuDeviceTotalMem */ 531 | CUDA_ENTRY_ENUM(cuDeviceTotalMem), 532 | /** cuEGLInit */ 533 | CUDA_ENTRY_ENUM(cuEGLInit), 534 | /** cuEGLStreamConsumerAcquireFrame */ 535 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerAcquireFrame), 536 | /** cuEGLStreamConsumerConnect */ 537 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerConnect), 538 | /** cuEGLStreamConsumerConnectWithFlags */ 539 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerConnectWithFlags), 540 | /** cuEGLStreamConsumerDisconnect */ 541 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerDisconnect), 542 | /** cuEGLStreamConsumerReleaseFrame */ 543 | CUDA_ENTRY_ENUM(cuEGLStreamConsumerReleaseFrame), 544 | /** cuEGLStreamProducerConnect */ 545 | CUDA_ENTRY_ENUM(cuEGLStreamProducerConnect), 546 | /** cuEGLStreamProducerDisconnect */ 547 | CUDA_ENTRY_ENUM(cuEGLStreamProducerDisconnect), 548 | /** cuEGLStreamProducerPresentFrame */ 549 | CUDA_ENTRY_ENUM(cuEGLStreamProducerPresentFrame), 550 | /** cuEGLStreamProducerReturnFrame */ 551 | CUDA_ENTRY_ENUM(cuEGLStreamProducerReturnFrame), 552 | /** cuEventDestroy */ 553 | CUDA_ENTRY_ENUM(cuEventDestroy), 554 | /** cuFuncSetAttribute */ 555 | CUDA_ENTRY_ENUM(cuFuncSetAttribute), 556 | /** cuFuncSetBlockShape */ 557 | CUDA_ENTRY_ENUM(cuFuncSetBlockShape), 558 | /** cuFuncSetSharedSize */ 559 | CUDA_ENTRY_ENUM(cuFuncSetSharedSize), 560 | /** cuGLCtxCreate */ 561 | CUDA_ENTRY_ENUM(cuGLCtxCreate), 562 | /** cuGLGetDevices_v2 */ 563 | CUDA_ENTRY_ENUM(cuGLGetDevices_v2), 564 | /** cuGLMapBufferObject */ 565 | CUDA_ENTRY_ENUM(cuGLMapBufferObject), 566 | /** cuGLMapBufferObjectAsync */ 567 | CUDA_ENTRY_ENUM(cuGLMapBufferObjectAsync), 568 | /** cuGraphicsEGLRegisterImage */ 569 | CUDA_ENTRY_ENUM(cuGraphicsEGLRegisterImage), 570 | /** cuGraphicsResourceGetMappedEglFrame */ 571 | CUDA_ENTRY_ENUM(cuGraphicsResourceGetMappedEglFrame), 572 | /** cuGraphicsResourceGetMappedPointer */ 573 | CUDA_ENTRY_ENUM(cuGraphicsResourceGetMappedPointer), 574 | /** cuGraphicsResourceSetMapFlags */ 575 | CUDA_ENTRY_ENUM(cuGraphicsResourceSetMapFlags), 576 | /** cuLaunch */ 577 | CUDA_ENTRY_ENUM(cuLaunch), 578 | /** cuLaunchCooperativeKernel */ 579 | CUDA_ENTRY_ENUM(cuLaunchCooperativeKernel), 580 | /** cuLaunchCooperativeKernelMultiDevice */ 581 | CUDA_ENTRY_ENUM(cuLaunchCooperativeKernelMultiDevice), 582 | /** cuLaunchCooperativeKernel_ptsz */ 583 | CUDA_ENTRY_ENUM(cuLaunchCooperativeKernel_ptsz), 584 | /** cuLaunchGrid */ 585 | CUDA_ENTRY_ENUM(cuLaunchGrid), 586 | /** cuLaunchGridAsync */ 587 | CUDA_ENTRY_ENUM(cuLaunchGridAsync), 588 | /** cuLinkAddData_v2 */ 589 | CUDA_ENTRY_ENUM(cuLinkAddData_v2), 590 | /** cuLinkAddFile_v2 */ 591 | CUDA_ENTRY_ENUM(cuLinkAddFile_v2), 592 | /** cuLinkCreate_v2 */ 593 | CUDA_ENTRY_ENUM(cuLinkCreate_v2), 594 | /** cuMemAlloc */ 595 | CUDA_ENTRY_ENUM(cuMemAlloc), 596 | /** cuMemAllocHost */ 597 | CUDA_ENTRY_ENUM(cuMemAllocHost), 598 | /** cuMemAllocHost_v2 */ 599 | CUDA_ENTRY_ENUM(cuMemAllocHost_v2), 600 | /** cuMemAllocPitch */ 601 | CUDA_ENTRY_ENUM(cuMemAllocPitch), 602 | /** cuMemcpy2D */ 603 | CUDA_ENTRY_ENUM(cuMemcpy2D), 604 | /** cuMemcpy2DAsync */ 605 | CUDA_ENTRY_ENUM(cuMemcpy2DAsync), 606 | /** cuMemcpy2DUnaligned */ 607 | CUDA_ENTRY_ENUM(cuMemcpy2DUnaligned), 608 | /** cuMemcpy2D_v2 */ 609 | CUDA_ENTRY_ENUM(cuMemcpy2D_v2), 610 | /** cuMemcpy2D_v2_ptds */ 611 | CUDA_ENTRY_ENUM(cuMemcpy2D_v2_ptds), 612 | /** cuMemcpy3D */ 613 | CUDA_ENTRY_ENUM(cuMemcpy3D), 614 | /** cuMemcpy3DAsync */ 615 | CUDA_ENTRY_ENUM(cuMemcpy3DAsync), 616 | /** cuMemcpyAtoA */ 617 | CUDA_ENTRY_ENUM(cuMemcpyAtoA), 618 | /** cuMemcpyAtoA_v2 */ 619 | CUDA_ENTRY_ENUM(cuMemcpyAtoA_v2), 620 | /** cuMemcpyAtoA_v2_ptds */ 621 | CUDA_ENTRY_ENUM(cuMemcpyAtoA_v2_ptds), 622 | /** cuMemcpyAtoD */ 623 | CUDA_ENTRY_ENUM(cuMemcpyAtoD), 624 | /** cuMemcpyAtoD_v2 */ 625 | CUDA_ENTRY_ENUM(cuMemcpyAtoD_v2), 626 | /** cuMemcpyAtoD_v2_ptds */ 627 | CUDA_ENTRY_ENUM(cuMemcpyAtoD_v2_ptds), 628 | /** cuMemcpyAtoH */ 629 | CUDA_ENTRY_ENUM(cuMemcpyAtoH), 630 | /** cuMemcpyAtoHAsync */ 631 | CUDA_ENTRY_ENUM(cuMemcpyAtoHAsync), 632 | /** cuMemcpyAtoHAsync_v2 */ 633 | CUDA_ENTRY_ENUM(cuMemcpyAtoHAsync_v2), 634 | /** cuMemcpyAtoHAsync_v2_ptsz */ 635 | CUDA_ENTRY_ENUM(cuMemcpyAtoHAsync_v2_ptsz), 636 | /** cuMemcpyAtoH_v2 */ 637 | CUDA_ENTRY_ENUM(cuMemcpyAtoH_v2), 638 | /** cuMemcpyAtoH_v2_ptds */ 639 | CUDA_ENTRY_ENUM(cuMemcpyAtoH_v2_ptds), 640 | /** cuMemcpyDtoA */ 641 | CUDA_ENTRY_ENUM(cuMemcpyDtoA), 642 | /** cuMemcpyDtoA_v2 */ 643 | CUDA_ENTRY_ENUM(cuMemcpyDtoA_v2), 644 | /** cuMemcpyDtoA_v2_ptds */ 645 | CUDA_ENTRY_ENUM(cuMemcpyDtoA_v2_ptds), 646 | /** cuMemcpyDtoD */ 647 | CUDA_ENTRY_ENUM(cuMemcpyDtoD), 648 | /** cuMemcpyDtoDAsync */ 649 | CUDA_ENTRY_ENUM(cuMemcpyDtoDAsync), 650 | /** cuMemcpyDtoH */ 651 | CUDA_ENTRY_ENUM(cuMemcpyDtoH), 652 | /** cuMemcpyDtoHAsync */ 653 | CUDA_ENTRY_ENUM(cuMemcpyDtoHAsync), 654 | /** cuMemcpyHtoA */ 655 | CUDA_ENTRY_ENUM(cuMemcpyHtoA), 656 | /** cuMemcpyHtoAAsync */ 657 | CUDA_ENTRY_ENUM(cuMemcpyHtoAAsync), 658 | /** cuMemcpyHtoAAsync_v2 */ 659 | CUDA_ENTRY_ENUM(cuMemcpyHtoAAsync_v2), 660 | /** cuMemcpyHtoAAsync_v2_ptsz */ 661 | CUDA_ENTRY_ENUM(cuMemcpyHtoAAsync_v2_ptsz), 662 | /** cuMemcpyHtoA_v2 */ 663 | CUDA_ENTRY_ENUM(cuMemcpyHtoA_v2), 664 | /** cuMemcpyHtoA_v2_ptds */ 665 | CUDA_ENTRY_ENUM(cuMemcpyHtoA_v2_ptds), 666 | /** cuMemcpyHtoD */ 667 | CUDA_ENTRY_ENUM(cuMemcpyHtoD), 668 | /** cuMemcpyHtoDAsync */ 669 | CUDA_ENTRY_ENUM(cuMemcpyHtoDAsync), 670 | /** cuMemFree */ 671 | CUDA_ENTRY_ENUM(cuMemFree), 672 | /** cuMemGetAddressRange */ 673 | CUDA_ENTRY_ENUM(cuMemGetAddressRange), 674 | // Deprecated 675 | // CUDA_ENTRY_ENUM(cuMemGetAttribute), 676 | // CUDA_ENTRY_ENUM(cuMemGetAttribute_v2), 677 | /** cuMemGetInfo */ 678 | CUDA_ENTRY_ENUM(cuMemGetInfo), 679 | /** cuMemHostGetDevicePointer */ 680 | CUDA_ENTRY_ENUM(cuMemHostGetDevicePointer), 681 | /** cuMemHostRegister */ 682 | CUDA_ENTRY_ENUM(cuMemHostRegister), 683 | /** cuMemsetD16 */ 684 | CUDA_ENTRY_ENUM(cuMemsetD16), 685 | /** cuMemsetD16Async */ 686 | CUDA_ENTRY_ENUM(cuMemsetD16Async), 687 | /** cuMemsetD16Async_ptsz */ 688 | CUDA_ENTRY_ENUM(cuMemsetD16Async_ptsz), 689 | /** cuMemsetD16_v2 */ 690 | CUDA_ENTRY_ENUM(cuMemsetD16_v2), 691 | /** cuMemsetD16_v2_ptds */ 692 | CUDA_ENTRY_ENUM(cuMemsetD16_v2_ptds), 693 | /** cuMemsetD2D16 */ 694 | CUDA_ENTRY_ENUM(cuMemsetD2D16), 695 | /** cuMemsetD2D16Async */ 696 | CUDA_ENTRY_ENUM(cuMemsetD2D16Async), 697 | /** cuMemsetD2D16Async_ptsz */ 698 | CUDA_ENTRY_ENUM(cuMemsetD2D16Async_ptsz), 699 | /** cuMemsetD2D16_v2 */ 700 | CUDA_ENTRY_ENUM(cuMemsetD2D16_v2), 701 | /** cuMemsetD2D16_v2_ptds */ 702 | CUDA_ENTRY_ENUM(cuMemsetD2D16_v2_ptds), 703 | /** cuMemsetD2D32 */ 704 | CUDA_ENTRY_ENUM(cuMemsetD2D32), 705 | /** cuMemsetD2D32Async */ 706 | CUDA_ENTRY_ENUM(cuMemsetD2D32Async), 707 | /** cuMemsetD2D32Async_ptsz */ 708 | CUDA_ENTRY_ENUM(cuMemsetD2D32Async_ptsz), 709 | /** cuMemsetD2D32_v2 */ 710 | CUDA_ENTRY_ENUM(cuMemsetD2D32_v2), 711 | /** cuMemsetD2D32_v2_ptds */ 712 | CUDA_ENTRY_ENUM(cuMemsetD2D32_v2_ptds), 713 | /** cuMemsetD2D8 */ 714 | CUDA_ENTRY_ENUM(cuMemsetD2D8), 715 | /** cuMemsetD32 */ 716 | CUDA_ENTRY_ENUM(cuMemsetD32), 717 | /** cuMemsetD32Async */ 718 | CUDA_ENTRY_ENUM(cuMemsetD32Async), 719 | /** cuMemsetD32Async_ptsz */ 720 | CUDA_ENTRY_ENUM(cuMemsetD32Async_ptsz), 721 | /** cuMemsetD32_v2 */ 722 | CUDA_ENTRY_ENUM(cuMemsetD32_v2), 723 | /** cuMemsetD32_v2_ptds */ 724 | CUDA_ENTRY_ENUM(cuMemsetD32_v2_ptds), 725 | /** cuMemsetD8 */ 726 | CUDA_ENTRY_ENUM(cuMemsetD8), 727 | /** cuModuleGetGlobal */ 728 | CUDA_ENTRY_ENUM(cuModuleGetGlobal), 729 | /** cuModuleLoadDataEx */ 730 | CUDA_ENTRY_ENUM(cuModuleLoadDataEx), 731 | /** cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags */ 732 | CUDA_ENTRY_ENUM(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags), 733 | /** cuOccupancyMaxPotentialBlockSize */ 734 | CUDA_ENTRY_ENUM(cuOccupancyMaxPotentialBlockSize), 735 | /** cuOccupancyMaxPotentialBlockSizeWithFlags */ 736 | CUDA_ENTRY_ENUM(cuOccupancyMaxPotentialBlockSizeWithFlags), 737 | /** cuParamSetf */ 738 | CUDA_ENTRY_ENUM(cuParamSetf), 739 | /** cuParamSeti */ 740 | CUDA_ENTRY_ENUM(cuParamSeti), 741 | /** cuParamSetSize */ 742 | CUDA_ENTRY_ENUM(cuParamSetSize), 743 | /** cuParamSetTexRef */ 744 | CUDA_ENTRY_ENUM(cuParamSetTexRef), 745 | /** cuParamSetv */ 746 | CUDA_ENTRY_ENUM(cuParamSetv), 747 | /** cuPointerSetAttribute */ 748 | CUDA_ENTRY_ENUM(cuPointerSetAttribute), 749 | /** cuStreamDestroy */ 750 | CUDA_ENTRY_ENUM(cuStreamDestroy), 751 | /** cuStreamWaitValue64 */ 752 | CUDA_ENTRY_ENUM(cuStreamWaitValue64), 753 | /** cuStreamWaitValue64_ptsz */ 754 | CUDA_ENTRY_ENUM(cuStreamWaitValue64_ptsz), 755 | /** cuStreamWriteValue64 */ 756 | CUDA_ENTRY_ENUM(cuStreamWriteValue64), 757 | /** cuStreamWriteValue64_ptsz */ 758 | CUDA_ENTRY_ENUM(cuStreamWriteValue64_ptsz), 759 | /** cuSurfRefGetArray */ 760 | CUDA_ENTRY_ENUM(cuSurfRefGetArray), 761 | /** cuTexRefGetAddress */ 762 | CUDA_ENTRY_ENUM(cuTexRefGetAddress), 763 | /** cuTexRefGetAddressMode */ 764 | CUDA_ENTRY_ENUM(cuTexRefGetAddressMode), 765 | /** cuTexRefGetAddress_v2 */ 766 | CUDA_ENTRY_ENUM(cuTexRefGetAddress_v2), 767 | /** cuTexRefGetArray */ 768 | CUDA_ENTRY_ENUM(cuTexRefGetArray), 769 | /** cuTexRefGetFilterMode */ 770 | CUDA_ENTRY_ENUM(cuTexRefGetFilterMode), 771 | /** cuTexRefGetFlags */ 772 | CUDA_ENTRY_ENUM(cuTexRefGetFlags), 773 | /** cuTexRefGetFormat */ 774 | CUDA_ENTRY_ENUM(cuTexRefGetFormat), 775 | /** cuTexRefGetMaxAnisotropy */ 776 | CUDA_ENTRY_ENUM(cuTexRefGetMaxAnisotropy), 777 | /** cuTexRefGetMipmapFilterMode */ 778 | CUDA_ENTRY_ENUM(cuTexRefGetMipmapFilterMode), 779 | /** cuTexRefGetMipmapLevelBias */ 780 | CUDA_ENTRY_ENUM(cuTexRefGetMipmapLevelBias), 781 | /** cuTexRefGetMipmapLevelClamp */ 782 | CUDA_ENTRY_ENUM(cuTexRefGetMipmapLevelClamp), 783 | /** cuTexRefGetMipmappedArray */ 784 | CUDA_ENTRY_ENUM(cuTexRefGetMipmappedArray), 785 | /** cuTexRefSetAddress */ 786 | CUDA_ENTRY_ENUM(cuTexRefSetAddress), 787 | /** cuTexRefSetAddress2D */ 788 | CUDA_ENTRY_ENUM(cuTexRefSetAddress2D), 789 | /** cuTexRefSetAddress2D_v2 */ 790 | CUDA_ENTRY_ENUM(cuTexRefSetAddress2D_v2), 791 | /** cuVDPAUCtxCreate */ 792 | CUDA_ENTRY_ENUM(cuVDPAUCtxCreate), 793 | /** cuEGLApiInit */ 794 | CUDA_ENTRY_ENUM(cuEGLApiInit), 795 | /** cuDestroyExternalMemory */ 796 | CUDA_ENTRY_ENUM(cuDestroyExternalMemory), 797 | /** cuDestroyExternalSemaphore */ 798 | CUDA_ENTRY_ENUM(cuDestroyExternalSemaphore), 799 | /** cuDeviceGetUuid */ 800 | CUDA_ENTRY_ENUM(cuDeviceGetUuid), 801 | /** cuExternalMemoryGetMappedBuffer */ 802 | CUDA_ENTRY_ENUM(cuExternalMemoryGetMappedBuffer), 803 | /** cuExternalMemoryGetMappedMipmappedArray */ 804 | CUDA_ENTRY_ENUM(cuExternalMemoryGetMappedMipmappedArray), 805 | /** cuGraphAddChildGraphNode */ 806 | CUDA_ENTRY_ENUM(cuGraphAddChildGraphNode), 807 | /** cuGraphAddDependencies */ 808 | CUDA_ENTRY_ENUM(cuGraphAddDependencies), 809 | /** cuGraphAddEmptyNode */ 810 | CUDA_ENTRY_ENUM(cuGraphAddEmptyNode), 811 | /** cuGraphAddHostNode */ 812 | CUDA_ENTRY_ENUM(cuGraphAddHostNode), 813 | /** cuGraphAddKernelNode */ 814 | CUDA_ENTRY_ENUM(cuGraphAddKernelNode), 815 | /** cuGraphAddMemcpyNode */ 816 | CUDA_ENTRY_ENUM(cuGraphAddMemcpyNode), 817 | /** cuGraphAddMemsetNode */ 818 | CUDA_ENTRY_ENUM(cuGraphAddMemsetNode), 819 | /** cuGraphChildGraphNodeGetGraph */ 820 | CUDA_ENTRY_ENUM(cuGraphChildGraphNodeGetGraph), 821 | /** cuGraphClone */ 822 | CUDA_ENTRY_ENUM(cuGraphClone), 823 | /** cuGraphCreate */ 824 | CUDA_ENTRY_ENUM(cuGraphCreate), 825 | /** cuGraphDestroy */ 826 | CUDA_ENTRY_ENUM(cuGraphDestroy), 827 | /** cuGraphDestroyNode */ 828 | CUDA_ENTRY_ENUM(cuGraphDestroyNode), 829 | /** cuGraphExecDestroy */ 830 | CUDA_ENTRY_ENUM(cuGraphExecDestroy), 831 | /** cuGraphGetEdges */ 832 | CUDA_ENTRY_ENUM(cuGraphGetEdges), 833 | /** cuGraphGetNodes */ 834 | CUDA_ENTRY_ENUM(cuGraphGetNodes), 835 | /** cuGraphGetRootNodes */ 836 | CUDA_ENTRY_ENUM(cuGraphGetRootNodes), 837 | /** cuGraphHostNodeGetParams */ 838 | CUDA_ENTRY_ENUM(cuGraphHostNodeGetParams), 839 | /** cuGraphHostNodeSetParams */ 840 | CUDA_ENTRY_ENUM(cuGraphHostNodeSetParams), 841 | /** cuGraphInstantiate */ 842 | CUDA_ENTRY_ENUM(cuGraphInstantiate), 843 | /** cuGraphKernelNodeGetParams */ 844 | CUDA_ENTRY_ENUM(cuGraphKernelNodeGetParams), 845 | /** cuGraphKernelNodeSetParams */ 846 | CUDA_ENTRY_ENUM(cuGraphKernelNodeSetParams), 847 | /** cuGraphLaunch */ 848 | CUDA_ENTRY_ENUM(cuGraphLaunch), 849 | /** cuGraphLaunch_ptsz */ 850 | CUDA_ENTRY_ENUM(cuGraphLaunch_ptsz), 851 | /** cuGraphMemcpyNodeGetParams */ 852 | CUDA_ENTRY_ENUM(cuGraphMemcpyNodeGetParams), 853 | /** cuGraphMemcpyNodeSetParams */ 854 | CUDA_ENTRY_ENUM(cuGraphMemcpyNodeSetParams), 855 | /** cuGraphMemsetNodeGetParams */ 856 | CUDA_ENTRY_ENUM(cuGraphMemsetNodeGetParams), 857 | /** cuGraphMemsetNodeSetParams */ 858 | CUDA_ENTRY_ENUM(cuGraphMemsetNodeSetParams), 859 | /** cuGraphNodeFindInClone */ 860 | CUDA_ENTRY_ENUM(cuGraphNodeFindInClone), 861 | /** cuGraphNodeGetDependencies */ 862 | CUDA_ENTRY_ENUM(cuGraphNodeGetDependencies), 863 | /** cuGraphNodeGetDependentNodes */ 864 | CUDA_ENTRY_ENUM(cuGraphNodeGetDependentNodes), 865 | /** cuGraphNodeGetType */ 866 | CUDA_ENTRY_ENUM(cuGraphNodeGetType), 867 | /** cuGraphRemoveDependencies */ 868 | CUDA_ENTRY_ENUM(cuGraphRemoveDependencies), 869 | /** cuImportExternalMemory */ 870 | CUDA_ENTRY_ENUM(cuImportExternalMemory), 871 | /** cuImportExternalSemaphore */ 872 | CUDA_ENTRY_ENUM(cuImportExternalSemaphore), 873 | /** cuLaunchHostFunc */ 874 | CUDA_ENTRY_ENUM(cuLaunchHostFunc), 875 | /** cuLaunchHostFunc_ptsz */ 876 | CUDA_ENTRY_ENUM(cuLaunchHostFunc_ptsz), 877 | /** cuSignalExternalSemaphoresAsync */ 878 | CUDA_ENTRY_ENUM(cuSignalExternalSemaphoresAsync), 879 | /** cuSignalExternalSemaphoresAsync_ptsz */ 880 | CUDA_ENTRY_ENUM(cuSignalExternalSemaphoresAsync_ptsz), 881 | /** cuStreamBeginCapture */ 882 | CUDA_ENTRY_ENUM(cuStreamBeginCapture), 883 | /** cuStreamBeginCapture_ptsz */ 884 | CUDA_ENTRY_ENUM(cuStreamBeginCapture_ptsz), 885 | /** cuStreamEndCapture */ 886 | CUDA_ENTRY_ENUM(cuStreamEndCapture), 887 | /** cuStreamEndCapture_ptsz */ 888 | CUDA_ENTRY_ENUM(cuStreamEndCapture_ptsz), 889 | /** cuStreamGetCtx */ 890 | CUDA_ENTRY_ENUM(cuStreamGetCtx), 891 | /** cuStreamGetCtx_ptsz */ 892 | CUDA_ENTRY_ENUM(cuStreamGetCtx_ptsz), 893 | /** cuStreamIsCapturing */ 894 | CUDA_ENTRY_ENUM(cuStreamIsCapturing), 895 | /** cuStreamIsCapturing_ptsz */ 896 | CUDA_ENTRY_ENUM(cuStreamIsCapturing_ptsz), 897 | /** cuWaitExternalSemaphoresAsync */ 898 | CUDA_ENTRY_ENUM(cuWaitExternalSemaphoresAsync), 899 | /** cuWaitExternalSemaphoresAsync_ptsz */ 900 | CUDA_ENTRY_ENUM(cuWaitExternalSemaphoresAsync_ptsz), 901 | /** cuGraphExecKernelNodeSetParams */ 902 | CUDA_ENTRY_ENUM(cuGraphExecKernelNodeSetParams), 903 | /** cuStreamBeginCapture_v2 */ 904 | CUDA_ENTRY_ENUM(cuStreamBeginCapture_v2), 905 | /** cuStreamBeginCapture_v2_ptsz */ 906 | CUDA_ENTRY_ENUM(cuStreamBeginCapture_v2_ptsz), 907 | /** cuStreamGetCaptureInfo */ 908 | CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo), 909 | /** cuStreamGetCaptureInfo_ptsz */ 910 | CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo_ptsz), 911 | /** cuThreadExchangeStreamCaptureMode */ 912 | CUDA_ENTRY_ENUM(cuThreadExchangeStreamCaptureMode), 913 | /** cuDeviceGetNvSciSyncAttributes */ 914 | CUDA_ENTRY_ENUM(cuDeviceGetNvSciSyncAttributes), 915 | /** cuGraphExecHostNodeSetParams */ 916 | CUDA_ENTRY_ENUM(cuGraphExecHostNodeSetParams), 917 | /** cuGraphExecMemcpyNodeSetParams */ 918 | CUDA_ENTRY_ENUM(cuGraphExecMemcpyNodeSetParams), 919 | /** cuGraphExecMemsetNodeSetParams */ 920 | CUDA_ENTRY_ENUM(cuGraphExecMemsetNodeSetParams), 921 | /** cuGraphExecUpdate */ 922 | CUDA_ENTRY_ENUM(cuGraphExecUpdate), 923 | /** cuMemAddressFree */ 924 | CUDA_ENTRY_ENUM(cuMemAddressFree), 925 | /** cuMemAddressReserve */ 926 | CUDA_ENTRY_ENUM(cuMemAddressReserve), 927 | /** cuMemCreate */ 928 | CUDA_ENTRY_ENUM(cuMemCreate), 929 | /** cuMemExportToShareableHandle */ 930 | CUDA_ENTRY_ENUM(cuMemExportToShareableHandle), 931 | /** cuMemGetAccess */ 932 | CUDA_ENTRY_ENUM(cuMemGetAccess), 933 | /** cuMemGetAllocationGranularity */ 934 | CUDA_ENTRY_ENUM(cuMemGetAllocationGranularity), 935 | /** cuMemGetAllocationPropertiesFromHandle */ 936 | CUDA_ENTRY_ENUM(cuMemGetAllocationPropertiesFromHandle), 937 | /** cuMemImportFromShareableHandle */ 938 | CUDA_ENTRY_ENUM(cuMemImportFromShareableHandle), 939 | /** cuMemMap */ 940 | CUDA_ENTRY_ENUM(cuMemMap), 941 | /** cuMemRelease */ 942 | CUDA_ENTRY_ENUM(cuMemRelease), 943 | /** cuMemSetAccess */ 944 | CUDA_ENTRY_ENUM(cuMemSetAccess), 945 | /** cuMemUnmap */ 946 | CUDA_ENTRY_ENUM(cuMemUnmap), 947 | /** cuCtxResetPersistingL2Cache */ 948 | CUDA_ENTRY_ENUM(cuCtxResetPersistingL2Cache), 949 | /** cuDevicePrimaryCtxRelease_v2 */ 950 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxRelease_v2), 951 | /** cuDevicePrimaryCtxReset_v2 */ 952 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxReset_v2), 953 | /** cuDevicePrimaryCtxSetFlags_v2 */ 954 | CUDA_ENTRY_ENUM(cuDevicePrimaryCtxSetFlags_v2), 955 | /** cuFuncGetModule */ 956 | CUDA_ENTRY_ENUM(cuFuncGetModule), 957 | /** cuGraphInstantiate_v2 */ 958 | CUDA_ENTRY_ENUM(cuGraphInstantiate_v2), 959 | /** cuGraphKernelNodeCopyAttributes */ 960 | CUDA_ENTRY_ENUM(cuGraphKernelNodeCopyAttributes), 961 | /** cuGraphKernelNodeGetAttribute */ 962 | CUDA_ENTRY_ENUM(cuGraphKernelNodeGetAttribute), 963 | /** cuGraphKernelNodeSetAttribute */ 964 | CUDA_ENTRY_ENUM(cuGraphKernelNodeSetAttribute), 965 | /** cuMemRetainAllocationHandle */ 966 | CUDA_ENTRY_ENUM(cuMemRetainAllocationHandle), 967 | /** cuOccupancyAvailableDynamicSMemPerBlock */ 968 | CUDA_ENTRY_ENUM(cuOccupancyAvailableDynamicSMemPerBlock), 969 | /** cuStreamCopyAttributes */ 970 | CUDA_ENTRY_ENUM(cuStreamCopyAttributes), 971 | /** cuStreamCopyAttributes_ptsz */ 972 | CUDA_ENTRY_ENUM(cuStreamCopyAttributes_ptsz), 973 | /** cuStreamGetAttribute */ 974 | CUDA_ENTRY_ENUM(cuStreamGetAttribute), 975 | /** cuStreamGetAttribute_ptsz */ 976 | CUDA_ENTRY_ENUM(cuStreamGetAttribute_ptsz), 977 | /** cuStreamSetAttribute */ 978 | CUDA_ENTRY_ENUM(cuStreamSetAttribute), 979 | /** cuStreamSetAttribute_ptsz */ 980 | CUDA_ENTRY_ENUM(cuStreamSetAttribute_ptsz), 981 | /** 11.2 */ 982 | /** cuArrayGetPlane */ 983 | CUDA_ENTRY_ENUM(cuArrayGetPlane), 984 | /** cuArrayGetSparseProperties */ 985 | CUDA_ENTRY_ENUM(cuArrayGetSparseProperties), 986 | /** cuDeviceGetDefaultMemPool */ 987 | CUDA_ENTRY_ENUM(cuDeviceGetDefaultMemPool), 988 | /** cuDeviceGetLuid */ 989 | CUDA_ENTRY_ENUM(cuDeviceGetLuid), 990 | /** cuDeviceGetMemPool */ 991 | CUDA_ENTRY_ENUM(cuDeviceGetMemPool), 992 | /** cuDeviceGetTexture1DLinearMaxWidth */ 993 | CUDA_ENTRY_ENUM(cuDeviceGetTexture1DLinearMaxWidth), 994 | /** cuDeviceSetMemPool */ 995 | CUDA_ENTRY_ENUM(cuDeviceSetMemPool), 996 | /** cuEventRecordWithFlags */ 997 | CUDA_ENTRY_ENUM(cuEventRecordWithFlags), 998 | /** cuEventRecordWithFlags_ptsz */ 999 | CUDA_ENTRY_ENUM(cuEventRecordWithFlags_ptsz), 1000 | /** cuGraphAddEventRecordNode */ 1001 | CUDA_ENTRY_ENUM(cuGraphAddEventRecordNode), 1002 | /** cuGraphAddEventWaitNode */ 1003 | CUDA_ENTRY_ENUM(cuGraphAddEventWaitNode), 1004 | /** cuGraphAddExternalSemaphoresSignalNode */ 1005 | CUDA_ENTRY_ENUM(cuGraphAddExternalSemaphoresSignalNode), 1006 | /** cuGraphAddExternalSemaphoresWaitNode */ 1007 | CUDA_ENTRY_ENUM(cuGraphAddExternalSemaphoresWaitNode), 1008 | /** cuGraphEventRecordNodeGetEvent */ 1009 | CUDA_ENTRY_ENUM(cuGraphEventRecordNodeGetEvent), 1010 | /** cuGraphEventRecordNodeSetEvent */ 1011 | CUDA_ENTRY_ENUM(cuGraphEventRecordNodeSetEvent), 1012 | /** cuGraphEventWaitNodeGetEvent */ 1013 | CUDA_ENTRY_ENUM(cuGraphEventWaitNodeGetEvent), 1014 | /** cuGraphEventWaitNodeSetEvent */ 1015 | CUDA_ENTRY_ENUM(cuGraphEventWaitNodeSetEvent), 1016 | /** cuGraphExecChildGraphNodeSetParams */ 1017 | CUDA_ENTRY_ENUM(cuGraphExecChildGraphNodeSetParams), 1018 | /** cuGraphExecEventRecordNodeSetEvent */ 1019 | CUDA_ENTRY_ENUM(cuGraphExecEventRecordNodeSetEvent), 1020 | /** cuGraphExecEventWaitNodeSetEvent */ 1021 | CUDA_ENTRY_ENUM(cuGraphExecEventWaitNodeSetEvent), 1022 | /** cuGraphExecExternalSemaphoresSignalNodeSetParams */ 1023 | CUDA_ENTRY_ENUM(cuGraphExecExternalSemaphoresSignalNodeSetParams), 1024 | /** cuGraphExecExternalSemaphoresWaitNodeSetParams */ 1025 | CUDA_ENTRY_ENUM(cuGraphExecExternalSemaphoresWaitNodeSetParams), 1026 | /** cuGraphExternalSemaphoresSignalNodeGetParams */ 1027 | CUDA_ENTRY_ENUM(cuGraphExternalSemaphoresSignalNodeGetParams), 1028 | /** cuGraphExternalSemaphoresSignalNodeSetParams */ 1029 | CUDA_ENTRY_ENUM(cuGraphExternalSemaphoresSignalNodeSetParams), 1030 | /** cuGraphExternalSemaphoresWaitNodeGetParams */ 1031 | CUDA_ENTRY_ENUM(cuGraphExternalSemaphoresWaitNodeGetParams), 1032 | /** cuGraphExternalSemaphoresWaitNodeSetParams */ 1033 | CUDA_ENTRY_ENUM(cuGraphExternalSemaphoresWaitNodeSetParams), 1034 | /** cuGraphUpload */ 1035 | CUDA_ENTRY_ENUM(cuGraphUpload), 1036 | /** cuGraphUpload_ptsz */ 1037 | CUDA_ENTRY_ENUM(cuGraphUpload_ptsz), 1038 | /** cuIpcOpenMemHandle_v2 */ 1039 | CUDA_ENTRY_ENUM(cuIpcOpenMemHandle_v2), 1040 | /** memory pool should be concerned ? */ 1041 | /** cuMemAllocAsync */ 1042 | CUDA_ENTRY_ENUM(cuMemAllocAsync), 1043 | /** cuMemAllocAsync_ptsz */ 1044 | CUDA_ENTRY_ENUM(cuMemAllocAsync_ptsz), 1045 | /** cuMemAllocFromPoolAsync */ 1046 | CUDA_ENTRY_ENUM(cuMemAllocFromPoolAsync), 1047 | /** cuMemAllocFromPoolAsync_ptsz */ 1048 | CUDA_ENTRY_ENUM(cuMemAllocFromPoolAsync_ptsz), 1049 | /** cuMemFreeAsync */ 1050 | CUDA_ENTRY_ENUM(cuMemFreeAsync), 1051 | /** cuMemFreeAsync_ptsz */ 1052 | CUDA_ENTRY_ENUM(cuMemFreeAsync_ptsz), 1053 | /** cuMemMapArrayAsync */ 1054 | CUDA_ENTRY_ENUM(cuMemMapArrayAsync), 1055 | /** cuMemMapArrayAsync_ptsz */ 1056 | CUDA_ENTRY_ENUM(cuMemMapArrayAsync_ptsz), 1057 | /** cuMemPoolCreate */ 1058 | CUDA_ENTRY_ENUM(cuMemPoolCreate), 1059 | /** cuMemPoolDestroy */ 1060 | CUDA_ENTRY_ENUM(cuMemPoolDestroy), 1061 | /** cuMemPoolExportPointer */ 1062 | CUDA_ENTRY_ENUM(cuMemPoolExportPointer), 1063 | /** cuMemPoolExportToShareableHandle */ 1064 | CUDA_ENTRY_ENUM(cuMemPoolExportToShareableHandle), 1065 | /** cuMemPoolGetAccess */ 1066 | CUDA_ENTRY_ENUM(cuMemPoolGetAccess), 1067 | /** cuMemPoolGetAttribute */ 1068 | CUDA_ENTRY_ENUM(cuMemPoolGetAttribute), 1069 | /** cuMemPoolImportFromShareableHandle */ 1070 | CUDA_ENTRY_ENUM(cuMemPoolImportFromShareableHandle), 1071 | /** cuMemPoolImportPointer */ 1072 | CUDA_ENTRY_ENUM(cuMemPoolImportPointer), 1073 | /** cuMemPoolSetAccess */ 1074 | CUDA_ENTRY_ENUM(cuMemPoolSetAccess), 1075 | /** cuMemPoolSetAttribute */ 1076 | CUDA_ENTRY_ENUM(cuMemPoolSetAttribute), 1077 | /** cuMemPoolTrimTo */ 1078 | CUDA_ENTRY_ENUM(cuMemPoolTrimTo), 1079 | /** cuMipmappedArrayGetSparseProperties */ 1080 | CUDA_ENTRY_ENUM(cuMipmappedArrayGetSparseProperties), 1081 | CUDA_ENTRY_ENUM(cuCtxCreate_v3), 1082 | CUDA_ENTRY_ENUM(cuCtxGetExecAffinity), 1083 | CUDA_ENTRY_ENUM(cuDeviceGetExecAffinitySupport), 1084 | CUDA_ENTRY_ENUM(cuDeviceGetGraphMemAttribute), 1085 | CUDA_ENTRY_ENUM(cuDeviceGetUuid_v2), 1086 | CUDA_ENTRY_ENUM(cuDeviceGraphMemTrim), 1087 | CUDA_ENTRY_ENUM(cuDeviceSetGraphMemAttribute), 1088 | CUDA_ENTRY_ENUM(cuFlushGPUDirectRDMAWrites), 1089 | CUDA_ENTRY_ENUM(cuGetProcAddress), 1090 | CUDA_ENTRY_ENUM(cuGraphAddMemAllocNode), 1091 | CUDA_ENTRY_ENUM(cuGraphAddMemFreeNode), 1092 | CUDA_ENTRY_ENUM(cuGraphDebugDotPrint), 1093 | CUDA_ENTRY_ENUM(cuGraphInstantiateWithFlags), 1094 | CUDA_ENTRY_ENUM(cuGraphMemAllocNodeGetParams), 1095 | CUDA_ENTRY_ENUM(cuGraphMemFreeNodeGetParams), 1096 | CUDA_ENTRY_ENUM(cuGraphReleaseUserObject), 1097 | CUDA_ENTRY_ENUM(cuGraphRetainUserObject), 1098 | CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo_v2), 1099 | CUDA_ENTRY_ENUM(cuStreamGetCaptureInfo_v2_ptsz), 1100 | CUDA_ENTRY_ENUM(cuStreamUpdateCaptureDependencies), 1101 | CUDA_ENTRY_ENUM(cuStreamUpdateCaptureDependencies_ptsz), 1102 | CUDA_ENTRY_ENUM(cuUserObjectCreate), 1103 | CUDA_ENTRY_ENUM(cuUserObjectRelease), 1104 | CUDA_ENTRY_ENUM(cuUserObjectRetain), 1105 | CUDA_ENTRY_END 1106 | } cuda_entry_enum_t; 1107 | 1108 | /** 1109 | * CUDA library function pointer 1110 | */ 1111 | typedef CUresult (*cuda_sym_t)(); 1112 | 1113 | /** 1114 | * CUDA library debug function pointer 1115 | */ 1116 | typedef void (*cuda_debug_void_sym_t)(); 1117 | 1118 | /** 1119 | * CUDA library debug result function pointer 1120 | */ 1121 | typedef CUDBGResult (*cuda_debug_result_sym_t)(); 1122 | 1123 | #ifdef __cplusplus 1124 | } 1125 | #endif 1126 | 1127 | #endif // HIJACK_CUDA_HELPER_H 1128 | -------------------------------------------------------------------------------- /include/hijack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack 3 | * available. 4 | * 5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 8 | * use this file except in compliance with the License. You may obtain a copy of 9 | * the License at 10 | * 11 | * https://opensource.org/licenses/Apache-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations under the License. 17 | */ 18 | 19 | #ifndef HIJACK_LIBRARY_H 20 | #define HIJACK_LIBRARY_H 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "nvml-subset.h" 34 | 35 | /** 36 | * Controller configuration base path 37 | */ 38 | #define VCUDA_CONFIG_PATH "/etc/vcuda/" 39 | 40 | /** 41 | * Controller pid information file name 42 | */ 43 | #define PIDS_CONFIG_NAME "pids.config" 44 | 45 | /** 46 | * Controller configuration file name 47 | */ 48 | #define CONTROLLER_CONFIG_NAME "vcuda.config" 49 | #define PIDS_CONFIG_PATH (VCUDA_CONFIG_PATH "/" PIDS_CONFIG_NAME) 50 | #define CONTROLLER_CONFIG_PATH (VCUDA_CONFIG_PATH "/" CONTROLLER_CONFIG_NAME) 51 | 52 | /** 53 | * RPC binary base path 54 | */ 55 | #define RPC_CLIENT_PATH "/usr/local/nvidia/bin/" 56 | 57 | /** 58 | * RPC binary file name 59 | */ 60 | #define RPC_CLIENT_NAME "gpu-client" 61 | 62 | /** 63 | * RPC address 64 | */ 65 | #define RPC_ADDR (VCUDA_CONFIG_PATH "vcuda.sock") 66 | 67 | /** 68 | * Default prefix for cgroup path 69 | */ 70 | #define EMPTY_PREFIX "0xdead" 71 | 72 | /** 73 | * Proc file path for driver version 74 | */ 75 | #define DRIVER_VERSION_PROC_PATH "/proc/driver/nvidia/version" 76 | 77 | /** 78 | * Driver regular expression pattern 79 | */ 80 | #define DRIVER_VERSION_MATCH_PATTERN "([0-9]+)(\\.[0-9]+)+" 81 | 82 | /** 83 | * Max sample pid size 84 | */ 85 | #define MAX_PIDS (1024) 86 | 87 | #define likely(x) __builtin_expect(!!(x), 1) 88 | #define unlikely(x) __builtin_expect(!!(x), 0) 89 | 90 | #define ROUND_UP(n, base) ((n) % (base) ? (n) + (base) - (n) % (base) : (n)) 91 | 92 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2 * !!(condition)])) 93 | 94 | #define CAS(ptr, old, new) __sync_bool_compare_and_swap((ptr), (old), (new)) 95 | #define UNUSED __attribute__((unused)) 96 | 97 | #define MILLISEC (1000UL * 1000UL) 98 | 99 | #define TIME_TICK (10) 100 | #define FACTOR (32) 101 | #define MAX_UTILIZATION (100) 102 | #define CHANGE_LIMIT_INTERVAL (30) 103 | #define USAGE_THRESHOLD (5) 104 | 105 | #define GET_VALID_VALUE(x) (((x) >= 0 && (x) <= 100) ? (x) : 0) 106 | #define CODEC_NORMALIZE(x) (x * 85 / 100) 107 | 108 | typedef struct { 109 | void *fn_ptr; 110 | char *name; 111 | } entry_t; 112 | 113 | typedef struct { 114 | int major; 115 | int minor; 116 | } __attribute__((packed, aligned(8))) version_t; 117 | 118 | /** 119 | * Controller configuration data format 120 | */ 121 | typedef struct { 122 | char pod_uid[48]; 123 | int limit; 124 | char occupied[4044]; 125 | char container_name[FILENAME_MAX]; 126 | char bus_id[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE]; 127 | uint64_t gpu_memory; 128 | int utilization; 129 | int hard_limit; 130 | version_t driver_version; 131 | int enable; 132 | } __attribute__((packed, aligned(8))) resource_data_t; 133 | 134 | typedef enum { 135 | INFO = 0, 136 | ERROR = 1, 137 | WARNING = 2, 138 | FATAL = 3, 139 | VERBOSE = 4, 140 | } log_level_enum_t; 141 | 142 | #define LOGGER(level, format, ...) \ 143 | ({ \ 144 | char *_print_level_str = getenv("LOGGER_LEVEL"); \ 145 | int _print_level = 3; \ 146 | if (_print_level_str) { \ 147 | _print_level = (int)strtoul(_print_level_str, NULL, 10); \ 148 | _print_level = _print_level < 0 ? 3 : _print_level; \ 149 | } \ 150 | if (level <= _print_level) { \ 151 | fprintf(stderr, "%s:%d " format "\n", __FILE__, __LINE__, \ 152 | ##__VA_ARGS__); \ 153 | } \ 154 | if (level == FATAL) { \ 155 | exit(-1); \ 156 | } \ 157 | }) 158 | 159 | /** 160 | * Read controller configuration from \aCONTROLLER_CONFIG_PATH 161 | * 162 | * @return 0 -> success 163 | */ 164 | int read_controller_configuration(); 165 | 166 | /** 167 | * Load library and initialize some data 168 | */ 169 | void load_necessary_data(); 170 | 171 | /** 172 | * Register data to remote controller to retrieve configuration 173 | * 174 | * @param bus_id bus is of GPU card 175 | * @param pod_uid pod uid of Pod 176 | * @param container_name container name of Pod 177 | */ 178 | void register_to_remote_with_data(const char *bus_id, const char *pod_uid, 179 | const char *container_name); 180 | 181 | /** 182 | * Tell whether we're using old method to find controller configuration path 183 | * 184 | * @return 1 -> using new, 0 -> using old 185 | */ 186 | int is_custom_config_path(); 187 | 188 | #ifdef __cplusplus 189 | } 190 | #endif 191 | 192 | #endif 193 | -------------------------------------------------------------------------------- /include/nvml-helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack 3 | * available. 4 | * 5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 8 | * use this file except in compliance with the License. You may obtain a copy of 9 | * the License at 10 | * 11 | * https://opensource.org/licenses/Apache-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations under the License. 17 | */ 18 | 19 | #ifndef HIJACK_NVML_HELPER_H 20 | #define HIJACK_NVML_HELPER_H 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | #include 27 | 28 | #include "hijack.h" 29 | #include "nvml-subset.h" 30 | 31 | /** 32 | * NVML management library prefix 33 | */ 34 | #define DRIVER_ML_LIBRARY_PREFIX "libnvidia-ml.so" 35 | 36 | #define NVML_ENTRY_ENUM(x) ENTRY_##x 37 | 38 | #define NVML_FIND_ENTRY(table, sym) ({ (table)[NVML_ENTRY_ENUM(sym)].fn_ptr; }) 39 | 40 | #define NVML_ENTRY_CALL(table, sym, ...) \ 41 | ({ \ 42 | LOGGER(5, "Hijacking %s\n", #sym); \ 43 | driver_sym_t _entry = NVML_FIND_ENTRY(table, sym); \ 44 | _entry(__VA_ARGS__); \ 45 | }) 46 | 47 | typedef nvmlReturn_t (*driver_sym_t)(); 48 | 49 | /** 50 | * NVML management library enumerator entry 51 | */ 52 | typedef enum { 53 | /** nvmlInit */ 54 | NVML_ENTRY_ENUM(nvmlInit), 55 | /** nvmlShutdown */ 56 | NVML_ENTRY_ENUM(nvmlShutdown), 57 | /** nvmlErrorString */ 58 | NVML_ENTRY_ENUM(nvmlErrorString), 59 | /** nvmlDeviceGetHandleByIndex */ 60 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByIndex), 61 | /** nvmlDeviceGetComputeRunningProcesses */ 62 | NVML_ENTRY_ENUM(nvmlDeviceGetComputeRunningProcesses), 63 | /** nvmlDeviceGetPciInfo */ 64 | NVML_ENTRY_ENUM(nvmlDeviceGetPciInfo), 65 | /** nvmlDeviceGetProcessUtilization */ 66 | NVML_ENTRY_ENUM(nvmlDeviceGetProcessUtilization), 67 | /** nvmlDeviceGetCount */ 68 | NVML_ENTRY_ENUM(nvmlDeviceGetCount), 69 | /** nvmlDeviceClearAccountingPids */ 70 | NVML_ENTRY_ENUM(nvmlDeviceClearAccountingPids), 71 | /** nvmlDeviceClearCpuAffinity */ 72 | NVML_ENTRY_ENUM(nvmlDeviceClearCpuAffinity), 73 | /** nvmlDeviceClearEccErrorCounts */ 74 | NVML_ENTRY_ENUM(nvmlDeviceClearEccErrorCounts), 75 | /** nvmlDeviceDiscoverGpus */ 76 | NVML_ENTRY_ENUM(nvmlDeviceDiscoverGpus), 77 | /** nvmlDeviceFreezeNvLinkUtilizationCounter */ 78 | NVML_ENTRY_ENUM(nvmlDeviceFreezeNvLinkUtilizationCounter), 79 | /** nvmlDeviceGetAccountingBufferSize */ 80 | NVML_ENTRY_ENUM(nvmlDeviceGetAccountingBufferSize), 81 | /** nvmlDeviceGetAccountingMode */ 82 | NVML_ENTRY_ENUM(nvmlDeviceGetAccountingMode), 83 | /** nvmlDeviceGetAccountingPids */ 84 | NVML_ENTRY_ENUM(nvmlDeviceGetAccountingPids), 85 | /** nvmlDeviceGetAccountingStats */ 86 | NVML_ENTRY_ENUM(nvmlDeviceGetAccountingStats), 87 | /** nvmlDeviceGetActiveVgpus */ 88 | NVML_ENTRY_ENUM(nvmlDeviceGetActiveVgpus), 89 | /** nvmlDeviceGetAPIRestriction */ 90 | NVML_ENTRY_ENUM(nvmlDeviceGetAPIRestriction), 91 | /** nvmlDeviceGetApplicationsClock */ 92 | NVML_ENTRY_ENUM(nvmlDeviceGetApplicationsClock), 93 | /** nvmlDeviceGetAutoBoostedClocksEnabled */ 94 | NVML_ENTRY_ENUM(nvmlDeviceGetAutoBoostedClocksEnabled), 95 | /** nvmlDeviceGetBAR1MemoryInfo */ 96 | NVML_ENTRY_ENUM(nvmlDeviceGetBAR1MemoryInfo), 97 | /** nvmlDeviceGetBoardId */ 98 | NVML_ENTRY_ENUM(nvmlDeviceGetBoardId), 99 | /** nvmlDeviceGetBoardPartNumber */ 100 | NVML_ENTRY_ENUM(nvmlDeviceGetBoardPartNumber), 101 | /** nvmlDeviceGetBrand */ 102 | NVML_ENTRY_ENUM(nvmlDeviceGetBrand), 103 | /** nvmlDeviceGetBridgeChipInfo */ 104 | NVML_ENTRY_ENUM(nvmlDeviceGetBridgeChipInfo), 105 | /** nvmlDeviceGetClock */ 106 | NVML_ENTRY_ENUM(nvmlDeviceGetClock), 107 | /** nvmlDeviceGetClockInfo */ 108 | NVML_ENTRY_ENUM(nvmlDeviceGetClockInfo), 109 | /** nvmlDeviceGetComputeMode */ 110 | NVML_ENTRY_ENUM(nvmlDeviceGetComputeMode), 111 | /** nvmlDeviceGetCount_v2 */ 112 | NVML_ENTRY_ENUM(nvmlDeviceGetCount_v2), 113 | /** nvmlDeviceGetCpuAffinity */ 114 | NVML_ENTRY_ENUM(nvmlDeviceGetCpuAffinity), 115 | /** nvmlDeviceGetCreatableVgpus */ 116 | NVML_ENTRY_ENUM(nvmlDeviceGetCreatableVgpus), 117 | /** nvmlDeviceGetCudaComputeCapability */ 118 | NVML_ENTRY_ENUM(nvmlDeviceGetCudaComputeCapability), 119 | /** nvmlDeviceGetCurrentClocksThrottleReasons */ 120 | NVML_ENTRY_ENUM(nvmlDeviceGetCurrentClocksThrottleReasons), 121 | /** nvmlDeviceGetCurrPcieLinkGeneration */ 122 | NVML_ENTRY_ENUM(nvmlDeviceGetCurrPcieLinkGeneration), 123 | /** nvmlDeviceGetCurrPcieLinkWidth */ 124 | NVML_ENTRY_ENUM(nvmlDeviceGetCurrPcieLinkWidth), 125 | /** nvmlDeviceGetDecoderUtilization */ 126 | NVML_ENTRY_ENUM(nvmlDeviceGetDecoderUtilization), 127 | /** nvmlDeviceGetDefaultApplicationsClock */ 128 | NVML_ENTRY_ENUM(nvmlDeviceGetDefaultApplicationsClock), 129 | /** nvmlDeviceGetDetailedEccErrors */ 130 | NVML_ENTRY_ENUM(nvmlDeviceGetDetailedEccErrors), 131 | /** nvmlDeviceGetDisplayActive */ 132 | NVML_ENTRY_ENUM(nvmlDeviceGetDisplayActive), 133 | /** nvmlDeviceGetDisplayMode */ 134 | NVML_ENTRY_ENUM(nvmlDeviceGetDisplayMode), 135 | /** nvmlDeviceGetDriverModel */ 136 | NVML_ENTRY_ENUM(nvmlDeviceGetDriverModel), 137 | /** nvmlDeviceGetEccMode */ 138 | NVML_ENTRY_ENUM(nvmlDeviceGetEccMode), 139 | /** nvmlDeviceGetEncoderCapacity */ 140 | NVML_ENTRY_ENUM(nvmlDeviceGetEncoderCapacity), 141 | /** nvmlDeviceGetEncoderSessions */ 142 | NVML_ENTRY_ENUM(nvmlDeviceGetEncoderSessions), 143 | /** nvmlDeviceGetEncoderStats */ 144 | NVML_ENTRY_ENUM(nvmlDeviceGetEncoderStats), 145 | /** nvmlDeviceGetEncoderUtilization */ 146 | NVML_ENTRY_ENUM(nvmlDeviceGetEncoderUtilization), 147 | /** nvmlDeviceGetEnforcedPowerLimit */ 148 | NVML_ENTRY_ENUM(nvmlDeviceGetEnforcedPowerLimit), 149 | /** nvmlDeviceGetFanSpeed */ 150 | NVML_ENTRY_ENUM(nvmlDeviceGetFanSpeed), 151 | /** nvmlDeviceGetFanSpeed_v2 */ 152 | NVML_ENTRY_ENUM(nvmlDeviceGetFanSpeed_v2), 153 | /** nvmlDeviceGetFieldValues */ 154 | NVML_ENTRY_ENUM(nvmlDeviceGetFieldValues), 155 | /** nvmlDeviceGetGpuOperationMode */ 156 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuOperationMode), 157 | /** nvmlDeviceGetGraphicsRunningProcesses */ 158 | NVML_ENTRY_ENUM(nvmlDeviceGetGraphicsRunningProcesses), 159 | /** nvmlDeviceGetGridLicensableFeatures */ 160 | NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures), 161 | /** nvmlDeviceGetHandleByIndex_v2 */ 162 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByIndex_v2), 163 | /** nvmlDeviceGetHandleByPciBusId */ 164 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByPciBusId), 165 | /** nvmlDeviceGetHandleByPciBusId_v2 */ 166 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByPciBusId_v2), 167 | /** nvmlDeviceGetHandleBySerial */ 168 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleBySerial), 169 | /** nvmlDeviceGetHandleByUUID */ 170 | NVML_ENTRY_ENUM(nvmlDeviceGetHandleByUUID), 171 | /** nvmlDeviceGetIndex */ 172 | NVML_ENTRY_ENUM(nvmlDeviceGetIndex), 173 | /** nvmlDeviceGetInforomConfigurationChecksum */ 174 | NVML_ENTRY_ENUM(nvmlDeviceGetInforomConfigurationChecksum), 175 | /** nvmlDeviceGetInforomImageVersion */ 176 | NVML_ENTRY_ENUM(nvmlDeviceGetInforomImageVersion), 177 | /** nvmlDeviceGetInforomVersion */ 178 | NVML_ENTRY_ENUM(nvmlDeviceGetInforomVersion), 179 | /** nvmlDeviceGetMaxClockInfo */ 180 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxClockInfo), 181 | /** nvmlDeviceGetMaxCustomerBoostClock */ 182 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxCustomerBoostClock), 183 | /** nvmlDeviceGetMaxPcieLinkGeneration */ 184 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxPcieLinkGeneration), 185 | /** nvmlDeviceGetMaxPcieLinkWidth */ 186 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxPcieLinkWidth), 187 | /** nvmlDeviceGetMemoryErrorCounter */ 188 | NVML_ENTRY_ENUM(nvmlDeviceGetMemoryErrorCounter), 189 | /** nvmlDeviceGetMemoryInfo */ 190 | NVML_ENTRY_ENUM(nvmlDeviceGetMemoryInfo), 191 | /** nvmlDeviceGetMinorNumber */ 192 | NVML_ENTRY_ENUM(nvmlDeviceGetMinorNumber), 193 | /** nvmlDeviceGetMPSComputeRunningProcesses */ 194 | NVML_ENTRY_ENUM(nvmlDeviceGetMPSComputeRunningProcesses), 195 | /** nvmlDeviceGetMultiGpuBoard */ 196 | NVML_ENTRY_ENUM(nvmlDeviceGetMultiGpuBoard), 197 | /** nvmlDeviceGetName */ 198 | NVML_ENTRY_ENUM(nvmlDeviceGetName), 199 | /** nvmlDeviceGetNvLinkCapability */ 200 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkCapability), 201 | /** nvmlDeviceGetNvLinkErrorCounter */ 202 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkErrorCounter), 203 | /** nvmlDeviceGetNvLinkRemotePciInfo */ 204 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkRemotePciInfo), 205 | /** nvmlDeviceGetNvLinkRemotePciInfo_v2 */ 206 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkRemotePciInfo_v2), 207 | /** nvmlDeviceGetNvLinkState */ 208 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkState), 209 | /** nvmlDeviceGetNvLinkUtilizationControl */ 210 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkUtilizationControl), 211 | /** nvmlDeviceGetNvLinkUtilizationCounter */ 212 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkUtilizationCounter), 213 | /** nvmlDeviceGetNvLinkVersion */ 214 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkVersion), 215 | /** nvmlDeviceGetP2PStatus */ 216 | NVML_ENTRY_ENUM(nvmlDeviceGetP2PStatus), 217 | /** nvmlDeviceGetPcieReplayCounter */ 218 | NVML_ENTRY_ENUM(nvmlDeviceGetPcieReplayCounter), 219 | /** nvmlDeviceGetPcieThroughput */ 220 | NVML_ENTRY_ENUM(nvmlDeviceGetPcieThroughput), 221 | /** nvmlDeviceGetPciInfo_v2 */ 222 | NVML_ENTRY_ENUM(nvmlDeviceGetPciInfo_v2), 223 | /** nvmlDeviceGetPciInfo_v3 */ 224 | NVML_ENTRY_ENUM(nvmlDeviceGetPciInfo_v3), 225 | /** nvmlDeviceGetPerformanceState */ 226 | NVML_ENTRY_ENUM(nvmlDeviceGetPerformanceState), 227 | /** nvmlDeviceGetPersistenceMode */ 228 | NVML_ENTRY_ENUM(nvmlDeviceGetPersistenceMode), 229 | /** nvmlDeviceGetPowerManagementDefaultLimit */ 230 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerManagementDefaultLimit), 231 | /** nvmlDeviceGetPowerManagementLimit */ 232 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerManagementLimit), 233 | /** nvmlDeviceGetPowerManagementLimitConstraints */ 234 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerManagementLimitConstraints), 235 | /** nvmlDeviceGetPowerManagementMode */ 236 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerManagementMode), 237 | /** nvmlDeviceGetPowerState */ 238 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerState), 239 | /** nvmlDeviceGetPowerUsage */ 240 | NVML_ENTRY_ENUM(nvmlDeviceGetPowerUsage), 241 | /** nvmlDeviceGetRetiredPages */ 242 | NVML_ENTRY_ENUM(nvmlDeviceGetRetiredPages), 243 | /** nvmlDeviceGetRetiredPagesPendingStatus */ 244 | NVML_ENTRY_ENUM(nvmlDeviceGetRetiredPagesPendingStatus), 245 | /** nvmlDeviceGetSamples */ 246 | NVML_ENTRY_ENUM(nvmlDeviceGetSamples), 247 | /** nvmlDeviceGetSerial */ 248 | NVML_ENTRY_ENUM(nvmlDeviceGetSerial), 249 | /** nvmlDeviceGetSupportedClocksThrottleReasons */ 250 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedClocksThrottleReasons), 251 | /** nvmlDeviceGetSupportedEventTypes */ 252 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedEventTypes), 253 | /** nvmlDeviceGetSupportedGraphicsClocks */ 254 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedGraphicsClocks), 255 | /** nvmlDeviceGetSupportedMemoryClocks */ 256 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedMemoryClocks), 257 | /** nvmlDeviceGetSupportedVgpus */ 258 | NVML_ENTRY_ENUM(nvmlDeviceGetSupportedVgpus), 259 | /** nvmlDeviceGetTemperature */ 260 | NVML_ENTRY_ENUM(nvmlDeviceGetTemperature), 261 | /** nvmlDeviceGetTemperatureThreshold */ 262 | NVML_ENTRY_ENUM(nvmlDeviceGetTemperatureThreshold), 263 | /** nvmlDeviceGetTopologyCommonAncestor */ 264 | NVML_ENTRY_ENUM(nvmlDeviceGetTopologyCommonAncestor), 265 | /** nvmlDeviceGetTopologyNearestGpus */ 266 | NVML_ENTRY_ENUM(nvmlDeviceGetTopologyNearestGpus), 267 | /** nvmlDeviceGetTotalEccErrors */ 268 | NVML_ENTRY_ENUM(nvmlDeviceGetTotalEccErrors), 269 | /** nvmlDeviceGetTotalEnergyConsumption */ 270 | NVML_ENTRY_ENUM(nvmlDeviceGetTotalEnergyConsumption), 271 | /** nvmlDeviceGetUtilizationRates */ 272 | NVML_ENTRY_ENUM(nvmlDeviceGetUtilizationRates), 273 | /** nvmlDeviceGetUUID */ 274 | NVML_ENTRY_ENUM(nvmlDeviceGetUUID), 275 | /** nvmlDeviceGetVbiosVersion */ 276 | NVML_ENTRY_ENUM(nvmlDeviceGetVbiosVersion), 277 | /** nvmlDeviceGetVgpuMetadata */ 278 | NVML_ENTRY_ENUM(nvmlDeviceGetVgpuMetadata), 279 | /** nvmlDeviceGetVgpuProcessUtilization */ 280 | NVML_ENTRY_ENUM(nvmlDeviceGetVgpuProcessUtilization), 281 | /** nvmlDeviceGetVgpuUtilization */ 282 | NVML_ENTRY_ENUM(nvmlDeviceGetVgpuUtilization), 283 | /** nvmlDeviceGetViolationStatus */ 284 | NVML_ENTRY_ENUM(nvmlDeviceGetViolationStatus), 285 | /** nvmlDeviceGetVirtualizationMode */ 286 | NVML_ENTRY_ENUM(nvmlDeviceGetVirtualizationMode), 287 | /** nvmlDeviceModifyDrainState */ 288 | NVML_ENTRY_ENUM(nvmlDeviceModifyDrainState), 289 | /** nvmlDeviceOnSameBoard */ 290 | NVML_ENTRY_ENUM(nvmlDeviceOnSameBoard), 291 | /** nvmlDeviceQueryDrainState */ 292 | NVML_ENTRY_ENUM(nvmlDeviceQueryDrainState), 293 | /** nvmlDeviceRegisterEvents */ 294 | NVML_ENTRY_ENUM(nvmlDeviceRegisterEvents), 295 | /** nvmlDeviceRemoveGpu */ 296 | NVML_ENTRY_ENUM(nvmlDeviceRemoveGpu), 297 | /** nvmlDeviceRemoveGpu_v2 */ 298 | NVML_ENTRY_ENUM(nvmlDeviceRemoveGpu_v2), 299 | /** nvmlDeviceResetApplicationsClocks */ 300 | NVML_ENTRY_ENUM(nvmlDeviceResetApplicationsClocks), 301 | /** nvmlDeviceResetNvLinkErrorCounters */ 302 | NVML_ENTRY_ENUM(nvmlDeviceResetNvLinkErrorCounters), 303 | /** nvmlDeviceResetNvLinkUtilizationCounter */ 304 | NVML_ENTRY_ENUM(nvmlDeviceResetNvLinkUtilizationCounter), 305 | /** nvmlDeviceSetAccountingMode */ 306 | NVML_ENTRY_ENUM(nvmlDeviceSetAccountingMode), 307 | /** nvmlDeviceSetAPIRestriction */ 308 | NVML_ENTRY_ENUM(nvmlDeviceSetAPIRestriction), 309 | /** nvmlDeviceSetApplicationsClocks */ 310 | NVML_ENTRY_ENUM(nvmlDeviceSetApplicationsClocks), 311 | /** nvmlDeviceSetAutoBoostedClocksEnabled */ 312 | NVML_ENTRY_ENUM(nvmlDeviceSetAutoBoostedClocksEnabled), 313 | /** nvmlDeviceSetComputeMode */ 314 | NVML_ENTRY_ENUM(nvmlDeviceSetComputeMode), 315 | /** nvmlDeviceSetCpuAffinity */ 316 | NVML_ENTRY_ENUM(nvmlDeviceSetCpuAffinity), 317 | /** nvmlDeviceSetDefaultAutoBoostedClocksEnabled */ 318 | NVML_ENTRY_ENUM(nvmlDeviceSetDefaultAutoBoostedClocksEnabled), 319 | /** nvmlDeviceSetDriverModel */ 320 | NVML_ENTRY_ENUM(nvmlDeviceSetDriverModel), 321 | /** nvmlDeviceSetEccMode */ 322 | NVML_ENTRY_ENUM(nvmlDeviceSetEccMode), 323 | /** nvmlDeviceSetGpuOperationMode */ 324 | NVML_ENTRY_ENUM(nvmlDeviceSetGpuOperationMode), 325 | /** nvmlDeviceSetNvLinkUtilizationControl */ 326 | NVML_ENTRY_ENUM(nvmlDeviceSetNvLinkUtilizationControl), 327 | /** nvmlDeviceSetPersistenceMode */ 328 | NVML_ENTRY_ENUM(nvmlDeviceSetPersistenceMode), 329 | /** nvmlDeviceSetPowerManagementLimit */ 330 | NVML_ENTRY_ENUM(nvmlDeviceSetPowerManagementLimit), 331 | /** nvmlDeviceSetVirtualizationMode */ 332 | NVML_ENTRY_ENUM(nvmlDeviceSetVirtualizationMode), 333 | /** nvmlDeviceValidateInforom */ 334 | NVML_ENTRY_ENUM(nvmlDeviceValidateInforom), 335 | /** nvmlEventSetCreate */ 336 | NVML_ENTRY_ENUM(nvmlEventSetCreate), 337 | /** nvmlEventSetFree */ 338 | NVML_ENTRY_ENUM(nvmlEventSetFree), 339 | /** nvmlEventSetWait */ 340 | NVML_ENTRY_ENUM(nvmlEventSetWait), 341 | /** nvmlGetVgpuCompatibility */ 342 | NVML_ENTRY_ENUM(nvmlGetVgpuCompatibility), 343 | /** nvmlInit_v2 */ 344 | NVML_ENTRY_ENUM(nvmlInit_v2), 345 | /** nvmlInitWithFlags */ 346 | NVML_ENTRY_ENUM(nvmlInitWithFlags), 347 | /** nvmlInternalGetExportTable */ 348 | NVML_ENTRY_ENUM(nvmlInternalGetExportTable), 349 | /** nvmlSystemGetCudaDriverVersion */ 350 | NVML_ENTRY_ENUM(nvmlSystemGetCudaDriverVersion), 351 | /** nvmlSystemGetCudaDriverVersion_v2 */ 352 | NVML_ENTRY_ENUM(nvmlSystemGetCudaDriverVersion_v2), 353 | /** nvmlSystemGetDriverVersion */ 354 | NVML_ENTRY_ENUM(nvmlSystemGetDriverVersion), 355 | /** nvmlSystemGetHicVersion */ 356 | NVML_ENTRY_ENUM(nvmlSystemGetHicVersion), 357 | /** nvmlSystemGetNVMLVersion */ 358 | NVML_ENTRY_ENUM(nvmlSystemGetNVMLVersion), 359 | /** nvmlSystemGetProcessName */ 360 | NVML_ENTRY_ENUM(nvmlSystemGetProcessName), 361 | /** nvmlSystemGetTopologyGpuSet */ 362 | NVML_ENTRY_ENUM(nvmlSystemGetTopologyGpuSet), 363 | /** nvmlUnitGetCount */ 364 | NVML_ENTRY_ENUM(nvmlUnitGetCount), 365 | /** nvmlUnitGetDevices */ 366 | NVML_ENTRY_ENUM(nvmlUnitGetDevices), 367 | /** nvmlUnitGetFanSpeedInfo */ 368 | NVML_ENTRY_ENUM(nvmlUnitGetFanSpeedInfo), 369 | /** nvmlUnitGetHandleByIndex */ 370 | NVML_ENTRY_ENUM(nvmlUnitGetHandleByIndex), 371 | /** nvmlUnitGetLedState */ 372 | NVML_ENTRY_ENUM(nvmlUnitGetLedState), 373 | /** nvmlUnitGetPsuInfo */ 374 | NVML_ENTRY_ENUM(nvmlUnitGetPsuInfo), 375 | /** nvmlUnitGetTemperature */ 376 | NVML_ENTRY_ENUM(nvmlUnitGetTemperature), 377 | /** nvmlUnitGetUnitInfo */ 378 | NVML_ENTRY_ENUM(nvmlUnitGetUnitInfo), 379 | /** nvmlUnitSetLedState */ 380 | NVML_ENTRY_ENUM(nvmlUnitSetLedState), 381 | /** nvmlVgpuInstanceGetEncoderCapacity */ 382 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetEncoderCapacity), 383 | /** nvmlVgpuInstanceGetEncoderSessions */ 384 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetEncoderSessions), 385 | /** nvmlVgpuInstanceGetEncoderStats */ 386 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetEncoderStats), 387 | /** nvmlVgpuInstanceGetFbUsage */ 388 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetFbUsage), 389 | /** nvmlVgpuInstanceGetFrameRateLimit */ 390 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetFrameRateLimit), 391 | /** nvmlVgpuInstanceGetLicenseStatus */ 392 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetLicenseStatus), 393 | /** nvmlVgpuInstanceGetMetadata */ 394 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetMetadata), 395 | /** nvmlVgpuInstanceGetType */ 396 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetType), 397 | /** nvmlVgpuInstanceGetUUID */ 398 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetUUID), 399 | /** nvmlVgpuInstanceGetVmDriverVersion */ 400 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetVmDriverVersion), 401 | /** nvmlVgpuInstanceGetVmID */ 402 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetVmID), 403 | /** nvmlVgpuInstanceSetEncoderCapacity */ 404 | NVML_ENTRY_ENUM(nvmlVgpuInstanceSetEncoderCapacity), 405 | /** nvmlVgpuTypeGetClass */ 406 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetClass), 407 | /** nvmlVgpuTypeGetDeviceID */ 408 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetDeviceID), 409 | /** nvmlVgpuTypeGetFramebufferSize */ 410 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetFramebufferSize), 411 | /** nvmlVgpuTypeGetFrameRateLimit */ 412 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetFrameRateLimit), 413 | /** nvmlVgpuTypeGetLicense */ 414 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetLicense), 415 | /** nvmlVgpuTypeGetMaxInstances */ 416 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetMaxInstances), 417 | /** nvmlVgpuTypeGetName */ 418 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetName), 419 | /** nvmlVgpuTypeGetNumDisplayHeads */ 420 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetNumDisplayHeads), 421 | /** nvmlVgpuTypeGetResolution */ 422 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetResolution), 423 | /** nvmlDeviceGetFBCSessions */ 424 | NVML_ENTRY_ENUM(nvmlDeviceGetFBCSessions), 425 | /** nvmlDeviceGetFBCStats */ 426 | NVML_ENTRY_ENUM(nvmlDeviceGetFBCStats), 427 | /** nvmlDeviceGetGridLicensableFeatures_v2 */ 428 | NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures_v2), 429 | /** nvmlDeviceGetRetiredPages_v2 */ 430 | NVML_ENTRY_ENUM(nvmlDeviceGetRetiredPages_v2), 431 | /** nvmlDeviceResetGpuLockedClocks */ 432 | NVML_ENTRY_ENUM(nvmlDeviceResetGpuLockedClocks), 433 | /** nvmlDeviceSetGpuLockedClocks */ 434 | NVML_ENTRY_ENUM(nvmlDeviceSetGpuLockedClocks), 435 | /** nvmlGetBlacklistDeviceCount */ 436 | NVML_ENTRY_ENUM(nvmlGetBlacklistDeviceCount), 437 | /** nvmlGetBlacklistDeviceInfoByIndex */ 438 | NVML_ENTRY_ENUM(nvmlGetBlacklistDeviceInfoByIndex), 439 | /** nvmlVgpuInstanceGetAccountingMode */ 440 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetAccountingMode), 441 | /** nvmlVgpuInstanceGetAccountingPids */ 442 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetAccountingPids), 443 | /** nvmlVgpuInstanceGetAccountingStats */ 444 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetAccountingStats), 445 | /** nvmlVgpuInstanceGetFBCSessions */ 446 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetFBCSessions), 447 | /** nvmlVgpuInstanceGetFBCStats */ 448 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetFBCStats), 449 | /** nvmlVgpuTypeGetMaxInstancesPerVm */ 450 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetMaxInstancesPerVm), 451 | /** nvmlGetVgpuVersion */ 452 | NVML_ENTRY_ENUM(nvmlGetVgpuVersion), 453 | /** nvmlSetVgpuVersion */ 454 | NVML_ENTRY_ENUM(nvmlSetVgpuVersion), 455 | /** nvmlDeviceGetGridLicensableFeatures_v3 */ 456 | NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures_v3), 457 | /** nvmlDeviceGetHostVgpuMode */ 458 | NVML_ENTRY_ENUM(nvmlDeviceGetHostVgpuMode), 459 | /** nvmlDeviceGetPgpuMetadataString */ 460 | NVML_ENTRY_ENUM(nvmlDeviceGetPgpuMetadataString), 461 | /** nvmlVgpuInstanceGetEccMode */ 462 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetEccMode), 463 | /** nvmlComputeInstanceDestroy */ 464 | NVML_ENTRY_ENUM(nvmlComputeInstanceDestroy), 465 | /** nvmlComputeInstanceGetInfo */ 466 | NVML_ENTRY_ENUM(nvmlComputeInstanceGetInfo), 467 | /** nvmlDeviceCreateGpuInstance */ 468 | NVML_ENTRY_ENUM(nvmlDeviceCreateGpuInstance), 469 | /** nvmlDeviceGetArchitecture */ 470 | NVML_ENTRY_ENUM(nvmlDeviceGetArchitecture), 471 | /** nvmlDeviceGetAttributes */ 472 | NVML_ENTRY_ENUM(nvmlDeviceGetAttributes), 473 | /** nvmlDeviceGetAttributes_v2 */ 474 | NVML_ENTRY_ENUM(nvmlDeviceGetAttributes_v2), 475 | /** nvmlDeviceGetComputeInstanceId */ 476 | NVML_ENTRY_ENUM(nvmlDeviceGetComputeInstanceId), 477 | /** nvmlDeviceGetCpuAffinityWithinScope */ 478 | NVML_ENTRY_ENUM(nvmlDeviceGetCpuAffinityWithinScope), 479 | /** nvmlDeviceGetDeviceHandleFromMigDeviceHandle */ 480 | NVML_ENTRY_ENUM(nvmlDeviceGetDeviceHandleFromMigDeviceHandle), 481 | /** nvmlDeviceGetGpuInstanceById */ 482 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstanceById), 483 | /** nvmlDeviceGetGpuInstanceId */ 484 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstanceId), 485 | /** nvmlDeviceGetGpuInstancePossiblePlacements */ 486 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements), 487 | /** nvmlDeviceGetGpuInstanceProfileInfo */ 488 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstanceProfileInfo), 489 | /** nvmlDeviceGetGpuInstanceRemainingCapacity */ 490 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstanceRemainingCapacity), 491 | /** nvmlDeviceGetGpuInstances */ 492 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstances), 493 | /** nvmlDeviceGetMaxMigDeviceCount */ 494 | NVML_ENTRY_ENUM(nvmlDeviceGetMaxMigDeviceCount), 495 | /** nvmlDeviceGetMemoryAffinity */ 496 | NVML_ENTRY_ENUM(nvmlDeviceGetMemoryAffinity), 497 | /** nvmlDeviceGetMigDeviceHandleByIndex */ 498 | NVML_ENTRY_ENUM(nvmlDeviceGetMigDeviceHandleByIndex), 499 | /** nvmlDeviceGetMigMode */ 500 | NVML_ENTRY_ENUM(nvmlDeviceGetMigMode), 501 | /** nvmlDeviceGetRemappedRows */ 502 | NVML_ENTRY_ENUM(nvmlDeviceGetRemappedRows), 503 | /** nvmlDeviceGetRowRemapperHistogram */ 504 | NVML_ENTRY_ENUM(nvmlDeviceGetRowRemapperHistogram), 505 | /** nvmlDeviceIsMigDeviceHandle */ 506 | NVML_ENTRY_ENUM(nvmlDeviceIsMigDeviceHandle), 507 | /** nvmlDeviceSetMigMode */ 508 | NVML_ENTRY_ENUM(nvmlDeviceSetMigMode), 509 | /** nvmlEventSetWait_v2 */ 510 | NVML_ENTRY_ENUM(nvmlEventSetWait_v2), 511 | /** nvmlGpuInstanceCreateComputeInstance */ 512 | NVML_ENTRY_ENUM(nvmlGpuInstanceCreateComputeInstance), 513 | /** nvmlGpuInstanceDestroy */ 514 | NVML_ENTRY_ENUM(nvmlGpuInstanceDestroy), 515 | /** nvmlGpuInstanceGetComputeInstanceById */ 516 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetComputeInstanceById), 517 | /** nvmlGpuInstanceGetComputeInstanceProfileInfo */ 518 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetComputeInstanceProfileInfo), 519 | /** nvmlGpuInstanceGetComputeInstanceRemainingCapacity */ 520 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetComputeInstanceRemainingCapacity), 521 | /** nvmlGpuInstanceGetComputeInstances */ 522 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetComputeInstances), 523 | /** nvmlGpuInstanceGetInfo */ 524 | NVML_ENTRY_ENUM(nvmlGpuInstanceGetInfo), 525 | /** nvmlVgpuInstanceClearAccountingPids */ 526 | NVML_ENTRY_ENUM(nvmlVgpuInstanceClearAccountingPids), 527 | /** nvmlVgpuInstanceGetMdevUUID */ 528 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetMdevUUID), 529 | /** nvmlComputeInstanceGetInfo_v2 */ 530 | NVML_ENTRY_ENUM(nvmlComputeInstanceGetInfo_v2), 531 | /** nvmlDeviceGetComputeRunningProcesses_v2 */ 532 | NVML_ENTRY_ENUM(nvmlDeviceGetComputeRunningProcesses_v2), 533 | /** nvmlDeviceGetGraphicsRunningProcesses_v2 */ 534 | NVML_ENTRY_ENUM(nvmlDeviceGetGraphicsRunningProcesses_v2), 535 | /** nvmlDeviceSetTemperatureThreshold */ 536 | NVML_ENTRY_ENUM(nvmlDeviceSetTemperatureThreshold), 537 | /** nvmlRetry_NvRmControl */ 538 | NVML_ENTRY_ENUM(nvmlRetry_NvRmControl), 539 | /** nvmlVgpuInstanceGetGpuInstanceId */ 540 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetGpuInstanceId), 541 | /** nvmlVgpuTypeGetGpuInstanceProfileId */ 542 | NVML_ENTRY_ENUM(nvmlVgpuTypeGetGpuInstanceProfileId), 543 | NVML_ENTRY_ENUM(nvmlDeviceCreateGpuInstanceWithPlacement), 544 | NVML_ENTRY_ENUM(nvmlDeviceGetBusType), 545 | NVML_ENTRY_ENUM(nvmlDeviceGetClkMonStatus), 546 | NVML_ENTRY_ENUM(nvmlDeviceGetGpuInstancePossiblePlacements_v2), 547 | NVML_ENTRY_ENUM(nvmlDeviceGetGridLicensableFeatures_v4), 548 | NVML_ENTRY_ENUM(nvmlDeviceGetIrqNum), 549 | NVML_ENTRY_ENUM(nvmlDeviceGetMPSComputeRunningProcesses_v2), 550 | NVML_ENTRY_ENUM(nvmlDeviceGetNvLinkRemoteDeviceType), 551 | NVML_ENTRY_ENUM(nvmlDeviceResetMemoryLockedClocks), 552 | NVML_ENTRY_ENUM(nvmlDeviceSetMemoryLockedClocks), 553 | NVML_ENTRY_ENUM(nvmlGetExcludedDeviceCount), 554 | NVML_ENTRY_ENUM(nvmlGetExcludedDeviceInfoByIndex), 555 | NVML_ENTRY_ENUM(nvmlVgpuInstanceGetLicenseInfo), 556 | NVML_ENTRY_END 557 | } nvml_entry_enum_t; 558 | 559 | #ifdef __cplusplus 560 | } 561 | #endif 562 | 563 | #endif // HIJACK_NVML_HELPER_H 564 | -------------------------------------------------------------------------------- /src/hijack_call.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack 3 | * available. 4 | * 5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 8 | * use this file except in compliance with the License. You may obtain a copy of 9 | * the License at 10 | * 11 | * https://opensource.org/licenses/Apache-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations under the License. 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include "include/cuda-helper.h" 29 | #include "include/hijack.h" 30 | #include "include/nvml-helper.h" 31 | 32 | extern resource_data_t g_vcuda_config; 33 | extern entry_t cuda_library_entry[]; 34 | extern entry_t nvml_library_entry[]; 35 | extern char pid_path[]; 36 | 37 | typedef void (*atomic_fn_ptr)(int, void *); 38 | 39 | static pthread_once_t g_init_set = PTHREAD_ONCE_INIT; 40 | static pthread_once_t g_register_set = PTHREAD_ONCE_INIT; 41 | 42 | static volatile int g_cur_cuda_cores = 0; 43 | static volatile int g_total_cuda_cores = 0; 44 | 45 | static int g_max_thread_per_sm = 0; 46 | static int g_sm_num = 0; 47 | 48 | static int g_block_x = 1, g_block_y = 1, g_block_z = 1; 49 | static uint32_t g_block_locker = 0; 50 | 51 | static const struct timespec g_cycle = { 52 | .tv_sec = 0, 53 | .tv_nsec = TIME_TICK * MILLISEC, 54 | }; 55 | 56 | static const struct timespec g_wait = { 57 | .tv_sec = 0, 58 | .tv_nsec = 120 * MILLISEC, 59 | }; 60 | 61 | /** pid mapping related */ 62 | static int g_pids_table[MAX_PIDS]; 63 | static int g_pids_table_size; 64 | 65 | /** internal function definition */ 66 | static void register_to_remote(); 67 | 68 | static void atomic_action(const char *, atomic_fn_ptr, void *); 69 | 70 | static void active_utilization_notifier(); 71 | 72 | static void *utilization_watcher(void *); 73 | 74 | static void load_pids_table(int, void *); 75 | 76 | static void get_used_gpu_memory(int, void *); 77 | 78 | static void get_used_gpu_utilization(int, void *); 79 | 80 | static void initialization(); 81 | 82 | static void rate_limiter(int, int); 83 | 84 | static void change_token(int); 85 | 86 | static const char *nvml_error(nvmlReturn_t); 87 | 88 | static const char *cuda_error(CUresult, const char **); 89 | 90 | static int int_match(const void *, const void *); 91 | 92 | static int delta(int, int, int); 93 | 94 | /** export function definition */ 95 | CUresult cuDriverGetVersion(int *driverVersion); 96 | CUresult cuInit(unsigned int flag); 97 | CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, 98 | cuuint64_t flags); 99 | CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, 100 | unsigned int flags); 101 | CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize); 102 | CUresult cuMemAlloc(CUdeviceptr *dptr, size_t bytesize); 103 | CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch, 104 | size_t WidthInBytes, size_t Height, 105 | unsigned int ElementSizeBytes); 106 | CUresult cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, 107 | size_t Height, unsigned int ElementSizeBytes); 108 | CUresult cuArrayCreate_v2(CUarray *pHandle, 109 | const CUDA_ARRAY_DESCRIPTOR *pAllocateArray); 110 | CUresult cuArrayCreate(CUarray *pHandle, 111 | const CUDA_ARRAY_DESCRIPTOR *pAllocateArray); 112 | CUresult cuArray3DCreate_v2(CUarray *pHandle, 113 | const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); 114 | CUresult cuArray3DCreate(CUarray *pHandle, 115 | const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); 116 | CUresult 117 | cuMipmappedArrayCreate(CUmipmappedArray *pHandle, 118 | const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, 119 | unsigned int numMipmapLevels); 120 | CUresult cuDeviceTotalMem_v2(size_t *bytes, CUdevice dev); 121 | CUresult cuDeviceTotalMem(size_t *bytes, CUdevice dev); 122 | CUresult cuMemGetInfo_v2(size_t *free, size_t *total); 123 | CUresult cuMemGetInfo(size_t *free, size_t *total); 124 | CUresult cuLaunchKernel_ptsz(CUfunction f, unsigned int gridDimX, 125 | unsigned int gridDimY, unsigned int gridDimZ, 126 | unsigned int blockDimX, unsigned int blockDimY, 127 | unsigned int blockDimZ, 128 | unsigned int sharedMemBytes, CUstream hStream, 129 | void **kernelParams, void **extra); 130 | CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, 131 | unsigned int gridDimY, unsigned int gridDimZ, 132 | unsigned int blockDimX, unsigned int blockDimY, 133 | unsigned int blockDimZ, unsigned int sharedMemBytes, 134 | CUstream hStream, void **kernelParams, void **extra); 135 | CUresult cuLaunch(CUfunction f); 136 | CUresult cuLaunchCooperativeKernel_ptsz( 137 | CUfunction f, unsigned int gridDimX, unsigned int gridDimY, 138 | unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, 139 | unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, 140 | void **kernelParams); 141 | CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, 142 | unsigned int gridDimY, unsigned int gridDimZ, 143 | unsigned int blockDimX, 144 | unsigned int blockDimY, 145 | unsigned int blockDimZ, 146 | unsigned int sharedMemBytes, 147 | CUstream hStream, void **kernelParams); 148 | CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height); 149 | CUresult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, 150 | CUstream hStream); 151 | CUresult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z); 152 | 153 | entry_t cuda_hooks_entry[] = { 154 | {.name = "cuDriverGetVersion", .fn_ptr = cuDriverGetVersion}, 155 | {.name = "cuInit", .fn_ptr = cuInit}, 156 | {.name = "cuGetProcAddress", .fn_ptr = cuGetProcAddress}, 157 | {.name = "cuMemAllocManaged", .fn_ptr = cuMemAllocManaged}, 158 | {.name = "cuMemAlloc_v2", .fn_ptr = cuMemAlloc_v2}, 159 | {.name = "cuMemAlloc", .fn_ptr = cuMemAlloc}, 160 | {.name = "cuMemAllocPitch_v2", .fn_ptr = cuMemAllocPitch_v2}, 161 | {.name = "cuMemAllocPitch", .fn_ptr = cuMemAllocPitch}, 162 | {.name = "cuArrayCreate_v2", .fn_ptr = cuArrayCreate_v2}, 163 | {.name = "cuArrayCreate", .fn_ptr = cuArrayCreate}, 164 | {.name = "cuArray3DCreate_v2", .fn_ptr = cuArray3DCreate_v2}, 165 | {.name = "cuArray3DCreate", .fn_ptr = cuArray3DCreate}, 166 | {.name = "cuMipmappedArrayCreate", .fn_ptr = cuMipmappedArrayCreate}, 167 | {.name = "cuDeviceTotalMem_v2", .fn_ptr = cuDeviceTotalMem_v2}, 168 | {.name = "cuDeviceTotalMem", .fn_ptr = cuDeviceTotalMem}, 169 | {.name = "cuMemGetInfo_v2", .fn_ptr = cuMemGetInfo_v2}, 170 | {.name = "cuMemGetInfo", .fn_ptr = cuMemGetInfo}, 171 | {.name = "cuLaunchKernel_ptsz", .fn_ptr = cuLaunchKernel_ptsz}, 172 | {.name = "cuLaunchKernel", .fn_ptr = cuLaunchKernel}, 173 | {.name = "cuLaunch", .fn_ptr = cuLaunch}, 174 | {.name = "cuLaunchCooperativeKernel_ptsz", 175 | .fn_ptr = cuLaunchCooperativeKernel_ptsz}, 176 | {.name = "cuLaunchCooperativeKernel", .fn_ptr = cuLaunchCooperativeKernel}, 177 | {.name = "cuLaunchGrid", .fn_ptr = cuLaunchGrid}, 178 | {.name = "cuLaunchGridAsync", .fn_ptr = cuLaunchGridAsync}, 179 | {.name = "cuFuncSetBlockShape", .fn_ptr = cuFuncSetBlockShape}, 180 | }; 181 | 182 | const int cuda_hook_nums = 183 | sizeof(cuda_hooks_entry) / sizeof(cuda_hooks_entry[0]); 184 | 185 | /** dynamic rate control */ 186 | typedef struct { 187 | int user_current; 188 | int sys_current; 189 | int valid; 190 | uint64_t checktime; 191 | int sys_process_num; 192 | } utilization_t; 193 | 194 | /** helper function */ 195 | int int_match(const void *a, const void *b) { 196 | const int *ra = (const int *)a; 197 | const int *rb = (const int *)b; 198 | 199 | if (*ra < *rb) { 200 | return -1; 201 | } 202 | 203 | if (*ra > *rb) { 204 | return 1; 205 | } 206 | 207 | return 0; 208 | } 209 | 210 | static void atomic_action(const char *filename, atomic_fn_ptr fn_ptr, 211 | void *arg) { 212 | int fd; 213 | 214 | fd = open(filename, O_RDONLY); 215 | if (unlikely(fd == -1)) { 216 | LOGGER(FATAL, "can't open %s, error %s", filename, strerror(errno)); 217 | } 218 | 219 | fn_ptr(fd, arg); 220 | 221 | close(fd); 222 | } 223 | 224 | const char *nvml_error(nvmlReturn_t code) { 225 | const char *(*err_fn)(nvmlReturn_t) = NULL; 226 | 227 | err_fn = nvml_library_entry[NVML_ENTRY_ENUM(nvmlErrorString)].fn_ptr; 228 | if (unlikely(!err_fn)) { 229 | LOGGER(FATAL, "can't find nvmlErrorString"); 230 | } 231 | 232 | return err_fn(code); 233 | } 234 | 235 | const char *cuda_error(CUresult code, const char **p) { 236 | CUDA_ENTRY_CALL(cuda_library_entry, cuGetErrorString, code, p); 237 | 238 | return *p; 239 | } 240 | 241 | static void change_token(int delta) { 242 | int cuda_cores_before = 0, cuda_cores_after = 0; 243 | 244 | LOGGER(5, "delta: %d, curr: %d", delta, g_cur_cuda_cores); 245 | do { 246 | cuda_cores_before = g_cur_cuda_cores; 247 | cuda_cores_after = cuda_cores_before + delta; 248 | 249 | if (unlikely(cuda_cores_after > g_total_cuda_cores)) { 250 | cuda_cores_after = g_total_cuda_cores; 251 | } 252 | } while (!CAS(&g_cur_cuda_cores, cuda_cores_before, cuda_cores_after)); 253 | } 254 | 255 | static void rate_limiter(int grids, int blocks) { 256 | int before_cuda_cores = 0; 257 | int after_cuda_cores = 0; 258 | int kernel_size = grids; 259 | 260 | LOGGER(5, "grid: %d, blocks: %d", grids, blocks); 261 | LOGGER(5, "launch kernel %d, curr core: %d", kernel_size, g_cur_cuda_cores); 262 | if (g_vcuda_config.enable) { 263 | do { 264 | CHECK: 265 | before_cuda_cores = g_cur_cuda_cores; 266 | LOGGER(8, "current core: %d", g_cur_cuda_cores); 267 | if (before_cuda_cores < 0) { 268 | nanosleep(&g_cycle, NULL); 269 | goto CHECK; 270 | } 271 | after_cuda_cores = before_cuda_cores - kernel_size; 272 | } while (!CAS(&g_cur_cuda_cores, before_cuda_cores, after_cuda_cores)); 273 | } 274 | } 275 | 276 | static int delta(int up_limit, int user_current, int share) { 277 | int utilization_diff = 278 | abs(up_limit - user_current) < 5 ? 5 : abs(up_limit - user_current); 279 | int increment = 280 | g_sm_num * g_sm_num * g_max_thread_per_sm / 256 * utilization_diff / 10; 281 | 282 | /* Accelerate cuda cores allocation when utilization vary widely */ 283 | if (utilization_diff > up_limit / 2) { 284 | increment = increment * utilization_diff * 2 / (up_limit + 1); 285 | } 286 | 287 | if (unlikely(increment < 0)) { 288 | LOGGER(3, "overflow: %d, current sm: %d, thread_per_sm: %d, diff: %d", 289 | increment, g_sm_num, g_max_thread_per_sm, utilization_diff); 290 | } 291 | 292 | if (user_current <= up_limit) { 293 | share = share + increment > g_total_cuda_cores ? g_total_cuda_cores 294 | : share + increment; 295 | } else { 296 | share = share - increment < 0 ? 0 : share - increment; 297 | } 298 | 299 | return share; 300 | } 301 | 302 | // #lizard forgives 303 | static void *utilization_watcher(void *arg UNUSED) { 304 | utilization_t top_result = { 305 | .user_current = 0, 306 | .sys_current = 0, 307 | .sys_process_num = 0, 308 | }; 309 | int sys_free = 0; 310 | int share = 0; 311 | int i = 0; 312 | int avg_sys_free = 0; 313 | int pre_sys_process_num = 1; 314 | int up_limit = g_vcuda_config.utilization; 315 | 316 | LOGGER(5, "start %s", __FUNCTION__); 317 | LOGGER(4, "sm: %d, thread per sm: %d", g_sm_num, g_max_thread_per_sm); 318 | while (1) { 319 | nanosleep(&g_wait, NULL); 320 | do { 321 | atomic_action(pid_path, get_used_gpu_utilization, (void *)&top_result); 322 | } while (!top_result.valid); 323 | 324 | sys_free = MAX_UTILIZATION - top_result.sys_current; 325 | 326 | if (g_vcuda_config.hard_limit) { 327 | /* Avoid usage jitter when application is initialized*/ 328 | if (top_result.sys_process_num == 1 && 329 | top_result.user_current < up_limit / 10) { 330 | g_cur_cuda_cores = 331 | delta(g_vcuda_config.utilization, top_result.user_current, share); 332 | continue; 333 | } 334 | share = delta(g_vcuda_config.utilization, top_result.user_current, share); 335 | } else { 336 | if (pre_sys_process_num != top_result.sys_process_num) { 337 | /* When a new process comes, all processes are reset to initial value*/ 338 | if (pre_sys_process_num < top_result.sys_process_num) { 339 | share = g_max_thread_per_sm; 340 | up_limit = g_vcuda_config.utilization; 341 | i = 0; 342 | avg_sys_free = 0; 343 | } 344 | pre_sys_process_num = top_result.sys_process_num; 345 | } 346 | 347 | /* 1.Only one process on the GPU 348 | * Allocate cuda cores according to the limit value. 349 | * 350 | * 2.Multiple processes on the GPU 351 | * First, change the up_limit of the process according to the 352 | * historical resource utilization. Second, allocate the cuda 353 | * cores according to the changed limit value.*/ 354 | if (top_result.sys_process_num == 1) { 355 | share = delta(g_vcuda_config.limit, top_result.user_current, share); 356 | } else { 357 | i++; 358 | avg_sys_free += sys_free; 359 | if (i % CHANGE_LIMIT_INTERVAL == 0) { 360 | if (avg_sys_free * 2 / CHANGE_LIMIT_INTERVAL > USAGE_THRESHOLD) { 361 | up_limit = up_limit + g_vcuda_config.utilization / 10 > 362 | g_vcuda_config.limit 363 | ? g_vcuda_config.limit 364 | : up_limit + g_vcuda_config.utilization / 10; 365 | } 366 | i = 0; 367 | } 368 | avg_sys_free = i % (CHANGE_LIMIT_INTERVAL / 2) == 0 ? 0 : avg_sys_free; 369 | share = delta(up_limit, top_result.user_current, share); 370 | } 371 | } 372 | 373 | change_token(share); 374 | 375 | LOGGER(4, "util: %d, up_limit: %d, share: %d, cur: %d", 376 | top_result.user_current, up_limit, share, g_cur_cuda_cores); 377 | } 378 | } 379 | 380 | static void active_utilization_notifier() { 381 | pthread_t tid; 382 | 383 | pthread_create(&tid, NULL, utilization_watcher, NULL); 384 | 385 | #ifdef __APPLE__ 386 | pthread_setname_np("utilization_watcher"); 387 | #else 388 | pthread_setname_np(tid, "utilization_watcher"); 389 | #endif 390 | } 391 | 392 | static void get_used_gpu_utilization(int fd, void *arg) { 393 | nvmlProcessUtilizationSample_t processes_sample[MAX_PIDS]; 394 | int processes_num = MAX_PIDS; 395 | unsigned int running_processes = MAX_PIDS; 396 | nvmlProcessInfo_t pids_on_device[MAX_PIDS]; 397 | nvmlDevice_t dev; 398 | utilization_t *top_result = (utilization_t *)arg; 399 | nvmlReturn_t ret; 400 | struct timeval cur; 401 | size_t microsec; 402 | int codec_util = 0; 403 | 404 | int i; 405 | 406 | ret = 407 | NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetHandleByIndex, 0, &dev); 408 | if (unlikely(ret)) { 409 | LOGGER(4, "nvmlDeviceGetHandleByIndex: %s", nvml_error(ret)); 410 | return; 411 | } 412 | 413 | ret = 414 | NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetComputeRunningProcesses, 415 | dev, &running_processes, pids_on_device); 416 | if (unlikely(ret)) { 417 | LOGGER(4, "nvmlDeviceGetComputeRunningProcesses: %s", nvml_error(ret)); 418 | return; 419 | } 420 | 421 | top_result->sys_process_num = running_processes; 422 | 423 | load_pids_table(fd, NULL); 424 | gettimeofday(&cur, NULL); 425 | microsec = (cur.tv_sec - 1) * 1000UL * 1000UL + cur.tv_usec; 426 | top_result->checktime = microsec; 427 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetProcessUtilization, 428 | dev, processes_sample, &processes_num, microsec); 429 | if (unlikely(ret)) { 430 | LOGGER(4, "nvmlDeviceGetProcessUtilization: %s", nvml_error(ret)); 431 | return; 432 | } 433 | 434 | top_result->user_current = 0; 435 | top_result->sys_current = 0; 436 | for (i = 0; i < processes_num; i++) { 437 | if (processes_sample[i].timeStamp >= top_result->checktime) { 438 | top_result->valid = 1; 439 | top_result->sys_current += GET_VALID_VALUE(processes_sample[i].smUtil); 440 | 441 | codec_util = GET_VALID_VALUE(processes_sample[i].encUtil) + 442 | GET_VALID_VALUE(processes_sample[i].decUtil); 443 | top_result->sys_current += CODEC_NORMALIZE(codec_util); 444 | 445 | LOGGER(8, "try to find %d from pid tables", processes_sample[i].pid); 446 | if (likely(bsearch(&processes_sample[i].pid, g_pids_table, 447 | (size_t)g_pids_table_size, sizeof(int), int_match))) { 448 | top_result->user_current += GET_VALID_VALUE(processes_sample[i].smUtil); 449 | 450 | codec_util = GET_VALID_VALUE(processes_sample[i].encUtil) + 451 | GET_VALID_VALUE(processes_sample[i].decUtil); 452 | top_result->user_current += CODEC_NORMALIZE(codec_util); 453 | } 454 | } 455 | } 456 | 457 | LOGGER(5, "sys utilization: %d", top_result->sys_current); 458 | LOGGER(5, "used utilization: %d", top_result->user_current); 459 | } 460 | 461 | static void load_pids_table(int fd, void *arg UNUSED) { 462 | int item = 0; 463 | int rsize = 0; 464 | int i = 0; 465 | 466 | for (item = 0; item < MAX_PIDS; item++) { 467 | rsize = (int)read(fd, g_pids_table + item, sizeof(int)); 468 | if (unlikely(rsize != sizeof(int))) { 469 | break; 470 | } 471 | } 472 | 473 | for (i = 0; i < item; i++) { 474 | LOGGER(8, "pid: %d", g_pids_table[i]); 475 | } 476 | 477 | g_pids_table_size = item; 478 | 479 | LOGGER(8, "read %d items from %s", g_pids_table_size, pid_path); 480 | } 481 | 482 | static void get_used_gpu_memory(int fd, void *arg) { 483 | size_t *used_memory = arg; 484 | 485 | nvmlDevice_t dev; 486 | nvmlProcessInfo_t pids_on_device[MAX_PIDS]; 487 | unsigned int size_on_device = MAX_PIDS; 488 | int ret; 489 | 490 | unsigned int i; 491 | 492 | load_pids_table(fd, NULL); 493 | 494 | ret = 495 | NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetHandleByIndex, 0, &dev); 496 | if (unlikely(ret)) { 497 | LOGGER(4, "nvmlDeviceGetHandleByIndex can't find device 0, return %d", ret); 498 | *used_memory = g_vcuda_config.gpu_memory; 499 | return; 500 | } 501 | 502 | ret = 503 | NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetComputeRunningProcesses, 504 | dev, &size_on_device, pids_on_device); 505 | if (unlikely(ret)) { 506 | LOGGER(4, 507 | "nvmlDeviceGetComputeRunningProcesses can't get pids on device 0, " 508 | "return %d", 509 | ret); 510 | *used_memory = g_vcuda_config.gpu_memory; 511 | return; 512 | } 513 | 514 | for (i = 0; i < size_on_device; i++) { 515 | LOGGER(4, "summary: %d used %lld", pids_on_device[i].pid, 516 | pids_on_device[i].usedGpuMemory); 517 | } 518 | 519 | for (i = 0; i < size_on_device; i++) { 520 | if (bsearch(&pids_on_device[i].pid, g_pids_table, (size_t)g_pids_table_size, 521 | sizeof(int), int_match)) { 522 | LOGGER(4, "%d use memory: %lld", pids_on_device[i].pid, 523 | pids_on_device[i].usedGpuMemory); 524 | *used_memory += pids_on_device[i].usedGpuMemory; 525 | } 526 | } 527 | 528 | LOGGER(4, "total used memory: %zu", *used_memory); 529 | } 530 | 531 | // #lizard forgives 532 | static void register_to_remote() { 533 | nvmlPciInfo_t pci_info; 534 | nvmlDevice_t nvml_dev; 535 | int ret; 536 | 537 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetHandleByIndex, 0, 538 | &nvml_dev); 539 | if (unlikely(ret)) { 540 | LOGGER(FATAL, "can't find device 0, error %s", 541 | nvml_error((nvmlReturn_t)ret)); 542 | } 543 | 544 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, nvml_dev, 545 | &pci_info); 546 | if (unlikely(ret)) { 547 | LOGGER(FATAL, "can't find device 0, error %s", 548 | nvml_error((nvmlReturn_t)ret)); 549 | } 550 | 551 | strncpy(g_vcuda_config.bus_id, pci_info.busId, 552 | NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE); 553 | 554 | register_to_remote_with_data(g_vcuda_config.bus_id, g_vcuda_config.pod_uid, 555 | g_vcuda_config.container_name); 556 | } 557 | 558 | static void initialization() { 559 | int ret; 560 | const char *cuda_err_string = NULL; 561 | 562 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuInit, 0); 563 | if (unlikely(ret)) { 564 | LOGGER(FATAL, "cuInit error %s", 565 | cuda_error((CUresult)ret, &cuda_err_string)); 566 | } 567 | 568 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceGetAttribute, &g_sm_num, 569 | CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 0); 570 | if (unlikely(ret)) { 571 | LOGGER(FATAL, "can't get processor number, error %s", 572 | cuda_error((CUresult)ret, &cuda_err_string)); 573 | } 574 | 575 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceGetAttribute, 576 | &g_max_thread_per_sm, 577 | CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, 0); 578 | if (unlikely(ret)) { 579 | LOGGER(FATAL, "can't get max thread per processor, error %s", 580 | cuda_error((CUresult)ret, &cuda_err_string)); 581 | } 582 | 583 | g_total_cuda_cores = g_max_thread_per_sm * g_sm_num * FACTOR; 584 | LOGGER(4, "total cuda cores: %d", g_total_cuda_cores); 585 | active_utilization_notifier(); 586 | } 587 | 588 | /** hijack entrypoint */ 589 | CUresult cuDriverGetVersion(int *driverVersion) { 590 | CUresult ret; 591 | 592 | load_necessary_data(); 593 | if (!is_custom_config_path()) { 594 | pthread_once(&g_register_set, register_to_remote); 595 | } 596 | pthread_once(&g_init_set, initialization); 597 | 598 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuDriverGetVersion, driverVersion); 599 | if (unlikely(ret)) { 600 | goto DONE; 601 | } 602 | 603 | DONE: 604 | return ret; 605 | } 606 | 607 | CUresult cuInit(unsigned int flag) { 608 | CUresult ret; 609 | 610 | load_necessary_data(); 611 | if (!is_custom_config_path()) { 612 | pthread_once(&g_register_set, register_to_remote); 613 | } 614 | pthread_once(&g_init_set, initialization); 615 | 616 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuInit, flag); 617 | 618 | if (unlikely(ret)) { 619 | goto DONE; 620 | } 621 | 622 | DONE: 623 | return ret; 624 | } 625 | 626 | CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, 627 | cuuint64_t flags) { 628 | CUresult ret; 629 | int i; 630 | 631 | load_necessary_data(); 632 | if (!is_custom_config_path()) { 633 | pthread_once(&g_register_set, register_to_remote); 634 | } 635 | pthread_once(&g_init_set, initialization); 636 | 637 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuGetProcAddress, symbol, pfn, 638 | cudaVersion, flags); 639 | if (ret == CUDA_SUCCESS) { 640 | for (i = 0; i < cuda_hook_nums; i++) { 641 | if (!strcmp(symbol, cuda_hooks_entry[i].name)) { 642 | LOGGER(5, "Match hook %s", symbol); 643 | *pfn = cuda_hooks_entry[i].fn_ptr; 644 | break; 645 | } 646 | } 647 | } 648 | 649 | return ret; 650 | } 651 | 652 | CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, 653 | unsigned int flags) { 654 | size_t used = 0; 655 | size_t request_size = bytesize; 656 | CUresult ret; 657 | 658 | if (g_vcuda_config.enable) { 659 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 660 | 661 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) { 662 | ret = CUDA_ERROR_OUT_OF_MEMORY; 663 | goto DONE; 664 | } 665 | } 666 | 667 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAllocManaged, dptr, bytesize, 668 | flags); 669 | DONE: 670 | return ret; 671 | } 672 | 673 | CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize) { 674 | size_t used = 0; 675 | size_t request_size = bytesize; 676 | CUresult ret; 677 | 678 | if (g_vcuda_config.enable) { 679 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 680 | 681 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) { 682 | ret = CUDA_ERROR_OUT_OF_MEMORY; 683 | goto DONE; 684 | } 685 | } 686 | 687 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAlloc_v2, dptr, bytesize); 688 | DONE: 689 | return ret; 690 | } 691 | 692 | CUresult cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) { 693 | size_t used = 0; 694 | size_t request_size = bytesize; 695 | CUresult ret; 696 | 697 | if (g_vcuda_config.enable) { 698 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 699 | 700 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) { 701 | ret = CUDA_ERROR_OUT_OF_MEMORY; 702 | goto DONE; 703 | } 704 | } 705 | 706 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAlloc, dptr, bytesize); 707 | DONE: 708 | return ret; 709 | } 710 | 711 | CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch, 712 | size_t WidthInBytes, size_t Height, 713 | unsigned int ElementSizeBytes) { 714 | size_t used = 0; 715 | size_t request_size = ROUND_UP(WidthInBytes * Height, ElementSizeBytes); 716 | CUresult ret; 717 | 718 | if (g_vcuda_config.enable) { 719 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 720 | 721 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) { 722 | ret = CUDA_ERROR_OUT_OF_MEMORY; 723 | goto DONE; 724 | } 725 | } 726 | 727 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAllocPitch_v2, dptr, pPitch, 728 | WidthInBytes, Height, ElementSizeBytes); 729 | DONE: 730 | return ret; 731 | } 732 | 733 | CUresult cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, 734 | size_t Height, unsigned int ElementSizeBytes) { 735 | size_t used = 0; 736 | size_t request_size = ROUND_UP(WidthInBytes * Height, ElementSizeBytes); 737 | CUresult ret; 738 | 739 | if (g_vcuda_config.enable) { 740 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 741 | 742 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) { 743 | ret = CUDA_ERROR_OUT_OF_MEMORY; 744 | goto DONE; 745 | } 746 | } 747 | 748 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMemAllocPitch, dptr, pPitch, 749 | WidthInBytes, Height, ElementSizeBytes); 750 | DONE: 751 | return ret; 752 | } 753 | 754 | static size_t get_array_base_size(int format) { 755 | size_t base_size = 0; 756 | 757 | switch (format) { 758 | case CU_AD_FORMAT_UNSIGNED_INT8: 759 | case CU_AD_FORMAT_SIGNED_INT8: 760 | base_size = 8; 761 | break; 762 | case CU_AD_FORMAT_UNSIGNED_INT16: 763 | case CU_AD_FORMAT_SIGNED_INT16: 764 | case CU_AD_FORMAT_HALF: 765 | base_size = 16; 766 | break; 767 | case CU_AD_FORMAT_UNSIGNED_INT32: 768 | case CU_AD_FORMAT_SIGNED_INT32: 769 | case CU_AD_FORMAT_FLOAT: 770 | base_size = 32; 771 | break; 772 | default: 773 | base_size = 32; 774 | } 775 | 776 | return base_size; 777 | } 778 | 779 | static CUresult 780 | cuArrayCreate_helper(const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { 781 | size_t used = 0; 782 | size_t base_size = 0; 783 | size_t request_size = 0; 784 | CUresult ret = CUDA_SUCCESS; 785 | 786 | if (g_vcuda_config.enable) { 787 | base_size = get_array_base_size(pAllocateArray->Format); 788 | request_size = base_size * pAllocateArray->NumChannels * 789 | pAllocateArray->Height * pAllocateArray->Width; 790 | 791 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 792 | 793 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) { 794 | ret = CUDA_ERROR_OUT_OF_MEMORY; 795 | goto DONE; 796 | } 797 | } 798 | 799 | DONE: 800 | return ret; 801 | } 802 | 803 | CUresult cuArrayCreate_v2(CUarray *pHandle, 804 | const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { 805 | CUresult ret; 806 | 807 | ret = cuArrayCreate_helper(pAllocateArray); 808 | if (ret != CUDA_SUCCESS) { 809 | goto DONE; 810 | } 811 | 812 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuArrayCreate_v2, pHandle, 813 | pAllocateArray); 814 | DONE: 815 | return ret; 816 | } 817 | 818 | CUresult cuArrayCreate(CUarray *pHandle, 819 | const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { 820 | CUresult ret; 821 | 822 | ret = cuArrayCreate_helper(pAllocateArray); 823 | if (ret != CUDA_SUCCESS) { 824 | goto DONE; 825 | } 826 | 827 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuArrayCreate, pHandle, 828 | pAllocateArray); 829 | DONE: 830 | return ret; 831 | } 832 | 833 | static CUresult 834 | cuArray3DCreate_helper(const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { 835 | size_t used = 0; 836 | size_t base_size = 0; 837 | size_t request_size = 0; 838 | CUresult ret = CUDA_SUCCESS; 839 | 840 | if (g_vcuda_config.enable) { 841 | base_size = get_array_base_size(pAllocateArray->Format); 842 | request_size = base_size * pAllocateArray->NumChannels * 843 | pAllocateArray->Height * pAllocateArray->Width * 844 | pAllocateArray->Depth; 845 | 846 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 847 | 848 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) { 849 | ret = CUDA_ERROR_OUT_OF_MEMORY; 850 | goto DONE; 851 | } 852 | } 853 | 854 | DONE: 855 | return ret; 856 | } 857 | 858 | CUresult cuArray3DCreate_v2(CUarray *pHandle, 859 | const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { 860 | CUresult ret; 861 | 862 | ret = cuArray3DCreate_helper(pAllocateArray); 863 | if (ret != CUDA_SUCCESS) { 864 | goto DONE; 865 | } 866 | 867 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuArray3DCreate_v2, pHandle, 868 | pAllocateArray); 869 | DONE: 870 | return ret; 871 | } 872 | 873 | CUresult cuArray3DCreate(CUarray *pHandle, 874 | const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { 875 | CUresult ret; 876 | 877 | ret = cuArray3DCreate_helper(pAllocateArray); 878 | if (ret != CUDA_SUCCESS) { 879 | goto DONE; 880 | } 881 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuArray3DCreate, pHandle, 882 | pAllocateArray); 883 | DONE: 884 | return ret; 885 | } 886 | 887 | CUresult 888 | cuMipmappedArrayCreate(CUmipmappedArray *pHandle, 889 | const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, 890 | unsigned int numMipmapLevels) { 891 | size_t used = 0; 892 | size_t base_size = 0; 893 | size_t request_size = 0; 894 | CUresult ret; 895 | 896 | if (g_vcuda_config.enable) { 897 | base_size = get_array_base_size(pMipmappedArrayDesc->Format); 898 | request_size = base_size * pMipmappedArrayDesc->NumChannels * 899 | pMipmappedArrayDesc->Height * pMipmappedArrayDesc->Width * 900 | pMipmappedArrayDesc->Depth; 901 | 902 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 903 | 904 | if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) { 905 | ret = CUDA_ERROR_OUT_OF_MEMORY; 906 | goto DONE; 907 | } 908 | } 909 | 910 | ret = CUDA_ENTRY_CALL(cuda_library_entry, cuMipmappedArrayCreate, pHandle, 911 | pMipmappedArrayDesc, numMipmapLevels); 912 | DONE: 913 | return ret; 914 | } 915 | 916 | CUresult cuDeviceTotalMem_v2(size_t *bytes, CUdevice dev) { 917 | if (g_vcuda_config.enable) { 918 | *bytes = g_vcuda_config.gpu_memory; 919 | 920 | return CUDA_SUCCESS; 921 | } 922 | 923 | return CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceTotalMem_v2, bytes, dev); 924 | } 925 | 926 | CUresult cuDeviceTotalMem(size_t *bytes, CUdevice dev) { 927 | if (g_vcuda_config.enable) { 928 | *bytes = g_vcuda_config.gpu_memory; 929 | 930 | return CUDA_SUCCESS; 931 | } 932 | 933 | return CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceTotalMem, bytes, dev); 934 | } 935 | 936 | CUresult cuMemGetInfo_v2(size_t *free, size_t *total) { 937 | size_t used = 0; 938 | 939 | if (g_vcuda_config.enable) { 940 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 941 | 942 | *total = g_vcuda_config.gpu_memory; 943 | *free = 944 | used > g_vcuda_config.gpu_memory ? 0 : g_vcuda_config.gpu_memory - used; 945 | 946 | return CUDA_SUCCESS; 947 | } 948 | 949 | return CUDA_ENTRY_CALL(cuda_library_entry, cuMemGetInfo_v2, free, total); 950 | } 951 | 952 | CUresult cuMemGetInfo(size_t *free, size_t *total) { 953 | size_t used = 0; 954 | 955 | if (g_vcuda_config.enable) { 956 | atomic_action(pid_path, get_used_gpu_memory, (void *)&used); 957 | 958 | *total = g_vcuda_config.gpu_memory; 959 | *free = 960 | used > g_vcuda_config.gpu_memory ? 0 : g_vcuda_config.gpu_memory - used; 961 | 962 | return CUDA_SUCCESS; 963 | } 964 | 965 | return CUDA_ENTRY_CALL(cuda_library_entry, cuMemGetInfo, free, total); 966 | } 967 | 968 | CUresult cuLaunchKernel_ptsz(CUfunction f, unsigned int gridDimX, 969 | unsigned int gridDimY, unsigned int gridDimZ, 970 | unsigned int blockDimX, unsigned int blockDimY, 971 | unsigned int blockDimZ, 972 | unsigned int sharedMemBytes, CUstream hStream, 973 | void **kernelParams, void **extra) { 974 | rate_limiter(gridDimX * gridDimY * gridDimZ, 975 | blockDimX * blockDimY * blockDimZ); 976 | 977 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchKernel_ptsz, f, gridDimX, 978 | gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 979 | sharedMemBytes, hStream, kernelParams, extra); 980 | } 981 | 982 | CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, 983 | unsigned int gridDimY, unsigned int gridDimZ, 984 | unsigned int blockDimX, unsigned int blockDimY, 985 | unsigned int blockDimZ, unsigned int sharedMemBytes, 986 | CUstream hStream, void **kernelParams, void **extra) { 987 | rate_limiter(gridDimX * gridDimY * gridDimZ, 988 | blockDimX * blockDimY * blockDimZ); 989 | 990 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchKernel, f, gridDimX, 991 | gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 992 | sharedMemBytes, hStream, kernelParams, extra); 993 | } 994 | 995 | CUresult cuLaunch(CUfunction f) { 996 | rate_limiter(1, g_block_x * g_block_y * g_block_z); 997 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunch, f); 998 | } 999 | 1000 | CUresult cuLaunchCooperativeKernel_ptsz( 1001 | CUfunction f, unsigned int gridDimX, unsigned int gridDimY, 1002 | unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, 1003 | unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, 1004 | void **kernelParams) { 1005 | rate_limiter(gridDimX * gridDimY * gridDimZ, 1006 | blockDimX * blockDimY * blockDimZ); 1007 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchCooperativeKernel_ptsz, f, 1008 | gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, 1009 | blockDimZ, sharedMemBytes, hStream, kernelParams); 1010 | } 1011 | 1012 | CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, 1013 | unsigned int gridDimY, unsigned int gridDimZ, 1014 | unsigned int blockDimX, 1015 | unsigned int blockDimY, 1016 | unsigned int blockDimZ, 1017 | unsigned int sharedMemBytes, 1018 | CUstream hStream, void **kernelParams) { 1019 | rate_limiter(gridDimX * gridDimY * gridDimZ, 1020 | blockDimX * blockDimY * blockDimZ); 1021 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchCooperativeKernel, f, 1022 | gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, 1023 | blockDimZ, sharedMemBytes, hStream, kernelParams); 1024 | } 1025 | 1026 | CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height) { 1027 | rate_limiter(grid_width * grid_height, g_block_x * g_block_y * g_block_z); 1028 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchGrid, f, grid_width, 1029 | grid_height); 1030 | } 1031 | 1032 | CUresult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, 1033 | CUstream hStream) { 1034 | rate_limiter(grid_width * grid_height, g_block_x * g_block_y * g_block_z); 1035 | return CUDA_ENTRY_CALL(cuda_library_entry, cuLaunchGridAsync, f, grid_width, 1036 | grid_height, hStream); 1037 | } 1038 | 1039 | CUresult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) { 1040 | if (g_vcuda_config.enable) { 1041 | while (!CAS(&g_block_locker, 0, 1)) { 1042 | } 1043 | 1044 | g_block_x = x; 1045 | g_block_y = y; 1046 | g_block_z = z; 1047 | 1048 | LOGGER(5, "Set block shape: %d, %d, %d", x, y, z); 1049 | 1050 | while (!CAS(&g_block_locker, 1, 0)) { 1051 | } 1052 | } 1053 | return CUDA_ENTRY_CALL(cuda_library_entry, cuFuncSetBlockShape, hfunc, x, y, 1054 | z); 1055 | } 1056 | -------------------------------------------------------------------------------- /src/loader.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack 3 | * available. 4 | * 5 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 8 | * use this file except in compliance with the License. You may obtain a copy of 9 | * the License at 10 | * 11 | * https://opensource.org/licenses/Apache-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 15 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations under the License. 17 | */ 18 | 19 | // 20 | // Created by thomas on 6/15/18. 21 | // 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "include/cuda-helper.h" 31 | #include "include/hijack.h" 32 | #include "include/nvml-helper.h" 33 | 34 | entry_t cuda_library_entry[] = { 35 | {.name = "cuInit"}, 36 | {.name = "cuDeviceGet"}, 37 | {.name = "cuDeviceGetCount"}, 38 | {.name = "cuDeviceGetName"}, 39 | {.name = "cuDeviceTotalMem_v2"}, 40 | {.name = "cuDeviceGetAttribute"}, 41 | {.name = "cuDeviceGetP2PAttribute"}, 42 | {.name = "cuDriverGetVersion"}, 43 | {.name = "cuDeviceGetByPCIBusId"}, 44 | {.name = "cuDeviceGetPCIBusId"}, 45 | {.name = "cuDevicePrimaryCtxRetain"}, 46 | {.name = "cuDevicePrimaryCtxRelease"}, 47 | {.name = "cuDevicePrimaryCtxSetFlags"}, 48 | {.name = "cuDevicePrimaryCtxGetState"}, 49 | {.name = "cuDevicePrimaryCtxReset"}, 50 | {.name = "cuCtxCreate_v2"}, 51 | {.name = "cuCtxGetFlags"}, 52 | {.name = "cuCtxSetCurrent"}, 53 | {.name = "cuCtxGetCurrent"}, 54 | {.name = "cuCtxDetach"}, 55 | {.name = "cuCtxGetApiVersion"}, 56 | {.name = "cuCtxGetDevice"}, 57 | {.name = "cuCtxGetLimit"}, 58 | {.name = "cuCtxSetLimit"}, 59 | {.name = "cuCtxGetCacheConfig"}, 60 | {.name = "cuCtxSetCacheConfig"}, 61 | {.name = "cuCtxGetSharedMemConfig"}, 62 | {.name = "cuCtxGetStreamPriorityRange"}, 63 | {.name = "cuCtxSetSharedMemConfig"}, 64 | {.name = "cuCtxSynchronize"}, 65 | {.name = "cuModuleLoad"}, 66 | {.name = "cuModuleLoadData"}, 67 | {.name = "cuModuleLoadFatBinary"}, 68 | {.name = "cuModuleUnload"}, 69 | {.name = "cuModuleGetFunction"}, 70 | {.name = "cuModuleGetGlobal_v2"}, 71 | {.name = "cuModuleGetTexRef"}, 72 | {.name = "cuModuleGetSurfRef"}, 73 | {.name = "cuLinkCreate"}, 74 | {.name = "cuLinkAddData"}, 75 | {.name = "cuLinkAddFile"}, 76 | {.name = "cuLinkComplete"}, 77 | {.name = "cuLinkDestroy"}, 78 | {.name = "cuMemGetInfo_v2"}, 79 | {.name = "cuMemAllocManaged"}, 80 | {.name = "cuMemAlloc_v2"}, 81 | {.name = "cuMemAllocPitch_v2"}, 82 | {.name = "cuMemFree_v2"}, 83 | {.name = "cuMemGetAddressRange_v2"}, 84 | {.name = "cuMemFreeHost"}, 85 | {.name = "cuMemHostAlloc"}, 86 | {.name = "cuMemHostGetDevicePointer_v2"}, 87 | {.name = "cuMemHostGetFlags"}, 88 | {.name = "cuMemHostRegister_v2"}, 89 | {.name = "cuMemHostUnregister"}, 90 | {.name = "cuPointerGetAttribute"}, 91 | {.name = "cuPointerGetAttributes"}, 92 | {.name = "cuMemcpy"}, 93 | {.name = "cuMemcpy_ptds"}, 94 | {.name = "cuMemcpyAsync"}, 95 | {.name = "cuMemcpyAsync_ptsz"}, 96 | {.name = "cuMemcpyPeer"}, 97 | {.name = "cuMemcpyPeer_ptds"}, 98 | {.name = "cuMemcpyPeerAsync"}, 99 | {.name = "cuMemcpyPeerAsync_ptsz"}, 100 | {.name = "cuMemcpyHtoD_v2"}, 101 | {.name = "cuMemcpyHtoD_v2_ptds"}, 102 | {.name = "cuMemcpyHtoDAsync_v2"}, 103 | {.name = "cuMemcpyHtoDAsync_v2_ptsz"}, 104 | {.name = "cuMemcpyDtoH_v2"}, 105 | {.name = "cuMemcpyDtoH_v2_ptds"}, 106 | {.name = "cuMemcpyDtoHAsync_v2"}, 107 | {.name = "cuMemcpyDtoHAsync_v2_ptsz"}, 108 | {.name = "cuMemcpyDtoD_v2"}, 109 | {.name = "cuMemcpyDtoD_v2_ptds"}, 110 | {.name = "cuMemcpyDtoDAsync_v2"}, 111 | {.name = "cuMemcpyDtoDAsync_v2_ptsz"}, 112 | {.name = "cuMemcpy2DUnaligned_v2"}, 113 | {.name = "cuMemcpy2DUnaligned_v2_ptds"}, 114 | {.name = "cuMemcpy2DAsync_v2"}, 115 | {.name = "cuMemcpy2DAsync_v2_ptsz"}, 116 | {.name = "cuMemcpy3D_v2"}, 117 | {.name = "cuMemcpy3D_v2_ptds"}, 118 | {.name = "cuMemcpy3DAsync_v2"}, 119 | {.name = "cuMemcpy3DAsync_v2_ptsz"}, 120 | {.name = "cuMemcpy3DPeer"}, 121 | {.name = "cuMemcpy3DPeer_ptds"}, 122 | {.name = "cuMemcpy3DPeerAsync"}, 123 | {.name = "cuMemcpy3DPeerAsync_ptsz"}, 124 | {.name = "cuMemsetD8_v2"}, 125 | {.name = "cuMemsetD8_v2_ptds"}, 126 | {.name = "cuMemsetD8Async"}, 127 | {.name = "cuMemsetD8Async_ptsz"}, 128 | {.name = "cuMemsetD2D8_v2"}, 129 | {.name = "cuMemsetD2D8_v2_ptds"}, 130 | {.name = "cuMemsetD2D8Async"}, 131 | {.name = "cuMemsetD2D8Async_ptsz"}, 132 | {.name = "cuFuncSetCacheConfig"}, 133 | {.name = "cuFuncSetSharedMemConfig"}, 134 | {.name = "cuFuncGetAttribute"}, 135 | {.name = "cuArrayCreate_v2"}, 136 | {.name = "cuArrayGetDescriptor_v2"}, 137 | {.name = "cuArray3DCreate_v2"}, 138 | {.name = "cuArray3DGetDescriptor_v2"}, 139 | {.name = "cuArrayDestroy"}, 140 | {.name = "cuMipmappedArrayCreate"}, 141 | {.name = "cuMipmappedArrayGetLevel"}, 142 | {.name = "cuMipmappedArrayDestroy"}, 143 | {.name = "cuTexRefCreate"}, 144 | {.name = "cuTexRefDestroy"}, 145 | {.name = "cuTexRefSetArray"}, 146 | {.name = "cuTexRefSetMipmappedArray"}, 147 | {.name = "cuTexRefSetAddress_v2"}, 148 | {.name = "cuTexRefSetAddress2D_v3"}, 149 | {.name = "cuTexRefSetFormat"}, 150 | {.name = "cuTexRefSetAddressMode"}, 151 | {.name = "cuTexRefSetFilterMode"}, 152 | {.name = "cuTexRefSetMipmapFilterMode"}, 153 | {.name = "cuTexRefSetMipmapLevelBias"}, 154 | {.name = "cuTexRefSetMipmapLevelClamp"}, 155 | {.name = "cuTexRefSetMaxAnisotropy"}, 156 | {.name = "cuTexRefSetFlags"}, 157 | {.name = "cuTexRefSetBorderColor"}, 158 | {.name = "cuTexRefGetBorderColor"}, 159 | {.name = "cuSurfRefSetArray"}, 160 | {.name = "cuTexObjectCreate"}, 161 | {.name = "cuTexObjectDestroy"}, 162 | {.name = "cuTexObjectGetResourceDesc"}, 163 | {.name = "cuTexObjectGetTextureDesc"}, 164 | {.name = "cuTexObjectGetResourceViewDesc"}, 165 | {.name = "cuSurfObjectCreate"}, 166 | {.name = "cuSurfObjectDestroy"}, 167 | {.name = "cuSurfObjectGetResourceDesc"}, 168 | {.name = "cuLaunchKernel"}, 169 | {.name = "cuLaunchKernel_ptsz"}, 170 | {.name = "cuEventCreate"}, 171 | {.name = "cuEventRecord"}, 172 | {.name = "cuEventRecord_ptsz"}, 173 | {.name = "cuEventQuery"}, 174 | {.name = "cuEventSynchronize"}, 175 | {.name = "cuEventDestroy_v2"}, 176 | {.name = "cuEventElapsedTime"}, 177 | {.name = "cuStreamWaitValue32"}, 178 | {.name = "cuStreamWaitValue32_ptsz"}, 179 | {.name = "cuStreamWriteValue32"}, 180 | {.name = "cuStreamWriteValue32_ptsz"}, 181 | {.name = "cuStreamBatchMemOp"}, 182 | {.name = "cuStreamBatchMemOp_ptsz"}, 183 | {.name = "cuStreamCreate"}, 184 | {.name = "cuStreamCreateWithPriority"}, 185 | {.name = "cuStreamGetPriority"}, 186 | {.name = "cuStreamGetPriority_ptsz"}, 187 | {.name = "cuStreamGetFlags"}, 188 | {.name = "cuStreamGetFlags_ptsz"}, 189 | {.name = "cuStreamDestroy_v2"}, 190 | {.name = "cuStreamWaitEvent"}, 191 | {.name = "cuStreamWaitEvent_ptsz"}, 192 | {.name = "cuStreamAddCallback"}, 193 | {.name = "cuStreamAddCallback_ptsz"}, 194 | {.name = "cuStreamSynchronize"}, 195 | {.name = "cuStreamSynchronize_ptsz"}, 196 | {.name = "cuStreamQuery"}, 197 | {.name = "cuStreamQuery_ptsz"}, 198 | {.name = "cuStreamAttachMemAsync"}, 199 | {.name = "cuStreamAttachMemAsync_ptsz"}, 200 | {.name = "cuDeviceCanAccessPeer"}, 201 | {.name = "cuCtxEnablePeerAccess"}, 202 | {.name = "cuCtxDisablePeerAccess"}, 203 | {.name = "cuIpcGetEventHandle"}, 204 | {.name = "cuIpcOpenEventHandle"}, 205 | {.name = "cuIpcGetMemHandle"}, 206 | {.name = "cuIpcOpenMemHandle"}, 207 | {.name = "cuIpcCloseMemHandle"}, 208 | {.name = "cuGLCtxCreate_v2"}, 209 | {.name = "cuGLInit"}, 210 | {.name = "cuGLGetDevices"}, 211 | {.name = "cuGLRegisterBufferObject"}, 212 | {.name = "cuGLMapBufferObject_v2"}, 213 | {.name = "cuGLMapBufferObject_v2_ptds"}, 214 | {.name = "cuGLMapBufferObjectAsync_v2"}, 215 | {.name = "cuGLMapBufferObjectAsync_v2_ptsz"}, 216 | {.name = "cuGLUnmapBufferObject"}, 217 | {.name = "cuGLUnmapBufferObjectAsync"}, 218 | {.name = "cuGLUnregisterBufferObject"}, 219 | {.name = "cuGLSetBufferObjectMapFlags"}, 220 | {.name = "cuGraphicsGLRegisterImage"}, 221 | {.name = "cuGraphicsGLRegisterBuffer"}, 222 | {.name = "cuGraphicsUnregisterResource"}, 223 | {.name = "cuGraphicsMapResources"}, 224 | {.name = "cuGraphicsMapResources_ptsz"}, 225 | {.name = "cuGraphicsUnmapResources"}, 226 | {.name = "cuGraphicsUnmapResources_ptsz"}, 227 | {.name = "cuGraphicsResourceSetMapFlags_v2"}, 228 | {.name = "cuGraphicsSubResourceGetMappedArray"}, 229 | {.name = "cuGraphicsResourceGetMappedMipmappedArray"}, 230 | {.name = "cuGraphicsResourceGetMappedPointer_v2"}, 231 | {.name = "cuProfilerInitialize"}, 232 | {.name = "cuProfilerStart"}, 233 | {.name = "cuProfilerStop"}, 234 | {.name = "cuVDPAUGetDevice"}, 235 | {.name = "cuVDPAUCtxCreate_v2"}, 236 | {.name = "cuGraphicsVDPAURegisterVideoSurface"}, 237 | {.name = "cuGraphicsVDPAURegisterOutputSurface"}, 238 | {.name = "cuGetExportTable"}, 239 | {.name = "cuOccupancyMaxActiveBlocksPerMultiprocessor"}, 240 | {.name = "cuMemAdvise"}, 241 | {.name = "cuMemPrefetchAsync"}, 242 | {.name = "cuMemPrefetchAsync_ptsz"}, 243 | {.name = "cuMemRangeGetAttribute"}, 244 | {.name = "cuMemRangeGetAttributes"}, 245 | {.name = "cuGetErrorString"}, 246 | {.name = "cuGetErrorName"}, 247 | {.name = "cuArray3DCreate"}, 248 | {.name = "cuArray3DGetDescriptor"}, 249 | {.name = "cuArrayCreate"}, 250 | {.name = "cuArrayGetDescriptor"}, 251 | {.name = "cuCtxAttach"}, 252 | {.name = "cuCtxCreate"}, 253 | {.name = "cuCtxDestroy"}, 254 | {.name = "cuCtxDestroy_v2"}, 255 | {.name = "cuCtxPopCurrent"}, 256 | {.name = "cuCtxPopCurrent_v2"}, 257 | {.name = "cuCtxPushCurrent"}, 258 | {.name = "cuCtxPushCurrent_v2"}, 259 | {.name = "cudbgApiAttach"}, 260 | {.name = "cudbgApiDetach"}, 261 | {.name = "cudbgApiInit"}, 262 | {.name = "cudbgGetAPI"}, 263 | {.name = "cudbgGetAPIVersion"}, 264 | {.name = "cudbgMain"}, 265 | {.name = "cudbgReportDriverApiError"}, 266 | {.name = "cudbgReportDriverInternalError"}, 267 | {.name = "cuDeviceComputeCapability"}, 268 | {.name = "cuDeviceGetProperties"}, 269 | {.name = "cuDeviceTotalMem"}, 270 | {.name = "cuEGLInit"}, 271 | {.name = "cuEGLStreamConsumerAcquireFrame"}, 272 | {.name = "cuEGLStreamConsumerConnect"}, 273 | {.name = "cuEGLStreamConsumerConnectWithFlags"}, 274 | {.name = "cuEGLStreamConsumerDisconnect"}, 275 | {.name = "cuEGLStreamConsumerReleaseFrame"}, 276 | {.name = "cuEGLStreamProducerConnect"}, 277 | {.name = "cuEGLStreamProducerDisconnect"}, 278 | {.name = "cuEGLStreamProducerPresentFrame"}, 279 | {.name = "cuEGLStreamProducerReturnFrame"}, 280 | {.name = "cuEventDestroy"}, 281 | {.name = "cuFuncSetAttribute"}, 282 | {.name = "cuFuncSetBlockShape"}, 283 | {.name = "cuFuncSetSharedSize"}, 284 | {.name = "cuGLCtxCreate"}, 285 | {.name = "cuGLGetDevices_v2"}, 286 | {.name = "cuGLMapBufferObject"}, 287 | {.name = "cuGLMapBufferObjectAsync"}, 288 | {.name = "cuGraphicsEGLRegisterImage"}, 289 | {.name = "cuGraphicsResourceGetMappedEglFrame"}, 290 | {.name = "cuGraphicsResourceGetMappedPointer"}, 291 | {.name = "cuGraphicsResourceSetMapFlags"}, 292 | {.name = "cuLaunch"}, 293 | {.name = "cuLaunchCooperativeKernel"}, 294 | {.name = "cuLaunchCooperativeKernelMultiDevice"}, 295 | {.name = "cuLaunchCooperativeKernel_ptsz"}, 296 | {.name = "cuLaunchGrid"}, 297 | {.name = "cuLaunchGridAsync"}, 298 | {.name = "cuLinkAddData_v2"}, 299 | {.name = "cuLinkAddFile_v2"}, 300 | {.name = "cuLinkCreate_v2"}, 301 | {.name = "cuMemAlloc"}, 302 | {.name = "cuMemAllocHost"}, 303 | {.name = "cuMemAllocHost_v2"}, 304 | {.name = "cuMemAllocPitch"}, 305 | {.name = "cuMemcpy2D"}, 306 | {.name = "cuMemcpy2DAsync"}, 307 | {.name = "cuMemcpy2DUnaligned"}, 308 | {.name = "cuMemcpy2D_v2"}, 309 | {.name = "cuMemcpy2D_v2_ptds"}, 310 | {.name = "cuMemcpy3D"}, 311 | {.name = "cuMemcpy3DAsync"}, 312 | {.name = "cuMemcpyAtoA"}, 313 | {.name = "cuMemcpyAtoA_v2"}, 314 | {.name = "cuMemcpyAtoA_v2_ptds"}, 315 | {.name = "cuMemcpyAtoD"}, 316 | {.name = "cuMemcpyAtoD_v2"}, 317 | {.name = "cuMemcpyAtoD_v2_ptds"}, 318 | {.name = "cuMemcpyAtoH"}, 319 | {.name = "cuMemcpyAtoHAsync"}, 320 | {.name = "cuMemcpyAtoHAsync_v2"}, 321 | {.name = "cuMemcpyAtoHAsync_v2_ptsz"}, 322 | {.name = "cuMemcpyAtoH_v2"}, 323 | {.name = "cuMemcpyAtoH_v2_ptds"}, 324 | {.name = "cuMemcpyDtoA"}, 325 | {.name = "cuMemcpyDtoA_v2"}, 326 | {.name = "cuMemcpyDtoA_v2_ptds"}, 327 | {.name = "cuMemcpyDtoD"}, 328 | {.name = "cuMemcpyDtoDAsync"}, 329 | {.name = "cuMemcpyDtoH"}, 330 | {.name = "cuMemcpyDtoHAsync"}, 331 | {.name = "cuMemcpyHtoA"}, 332 | {.name = "cuMemcpyHtoAAsync"}, 333 | {.name = "cuMemcpyHtoAAsync_v2"}, 334 | {.name = "cuMemcpyHtoAAsync_v2_ptsz"}, 335 | {.name = "cuMemcpyHtoA_v2"}, 336 | {.name = "cuMemcpyHtoA_v2_ptds"}, 337 | {.name = "cuMemcpyHtoD"}, 338 | {.name = "cuMemcpyHtoDAsync"}, 339 | {.name = "cuMemFree"}, 340 | {.name = "cuMemGetAddressRange"}, 341 | //{.name = "cuMemGetAttribute"}, 342 | //{.name = "cuMemGetAttribute_v2"}, 343 | {.name = "cuMemGetInfo"}, 344 | {.name = "cuMemHostGetDevicePointer"}, 345 | {.name = "cuMemHostRegister"}, 346 | {.name = "cuMemsetD16"}, 347 | {.name = "cuMemsetD16Async"}, 348 | {.name = "cuMemsetD16Async_ptsz"}, 349 | {.name = "cuMemsetD16_v2"}, 350 | {.name = "cuMemsetD16_v2_ptds"}, 351 | {.name = "cuMemsetD2D16"}, 352 | {.name = "cuMemsetD2D16Async"}, 353 | {.name = "cuMemsetD2D16Async_ptsz"}, 354 | {.name = "cuMemsetD2D16_v2"}, 355 | {.name = "cuMemsetD2D16_v2_ptds"}, 356 | {.name = "cuMemsetD2D32"}, 357 | {.name = "cuMemsetD2D32Async"}, 358 | {.name = "cuMemsetD2D32Async_ptsz"}, 359 | {.name = "cuMemsetD2D32_v2"}, 360 | {.name = "cuMemsetD2D32_v2_ptds"}, 361 | {.name = "cuMemsetD2D8"}, 362 | {.name = "cuMemsetD32"}, 363 | {.name = "cuMemsetD32Async"}, 364 | {.name = "cuMemsetD32Async_ptsz"}, 365 | {.name = "cuMemsetD32_v2"}, 366 | {.name = "cuMemsetD32_v2_ptds"}, 367 | {.name = "cuMemsetD8"}, 368 | {.name = "cuModuleGetGlobal"}, 369 | {.name = "cuModuleLoadDataEx"}, 370 | {.name = "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"}, 371 | {.name = "cuOccupancyMaxPotentialBlockSize"}, 372 | {.name = "cuOccupancyMaxPotentialBlockSizeWithFlags"}, 373 | {.name = "cuParamSetf"}, 374 | {.name = "cuParamSeti"}, 375 | {.name = "cuParamSetSize"}, 376 | {.name = "cuParamSetTexRef"}, 377 | {.name = "cuParamSetv"}, 378 | {.name = "cuPointerSetAttribute"}, 379 | {.name = "cuStreamDestroy"}, 380 | {.name = "cuStreamWaitValue64"}, 381 | {.name = "cuStreamWaitValue64_ptsz"}, 382 | {.name = "cuStreamWriteValue64"}, 383 | {.name = "cuStreamWriteValue64_ptsz"}, 384 | {.name = "cuSurfRefGetArray"}, 385 | {.name = "cuTexRefGetAddress"}, 386 | {.name = "cuTexRefGetAddressMode"}, 387 | {.name = "cuTexRefGetAddress_v2"}, 388 | {.name = "cuTexRefGetArray"}, 389 | {.name = "cuTexRefGetFilterMode"}, 390 | {.name = "cuTexRefGetFlags"}, 391 | {.name = "cuTexRefGetFormat"}, 392 | {.name = "cuTexRefGetMaxAnisotropy"}, 393 | {.name = "cuTexRefGetMipmapFilterMode"}, 394 | {.name = "cuTexRefGetMipmapLevelBias"}, 395 | {.name = "cuTexRefGetMipmapLevelClamp"}, 396 | {.name = "cuTexRefGetMipmappedArray"}, 397 | {.name = "cuTexRefSetAddress"}, 398 | {.name = "cuTexRefSetAddress2D"}, 399 | {.name = "cuTexRefSetAddress2D_v2"}, 400 | {.name = "cuVDPAUCtxCreate"}, 401 | {.name = "cuEGLApiInit"}, 402 | {.name = "cuDestroyExternalMemory"}, 403 | {.name = "cuDestroyExternalSemaphore"}, 404 | {.name = "cuDeviceGetUuid"}, 405 | {.name = "cuExternalMemoryGetMappedBuffer"}, 406 | {.name = "cuExternalMemoryGetMappedMipmappedArray"}, 407 | {.name = "cuGraphAddChildGraphNode"}, 408 | {.name = "cuGraphAddDependencies"}, 409 | {.name = "cuGraphAddEmptyNode"}, 410 | {.name = "cuGraphAddHostNode"}, 411 | {.name = "cuGraphAddKernelNode"}, 412 | {.name = "cuGraphAddMemcpyNode"}, 413 | {.name = "cuGraphAddMemsetNode"}, 414 | {.name = "cuGraphChildGraphNodeGetGraph"}, 415 | {.name = "cuGraphClone"}, 416 | {.name = "cuGraphCreate"}, 417 | {.name = "cuGraphDestroy"}, 418 | {.name = "cuGraphDestroyNode"}, 419 | {.name = "cuGraphExecDestroy"}, 420 | {.name = "cuGraphGetEdges"}, 421 | {.name = "cuGraphGetNodes"}, 422 | {.name = "cuGraphGetRootNodes"}, 423 | {.name = "cuGraphHostNodeGetParams"}, 424 | {.name = "cuGraphHostNodeSetParams"}, 425 | {.name = "cuGraphInstantiate"}, 426 | {.name = "cuGraphKernelNodeGetParams"}, 427 | {.name = "cuGraphKernelNodeSetParams"}, 428 | {.name = "cuGraphLaunch"}, 429 | {.name = "cuGraphLaunch_ptsz"}, 430 | {.name = "cuGraphMemcpyNodeGetParams"}, 431 | {.name = "cuGraphMemcpyNodeSetParams"}, 432 | {.name = "cuGraphMemsetNodeGetParams"}, 433 | {.name = "cuGraphMemsetNodeSetParams"}, 434 | {.name = "cuGraphNodeFindInClone"}, 435 | {.name = "cuGraphNodeGetDependencies"}, 436 | {.name = "cuGraphNodeGetDependentNodes"}, 437 | {.name = "cuGraphNodeGetType"}, 438 | {.name = "cuGraphRemoveDependencies"}, 439 | {.name = "cuImportExternalMemory"}, 440 | {.name = "cuImportExternalSemaphore"}, 441 | {.name = "cuLaunchHostFunc"}, 442 | {.name = "cuLaunchHostFunc_ptsz"}, 443 | {.name = "cuSignalExternalSemaphoresAsync"}, 444 | {.name = "cuSignalExternalSemaphoresAsync_ptsz"}, 445 | {.name = "cuStreamBeginCapture"}, 446 | {.name = "cuStreamBeginCapture_ptsz"}, 447 | {.name = "cuStreamEndCapture"}, 448 | {.name = "cuStreamEndCapture_ptsz"}, 449 | {.name = "cuStreamGetCtx"}, 450 | {.name = "cuStreamGetCtx_ptsz"}, 451 | {.name = "cuStreamIsCapturing"}, 452 | {.name = "cuStreamIsCapturing_ptsz"}, 453 | {.name = "cuWaitExternalSemaphoresAsync"}, 454 | {.name = "cuWaitExternalSemaphoresAsync_ptsz"}, 455 | {.name = "cuGraphExecKernelNodeSetParams"}, 456 | {.name = "cuStreamBeginCapture_v2"}, 457 | {.name = "cuStreamBeginCapture_v2_ptsz"}, 458 | {.name = "cuStreamGetCaptureInfo"}, 459 | {.name = "cuStreamGetCaptureInfo_ptsz"}, 460 | {.name = "cuThreadExchangeStreamCaptureMode"}, 461 | {.name = "cuDeviceGetNvSciSyncAttributes"}, 462 | {.name = "cuGraphExecHostNodeSetParams"}, 463 | {.name = "cuGraphExecMemcpyNodeSetParams"}, 464 | {.name = "cuGraphExecMemsetNodeSetParams"}, 465 | {.name = "cuGraphExecUpdate"}, 466 | {.name = "cuMemAddressFree"}, 467 | {.name = "cuMemAddressReserve"}, 468 | {.name = "cuMemCreate"}, 469 | {.name = "cuMemExportToShareableHandle"}, 470 | {.name = "cuMemGetAccess"}, 471 | {.name = "cuMemGetAllocationGranularity"}, 472 | {.name = "cuMemGetAllocationPropertiesFromHandle"}, 473 | {.name = "cuMemImportFromShareableHandle"}, 474 | {.name = "cuMemMap"}, 475 | {.name = "cuMemRelease"}, 476 | {.name = "cuMemSetAccess"}, 477 | {.name = "cuMemUnmap"}, 478 | {.name = "cuCtxResetPersistingL2Cache"}, 479 | {.name = "cuDevicePrimaryCtxRelease_v2"}, 480 | {.name = "cuDevicePrimaryCtxReset_v2"}, 481 | {.name = "cuDevicePrimaryCtxSetFlags_v2"}, 482 | {.name = "cuFuncGetModule"}, 483 | {.name = "cuGraphInstantiate_v2"}, 484 | {.name = "cuGraphKernelNodeCopyAttributes"}, 485 | {.name = "cuGraphKernelNodeGetAttribute"}, 486 | {.name = "cuGraphKernelNodeSetAttribute"}, 487 | {.name = "cuMemRetainAllocationHandle"}, 488 | {.name = "cuOccupancyAvailableDynamicSMemPerBlock"}, 489 | {.name = "cuStreamCopyAttributes"}, 490 | {.name = "cuStreamCopyAttributes_ptsz"}, 491 | {.name = "cuStreamGetAttribute"}, 492 | {.name = "cuStreamGetAttribute_ptsz"}, 493 | {.name = "cuStreamSetAttribute"}, 494 | {.name = "cuStreamSetAttribute_ptsz"}, 495 | {.name = "cuArrayGetPlane"}, 496 | {.name = "cuArrayGetSparseProperties"}, 497 | {.name = "cuDeviceGetDefaultMemPool"}, 498 | {.name = "cuDeviceGetLuid"}, 499 | {.name = "cuDeviceGetMemPool"}, 500 | {.name = "cuDeviceGetTexture1DLinearMaxWidth"}, 501 | {.name = "cuDeviceSetMemPool"}, 502 | {.name = "cuEventRecordWithFlags"}, 503 | {.name = "cuEventRecordWithFlags_ptsz"}, 504 | {.name = "cuGraphAddEventRecordNode"}, 505 | {.name = "cuGraphAddEventWaitNode"}, 506 | {.name = "cuGraphAddExternalSemaphoresSignalNode"}, 507 | {.name = "cuGraphAddExternalSemaphoresWaitNode"}, 508 | {.name = "cuGraphEventRecordNodeGetEvent"}, 509 | {.name = "cuGraphEventRecordNodeSetEvent"}, 510 | {.name = "cuGraphEventWaitNodeGetEvent"}, 511 | {.name = "cuGraphEventWaitNodeSetEvent"}, 512 | {.name = "cuGraphExecChildGraphNodeSetParams"}, 513 | {.name = "cuGraphExecEventRecordNodeSetEvent"}, 514 | {.name = "cuGraphExecEventWaitNodeSetEvent"}, 515 | {.name = "cuGraphExecExternalSemaphoresSignalNodeSetParams"}, 516 | {.name = "cuGraphExecExternalSemaphoresWaitNodeSetParams"}, 517 | {.name = "cuGraphExternalSemaphoresSignalNodeGetParams"}, 518 | {.name = "cuGraphExternalSemaphoresSignalNodeSetParams"}, 519 | {.name = "cuGraphExternalSemaphoresWaitNodeGetParams"}, 520 | {.name = "cuGraphExternalSemaphoresWaitNodeSetParams"}, 521 | {.name = "cuGraphUpload"}, 522 | {.name = "cuGraphUpload_ptsz"}, 523 | {.name = "cuIpcOpenMemHandle_v2"}, 524 | {.name = "cuMemAllocAsync"}, 525 | {.name = "cuMemAllocAsync_ptsz"}, 526 | {.name = "cuMemAllocFromPoolAsync"}, 527 | {.name = "cuMemAllocFromPoolAsync_ptsz"}, 528 | {.name = "cuMemFreeAsync"}, 529 | {.name = "cuMemFreeAsync_ptsz"}, 530 | {.name = "cuMemMapArrayAsync"}, 531 | {.name = "cuMemMapArrayAsync_ptsz"}, 532 | {.name = "cuMemPoolCreate"}, 533 | {.name = "cuMemPoolDestroy"}, 534 | {.name = "cuMemPoolExportPointer"}, 535 | {.name = "cuMemPoolExportToShareableHandle"}, 536 | {.name = "cuMemPoolGetAccess"}, 537 | {.name = "cuMemPoolGetAttribute"}, 538 | {.name = "cuMemPoolImportFromShareableHandle"}, 539 | {.name = "cuMemPoolImportPointer"}, 540 | {.name = "cuMemPoolSetAccess"}, 541 | {.name = "cuMemPoolSetAttribute"}, 542 | {.name = "cuMemPoolTrimTo"}, 543 | {.name = "cuMipmappedArrayGetSparseProperties"}, 544 | {.name = "cuCtxCreate_v3"}, 545 | {.name = "cuCtxGetExecAffinity"}, 546 | {.name = "cuDeviceGetExecAffinitySupport"}, 547 | {.name = "cuDeviceGetGraphMemAttribute"}, 548 | {.name = "cuDeviceGetUuid_v2"}, 549 | {.name = "cuDeviceGraphMemTrim"}, 550 | {.name = "cuDeviceSetGraphMemAttribute"}, 551 | {.name = "cuFlushGPUDirectRDMAWrites"}, 552 | {.name = "cuGetProcAddress"}, 553 | {.name = "cuGraphAddMemAllocNode"}, 554 | {.name = "cuGraphAddMemFreeNode"}, 555 | {.name = "cuGraphDebugDotPrint"}, 556 | {.name = "cuGraphInstantiateWithFlags"}, 557 | {.name = "cuGraphMemAllocNodeGetParams"}, 558 | {.name = "cuGraphMemFreeNodeGetParams"}, 559 | {.name = "cuGraphReleaseUserObject"}, 560 | {.name = "cuGraphRetainUserObject"}, 561 | {.name = "cuStreamGetCaptureInfo_v2"}, 562 | {.name = "cuStreamGetCaptureInfo_v2_ptsz"}, 563 | {.name = "cuStreamUpdateCaptureDependencies"}, 564 | {.name = "cuStreamUpdateCaptureDependencies_ptsz"}, 565 | {.name = "cuUserObjectCreate"}, 566 | {.name = "cuUserObjectRelease"}, 567 | {.name = "cuUserObjectRetain"}, 568 | }; 569 | 570 | entry_t nvml_library_entry[] = { 571 | {.name = "nvmlInit"}, 572 | {.name = "nvmlShutdown"}, 573 | {.name = "nvmlErrorString"}, 574 | {.name = "nvmlDeviceGetHandleByIndex"}, 575 | {.name = "nvmlDeviceGetComputeRunningProcesses"}, 576 | {.name = "nvmlDeviceGetPciInfo"}, 577 | {.name = "nvmlDeviceGetProcessUtilization"}, 578 | {.name = "nvmlDeviceGetCount"}, 579 | {.name = "nvmlDeviceClearAccountingPids"}, 580 | {.name = "nvmlDeviceClearCpuAffinity"}, 581 | {.name = "nvmlDeviceClearEccErrorCounts"}, 582 | {.name = "nvmlDeviceDiscoverGpus"}, 583 | {.name = "nvmlDeviceFreezeNvLinkUtilizationCounter"}, 584 | {.name = "nvmlDeviceGetAccountingBufferSize"}, 585 | {.name = "nvmlDeviceGetAccountingMode"}, 586 | {.name = "nvmlDeviceGetAccountingPids"}, 587 | {.name = "nvmlDeviceGetAccountingStats"}, 588 | {.name = "nvmlDeviceGetActiveVgpus"}, 589 | {.name = "nvmlDeviceGetAPIRestriction"}, 590 | {.name = "nvmlDeviceGetApplicationsClock"}, 591 | {.name = "nvmlDeviceGetAutoBoostedClocksEnabled"}, 592 | {.name = "nvmlDeviceGetBAR1MemoryInfo"}, 593 | {.name = "nvmlDeviceGetBoardId"}, 594 | {.name = "nvmlDeviceGetBoardPartNumber"}, 595 | {.name = "nvmlDeviceGetBrand"}, 596 | {.name = "nvmlDeviceGetBridgeChipInfo"}, 597 | {.name = "nvmlDeviceGetClock"}, 598 | {.name = "nvmlDeviceGetClockInfo"}, 599 | {.name = "nvmlDeviceGetComputeMode"}, 600 | {.name = "nvmlDeviceGetCount_v2"}, 601 | {.name = "nvmlDeviceGetCpuAffinity"}, 602 | {.name = "nvmlDeviceGetCreatableVgpus"}, 603 | {.name = "nvmlDeviceGetCudaComputeCapability"}, 604 | {.name = "nvmlDeviceGetCurrentClocksThrottleReasons"}, 605 | {.name = "nvmlDeviceGetCurrPcieLinkGeneration"}, 606 | {.name = "nvmlDeviceGetCurrPcieLinkWidth"}, 607 | {.name = "nvmlDeviceGetDecoderUtilization"}, 608 | {.name = "nvmlDeviceGetDefaultApplicationsClock"}, 609 | {.name = "nvmlDeviceGetDetailedEccErrors"}, 610 | {.name = "nvmlDeviceGetDisplayActive"}, 611 | {.name = "nvmlDeviceGetDisplayMode"}, 612 | {.name = "nvmlDeviceGetDriverModel"}, 613 | {.name = "nvmlDeviceGetEccMode"}, 614 | {.name = "nvmlDeviceGetEncoderCapacity"}, 615 | {.name = "nvmlDeviceGetEncoderSessions"}, 616 | {.name = "nvmlDeviceGetEncoderStats"}, 617 | {.name = "nvmlDeviceGetEncoderUtilization"}, 618 | {.name = "nvmlDeviceGetEnforcedPowerLimit"}, 619 | {.name = "nvmlDeviceGetFanSpeed"}, 620 | {.name = "nvmlDeviceGetFanSpeed_v2"}, 621 | {.name = "nvmlDeviceGetFieldValues"}, 622 | {.name = "nvmlDeviceGetGpuOperationMode"}, 623 | {.name = "nvmlDeviceGetGraphicsRunningProcesses"}, 624 | {.name = "nvmlDeviceGetGridLicensableFeatures"}, 625 | {.name = "nvmlDeviceGetHandleByIndex_v2"}, 626 | {.name = "nvmlDeviceGetHandleByPciBusId"}, 627 | {.name = "nvmlDeviceGetHandleByPciBusId_v2"}, 628 | {.name = "nvmlDeviceGetHandleBySerial"}, 629 | {.name = "nvmlDeviceGetHandleByUUID"}, 630 | {.name = "nvmlDeviceGetIndex"}, 631 | {.name = "nvmlDeviceGetInforomConfigurationChecksum"}, 632 | {.name = "nvmlDeviceGetInforomImageVersion"}, 633 | {.name = "nvmlDeviceGetInforomVersion"}, 634 | {.name = "nvmlDeviceGetMaxClockInfo"}, 635 | {.name = "nvmlDeviceGetMaxCustomerBoostClock"}, 636 | {.name = "nvmlDeviceGetMaxPcieLinkGeneration"}, 637 | {.name = "nvmlDeviceGetMaxPcieLinkWidth"}, 638 | {.name = "nvmlDeviceGetMemoryErrorCounter"}, 639 | {.name = "nvmlDeviceGetMemoryInfo"}, 640 | {.name = "nvmlDeviceGetMinorNumber"}, 641 | {.name = "nvmlDeviceGetMPSComputeRunningProcesses"}, 642 | {.name = "nvmlDeviceGetMultiGpuBoard"}, 643 | {.name = "nvmlDeviceGetName"}, 644 | {.name = "nvmlDeviceGetNvLinkCapability"}, 645 | {.name = "nvmlDeviceGetNvLinkErrorCounter"}, 646 | {.name = "nvmlDeviceGetNvLinkRemotePciInfo"}, 647 | {.name = "nvmlDeviceGetNvLinkRemotePciInfo_v2"}, 648 | {.name = "nvmlDeviceGetNvLinkState"}, 649 | {.name = "nvmlDeviceGetNvLinkUtilizationControl"}, 650 | {.name = "nvmlDeviceGetNvLinkUtilizationCounter"}, 651 | {.name = "nvmlDeviceGetNvLinkVersion"}, 652 | {.name = "nvmlDeviceGetP2PStatus"}, 653 | {.name = "nvmlDeviceGetPcieReplayCounter"}, 654 | {.name = "nvmlDeviceGetPcieThroughput"}, 655 | {.name = "nvmlDeviceGetPciInfo_v2"}, 656 | {.name = "nvmlDeviceGetPciInfo_v3"}, 657 | {.name = "nvmlDeviceGetPerformanceState"}, 658 | {.name = "nvmlDeviceGetPersistenceMode"}, 659 | {.name = "nvmlDeviceGetPowerManagementDefaultLimit"}, 660 | {.name = "nvmlDeviceGetPowerManagementLimit"}, 661 | {.name = "nvmlDeviceGetPowerManagementLimitConstraints"}, 662 | {.name = "nvmlDeviceGetPowerManagementMode"}, 663 | {.name = "nvmlDeviceGetPowerState"}, 664 | {.name = "nvmlDeviceGetPowerUsage"}, 665 | {.name = "nvmlDeviceGetRetiredPages"}, 666 | {.name = "nvmlDeviceGetRetiredPagesPendingStatus"}, 667 | {.name = "nvmlDeviceGetSamples"}, 668 | {.name = "nvmlDeviceGetSerial"}, 669 | {.name = "nvmlDeviceGetSupportedClocksThrottleReasons"}, 670 | {.name = "nvmlDeviceGetSupportedEventTypes"}, 671 | {.name = "nvmlDeviceGetSupportedGraphicsClocks"}, 672 | {.name = "nvmlDeviceGetSupportedMemoryClocks"}, 673 | {.name = "nvmlDeviceGetSupportedVgpus"}, 674 | {.name = "nvmlDeviceGetTemperature"}, 675 | {.name = "nvmlDeviceGetTemperatureThreshold"}, 676 | {.name = "nvmlDeviceGetTopologyCommonAncestor"}, 677 | {.name = "nvmlDeviceGetTopologyNearestGpus"}, 678 | {.name = "nvmlDeviceGetTotalEccErrors"}, 679 | {.name = "nvmlDeviceGetTotalEnergyConsumption"}, 680 | {.name = "nvmlDeviceGetUtilizationRates"}, 681 | {.name = "nvmlDeviceGetUUID"}, 682 | {.name = "nvmlDeviceGetVbiosVersion"}, 683 | {.name = "nvmlDeviceGetVgpuMetadata"}, 684 | {.name = "nvmlDeviceGetVgpuProcessUtilization"}, 685 | {.name = "nvmlDeviceGetVgpuUtilization"}, 686 | {.name = "nvmlDeviceGetViolationStatus"}, 687 | {.name = "nvmlDeviceGetVirtualizationMode"}, 688 | {.name = "nvmlDeviceModifyDrainState"}, 689 | {.name = "nvmlDeviceOnSameBoard"}, 690 | {.name = "nvmlDeviceQueryDrainState"}, 691 | {.name = "nvmlDeviceRegisterEvents"}, 692 | {.name = "nvmlDeviceRemoveGpu"}, 693 | {.name = "nvmlDeviceRemoveGpu_v2"}, 694 | {.name = "nvmlDeviceResetApplicationsClocks"}, 695 | {.name = "nvmlDeviceResetNvLinkErrorCounters"}, 696 | {.name = "nvmlDeviceResetNvLinkUtilizationCounter"}, 697 | {.name = "nvmlDeviceSetAccountingMode"}, 698 | {.name = "nvmlDeviceSetAPIRestriction"}, 699 | {.name = "nvmlDeviceSetApplicationsClocks"}, 700 | {.name = "nvmlDeviceSetAutoBoostedClocksEnabled"}, 701 | /** We hijack this call*/ 702 | {.name = "nvmlDeviceSetComputeMode"}, 703 | {.name = "nvmlDeviceSetCpuAffinity"}, 704 | {.name = "nvmlDeviceSetDefaultAutoBoostedClocksEnabled"}, 705 | {.name = "nvmlDeviceSetDriverModel"}, 706 | {.name = "nvmlDeviceSetEccMode"}, 707 | {.name = "nvmlDeviceSetGpuOperationMode"}, 708 | {.name = "nvmlDeviceSetNvLinkUtilizationControl"}, 709 | {.name = "nvmlDeviceSetPersistenceMode"}, 710 | {.name = "nvmlDeviceSetPowerManagementLimit"}, 711 | {.name = "nvmlDeviceSetVirtualizationMode"}, 712 | {.name = "nvmlDeviceValidateInforom"}, 713 | {.name = "nvmlEventSetCreate"}, 714 | {.name = "nvmlEventSetFree"}, 715 | {.name = "nvmlEventSetWait"}, 716 | {.name = "nvmlGetVgpuCompatibility"}, 717 | {.name = "nvmlInit_v2"}, 718 | {.name = "nvmlInitWithFlags"}, 719 | {.name = "nvmlInternalGetExportTable"}, 720 | {.name = "nvmlSystemGetCudaDriverVersion"}, 721 | {.name = "nvmlSystemGetCudaDriverVersion_v2"}, 722 | {.name = "nvmlSystemGetDriverVersion"}, 723 | {.name = "nvmlSystemGetHicVersion"}, 724 | {.name = "nvmlSystemGetNVMLVersion"}, 725 | {.name = "nvmlSystemGetProcessName"}, 726 | {.name = "nvmlSystemGetTopologyGpuSet"}, 727 | {.name = "nvmlUnitGetCount"}, 728 | {.name = "nvmlUnitGetDevices"}, 729 | {.name = "nvmlUnitGetFanSpeedInfo"}, 730 | {.name = "nvmlUnitGetHandleByIndex"}, 731 | {.name = "nvmlUnitGetLedState"}, 732 | {.name = "nvmlUnitGetPsuInfo"}, 733 | {.name = "nvmlUnitGetTemperature"}, 734 | {.name = "nvmlUnitGetUnitInfo"}, 735 | {.name = "nvmlUnitSetLedState"}, 736 | {.name = "nvmlVgpuInstanceGetEncoderCapacity"}, 737 | {.name = "nvmlVgpuInstanceGetEncoderSessions"}, 738 | {.name = "nvmlVgpuInstanceGetEncoderStats"}, 739 | {.name = "nvmlVgpuInstanceGetFbUsage"}, 740 | {.name = "nvmlVgpuInstanceGetFrameRateLimit"}, 741 | {.name = "nvmlVgpuInstanceGetLicenseStatus"}, 742 | {.name = "nvmlVgpuInstanceGetMetadata"}, 743 | {.name = "nvmlVgpuInstanceGetType"}, 744 | {.name = "nvmlVgpuInstanceGetUUID"}, 745 | {.name = "nvmlVgpuInstanceGetVmDriverVersion"}, 746 | {.name = "nvmlVgpuInstanceGetVmID"}, 747 | {.name = "nvmlVgpuInstanceSetEncoderCapacity"}, 748 | {.name = "nvmlVgpuTypeGetClass"}, 749 | {.name = "nvmlVgpuTypeGetDeviceID"}, 750 | {.name = "nvmlVgpuTypeGetFramebufferSize"}, 751 | {.name = "nvmlVgpuTypeGetFrameRateLimit"}, 752 | {.name = "nvmlVgpuTypeGetLicense"}, 753 | {.name = "nvmlVgpuTypeGetMaxInstances"}, 754 | {.name = "nvmlVgpuTypeGetName"}, 755 | {.name = "nvmlVgpuTypeGetNumDisplayHeads"}, 756 | {.name = "nvmlVgpuTypeGetResolution"}, 757 | {.name = "nvmlDeviceGetFBCSessions"}, 758 | {.name = "nvmlDeviceGetFBCStats"}, 759 | {.name = "nvmlDeviceGetGridLicensableFeatures_v2"}, 760 | {.name = "nvmlDeviceGetRetiredPages_v2"}, 761 | {.name = "nvmlDeviceResetGpuLockedClocks"}, 762 | {.name = "nvmlDeviceSetGpuLockedClocks"}, 763 | {.name = "nvmlGetBlacklistDeviceCount"}, 764 | {.name = "nvmlGetBlacklistDeviceInfoByIndex"}, 765 | {.name = "nvmlVgpuInstanceGetAccountingMode"}, 766 | {.name = "nvmlVgpuInstanceGetAccountingPids"}, 767 | {.name = "nvmlVgpuInstanceGetAccountingStats"}, 768 | {.name = "nvmlVgpuInstanceGetFBCSessions"}, 769 | {.name = "nvmlVgpuInstanceGetFBCStats"}, 770 | {.name = "nvmlVgpuTypeGetMaxInstancesPerVm"}, 771 | {.name = "nvmlGetVgpuVersion"}, 772 | {.name = "nvmlSetVgpuVersion"}, 773 | {.name = "nvmlDeviceGetGridLicensableFeatures_v3"}, 774 | {.name = "nvmlDeviceGetHostVgpuMode"}, 775 | {.name = "nvmlDeviceGetPgpuMetadataString"}, 776 | {.name = "nvmlVgpuInstanceGetEccMode"}, 777 | {.name = "nvmlComputeInstanceDestroy"}, 778 | {.name = "nvmlComputeInstanceGetInfo"}, 779 | {.name = "nvmlDeviceCreateGpuInstance"}, 780 | {.name = "nvmlDeviceGetArchitecture"}, 781 | {.name = "nvmlDeviceGetAttributes"}, 782 | {.name = "nvmlDeviceGetAttributes_v2"}, 783 | {.name = "nvmlDeviceGetComputeInstanceId"}, 784 | {.name = "nvmlDeviceGetCpuAffinityWithinScope"}, 785 | {.name = "nvmlDeviceGetDeviceHandleFromMigDeviceHandle"}, 786 | {.name = "nvmlDeviceGetGpuInstanceById"}, 787 | {.name = "nvmlDeviceGetGpuInstanceId"}, 788 | {.name = "nvmlDeviceGetGpuInstancePossiblePlacements"}, 789 | {.name = "nvmlDeviceGetGpuInstanceProfileInfo"}, 790 | {.name = "nvmlDeviceGetGpuInstanceRemainingCapacity"}, 791 | {.name = "nvmlDeviceGetGpuInstances"}, 792 | {.name = "nvmlDeviceGetMaxMigDeviceCount"}, 793 | {.name = "nvmlDeviceGetMemoryAffinity"}, 794 | {.name = "nvmlDeviceGetMigDeviceHandleByIndex"}, 795 | {.name = "nvmlDeviceGetMigMode"}, 796 | {.name = "nvmlDeviceGetRemappedRows"}, 797 | {.name = "nvmlDeviceGetRowRemapperHistogram"}, 798 | {.name = "nvmlDeviceIsMigDeviceHandle"}, 799 | {.name = "nvmlDeviceSetMigMode"}, 800 | {.name = "nvmlEventSetWait_v2"}, 801 | {.name = "nvmlGpuInstanceCreateComputeInstance"}, 802 | {.name = "nvmlGpuInstanceDestroy"}, 803 | {.name = "nvmlGpuInstanceGetComputeInstanceById"}, 804 | {.name = "nvmlGpuInstanceGetComputeInstanceProfileInfo"}, 805 | {.name = "nvmlGpuInstanceGetComputeInstanceRemainingCapacity"}, 806 | {.name = "nvmlGpuInstanceGetComputeInstances"}, 807 | {.name = "nvmlGpuInstanceGetInfo"}, 808 | {.name = "nvmlVgpuInstanceClearAccountingPids"}, 809 | {.name = "nvmlVgpuInstanceGetMdevUUID"}, 810 | {.name = "nvmlComputeInstanceGetInfo_v2"}, 811 | {.name = "nvmlDeviceGetComputeRunningProcesses_v2"}, 812 | {.name = "nvmlDeviceGetGraphicsRunningProcesses_v2"}, 813 | {.name = "nvmlDeviceSetTemperatureThreshold"}, 814 | {.name = "nvmlRetry_NvRmControl"}, 815 | {.name = "nvmlVgpuInstanceGetGpuInstanceId"}, 816 | {.name = "nvmlVgpuTypeGetGpuInstanceProfileId"}, 817 | {.name = "nvmlDeviceCreateGpuInstanceWithPlacement"}, 818 | {.name = "nvmlDeviceGetBusType"}, 819 | {.name = "nvmlDeviceGetClkMonStatus"}, 820 | {.name = "nvmlDeviceGetGpuInstancePossiblePlacements_v2"}, 821 | {.name = "nvmlDeviceGetGridLicensableFeatures_v4"}, 822 | {.name = "nvmlDeviceGetIrqNum"}, 823 | {.name = "nvmlDeviceGetMPSComputeRunningProcesses_v2"}, 824 | {.name = "nvmlDeviceGetNvLinkRemoteDeviceType"}, 825 | {.name = "nvmlDeviceResetMemoryLockedClocks"}, 826 | {.name = "nvmlDeviceSetMemoryLockedClocks"}, 827 | {.name = "nvmlGetExcludedDeviceCount"}, 828 | {.name = "nvmlGetExcludedDeviceInfoByIndex"}, 829 | {.name = "nvmlVgpuInstanceGetLicenseInfo"}, 830 | }; 831 | 832 | static void UNUSED bug_on() { 833 | BUILD_BUG_ON((sizeof(nvml_library_entry) / sizeof(nvml_library_entry[0])) != 834 | NVML_ENTRY_END); 835 | 836 | BUILD_BUG_ON((sizeof(cuda_library_entry) / sizeof(cuda_library_entry[0])) != 837 | CUDA_ENTRY_END); 838 | } 839 | 840 | /** register once set */ 841 | static pthread_once_t g_cuda_set = PTHREAD_ONCE_INIT; 842 | static pthread_once_t g_driver_set = PTHREAD_ONCE_INIT; 843 | 844 | resource_data_t g_vcuda_config = { 845 | .pod_uid = "", 846 | .limit = 0, 847 | .container_name = "", 848 | .utilization = 0, 849 | .gpu_memory = 0, 850 | .enable = 1, 851 | }; 852 | 853 | static char base_dir[FILENAME_MAX] = EMPTY_PREFIX; 854 | char config_path[FILENAME_MAX] = CONTROLLER_CONFIG_PATH; 855 | char pid_path[FILENAME_MAX] = PIDS_CONFIG_PATH; 856 | char driver_version[FILENAME_MAX] = ""; 857 | 858 | static void load_driver_libraries() { 859 | void *table = NULL; 860 | char driver_filename[FILENAME_MAX]; 861 | int i; 862 | 863 | snprintf(driver_filename, FILENAME_MAX - 1, "%s.%s", DRIVER_ML_LIBRARY_PREFIX, 864 | driver_version); 865 | driver_filename[FILENAME_MAX - 1] = '\0'; 866 | 867 | table = dlopen(driver_filename, RTLD_NOW | RTLD_NODELETE); 868 | if (unlikely(!table)) { 869 | LOGGER(FATAL, "can't find library %s", driver_filename); 870 | } 871 | 872 | for (i = 0; i < NVML_ENTRY_END; i++) { 873 | nvml_library_entry[i].fn_ptr = dlsym(table, nvml_library_entry[i].name); 874 | if (unlikely(!nvml_library_entry[i].fn_ptr)) { 875 | LOGGER(4, "can't find function %s in %s", nvml_library_entry[i].name, 876 | driver_filename); 877 | } 878 | } 879 | 880 | dlclose(table); 881 | 882 | // Initialize the ml driver 883 | if (NVML_FIND_ENTRY(nvml_library_entry, nvmlInitWithFlags)) { 884 | NVML_ENTRY_CALL(nvml_library_entry, nvmlInitWithFlags, 0); 885 | } else if (NVML_FIND_ENTRY(nvml_library_entry, nvmlInit_v2)) { 886 | NVML_ENTRY_CALL(nvml_library_entry, nvmlInit_v2); 887 | } else { 888 | NVML_ENTRY_CALL(nvml_library_entry, nvmlInit); 889 | } 890 | } 891 | 892 | static void load_cuda_single_library(int idx) { 893 | void *table = NULL; 894 | char cuda_filename[FILENAME_MAX]; 895 | 896 | snprintf(cuda_filename, FILENAME_MAX - 1, "%s.%s", CUDA_LIBRARY_PREFIX, 897 | driver_version); 898 | cuda_filename[FILENAME_MAX - 1] = '\0'; 899 | 900 | table = dlopen(cuda_filename, RTLD_NOW | RTLD_NODELETE); 901 | if (unlikely(!table)) { 902 | LOGGER(FATAL, "can't find library %s", cuda_filename); 903 | } 904 | 905 | cuda_library_entry[idx].fn_ptr = dlsym(table, cuda_library_entry[idx].name); 906 | if (unlikely(!cuda_library_entry[idx].fn_ptr)) { 907 | LOGGER(4, "can't find function %s in %s", cuda_library_entry[idx].name, 908 | cuda_filename); 909 | } 910 | 911 | dlclose(table); 912 | } 913 | 914 | void load_cuda_libraries() { 915 | void *table = NULL; 916 | int i = 0; 917 | char cuda_filename[FILENAME_MAX]; 918 | 919 | LOGGER(4, "Start hijacking"); 920 | 921 | snprintf(cuda_filename, FILENAME_MAX - 1, "%s.%s", CUDA_LIBRARY_PREFIX, 922 | driver_version); 923 | cuda_filename[FILENAME_MAX - 1] = '\0'; 924 | cuda_filename[FILENAME_MAX - 1] = '\0'; 925 | 926 | table = dlopen(cuda_filename, RTLD_NOW | RTLD_NODELETE); 927 | if (unlikely(!table)) { 928 | LOGGER(FATAL, "can't find library %s", cuda_filename); 929 | } 930 | 931 | for (i = 0; i < CUDA_ENTRY_END; i++) { 932 | cuda_library_entry[i].fn_ptr = dlsym(table, cuda_library_entry[i].name); 933 | if (unlikely(!cuda_library_entry[i].fn_ptr)) { 934 | LOGGER(4, "can't find function %s in %s", cuda_library_entry[i].name, 935 | cuda_filename); 936 | } 937 | } 938 | 939 | dlclose(table); 940 | } 941 | 942 | // #lizard forgives 943 | int get_cgroup_data(const char *pid_cgroup, char *pod_uid, char *container_id, 944 | size_t size) { 945 | int ret = 1; 946 | FILE *cgroup_fd = NULL; 947 | char *token = NULL, *last_ptr = NULL, *last_second = NULL; 948 | char *cgroup_ptr = NULL; 949 | char buffer[4096]; 950 | int is_systemd = 0; 951 | char *prune_pos = NULL; 952 | 953 | cgroup_fd = fopen(pid_cgroup, "r"); 954 | if (unlikely(!cgroup_fd)) { 955 | LOGGER(4, "can't open %s, error %s", pid_cgroup, strerror(errno)); 956 | goto DONE; 957 | } 958 | 959 | /** 960 | * find memory cgroup name 961 | */ 962 | while (!feof(cgroup_fd)) { 963 | buffer[0] = '\0'; 964 | if (unlikely(!fgets(buffer, sizeof(buffer), cgroup_fd))) { 965 | LOGGER(4, "can't get line from %s", pid_cgroup); 966 | goto DONE; 967 | } 968 | 969 | buffer[strlen(buffer) - 1] = '\0'; 970 | 971 | last_ptr = NULL; 972 | token = buffer; 973 | for (token = strtok_r(token, ":", &last_ptr); token; 974 | token = NULL, token = strtok_r(token, ":", &last_ptr)) { 975 | if (!strcmp(token, "memory")) { 976 | cgroup_ptr = strtok_r(NULL, ":", &last_ptr); 977 | break; 978 | } 979 | } 980 | 981 | if (cgroup_ptr) { 982 | break; 983 | } 984 | } 985 | 986 | if (!cgroup_ptr) { 987 | LOGGER(4, "can't find memory cgroup from %s", pid_cgroup); 988 | goto DONE; 989 | } 990 | 991 | /** 992 | * find container id 993 | */ 994 | last_ptr = NULL; 995 | last_second = NULL; 996 | token = cgroup_ptr; 997 | while (*token) { 998 | if (*token == '/') { 999 | last_second = last_ptr; 1000 | last_ptr = token; 1001 | } 1002 | ++token; 1003 | } 1004 | 1005 | if (!last_ptr) { 1006 | goto DONE; 1007 | } 1008 | 1009 | strncpy(container_id, last_ptr + 1, size); 1010 | container_id[size - 1] = '\0'; 1011 | 1012 | /** 1013 | * if cgroup is systemd, cgroup pattern should be like 1014 | * /kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod27882189_b4d9_11e9_b287_ec0d9ae89a20.slice/docker-4aa615892ab2a014d52178bdf3da1c4a45c8ddfb5171dd6e39dc910f96693e14.scope 1015 | * /kubepods.slice/kubepods-pod019c1fe8_0d92_4aa0_b61c_4df58bdde71c.slice/cri-containerd-9e073649debeec6d511391c9ec7627ee67ce3a3fb508b0fa0437a97f8e58ba98.scope 1016 | */ 1017 | if ((prune_pos = strstr(container_id, ".scope"))) { 1018 | is_systemd = 1; 1019 | *prune_pos = '\0'; 1020 | } 1021 | 1022 | /** 1023 | * find pod uid 1024 | */ 1025 | *last_ptr = '\0'; 1026 | if (!last_second) { 1027 | goto DONE; 1028 | } 1029 | 1030 | strncpy(pod_uid, last_second, size); 1031 | pod_uid[size - 1] = '\0'; 1032 | 1033 | if (is_systemd && (prune_pos = strstr(pod_uid, ".slice"))) { 1034 | *prune_pos = '\0'; 1035 | } 1036 | 1037 | /** 1038 | * remove unnecessary chars from $container_id and $pod_uid 1039 | */ 1040 | if (is_systemd) { 1041 | /** 1042 | * For this kind of cgroup path, we need to find the last appearance of 1043 | * slash 1044 | * /kubepods.slice/kubepods-pod019c1fe8_0d92_4aa0_b61c_4df58bdde71c.slice/cri-containerd-9e073649debeec6d511391c9ec7627ee67ce3a3fb508b0fa0437a97f8e58ba98.scope 1045 | */ 1046 | prune_pos = NULL; 1047 | token = container_id; 1048 | while (*token) { 1049 | if (*token == '-') { 1050 | prune_pos = token; 1051 | } 1052 | ++token; 1053 | } 1054 | 1055 | if (!prune_pos) { 1056 | LOGGER(4, "no - prefix"); 1057 | goto DONE; 1058 | } 1059 | 1060 | memmove(container_id, prune_pos + 1, strlen(container_id)); 1061 | 1062 | prune_pos = strstr(pod_uid, "-pod"); 1063 | if (!prune_pos) { 1064 | LOGGER(4, "no pod string"); 1065 | goto DONE; 1066 | } 1067 | prune_pos += strlen("-pod"); 1068 | memmove(pod_uid, prune_pos, strlen(prune_pos)); 1069 | pod_uid[strlen(prune_pos)] = '\0'; 1070 | prune_pos = pod_uid; 1071 | while (*prune_pos) { 1072 | if (*prune_pos == '_') { 1073 | *prune_pos = '-'; 1074 | } 1075 | ++prune_pos; 1076 | } 1077 | } else { 1078 | memmove(pod_uid, pod_uid + strlen("/pod"), strlen(pod_uid)); 1079 | } 1080 | 1081 | ret = 0; 1082 | DONE: 1083 | if (cgroup_fd) { 1084 | fclose(cgroup_fd); 1085 | } 1086 | return ret; 1087 | } 1088 | 1089 | static int get_path_by_cgroup(const char *pid_cgroup) { 1090 | int ret = 1; 1091 | char pod_uid[4096], container_id[4096]; 1092 | 1093 | if (is_custom_config_path()) { 1094 | return 0; 1095 | } 1096 | 1097 | if (unlikely(get_cgroup_data(pid_cgroup, pod_uid, container_id, 1098 | sizeof(container_id)))) { 1099 | LOGGER(4, "can't find container id from %s", pid_cgroup); 1100 | goto DONE; 1101 | } 1102 | 1103 | snprintf(base_dir, sizeof(base_dir), "%s%s", VCUDA_CONFIG_PATH, container_id); 1104 | snprintf(config_path, sizeof(config_path), "%s/%s", base_dir, 1105 | CONTROLLER_CONFIG_NAME); 1106 | snprintf(pid_path, sizeof(pid_path), "%s/%s", base_dir, PIDS_CONFIG_NAME); 1107 | 1108 | LOGGER(4, "config file: %s", config_path); 1109 | LOGGER(4, "pid file: %s", pid_path); 1110 | ret = 0; 1111 | 1112 | LOGGER(4, "register to remote: pod uid: %s, cont id: %s", pod_uid, 1113 | container_id); 1114 | register_to_remote_with_data("", pod_uid, container_id); 1115 | DONE: 1116 | return ret; 1117 | } 1118 | 1119 | static int is_default_config_path() { 1120 | int fd = -1; 1121 | 1122 | fd = open(config_path, O_RDONLY); 1123 | if (fd == -1) { 1124 | return 0; 1125 | } 1126 | 1127 | close(fd); 1128 | 1129 | return 1; 1130 | } 1131 | 1132 | static void matchRegex(const char *pattern, const char *matchString, 1133 | char *version) { 1134 | regex_t regex; 1135 | int reti; 1136 | regmatch_t matches[1]; 1137 | char msgbuf[512]; 1138 | 1139 | reti = regcomp(®ex, pattern, REG_EXTENDED); 1140 | if (reti) { 1141 | LOGGER(4, "Could not compile regex: %s", DRIVER_VERSION_MATCH_PATTERN); 1142 | return; 1143 | } 1144 | 1145 | reti = regexec(®ex, matchString, 1, matches, 0); 1146 | switch (reti) { 1147 | case 0: 1148 | strncpy(version, matchString + matches[0].rm_so, 1149 | matches[0].rm_eo - matches[0].rm_so); 1150 | version[matches[0].rm_eo - matches[0].rm_so] = '\0'; 1151 | break; 1152 | case REG_NOMATCH: 1153 | LOGGER(4, "Regex does not match for string: %s", matchString); 1154 | break; 1155 | default: 1156 | regerror(reti, ®ex, msgbuf, sizeof(msgbuf)); 1157 | LOGGER(4, "Regex match failed: %s", msgbuf); 1158 | } 1159 | 1160 | regfree(®ex); 1161 | return; 1162 | } 1163 | 1164 | static void read_version_from_proc(char *version) { 1165 | char *line = NULL; 1166 | size_t len = 0; 1167 | 1168 | FILE *fp = fopen(DRIVER_VERSION_PROC_PATH, "r"); 1169 | if (fp == NULL) { 1170 | LOGGER(4, "can't open %s, error %s", DRIVER_VERSION_PROC_PATH, 1171 | strerror(errno)); 1172 | return; 1173 | } 1174 | 1175 | while ((getline(&line, &len, fp) != -1)) { 1176 | if (strncmp(line, "NVRM", 4) == 0) { 1177 | matchRegex(DRIVER_VERSION_MATCH_PATTERN, line, version); 1178 | break; 1179 | } 1180 | } 1181 | fclose(fp); 1182 | } 1183 | 1184 | int read_controller_configuration() { 1185 | int fd = 0; 1186 | int rsize; 1187 | int ret = 1; 1188 | 1189 | if (!is_default_config_path()) { 1190 | if (get_path_by_cgroup("/proc/self/cgroup")) { 1191 | LOGGER(FATAL, "can't get config file path"); 1192 | } 1193 | } 1194 | 1195 | fd = open(config_path, O_RDONLY); 1196 | if (unlikely(fd == -1)) { 1197 | LOGGER(4, "can't open %s, error %s", config_path, strerror(errno)); 1198 | goto DONE; 1199 | } 1200 | 1201 | rsize = (int)read(fd, (void *)&g_vcuda_config, sizeof(resource_data_t)); 1202 | if (unlikely(rsize != sizeof(g_vcuda_config))) { 1203 | LOGGER(4, "can't read %s, need %zu but got %d", CONTROLLER_CONFIG_PATH, 1204 | sizeof(resource_data_t), rsize); 1205 | goto DONE; 1206 | } 1207 | 1208 | read_version_from_proc(driver_version); 1209 | ret = 0; 1210 | 1211 | LOGGER(4, "pod uid : %s", g_vcuda_config.pod_uid); 1212 | LOGGER(4, "limit : %d", g_vcuda_config.limit); 1213 | LOGGER(4, "container name : %s", g_vcuda_config.container_name); 1214 | LOGGER(4, "total utilization: %d", g_vcuda_config.utilization); 1215 | LOGGER(4, "total gpu memory : %" PRIu64, g_vcuda_config.gpu_memory); 1216 | LOGGER(4, "driver version : %s", driver_version); 1217 | LOGGER(4, "hard limit mode : %d", g_vcuda_config.hard_limit); 1218 | LOGGER(4, "enable mode : %d", g_vcuda_config.enable); 1219 | DONE: 1220 | if (likely(fd)) { 1221 | close(fd); 1222 | } 1223 | 1224 | return ret; 1225 | } 1226 | 1227 | void load_necessary_data() { 1228 | read_controller_configuration(); 1229 | load_cuda_single_library(CUDA_ENTRY_ENUM(cuDriverGetVersion)); 1230 | 1231 | pthread_once(&g_cuda_set, load_cuda_libraries); 1232 | pthread_once(&g_driver_set, load_driver_libraries); 1233 | } 1234 | 1235 | int is_custom_config_path() { return strcmp(base_dir, EMPTY_PREFIX) != 0; } 1236 | -------------------------------------------------------------------------------- /src/register.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | // 19 | // Created by Thomas Song on 2019-04-15. 20 | // 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "include/hijack.h" 28 | 29 | static const struct timespec g_cycle = { 30 | .tv_sec = 0, 31 | .tv_nsec = TIME_TICK * MILLISEC, 32 | }; 33 | 34 | // #lizard forgives 35 | void register_to_remote_with_data(const char* bus_id, const char* pod_uid, 36 | const char* container) { 37 | pid_t register_pid; 38 | int wstatus = 0, wret = 0; 39 | pid_t child_pid; 40 | int pipe_fd[2]; 41 | int ret = -1; 42 | 43 | ret = pipe(pipe_fd); 44 | if (unlikely(ret)) { 45 | LOGGER(FATAL, "create pipe failed, error %s", strerror(errno)); 46 | } 47 | 48 | register_pid = fork(); 49 | if (!register_pid) { 50 | close(pipe_fd[1]); 51 | while (read(pipe_fd[0], &child_pid, sizeof(pid_t)) == 0) { 52 | nanosleep(&g_cycle, NULL); 53 | } 54 | 55 | // child 56 | if (is_custom_config_path()) { 57 | ret = execl((RPC_CLIENT_PATH RPC_CLIENT_NAME), RPC_CLIENT_NAME, "--addr", 58 | RPC_ADDR, "--bus-id", bus_id, "--pod-uid", pod_uid, 59 | "--cont-id", container, (char*)NULL); 60 | } else { 61 | ret = execl((RPC_CLIENT_PATH RPC_CLIENT_NAME), RPC_CLIENT_NAME, "--addr", 62 | RPC_ADDR, "--bus-id", bus_id, "--pod-uid", pod_uid, 63 | "--cont-name", container, (char*)NULL); 64 | } 65 | if (unlikely(ret == -1)) { 66 | LOGGER(FATAL, "can't register to manager, error %s", strerror(errno)); 67 | } 68 | 69 | close(pipe_fd[0]); 70 | _exit(EXIT_SUCCESS); 71 | } else { 72 | close(pipe_fd[0]); 73 | 74 | while (write(pipe_fd[1], ®ister_pid, sizeof(pid_t)) == 0) { 75 | nanosleep(&g_cycle, NULL); 76 | } 77 | 78 | do { 79 | wret = waitpid(register_pid, &wstatus, WUNTRACED | WCONTINUED); 80 | if (unlikely(wret == -1)) { 81 | LOGGER(FATAL, "waitpid failed, error %s", strerror(errno)); 82 | } 83 | } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus)); 84 | 85 | ret = WEXITSTATUS(wstatus); 86 | if (unlikely(ret)) { 87 | LOGGER(FATAL, "rpc client exit with %d", ret); 88 | } 89 | 90 | close(pipe_fd[1]); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /tools/monitor_dockernized.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tencent is pleased to support the open source community by making TKEStack available. 3 | * 4 | * Copyright (C) 2012-2019 Tencent. All Rights Reserved. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use 7 | * this file except in compliance with the License. You may obtain a copy of the 8 | * License at 9 | * 10 | * https://opensource.org/licenses/Apache-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | * WARRANTIES OF ANY KIND, either express or implied. See the License for the 15 | * specific language governing permissions and limitations under the License. 16 | */ 17 | 18 | // 19 | // Created by thomas on 5/17/18. 20 | // 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "include/hijack.h" 28 | #include "include/nvml-helper.h" 29 | 30 | extern entry_t nvml_library_entry[]; 31 | extern char pid_path[]; 32 | extern char driver_version[]; 33 | 34 | static void load_driver_libraries() { 35 | void *table = NULL; 36 | char driver_filename[FILENAME_MAX]; 37 | int i; 38 | 39 | snprintf(driver_filename, FILENAME_MAX - 1, "%s.%s", DRIVER_ML_LIBRARY_PREFIX, 40 | driver_version); 41 | driver_filename[FILENAME_MAX - 1] = '\0'; 42 | 43 | table = dlopen(driver_filename, RTLD_NOW | RTLD_NODELETE); 44 | if (unlikely(!table)) { 45 | LOGGER(FATAL, "can't find library %s", driver_filename); 46 | } 47 | 48 | for (i = 0; i < NVML_ENTRY_END; i++) { 49 | nvml_library_entry[i].fn_ptr = dlsym(table, nvml_library_entry[i].name); 50 | if (unlikely(!nvml_library_entry[i].fn_ptr)) { 51 | LOGGER(4, "can't find function %s in %s", nvml_library_entry[i].name, 52 | driver_filename); 53 | } 54 | } 55 | 56 | dlclose(table); 57 | } 58 | 59 | int main(void) { 60 | int ret = 0; 61 | 62 | int fd = 0; 63 | int item = 0; 64 | int rsize = 0; 65 | 66 | int i = 0, j = 0, k = 0; 67 | 68 | int pids_table[MAX_PIDS]; 69 | int pids_table_size = 0; 70 | 71 | int device_num = 0; 72 | nvmlDevice_t dev; 73 | nvmlProcessInfo_t pids_on_device[MAX_PIDS]; 74 | unsigned int size_on_device = MAX_PIDS; 75 | 76 | struct timeval cur; 77 | size_t microsec; 78 | nvmlProcessUtilizationSample_t processes_sample[MAX_PIDS]; 79 | int processes_num = MAX_PIDS; 80 | 81 | int sm_util = 0; 82 | uint64_t memory = 0; 83 | nvmlProcessInfo_t *process_match = NULL; 84 | nvmlProcessUtilizationSample_t *sample_match = NULL; 85 | 86 | fd = open(pid_path, O_RDONLY); 87 | if (unlikely(fd == -1)) { 88 | LOGGER(5, "can't open %s", pid_path); 89 | return 0; 90 | } 91 | 92 | for (item = 0; item < MAX_PIDS; item++) { 93 | rsize = (int) read(fd, pids_table + item, sizeof(int)); 94 | if (unlikely(rsize != sizeof(int))) { 95 | break; 96 | } 97 | } 98 | 99 | for (i = 0; i < item; i++) { 100 | LOGGER(5, "pid: %d", pids_table[i]); 101 | } 102 | 103 | pids_table_size = item; 104 | 105 | LOGGER(5, "read %d items from %s", pids_table_size, pid_path); 106 | 107 | if (unlikely(read_controller_configuration())) { 108 | LOGGER(5, "can't read controller file"); 109 | return 1; 110 | } 111 | 112 | load_driver_libraries(); 113 | 114 | NVML_ENTRY_CALL(nvml_library_entry, nvmlInit); 115 | 116 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetCount, &device_num); 117 | if (unlikely(ret)) { 118 | LOGGER(ERROR, "Get device number return %d", ret); 119 | return 1; 120 | } 121 | 122 | for (i = 0; i < device_num; i++) { 123 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetHandleByIndex, i, 124 | &dev); 125 | if (unlikely(ret)) { 126 | LOGGER(ERROR, "Get device %d return %d", i, ret); 127 | continue; 128 | } 129 | 130 | size_on_device = MAX_PIDS; 131 | ret = NVML_ENTRY_CALL(nvml_library_entry, 132 | nvmlDeviceGetComputeRunningProcesses, dev, 133 | &size_on_device, pids_on_device); 134 | if (unlikely(ret)) { 135 | LOGGER(ERROR, "Get process gpu memory return %d", ret); 136 | continue; 137 | } 138 | 139 | for (j = 0; j < size_on_device; j++) { 140 | LOGGER(4, "summary: %d used %lld", pids_on_device[j].pid, 141 | pids_on_device[j].usedGpuMemory); 142 | } 143 | 144 | processes_num = MAX_PIDS; 145 | gettimeofday(&cur, NULL); 146 | microsec = (cur.tv_sec - 1) * 1000UL * 1000UL + cur.tv_usec; 147 | ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetProcessUtilization, 148 | dev, processes_sample, &processes_num, microsec); 149 | if (unlikely(ret)) { 150 | LOGGER(ERROR, "Get process utilization return %d", ret); 151 | continue; 152 | } 153 | 154 | for (j = 0; j < processes_num; j++) { 155 | LOGGER(4, "summary: %d util %d", processes_sample[j].pid, 156 | processes_sample[j].smUtil); 157 | } 158 | 159 | fprintf(stderr, "Device\tProcess\tUtilization\tMemory\n"); 160 | for (j = 0; j < pids_table_size; j++) { 161 | process_match = NULL; 162 | sample_match = NULL; 163 | 164 | for (k = 0; k < size_on_device; k++) { 165 | if (pids_on_device[k].pid == pids_table[j]) { 166 | process_match = &pids_on_device[k]; 167 | } 168 | } 169 | 170 | for (k = 0; k < processes_num; k++) { 171 | if (processes_sample[k].pid == pids_table[j]) { 172 | sample_match = &processes_sample[k]; 173 | } 174 | } 175 | 176 | if (process_match) { 177 | memory = process_match->usedGpuMemory; 178 | memory >>= 20; 179 | if (sample_match) { 180 | sm_util = sample_match->smUtil; 181 | } else { 182 | sm_util = 0; 183 | } 184 | fprintf(stderr, "%-6d\txxx\t%-11d\t%-6" PRIu64 " MB\n", i, sm_util, 185 | memory); 186 | } 187 | } 188 | } 189 | 190 | NVML_ENTRY_CALL(nvml_library_entry, nvmlShutdown); 191 | close(fd); 192 | } 193 | -------------------------------------------------------------------------------- /vcuda.spec: -------------------------------------------------------------------------------- 1 | Name: vcuda 2 | Version: %{version} 3 | Release: %{commit}%{?dist} 4 | Summary: GPU virtual device library 5 | 6 | License: MIT 7 | Source: vcuda.tar.gz 8 | 9 | Requires: systemd-units 10 | 11 | %define pkgname %{name}-%{version}-%{release} 12 | 13 | %description 14 | GPU virtual device library 15 | 16 | %prep 17 | %setup 18 | 19 | %install 20 | install -d $RPM_BUILD_ROOT/%{_libdir} 21 | install -d $RPM_BUILD_ROOT/%{_bindir} 22 | 23 | install -p -m 755 libcuda-control.so $RPM_BUILD_ROOT/%{_libdir}/ 24 | install -p -m 755 nvml-monitor $RPM_BUILD_ROOT/%{_bindir}/ 25 | 26 | %clean 27 | rm -rf $RPM_BUILD_ROOT 28 | 29 | %files 30 | /%{_libdir}/libcuda-control.so 31 | /%{_bindir}/nvml-monitor 32 | 33 | %post 34 | ldconfig 35 | 36 | %postun 37 | ldconfig 38 | --------------------------------------------------------------------------------