├── LICENSE ├── Makefile ├── README.md └── cuda_intercept.cpp /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Christos Konstantinos Matzoros 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Edit the CUDA_PATH variable for your system 2 | CUDA_PATH?=/usr/local/cuda-9.0 3 | 4 | 5 | #Set compilation flags 6 | CXX=g++ 7 | CFLAGS=-Wall -fPIC -shared -ldl 8 | 9 | 10 | all: lib_cuda_intercept.so 11 | 12 | lib_cuda_intercept.so: cuda_intercept.cpp 13 | $(CXX) -I$(CUDA_PATH)/include $(CFLAGS) -o lib_cuda_intercept.so cuda_intercept.cpp 14 | 15 | clean: 16 | -rm lib_cuda_intercept.so 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CUDA-Runtime-API-calls-interception 2 | Shared library for intercepting CUDA Runtime API calls. This code was part of my Bachelor thesis: "A Study on the Computational Exploitation of Remote Virtualized Graphics Cards" (https://bit.ly/37tIG0D) 3 | 4 | 5 | Prerequisites: 6 | -GNU/Linux for compilation 7 | -Set CUDA_PATH variable in the Makefile to the correct directory 8 | where cuda is installed. 9 | 10 | Tested on: 11 | gcc-6,gcc-7 12 | 13 | 14 | How to compile: 15 | $ make 16 | 17 | To remove: 18 | $ make clean 19 | 20 | 21 | How to run: 22 | $ LD_PRELOAD=/full_path_to_thecuda_intercept_directory/cuda_intercept/lib_cuda_intercept.so ./full_path_to_the_directory_of_the_CUDA_Program/your_cuda_program.cu 23 | 24 | e.g. 25 | LD_PRELOAD=/home/cuda_intercept/lib_cuda_intercept.so /home/NVIDIA_CUDA-9.0_Samples/6_Advanced/transpose/transpose 26 | -------------------------------------------------------------------------------- /cuda_intercept.cpp: -------------------------------------------------------------------------------- 1 | /********************* 2 | 3 | MIT License 4 | 5 | Copyright (c) 2020 Christos Konstantinos Matzoros 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | 25 | ***********************/ 26 | 27 | 28 | //Headers 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include //for dynamic linking 35 | #include 36 | #include 37 | using namespace std; 38 | 39 | 40 | 41 | typedef struct { 42 | dim3 gridDim; 43 | dim3 blockDim; 44 | list arguments; 45 | int counter; 46 | } kernel_info_t; 47 | 48 | static list kernels_list; 49 | 50 | kernel_info_t &kernelInfo() { 51 | static kernel_info_t kernelInfo; 52 | return kernelInfo; 53 | } 54 | 55 | 56 | 57 | ///////////////////////// 58 | // PRINT FUNCTIONS // 59 | ///////////////////////// 60 | 61 | void print_grid_dimensions(dim3 gridDim){ 62 | if (gridDim.y == 1 && gridDim.z == 1) { //1D grid (x) 63 | printf("gridDim=%d ", gridDim.x); 64 | } else if (gridDim.z == 1) { //2D grid (x,y) 65 | printf("gridDim=[%d,%d] ", gridDim.x, gridDim.y); 66 | } else { //3D grid (x,y,z) 67 | printf("gridDim=[%d,%d,%d] ", gridDim.x, gridDim.y, gridDim.z); 68 | } 69 | } 70 | 71 | void print_block_dimensions(dim3 blockDim){ 72 | if (blockDim.y == 1 && blockDim.z == 1) { //1D block (x) 73 | printf("blockDim=%d ", blockDim.x); 74 | } else if (blockDim.z == 1) { //2D block (x,y) 75 | printf("blockDim=[%d,%d] ", blockDim.x, blockDim.y); 76 | } else { //3D block (x,y,z) 77 | printf("blockDim=[%d,%d,%d] ", blockDim.x, blockDim.y, blockDim.z); 78 | } 79 | } 80 | 81 | void print_dimensions(dim3 gridDim, dim3 blockDim){ 82 | print_grid_dimensions(gridDim); 83 | print_block_dimensions(blockDim); 84 | } 85 | 86 | void print_args(list arg){ 87 | for (std::list::iterator it = arg.begin(), end = arg.end(); it != end; ++it) { 88 | unsigned i = std::distance(arg.begin(), it); 89 | printf("%d:%d \n", i, *(static_cast(*it))); 90 | } 91 | } 92 | 93 | void print_kernel_invocation(const char *entry) { 94 | printf("New kernel invocation\n"); 95 | print_dimensions(kernelInfo().gridDim,kernelInfo().blockDim); 96 | //print_args(kernelInfo().arguments); 97 | printf("\n"); 98 | } 99 | 100 | 101 | 102 | //////////////////////////// 103 | // CALLS INTERCEPTION // 104 | //////////////////////////// 105 | 106 | //*******************************************// 107 | // CUDA Runtime API Error Handling // 108 | //*******************************************// 109 | /// cudaGetErrorName /// 110 | typedef const char* (*cudaGetErrorName_t)(cudaError_t error); 111 | static cudaGetErrorName_t native_cudaGetErrorName = NULL; 112 | 113 | extern "C" const char* cudaGetErrorName(cudaError_t error) { 114 | printf("\n>> cudaGetErrorName interception\n"); 115 | 116 | if (native_cudaGetErrorName == NULL) { 117 | native_cudaGetErrorName = (cudaGetErrorName_t)dlsym(RTLD_NEXT,"cudaGetErrorName"); 118 | } 119 | assert(native_cudaGetErrorName != NULL); 120 | return native_cudaGetErrorName(error); 121 | } 122 | 123 | /// cudaGetErrorString /// 124 | typedef const char* (*cudaGetErrorString_t)(cudaError_t error); 125 | static cudaGetErrorString_t native_cudaGetErrorString = NULL; 126 | 127 | extern "C" const char* cudaGetErrorString(cudaError_t error) { 128 | printf("\n>> cudaGetErrorString interception\n"); 129 | 130 | if (native_cudaGetErrorString == NULL) { 131 | native_cudaGetErrorString = (cudaGetErrorString_t)dlsym(RTLD_NEXT,"cudaGetErrorString"); 132 | } 133 | assert(native_cudaGetErrorString != NULL); 134 | return native_cudaGetErrorString(error); 135 | } 136 | 137 | /// cudaGetLastError /// 138 | typedef cudaError_t (*cudaGetLastError_t)(void); 139 | static cudaGetLastError_t native_cudaGetLastError = NULL; 140 | 141 | extern "C" cudaError_t cudaGetLastError(void) { 142 | printf("\n>> cudaGetLastError interception\n"); 143 | 144 | if (native_cudaGetLastError == NULL) { 145 | native_cudaGetLastError = (cudaGetLastError_t)dlsym(RTLD_NEXT,"cudaGetLastError"); 146 | } 147 | assert(native_cudaGetLastError != NULL); 148 | return native_cudaGetLastError(); 149 | } 150 | 151 | /// cudaGetLastError /// 152 | typedef cudaError_t (*cudaPeekAtLastError_t)(void); 153 | static cudaPeekAtLastError_t native_cudaPeekAtLastError = NULL; 154 | 155 | extern "C" cudaError_t cudaPeekAtLastError(void) { 156 | printf("\n>> cudaPeekAtLastError interception\n"); 157 | 158 | if (native_cudaPeekAtLastError== NULL) { 159 | native_cudaPeekAtLastError = (cudaPeekAtLastError_t)dlsym(RTLD_NEXT,"cudaPeekAtLastError"); 160 | } 161 | assert(native_cudaPeekAtLastError != NULL); 162 | return native_cudaPeekAtLastError(); 163 | } 164 | 165 | 166 | //**********************************************// 167 | // CUDA Runtime API Device Management // 168 | //**********************************************// 169 | /// cudaChooseDevice /// 170 | typedef cudaError_t (*cudaChooseDevice_t)(int * device, const struct cudaDeviceProp * prop); 171 | static cudaChooseDevice_t native_cudaChooseDevice = NULL; 172 | 173 | extern "C" cudaError_t cudaChooseDevice(int * device, const struct cudaDeviceProp * prop) { 174 | printf("\n>>cudaChooseDevice interception \n"); 175 | 176 | if (native_cudaChooseDevice == NULL) { 177 | native_cudaChooseDevice = (cudaChooseDevice_t)dlsym(RTLD_NEXT,"cudaChooseDevice"); 178 | } 179 | assert(native_cudaChooseDevice != NULL); 180 | return native_cudaChooseDevice(device,prop); 181 | } 182 | 183 | /// cudaDeviceGetAttribute /// 184 | typedef cudaError_t (*cudaDeviceGetAttribute_t)(int* value, cudaDeviceAttr attr, int device); 185 | static cudaDeviceGetAttribute_t native_cudaDeviceGetAttribute = NULL; 186 | 187 | extern "C" cudaError_t cudaDeviceGetAttribute(int* value, cudaDeviceAttr attr, int device) { 188 | printf("\n>>cudaDeviceGetAttribute interception \n"); 189 | 190 | if (native_cudaDeviceGetAttribute == NULL) { 191 | native_cudaDeviceGetAttribute = (cudaDeviceGetAttribute_t)dlsym(RTLD_NEXT,"cudaDeviceGetAttribute"); 192 | } 193 | assert(native_cudaDeviceGetAttribute != NULL); 194 | return native_cudaDeviceGetAttribute(value,attr,device); 195 | } 196 | 197 | /// cudaDeviceGetByPCIBusId /// 198 | typedef cudaError_t (*cudaDeviceGetByPCIBusId_t)(int* device, const char* pciBusId); 199 | static cudaDeviceGetByPCIBusId_t native_cudaDeviceGetByPCIBusId = NULL; 200 | 201 | extern "C" cudaError_t cudaDeviceGetByPCIBusId (int* device, const char* pciBusId) { 202 | printf("\n>>cudaDeviceGetByPCIBusId interception\n"); 203 | 204 | if (native_cudaDeviceGetByPCIBusId == NULL) { 205 | native_cudaDeviceGetByPCIBusId = (cudaDeviceGetByPCIBusId_t)dlsym(RTLD_NEXT,"cudaDeviceGetByPCIBusId "); 206 | } 207 | assert(native_cudaDeviceGetByPCIBusId != NULL); 208 | return native_cudaDeviceGetByPCIBusId (device,pciBusId); 209 | } 210 | 211 | /// cudaDeviceGetCacheConfig /// 212 | typedef cudaError_t (*cudaDeviceGetCacheConfig_t)(cudaFuncCache ** pCacheConfig); 213 | static cudaDeviceGetCacheConfig_t native_cudaDeviceGetCacheConfig = NULL; 214 | 215 | extern "C" cudaError_t cudaDeviceGetCacheConfig (cudaFuncCache ** pCacheConfig) { 216 | printf("\n>>cudaDeviceGetCacheConfig interception\n"); 217 | 218 | if (native_cudaDeviceGetCacheConfig == NULL) { 219 | native_cudaDeviceGetCacheConfig = (cudaDeviceGetCacheConfig_t)dlsym(RTLD_NEXT,"cudaDeviceGetCacheConfig"); 220 | } 221 | assert(native_cudaDeviceGetCacheConfig != NULL); 222 | return native_cudaDeviceGetCacheConfig(pCacheConfig); 223 | } 224 | 225 | /// cudaDeviceGetLimit /// 226 | typedef cudaError_t (*cudaDeviceGetLimit_t)(size_t* pValue, cudaLimit limit); 227 | static cudaDeviceGetLimit_t native_cudaDeviceGetLimit = NULL; 228 | 229 | extern "C" cudaError_t cudaDeviceGetLimit (size_t* pValue, cudaLimit limit) { 230 | printf("\n>>cudaDeviceGetLimit interception\n"); 231 | 232 | if (native_cudaDeviceGetLimit == NULL) { 233 | native_cudaDeviceGetLimit = (cudaDeviceGetLimit_t)dlsym(RTLD_NEXT,"cudaDeviceGetLimit"); 234 | } 235 | assert(native_cudaDeviceGetLimit != NULL); 236 | return native_cudaDeviceGetLimit(pValue,limit); 237 | } 238 | 239 | /// cudaDeviceGetNvSciSyncAttributes /// 240 | typedef cudaError_t (*cudaDeviceGetNvSciSyncAttributes_t)( void* nvSciSyncAttrList, int device, int flags); 241 | static cudaDeviceGetNvSciSyncAttributes_t native_cudaDeviceGetNvSciSyncAttributes = NULL; 242 | 243 | extern "C" cudaError_t cudaDeviceGetNvSciSyncAttributes ( void* nvSciSyncAttrList, int device, int flags) { 244 | printf("\n>>cudaDeviceGetNvSciSyncAttributes interception\n"); 245 | 246 | if (native_cudaDeviceGetNvSciSyncAttributes== NULL) { 247 | native_cudaDeviceGetNvSciSyncAttributes= (cudaDeviceGetNvSciSyncAttributes_t)dlsym(RTLD_NEXT,"cudaDeviceGetNvSciSyncAttributes"); 248 | } 249 | assert(native_cudaDeviceGetNvSciSyncAttributes != NULL); 250 | return native_cudaDeviceGetNvSciSyncAttributes(nvSciSyncAttrList,device,flags); 251 | } 252 | 253 | /// cudaDeviceGetP2PAttribute /// 254 | typedef cudaError_t (*cudaDeviceGetP2PAttribute_t)(int* value, cudaDeviceP2PAttr attr, int srcDevice, int dstDevice); 255 | static cudaDeviceGetP2PAttribute_t native_cudaDeviceGetP2PAttribute= NULL; 256 | 257 | extern "C" cudaError_t cudaDeviceGetP2PAttribute (int* value, cudaDeviceP2PAttr attr, int srcDevice, int dstDevice) { 258 | printf("\n>>cudaDeviceGetP2PAttribute interception\n"); 259 | 260 | if (native_cudaDeviceGetP2PAttribute == NULL) { 261 | native_cudaDeviceGetP2PAttribute = (cudaDeviceGetP2PAttribute_t)dlsym(RTLD_NEXT,"cudaDeviceGetP2PAttribute"); 262 | } 263 | assert(native_cudaDeviceGetP2PAttribute != NULL); 264 | return native_cudaDeviceGetP2PAttribute(value,attr,srcDevice,dstDevice); 265 | } 266 | 267 | /// cudaDeviceGetPCIBusId /// 268 | typedef cudaError_t (*cudaDeviceGetPCIBusId_t)(char* pciBusId, int len, int device); 269 | static cudaDeviceGetPCIBusId_t native_cudaDeviceGetPCIBusId = NULL; 270 | 271 | extern "C" cudaError_t cudaDeviceGetPCIBusId (char* pciBusId, int len, int device) { 272 | printf("\n>>cudaDeviceGetPCIBusId interception\n"); 273 | 274 | if (native_cudaDeviceGetPCIBusId == NULL) { 275 | native_cudaDeviceGetPCIBusId = (cudaDeviceGetPCIBusId_t)dlsym(RTLD_NEXT,"cudaDeviceGetPCIBusId"); 276 | } 277 | assert(native_cudaDeviceGetPCIBusId != NULL); 278 | return native_cudaDeviceGetPCIBusId(pciBusId,len,device); 279 | } 280 | 281 | /// cudaDeviceGetSharedMemConfig /// 282 | typedef cudaError_t (*cudaDeviceGetSharedMemConfig_t)( cudaSharedMemConfig ** pConfig ); 283 | static cudaDeviceGetSharedMemConfig_t native_cudaDeviceGetSharedMemConfig = NULL; 284 | 285 | extern "C" cudaError_t cudaDeviceGetSharedMemConfig (cudaSharedMemConfig ** pConfig ) { 286 | printf("\n>>cudaDeviceGetSharedMemConfig interception\n"); 287 | 288 | if (native_cudaDeviceGetSharedMemConfig == NULL) { 289 | native_cudaDeviceGetSharedMemConfig = (cudaDeviceGetSharedMemConfig_t)dlsym(RTLD_NEXT,"cudaDeviceGetSharedMemConfig"); 290 | } 291 | assert(native_cudaDeviceGetSharedMemConfig != NULL); 292 | return native_cudaDeviceGetSharedMemConfig(pConfig); 293 | } 294 | 295 | /// cudaDeviceGetStreamPriorityRange /// 296 | typedef cudaError_t (*cudaDeviceGetStreamPriorityRange_t)( int* leastPriority, int* greatestPriority); 297 | static cudaDeviceGetStreamPriorityRange_t native_cudaDeviceGetStreamPriorityRange = NULL; 298 | 299 | extern "C" cudaError_t cudaDeviceGetStreamPriorityRange ( int* leastPriority, int* greatestPriority) { 300 | printf("\n>>cudaDeviceGetStreamPriorityRange interception\n"); 301 | 302 | if (native_cudaDeviceGetStreamPriorityRange == NULL) { 303 | native_cudaDeviceGetStreamPriorityRange = (cudaDeviceGetStreamPriorityRange_t)dlsym(RTLD_NEXT,"cudaDeviceGetStreamPriorityRange"); 304 | } 305 | assert(native_cudaDeviceGetStreamPriorityRange != NULL); 306 | return native_cudaDeviceGetStreamPriorityRange(leastPriority,greatestPriority); 307 | } 308 | 309 | /// cudaMalloc3D /// 310 | typedef cudaError_t (*cudaDeviceSetCacheConfig_t)(cudaFuncCache cacheConfig); 311 | static cudaDeviceSetCacheConfig_t native_cudaDeviceSetCacheConfig = NULL; 312 | 313 | extern "C" cudaError_t cudaDeviceSetCacheConfig (cudaFuncCache cacheConfig) { 314 | printf("\n>>cudaDeviceSetCacheConfig interception\n"); 315 | 316 | if (native_cudaDeviceSetCacheConfig == NULL) { 317 | native_cudaDeviceSetCacheConfig = (cudaDeviceSetCacheConfig_t)dlsym(RTLD_NEXT,"cudaDeviceSetCacheConfig"); 318 | } 319 | assert(native_cudaDeviceSetCacheConfig != NULL); 320 | return native_cudaDeviceSetCacheConfig(cacheConfig); 321 | } 322 | 323 | /// cudaDeviceSetLimit /// 324 | typedef cudaError_t (*cudaDeviceSetLimit_t)(cudaLimit limit, size_t value); 325 | static cudaDeviceSetLimit_t native_cudaDeviceSetLimit = NULL; 326 | 327 | extern "C" cudaError_t cudaDeviceSetLimit (cudaLimit limit, size_t value) { 328 | printf("\n>>cudaDeviceSetLimit interception\n"); 329 | 330 | if (native_cudaDeviceSetLimit == NULL) { 331 | native_cudaDeviceSetLimit = (cudaDeviceSetLimit_t)dlsym(RTLD_NEXT,"cudaDeviceSetLimit"); 332 | } 333 | assert(native_cudaDeviceSetLimit != NULL); 334 | return native_cudaDeviceSetLimit(limit,value); 335 | } 336 | 337 | /// cudaDeviceSetSharedMemConfig /// 338 | typedef cudaError_t (*cudaDeviceSetSharedMemConfig_t)(cudaSharedMemConfig config); 339 | static cudaDeviceSetSharedMemConfig_t native_cudaDeviceSetSharedMemConfig = NULL; 340 | 341 | extern "C" cudaError_t cudaDeviceSetSharedMemConfig(cudaSharedMemConfig config) { 342 | printf("\n>>cudaDeviceSetSharedMemConfig interception\n"); 343 | 344 | if (native_cudaDeviceSetSharedMemConfig == NULL) { 345 | native_cudaDeviceSetSharedMemConfig = (cudaDeviceSetSharedMemConfig_t)dlsym(RTLD_NEXT,"cudaDeviceSetSharedMemConfig"); 346 | } 347 | assert(native_cudaDeviceSetSharedMemConfig != NULL); 348 | return native_cudaDeviceSetSharedMemConfig(config); 349 | } 350 | 351 | /// cudaDeviceSynchronize /// 352 | typedef cudaError_t (*cudaDeviceSynchronize_t)(void); 353 | static cudaDeviceSynchronize_t native_cudaDeviceSynchronize = NULL; 354 | 355 | extern "C" cudaError_t cudaDeviceSynchronize (void) { 356 | printf("\n>>cudaDeviceSynchronize interception\n"); 357 | 358 | if (native_cudaDeviceSynchronize == NULL) { 359 | native_cudaDeviceSynchronize = (cudaDeviceSynchronize_t)dlsym(RTLD_NEXT,"cudaDeviceSynchronize"); 360 | } 361 | assert(native_cudaDeviceSynchronize != NULL); 362 | return native_cudaDeviceSynchronize(); 363 | } 364 | 365 | /// cudaGetDevice /// 366 | typedef cudaError_t (*cudaGetDevice_t)(int *device); 367 | static cudaGetDevice_t native_cudaGetDevice = NULL; 368 | 369 | extern "C" cudaError_t cudaGetDevice(int *device){ 370 | printf("\n>>cudaGetDevice \n"); 371 | //call of the real function 372 | if (native_cudaGetDevice == NULL) { 373 | native_cudaGetDevice = (cudaGetDevice_t)dlsym(RTLD_NEXT,"cudaGetDevice"); 374 | } 375 | assert(native_cudaGetDevice != NULL); 376 | return native_cudaGetDevice(device); 377 | } 378 | 379 | /// cudaGetDeviceCount /// 380 | typedef cudaError_t (*cudaGetDeviceCount_t)(int * count); 381 | static cudaGetDeviceCount_t native_cudaGetDeviceCount = NULL; 382 | 383 | extern "C" cudaError_t cudaGetDeviceCount(int * count){ 384 | printf("\n>>cudaGetDeviceCount interception \n"); 385 | 386 | if (native_cudaGetDeviceCount == NULL) { 387 | native_cudaGetDeviceCount = (cudaGetDeviceCount_t)dlsym(RTLD_NEXT,"cudaGetDeviceCount"); 388 | } 389 | assert(native_cudaGetDeviceCount != NULL); 390 | return native_cudaGetDeviceCount(count); 391 | } 392 | 393 | /// cudaGetDeviceFlags /// 394 | typedef cudaError_t (*cudaGetDeviceFlags_t)(unsigned int* flags); 395 | static cudaGetDeviceFlags_t native_cudaGetDeviceFlags = NULL; 396 | 397 | extern "C" cudaError_t cudaGetDeviceFlags (unsigned int* flags) { 398 | printf("\n>>cudaGetDeviceFlags interception\n"); 399 | 400 | if (native_cudaGetDeviceFlags == NULL) { 401 | native_cudaGetDeviceFlags = (cudaGetDeviceFlags_t)dlsym(RTLD_NEXT,"cudaGetDeviceFlags"); 402 | } 403 | assert(native_cudaGetDeviceFlags != NULL); 404 | return native_cudaGetDeviceFlags(flags); 405 | } 406 | 407 | /// cudaGetDeviceProperties /// 408 | typedef cudaError_t (*cudaGetDeviceProperties_t)(struct cudaDeviceProp * prop, int device); 409 | static cudaGetDeviceProperties_t native_cudaGetDeviceProperties = NULL; 410 | 411 | extern "C" cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp * prop, int device){ 412 | printf("\n>>cudaGetDeviceProperties interception \n"); 413 | 414 | if (native_cudaGetDeviceProperties == NULL) { 415 | native_cudaGetDeviceProperties = (cudaGetDeviceProperties_t)dlsym(RTLD_NEXT,"cudaGetDeviceProperties"); 416 | } 417 | assert(native_cudaGetDeviceProperties != NULL); 418 | return native_cudaGetDeviceProperties(prop,device); 419 | } 420 | 421 | /// cudaIpcCloseMemHandle /// 422 | typedef cudaError_t (*cudaIpcCloseMemHandle_t)(void* devPtr); 423 | static cudaIpcCloseMemHandle_t native_cudaIpcCloseMemHandle = NULL; 424 | 425 | extern "C" cudaError_t cudaIpcCloseMemHandle (void* devPtr) { 426 | printf("\n>>cudaIpcCloseMemHandle interception\n"); 427 | 428 | if (native_cudaIpcCloseMemHandle == NULL) { 429 | native_cudaIpcCloseMemHandle= (cudaIpcCloseMemHandle_t)dlsym(RTLD_NEXT,"cudaIpcCloseMemHandle"); 430 | } 431 | assert(native_cudaIpcCloseMemHandle != NULL); 432 | return native_cudaIpcCloseMemHandle(devPtr); 433 | } 434 | 435 | /// cudaIpcGetEventHandle /// 436 | typedef cudaError_t (*cudaIpcGetEventHandle_t)(cudaIpcEventHandle_t* handle, cudaEvent_t event); 437 | static cudaIpcGetEventHandle_t native_cudaIpcGetEventHandle = NULL; 438 | 439 | extern "C" cudaError_t cudaIpcGetEventHandle (cudaIpcEventHandle_t* handle, cudaEvent_t event) { 440 | printf("\n>>cudaIpcGetEventHandle interception\n"); 441 | 442 | if (native_cudaIpcGetEventHandle == NULL) { 443 | native_cudaIpcGetEventHandle = (cudaIpcGetEventHandle_t)dlsym(RTLD_NEXT,"cudaIpcGetEventHandle"); 444 | } 445 | assert(native_cudaIpcGetEventHandle != NULL); 446 | return native_cudaIpcGetEventHandle(handle,event); 447 | } 448 | 449 | /// cudaIpcGetMemHandle /// 450 | typedef cudaError_t (*cudaIpcGetMemHandle_t)(cudaIpcMemHandle_t* handle, void* devPtr); 451 | static cudaIpcGetMemHandle_t native_cudaIpcGetMemHandle= NULL; 452 | 453 | extern "C" cudaError_t cudaIpcGetMemHandle (cudaIpcMemHandle_t* handle, void* devPtr) { 454 | printf("\n>>cudaIpcGetMemHandle interception\n"); 455 | 456 | if (native_cudaIpcGetMemHandle == NULL) { 457 | native_cudaIpcGetMemHandle = (cudaIpcGetMemHandle_t)dlsym(RTLD_NEXT,"cudaIpcGetMemHandle"); 458 | } 459 | assert(native_cudaIpcGetMemHandle!= NULL); 460 | return native_cudaIpcGetMemHandle(handle,devPtr); 461 | } 462 | 463 | /// cudaIpcOpenEventHandle /// 464 | typedef cudaError_t (*cudaIpcOpenEventHandle_t)(cudaEvent_t* event, cudaIpcEventHandle_t handle); 465 | static cudaIpcOpenEventHandle_t native_cudaIpcOpenEventHandle = NULL; 466 | 467 | extern "C" cudaError_t cudaIpcOpenEventHandle (cudaEvent_t* event, cudaIpcEventHandle_t handle) { 468 | printf("\n>>cudaIpcOpenEventHandle interception\n"); 469 | 470 | if (native_cudaIpcOpenEventHandle== NULL) { 471 | native_cudaIpcOpenEventHandle = (cudaIpcOpenEventHandle_t)dlsym(RTLD_NEXT,"cudaIpcOpenEventHandle"); 472 | } 473 | assert(native_cudaIpcOpenEventHandle != NULL); 474 | return native_cudaIpcOpenEventHandle(event,handle); 475 | } 476 | 477 | /// cudaIpcOpenMemHandle /// 478 | typedef cudaError_t (*cudaIpcOpenMemHandle_t)(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags); 479 | static cudaIpcOpenMemHandle_t native_cudaIpcOpenMemHandle = NULL; 480 | 481 | extern "C" cudaError_t cudaIpcOpenMemHandle (void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { 482 | printf("\n>>cudaIpcOpenMemHandle interception\n"); 483 | 484 | if (native_cudaIpcOpenMemHandle == NULL) { 485 | native_cudaIpcOpenMemHandle = (cudaIpcOpenMemHandle_t)dlsym(RTLD_NEXT,"cudaIpcOpenMemHandle"); 486 | } 487 | assert(native_cudaIpcOpenMemHandle != NULL); 488 | return native_cudaIpcOpenMemHandle(devPtr,handle,flags); 489 | } 490 | 491 | /// cudaSetDevice /// 492 | typedef cudaError_t (*cudaSetDevice_t)(int device); 493 | static cudaSetDevice_t native_cudaSetDevice = NULL; 494 | 495 | extern "C" cudaError_t cudaSetDevice(int device){ 496 | printf("\n>>cudaSetDevice interception \n"); 497 | 498 | if (native_cudaSetDevice == NULL) { 499 | native_cudaSetDevice = (cudaSetDevice_t)dlsym(RTLD_NEXT,"cudaSetDevice"); 500 | } 501 | assert(native_cudaSetDevice != NULL); 502 | return native_cudaSetDevice(device); 503 | } 504 | 505 | /// cudaSetDeviceFlags /// 506 | typedef cudaError_t (*cudaSetDeviceFlags_t)(int flags); 507 | static cudaSetDeviceFlags_t native_cudaSetDeviceFlags = NULL; 508 | 509 | extern "C" cudaError_t cudaSetDeviceFlags(int flags){ 510 | printf("\n>>cudaSetDeviceFlags interception \n"); 511 | 512 | if (native_cudaSetDeviceFlags == NULL) { 513 | native_cudaSetDeviceFlags = (cudaSetDeviceFlags_t)dlsym(RTLD_NEXT,"cudaSetDeviceFlags"); 514 | } 515 | assert(native_cudaSetDeviceFlags != NULL); 516 | return native_cudaSetDeviceFlags(flags); 517 | } 518 | 519 | /// cudaSetValidDevices /// 520 | typedef cudaError_t (*cudaSetValidDevices_t)(int * device_arr, int len); 521 | static cudaSetValidDevices_t native_cudaSetValidDevices = NULL; 522 | 523 | extern "C" cudaError_t cudaSetValidDevices(int * device_arr, int len){ 524 | printf("\n>>cudaSetValidDevices interception \n"); 525 | 526 | if (native_cudaSetValidDevices == NULL) { 527 | native_cudaSetValidDevices = (cudaSetValidDevices_t)dlsym(RTLD_NEXT,"cudaSetValidDevices"); 528 | } 529 | assert(native_cudaSetValidDevices != NULL); 530 | return native_cudaSetValidDevices(device_arr,len); 531 | } 532 | 533 | 534 | 535 | //**********************************************// 536 | // CUDA Runtime API Stream Management // 537 | //**********************************************// 538 | /// cudaStreamAttachMemAsync /// 539 | typedef cudaError_t (*cudaStreamAttachMemAsync_t)(cudaStream_t stream, void* devPtr, size_t length, unsigned int flags); 540 | static cudaStreamAttachMemAsync_t native_cudaStreamAttachMemAsync = NULL; 541 | 542 | extern "C" cudaError_t cudaStreamAttachMemAsync(cudaStream_t stream, void* devPtr, size_t length, unsigned int flags){ 543 | printf("\n>>cudaStreamAttachMemAsync interception \n"); 544 | 545 | if (native_cudaStreamAttachMemAsync == NULL) { 546 | native_cudaStreamAttachMemAsync = (cudaStreamAttachMemAsync_t)dlsym(RTLD_NEXT,"cudaStreamAttachMemAsync"); 547 | } 548 | assert(native_cudaStreamAttachMemAsync != NULL); 549 | return native_cudaStreamAttachMemAsync(stream,devPtr,length,flags); 550 | } 551 | 552 | 553 | /// cudaStreamCreate /// 554 | typedef cudaError_t (*cudaStreamCreate_t)(cudaStream_t * pStream); 555 | static cudaStreamCreate_t native_cudaStreamCreate = NULL; 556 | 557 | extern "C" cudaError_t cudaStreamCreate(cudaStream_t * pStream){ 558 | printf("\n>>cudaStreamCreate interception \n"); 559 | 560 | if (native_cudaStreamCreate == NULL) { 561 | native_cudaStreamCreate = (cudaStreamCreate_t)dlsym(RTLD_NEXT,"cudaStreamCreate"); 562 | } 563 | assert(native_cudaStreamCreate != NULL); 564 | return native_cudaStreamCreate(pStream); 565 | } 566 | 567 | /// cudaStreamCreateWithFlags /// 568 | typedef cudaError_t (*cudaStreamCreateWithFlags_t)(cudaStream_t* pStream, unsigned int flags); 569 | static cudaStreamCreateWithFlags_t native_cudaStreamCreateWithFlags = NULL; 570 | 571 | extern "C" cudaError_t cudaStreamCreateWithFlags(cudaStream_t* pStream, unsigned int flags){ 572 | printf("\n>>cudaStreamCreateWithFlags interception \n"); 573 | 574 | if (native_cudaStreamCreateWithFlags == NULL) { 575 | native_cudaStreamCreateWithFlags = (cudaStreamCreateWithFlags_t)dlsym(RTLD_NEXT,"cudaStreamCreateWithFlags"); 576 | } 577 | assert(native_cudaStreamCreateWithFlags != NULL); 578 | return native_cudaStreamCreateWithFlags(pStream,flags); 579 | } 580 | 581 | /// cudaStreamCreateWithPriority /// 582 | typedef cudaError_t (*cudaStreamCreateWithPriority_t)(cudaStream_t* pStream, unsigned int flags, int priority); 583 | static cudaStreamCreateWithPriority_t native_cudaStreamCreateWithPriority = NULL; 584 | 585 | extern "C" cudaError_t cudaStreamCreateWithPriority(cudaStream_t* pStream, unsigned int flags, int priority){ 586 | printf("\n>>cudaStreamCreateWithPriority interception \n"); 587 | 588 | if (native_cudaStreamCreateWithPriority == NULL) { 589 | native_cudaStreamCreateWithPriority = (cudaStreamCreateWithPriority_t)dlsym(RTLD_NEXT,"cudaStreamCreateWithPriority"); 590 | } 591 | assert(native_cudaStreamCreateWithPriority != NULL); 592 | return native_cudaStreamCreateWithPriority(pStream,flags,priority); 593 | } 594 | 595 | /// cudaStreamDestroy /// 596 | typedef cudaError_t (*cudaStreamDestroy_t)(cudaStream_t stream); 597 | static cudaStreamDestroy_t native_cudaStreamDestroy = NULL; 598 | 599 | extern "C" cudaError_t cudaStreamDestroy(cudaStream_t stream){ 600 | printf("\n>>cudaStreamDestroy interception \n"); 601 | 602 | if (native_cudaStreamDestroy == NULL) { 603 | native_cudaStreamDestroy = (cudaStreamDestroy_t)dlsym(RTLD_NEXT,"cudaStreamDestroy"); 604 | } 605 | assert(native_cudaStreamDestroy != NULL); 606 | return native_cudaStreamDestroy(stream); 607 | } 608 | 609 | 610 | /// cudaStreamGetFlags /// 611 | typedef cudaError_t (*cudaStreamGetFlags_t)(cudaStream_t hStream, unsigned int* flags); 612 | static cudaStreamGetFlags_t native_cudaStreamGetFlags= NULL; 613 | 614 | extern "C" cudaError_t cudaStreamGetFlags(cudaStream_t hStream, unsigned int* flags){ 615 | printf("\n>>cudaStreamGetFlags interception \n"); 616 | 617 | if (native_cudaStreamGetFlags == NULL) { 618 | native_cudaStreamGetFlags = (cudaStreamGetFlags_t)dlsym(RTLD_NEXT,"cudaStreamGetFlags"); 619 | } 620 | assert(native_cudaStreamGetFlags != NULL); 621 | return native_cudaStreamGetFlags(hStream,flags); 622 | } 623 | 624 | /// cudaStreamGetPriority /// 625 | typedef cudaError_t (*cudaStreamGetPriority_t)(cudaStream_t hStream, int* priority); 626 | static cudaStreamGetPriority_t native_cudaStreamGetPriority = NULL; 627 | 628 | extern "C" cudaError_t cudaStreamGetPriority(cudaStream_t hStream, int* priority){ 629 | printf("\n>>cudaStreamGetPriority interception \n"); 630 | 631 | if (native_cudaStreamGetPriority == NULL) { 632 | native_cudaStreamGetPriority = (cudaStreamGetPriority_t)dlsym(RTLD_NEXT,"cudaStreamGetPriority"); 633 | } 634 | assert(native_cudaStreamGetPriority != NULL); 635 | return native_cudaStreamGetPriority(hStream,priority); 636 | } 637 | 638 | /// cudaStreamQuery /// 639 | typedef cudaError_t (*cudaStreamQuery_t)(cudaStream_t stream); 640 | static cudaStreamQuery_t native_cudaStreamQuery = NULL; 641 | 642 | extern "C" cudaError_t cudaStreamQuery(cudaStream_t stream){ 643 | printf("\n>>cudaStreamQuery interception \n"); 644 | 645 | if (native_cudaStreamQuery == NULL) { 646 | native_cudaStreamQuery = (cudaStreamQuery_t)dlsym(RTLD_NEXT,"cudaStreamQuery"); 647 | } 648 | assert(native_cudaStreamQuery != NULL); 649 | return native_cudaStreamQuery(stream); 650 | } 651 | 652 | /// cudaStreamSynchronize /// 653 | typedef cudaError_t (*cudaStreamSynchronize_t)(cudaStream_t stream); 654 | static cudaStreamSynchronize_t native_cudaStreamSynchronize = NULL; 655 | 656 | extern "C" cudaError_t cudaStreamSynchronize(cudaStream_t stream){ 657 | printf("\n>>cudaStreamSynchronize interception \n"); 658 | 659 | if (native_cudaStreamSynchronize== NULL) { 660 | native_cudaStreamSynchronize = (cudaStreamSynchronize_t)dlsym(RTLD_NEXT,"cudaStreamSynchronize"); 661 | } 662 | assert(native_cudaStreamSynchronize != NULL); 663 | return native_cudaStreamSynchronize(stream); 664 | } 665 | 666 | /// cudaStreamWaitEvent /// 667 | typedef cudaError_t (*cudaStreamWaitEvent_t)(cudaStream_t stream, cudaEvent_t event, unsigned int flags); 668 | static cudaStreamWaitEvent_t native_cudaStreamWaitEvent = NULL; 669 | 670 | extern "C" cudaError_t cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags){ 671 | printf("\n>>cudaStreamWaitEvent interception \n"); 672 | 673 | if (native_cudaStreamWaitEvent == NULL) { 674 | native_cudaStreamWaitEvent = (cudaStreamWaitEvent_t)dlsym(RTLD_NEXT,"cudaStreamWaitEvent"); 675 | } 676 | assert(native_cudaStreamWaitEvent != NULL); 677 | return native_cudaStreamWaitEvent(stream,event,flags); 678 | } 679 | 680 | 681 | 682 | //*********************************************// 683 | // CUDA Runtime API Event Management // 684 | //*********************************************// 685 | /// cudaDriverGetVersion /// 686 | typedef cudaError_t (*cudaEventCreate_t)(cudaEvent_t * event); 687 | static cudaEventCreate_t native_cudaEventCreate = NULL; 688 | 689 | extern "C" cudaError_t cudaEventCreate (cudaEvent_t * event) { 690 | printf("\n>>cudaEventCreate interception\n"); 691 | 692 | if (native_cudaEventCreate == NULL) { 693 | native_cudaEventCreate = (cudaEventCreate_t)dlsym(RTLD_NEXT,"cudaEventCreate"); 694 | } 695 | assert(native_cudaEventCreate != NULL); 696 | return native_cudaEventCreate(event); 697 | } 698 | 699 | /// cudaEventCreateWithFlags /// 700 | typedef cudaError_t (*cudaEventCreateWithFlags_t)(cudaEvent_t * event, int flags); 701 | static cudaEventCreateWithFlags_t native_cudaEventCreateWithFlags = NULL; 702 | 703 | extern "C" cudaError_t cudaEventCreateWithFlags(cudaEvent_t * event, int flags) { 704 | printf("\n>>cudaEventCreateWithFlags interception\n"); 705 | 706 | if (native_cudaEventCreateWithFlags == NULL) { 707 | native_cudaEventCreateWithFlags = (cudaEventCreateWithFlags_t)dlsym(RTLD_NEXT,"cudaEventCreateWithFlags"); 708 | } 709 | assert(native_cudaEventCreateWithFlags != NULL); 710 | return native_cudaEventCreateWithFlags(event,flags); 711 | } 712 | 713 | /// cudaEventDestroy /// 714 | typedef cudaError_t (*cudaEventDestroy_t)(cudaEvent_t event); 715 | static cudaEventDestroy_t native_cudaEventDestroy = NULL; 716 | 717 | extern "C" cudaError_t cudaEventDestroy (cudaEvent_t event) { 718 | printf("\n>>cudaEventDestroy interception\n"); 719 | 720 | if (native_cudaEventDestroy == NULL) { 721 | native_cudaEventDestroy = (cudaEventDestroy_t)dlsym(RTLD_NEXT,"cudaEventDestroy"); 722 | } 723 | 724 | assert(native_cudaEventDestroy != NULL); 725 | return native_cudaEventDestroy(event); 726 | } 727 | 728 | /// cudaEventElapsedTime /// 729 | typedef cudaError_t (*cudaEventElapsedTime_t)(float * ms, cudaEvent_t start, cudaEvent_t end); 730 | static cudaEventElapsedTime_t native_cudaEventElapsedTime = NULL; 731 | 732 | extern "C" cudaError_t cudaEventElapsedTime (float * ms, cudaEvent_t start,cudaEvent_t end) { 733 | printf("\n>>cudaEventElapsedTime interception\n"); 734 | 735 | if (native_cudaEventElapsedTime == NULL) { 736 | native_cudaEventElapsedTime = (cudaEventElapsedTime_t)dlsym(RTLD_NEXT,"cudaEventElapsedTime"); 737 | } 738 | assert(native_cudaEventElapsedTime != NULL); 739 | return native_cudaEventElapsedTime(ms,start,end); 740 | } 741 | 742 | /// cudaEventQuery /// 743 | typedef cudaError_t (*cudaEventQuery_t)(cudaEvent_t event); 744 | static cudaEventQuery_t native_cudaEventQuery = NULL; 745 | 746 | extern "C" cudaError_t cudaEventQuery (cudaEvent_t event) { 747 | printf("\n>>cudaEventQuery interception\n"); 748 | 749 | if (native_cudaEventQuery == NULL) { 750 | native_cudaEventQuery = (cudaEventQuery_t)dlsym(RTLD_NEXT,"cudaEventQuery"); 751 | } 752 | assert(native_cudaEventQuery != NULL); 753 | return native_cudaEventQuery(event); 754 | } 755 | 756 | /// cudaEventRecord /// 757 | typedef cudaError_t (*cudaEventRecord_t)(cudaEvent_t event, cudaStream_t stream); 758 | static cudaEventRecord_t native_cudaEventRecord = NULL; 759 | 760 | extern "C" cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream) { 761 | printf("\n>>cudaEventRecord interception\n"); 762 | 763 | if (native_cudaEventRecord == NULL) { 764 | native_cudaEventRecord = (cudaEventRecord_t)dlsym(RTLD_NEXT,"cudaEventRecord"); 765 | } 766 | assert(native_cudaEventRecord != NULL); 767 | return native_cudaEventRecord(event,stream); 768 | } 769 | 770 | /// cudaEventSynchronize /// 771 | typedef cudaError_t (*cudaEventSynchronize_t)(cudaEvent_t event); 772 | static cudaEventSynchronize_t native_cudaEventSynchronize = NULL; 773 | 774 | extern "C" cudaError_t cudaEventSynchronize (cudaEvent_t event) { 775 | printf("\n>>cudaEventSynchronize interception\n"); 776 | 777 | if (native_cudaEventSynchronize == NULL) { 778 | native_cudaEventSynchronize = (cudaEventSynchronize_t)dlsym(RTLD_NEXT,"cudaEventSynchronize"); 779 | } 780 | assert(native_cudaEventSynchronize != NULL); 781 | return native_cudaEventSynchronize(event); 782 | } 783 | 784 | 785 | //**********************************************// 786 | // CUDA Runtime API Execution Control // 787 | //**********************************************// 788 | // cudaConfigureCall /// 789 | typedef cudaError_t (*cudaConfigureCall_t)(dim3,dim3,size_t,cudaStream_t); 790 | static cudaConfigureCall_t native_CudaConfigureCall = NULL; 791 | 792 | extern "C" cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem=0, cudaStream_t stream=0) { 793 | assert(kernelInfo().counter == 0); 794 | kernelInfo().gridDim = gridDim; 795 | kernelInfo().blockDim = blockDim; 796 | //kernelInfo().counter++; //increase a counter to indicate an expected cudaLaunch to be completed 797 | printf("\n>>cudaConfigureCall interception\n"); 798 | //call of the real function 799 | if (native_CudaConfigureCall == NULL) 800 | native_CudaConfigureCall = (cudaConfigureCall_t)dlsym(RTLD_NEXT,"cudaConfigureCall"); 801 | 802 | assert(native_CudaConfigureCall != NULL); 803 | return native_CudaConfigureCall(gridDim,blockDim,sharedMem,stream); 804 | } 805 | 806 | 807 | /// cudaFuncGetAttributes /// 808 | typedef cudaError_t (*cudaFuncGetAttributes_t)(struct cudaFuncAttributes * attr, const char * func); 809 | static cudaFuncGetAttributes_t native_cudaFuncGetAttributes = NULL; 810 | 811 | extern "C" cudaError_t cudaFuncGetAttributes (struct cudaFuncAttributes * attr, const char * func) { 812 | printf("\n>>cudaFuncGetAttributes interception\n"); 813 | 814 | if (native_cudaFuncGetAttributes == NULL) { 815 | native_cudaFuncGetAttributes = (cudaFuncGetAttributes_t)dlsym(RTLD_NEXT,"cudaFuncGetAttributes"); 816 | } 817 | assert(native_cudaFuncGetAttributes != NULL); 818 | return native_cudaFuncGetAttributes(attr,func); 819 | } 820 | 821 | /// cudaFuncSetAttribute /// 822 | typedef cudaError_t (*cudaFuncSetAttribute_t)(const void* func, cudaFuncAttribute attr, int value); 823 | static cudaFuncSetAttribute_t native_cudaFuncSetAttribute = NULL; 824 | 825 | extern "C" cudaError_t cudaFuncSetAttribute (const void* func, cudaFuncAttribute attr, int value) { 826 | printf("\n>>cudaFuncSetAttribute interception\n"); 827 | 828 | if (native_cudaFuncSetAttribute == NULL) { 829 | native_cudaFuncSetAttribute = (cudaFuncSetAttribute_t)dlsym(RTLD_NEXT,"cudaFuncSetAttribute"); 830 | } 831 | assert(native_cudaFuncSetAttribute != NULL); 832 | return native_cudaFuncSetAttribute(func,attr,value); 833 | } 834 | 835 | /// cudaLaunch /// 836 | typedef cudaError_t (*cudaLaunch_t)(const char* entry); 837 | static cudaLaunch_t native_cudaLaunch = NULL; 838 | 839 | extern "C" cudaError_t cudaLaunch( const char* entry){ 840 | //print_kernel_invocation(entry); 841 | //kernelInfo().counter--; 842 | printf("\n>>cudaLaunch interception\n"); 843 | //call of the real function 844 | if (native_cudaLaunch == NULL) { 845 | native_cudaLaunch = (cudaLaunch_t)dlsym(RTLD_NEXT,"cudaLaunch"); 846 | } 847 | assert(native_cudaLaunch != NULL); 848 | return native_cudaLaunch(entry); 849 | } 850 | 851 | 852 | /// cudaFuncSetCacheConfig /// 853 | typedef cudaError_t (*cudaFuncSetCacheConfig_t)(const void* func, cudaFuncCache cacheConfig); 854 | static cudaFuncSetCacheConfig_t native_cudaFuncSetCacheConfig = NULL; 855 | 856 | extern "C" cudaError_t cudaFuncSetCacheConfig (const void* func, cudaFuncCache cacheConfig) { 857 | printf("\n>>cudaFuncSetCacheConfig interception\n"); 858 | 859 | if (native_cudaFuncSetCacheConfig == NULL) { 860 | native_cudaFuncSetCacheConfig = (cudaFuncSetCacheConfig_t)dlsym(RTLD_NEXT,"cudaFuncSetCacheConfig"); 861 | } 862 | assert(native_cudaFuncSetCacheConfig != NULL); 863 | return native_cudaFuncSetCacheConfig(func,cacheConfig); 864 | } 865 | 866 | /// cudaFuncSetSharedMemConfig /// 867 | typedef cudaError_t (*cudaFuncSetSharedMemConfig_t)(const void* func, cudaSharedMemConfig config); 868 | static cudaFuncSetSharedMemConfig_t native_cudaFuncSetSharedMemConfig = NULL; 869 | 870 | extern "C" cudaError_t cudaFuncSetSharedMemConfig (const void* func, cudaSharedMemConfig config) { 871 | printf("\n>>cudaFuncSetSharedMemConfig interception\n"); 872 | 873 | if (native_cudaFuncSetSharedMemConfig == NULL) { 874 | native_cudaFuncSetSharedMemConfig = (cudaFuncSetSharedMemConfig_t)dlsym(RTLD_NEXT,"cudaFuncSetSharedMemConfig"); 875 | } 876 | assert(native_cudaFuncSetSharedMemConfig != NULL); 877 | return native_cudaFuncSetSharedMemConfig(func,config); 878 | } 879 | 880 | /// cudaGetParameterBuffer /// 881 | typedef cudaError_t (*cudaGetParameterBuffer_t)(size_t alignment, size_t size); 882 | static cudaGetParameterBuffer_t native_cudaGetParameterBuffer = NULL; 883 | 884 | extern "C" cudaError_t cudaGetParameterBuffer (size_t alignment, size_t size) { 885 | printf("\n>>cudaGetParameterBuffer interception\n"); 886 | 887 | if (native_cudaGetParameterBuffer == NULL) { 888 | native_cudaGetParameterBuffer = (cudaGetParameterBuffer_t)dlsym(RTLD_NEXT,"cudaGetParameterBuffer"); 889 | } 890 | assert(native_cudaGetParameterBuffer != NULL); 891 | return native_cudaGetParameterBuffer(alignment,size); 892 | } 893 | 894 | /// cudaGetParameterBufferV2 /// 895 | typedef cudaError_t (*cudaGetParameterBufferV2_t)(void* func, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize); 896 | static cudaGetParameterBufferV2_t native_cudaGetParameterBufferV2 = NULL; 897 | 898 | extern "C" cudaError_t cudaGetParameterBufferV2 (void* func, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize) { 899 | printf("\n>>cudaGetParameterBufferV2 interception\n"); 900 | 901 | if (native_cudaGetParameterBufferV2 == NULL) { 902 | native_cudaGetParameterBufferV2 = (cudaGetParameterBufferV2_t)dlsym(RTLD_NEXT,"cudaGetParameterBufferV2"); 903 | } 904 | assert(native_cudaGetParameterBufferV2 != NULL); 905 | return native_cudaGetParameterBufferV2(func,gridDimension,blockDimension,sharedMemSize); 906 | } 907 | 908 | /// cudaLaunchCooperativeKernel /// 909 | typedef cudaError_t (*cudaLaunchCooperativeKernel_t)(const void* func, dim3 gridDim, dim3 blockDim, void** args, size_t sharedMem, cudaStream_t stream); 910 | static cudaLaunchCooperativeKernel_t native_cudaLaunchCooperativeKernel = NULL; 911 | 912 | extern "C" cudaError_t cudaLaunchCooperativeKernel(const void* func, dim3 gridDim, dim3 blockDim, void** args, size_t sharedMem, cudaStream_t stream) { 913 | printf("\n>>cudaLaunchCooperativeKernel interception\n"); 914 | 915 | if (native_cudaLaunchCooperativeKernel == NULL) { 916 | native_cudaLaunchCooperativeKernel = (cudaLaunchCooperativeKernel_t)dlsym(RTLD_NEXT,"cudaLaunchCooperativeKernel"); 917 | } 918 | assert(native_cudaLaunchCooperativeKernel != NULL); 919 | return native_cudaLaunchCooperativeKernel(func,gridDim,blockDim,args,sharedMem,stream); 920 | } 921 | 922 | /// cudaLaunchCooperativeKernelMultiDevice /// 923 | typedef cudaError_t (*cudaLaunchCooperativeKernelMultiDevice_t)(cudaLaunchParams* launchParamsList, unsigned int numDevices, unsigned int flags); 924 | static cudaLaunchCooperativeKernelMultiDevice_t native_cudaLaunchCooperativeKernelMultiDevice = NULL; 925 | 926 | extern "C" cudaError_t cudaLaunchCooperativeKernelMultiDevice (cudaLaunchParams* launchParamsList, unsigned int numDevices, unsigned int flags) { 927 | printf("\n>>cudaLaunchCooperativeKernelMultiDevice interception\n"); 928 | 929 | if (native_cudaLaunchCooperativeKernelMultiDevice == NULL) { 930 | native_cudaLaunchCooperativeKernelMultiDevice = (cudaLaunchCooperativeKernelMultiDevice_t)dlsym(RTLD_NEXT,"cudaLaunchCooperativeKernelMultiDevice"); 931 | } 932 | assert(native_cudaLaunchCooperativeKernelMultiDevice != NULL); 933 | return native_cudaLaunchCooperativeKernelMultiDevice(launchParamsList,numDevices,flags); 934 | } 935 | 936 | 937 | 938 | /// cudaLaunchKernel /// 939 | typedef cudaError_t (*cudaLaunchKernel_t)(const void* func, dim3 gridDim, dim3 blockDim, void** args, size_t sharedMem, cudaStream_t stream); 940 | static cudaLaunchKernel_t native_cudaLaunchKernel = NULL; 941 | 942 | extern "C" cudaError_t cudaLaunchKernel (const void* func, dim3 gridDim, dim3 blockDim, void** args, size_t sharedMem, cudaStream_t stream) { 943 | printf("\n>>cudaLaunchKernel interception\n"); 944 | 945 | if (native_cudaLaunchKernel == NULL) { 946 | native_cudaLaunchKernel = (cudaLaunchKernel_t)dlsym(RTLD_NEXT,"cudaLaunchKernel"); 947 | } 948 | assert(native_cudaLaunchKernel != NULL); 949 | return native_cudaLaunchKernel(func,gridDim,blockDim,args,sharedMem,stream); 950 | } 951 | 952 | /// cudaSetDoubleForDevice /// 953 | typedef cudaError_t (*cudaSetDoubleForDevice_t)(double *d); 954 | static cudaSetDoubleForDevice_t native_cudaSetDoubleForDevice = NULL; 955 | 956 | extern "C" cudaError_t cudaSetDoubleForDevice (double *d) { 957 | printf("\n>>cudaSetDoubleForDevice interception\n"); 958 | 959 | if (native_cudaSetDoubleForDevice == NULL) { 960 | native_cudaSetDoubleForDevice = (cudaSetDoubleForDevice_t)dlsym(RTLD_NEXT,"cudaSetDoubleForDevice"); 961 | } 962 | assert(native_cudaSetDoubleForDevice != NULL); 963 | return native_cudaSetDoubleForDevice(d); 964 | } 965 | 966 | /// cudaSetDoubleForHost /// 967 | typedef cudaError_t (*cudaSetDoubleForHost_t)(double *d); 968 | static cudaSetDoubleForHost_t native_cudaSetDoubleForHost = NULL; 969 | 970 | extern "C" cudaError_t cudaSetDoubleForHost (double *d) { 971 | printf("\n>>cudaSetDoubleForHost interception\n"); 972 | 973 | if (native_cudaSetDoubleForHost == NULL) { 974 | native_cudaSetDoubleForHost = (cudaSetDoubleForHost_t)dlsym(RTLD_NEXT,"cudaSetDoubleForHost"); 975 | } 976 | assert(native_cudaSetDoubleForHost != NULL); 977 | return native_cudaSetDoubleForHost(d); 978 | } 979 | 980 | /* cudaSetupArgument /// 981 | typedef cudaError_t (*cudaSetupArgument_t)(const void *, size_t, size_t); 982 | static cudaSetupArgument_t native_CudaSetupArgument = NULL; 983 | 984 | extern "C" cudaError_t cudaSetupArgument(const void *arg, size_t size, size_t offset) { 985 | kernelInfo().arguments.push_back(const_cast(arg)); 986 | 987 | //call of the real function 988 | if (native_CudaSetupArgument == NULL) { 989 | native_CudaSetupArgument = (cudaSetupArgument_t)dlsym(RTLD_NEXT,"cudaSetupArgument"); 990 | } 991 | assert(native_CudaSetupArgument != NULL); 992 | return native_CudaSetupArgument(arg, size, offset); 993 | } 994 | */ 995 | 996 | 997 | //**********************************************// 998 | // CUDA Runtime API Memory Management // 999 | //**********************************************// 1000 | /// cudaFree /// 1001 | typedef cudaError_t (*cudaFree_t)(void * devPtr); 1002 | static cudaFree_t native_cudaFree = NULL; 1003 | 1004 | extern "C" cudaError_t cudaFree (void * devPtr) { 1005 | printf("\n>>cudaFree interception\n"); 1006 | 1007 | if (native_cudaFree == NULL) { 1008 | native_cudaFree = (cudaFree_t)dlsym(RTLD_NEXT,"cudaFree"); 1009 | } 1010 | assert(native_cudaFree != NULL); 1011 | return native_cudaFree(devPtr); 1012 | } 1013 | 1014 | 1015 | /// cudaFreeArray /// 1016 | typedef cudaError_t (*cudaFreeArray_t)(struct cudaArray * array); 1017 | static cudaFreeArray_t native_cudaFreeArray = NULL; 1018 | 1019 | extern "C" cudaError_t cudaFreeArray (struct cudaArray * array) { 1020 | printf("\n>>cudaFreeArray interception\n"); 1021 | 1022 | if (native_cudaFreeArray == NULL) { 1023 | native_cudaFreeArray = (cudaFreeArray_t)dlsym(RTLD_NEXT,"cudaFreeArray"); 1024 | } 1025 | assert(native_cudaFreeArray != NULL); 1026 | return native_cudaFreeArray(array); 1027 | } 1028 | 1029 | 1030 | /// cudaFreeHost /// 1031 | typedef cudaError_t (*cudaFreeHost_t)(void * ptr); 1032 | static cudaFreeHost_t native_cudaFreeHost = NULL; 1033 | 1034 | extern "C" cudaError_t cudaFreeHost(void * ptr) { 1035 | printf("\n>>cudaFreeHost interception\n"); 1036 | 1037 | if (native_cudaFreeHost == NULL) { 1038 | native_cudaFreeHost = (cudaFreeHost_t)dlsym(RTLD_NEXT,"cudaFreeHost"); 1039 | } 1040 | assert(native_cudaFreeHost != NULL); 1041 | return native_cudaFreeHost(ptr); 1042 | } 1043 | 1044 | 1045 | /// cudaGetSymbolAddress /// 1046 | typedef cudaError_t (*cudaGetSymbolAddress_t)(void ** devPtr, const char * symbol); 1047 | static cudaGetSymbolAddress_t native_cudaGetSymbolAddress = NULL; 1048 | 1049 | extern "C" cudaError_t cudaGetSymbolAddress (void ** devPtr, const char * symbol) { 1050 | printf("\n>>cudaGetSymbolAddress interception\n"); 1051 | 1052 | if (native_cudaGetSymbolAddress == NULL) { 1053 | native_cudaGetSymbolAddress = (cudaGetSymbolAddress_t)dlsym(RTLD_NEXT,"cudaGetSymbolAddress"); 1054 | } 1055 | assert(native_cudaGetSymbolAddress != NULL); 1056 | return native_cudaGetSymbolAddress(devPtr,symbol); 1057 | } 1058 | 1059 | 1060 | /// cudaGetSymbolSize /// 1061 | typedef cudaError_t (*cudaGetSymbolSize_t)(size_t * size, const char * symbol); 1062 | static cudaGetSymbolSize_t native_cudaGetSymbolSize = NULL; 1063 | 1064 | extern "C" cudaError_t cudaGetSymbolSize(size_t * size, const char * symbol) { 1065 | printf("\n>>cudaGetSymbolSize interception\n"); 1066 | 1067 | if (native_cudaGetSymbolSize == NULL) { 1068 | native_cudaGetSymbolSize = (cudaGetSymbolSize_t)dlsym(RTLD_NEXT,"cudaGetSymbolSize"); 1069 | } 1070 | assert(native_cudaGetSymbolSize != NULL); 1071 | return native_cudaGetSymbolSize(size,symbol); 1072 | } 1073 | 1074 | 1075 | /// cudaHostAlloc /// 1076 | typedef cudaError_t (*cudaHostAlloc_t)(void ** ptr, size_t size, unsigned int flags); 1077 | static cudaHostAlloc_t native_cudaHostAlloc = NULL; 1078 | 1079 | extern "C" cudaError_t cudaHostAlloc (void ** ptr, size_t size, unsigned int flags) { 1080 | printf("\n>>cudaHostAlloc interception\n"); 1081 | 1082 | if (native_cudaHostAlloc == NULL) { 1083 | native_cudaHostAlloc = (cudaHostAlloc_t)dlsym(RTLD_NEXT,"cudaHostAlloc"); 1084 | } 1085 | assert(native_cudaHostAlloc != NULL); 1086 | return native_cudaHostAlloc(ptr,size,flags); 1087 | } 1088 | 1089 | 1090 | /// cudaHostGetDevicePointer /// 1091 | typedef cudaError_t (*cudaHostGetDevicePointer_t)(void ** pDevice, void * pHost, unsigned int flags); 1092 | static cudaHostGetDevicePointer_t native_cudaHostGetDevicePointer = NULL; 1093 | 1094 | extern "C" cudaError_t cudaHostGetDevicePointer(void ** pDevice, void * pHost, unsigned int flags) { 1095 | printf("\n>>cudaHostGetDevicePointer interception\n"); 1096 | 1097 | if (native_cudaHostGetDevicePointer == NULL) { 1098 | native_cudaHostGetDevicePointer = (cudaHostGetDevicePointer_t)dlsym(RTLD_NEXT,"cudaHostGetDevicePointer"); 1099 | } 1100 | assert(native_cudaHostGetDevicePointer != NULL); 1101 | return native_cudaHostGetDevicePointer(pDevice,pHost,flags); 1102 | } 1103 | 1104 | 1105 | /// cudaHostGetFlags /// 1106 | typedef cudaError_t (*cudaHostGetFlags_t)(unsigned int * pFlags, void * pHost); 1107 | static cudaHostGetFlags_t native_cudaHostGetFlags = NULL; 1108 | 1109 | extern "C" cudaError_t cudaHostGetFlags(unsigned int * pFlags, void * pHost) { 1110 | printf("\n>>cudaHostGetFlags interception\n"); 1111 | 1112 | if (native_cudaHostGetFlags == NULL) { 1113 | native_cudaHostGetFlags = (cudaHostGetFlags_t)dlsym(RTLD_NEXT,"cudaHostGetFlags"); 1114 | } 1115 | assert(native_cudaHostGetFlags != NULL); 1116 | return native_cudaHostGetFlags(pFlags,pHost); 1117 | } 1118 | 1119 | 1120 | /// cudaMalloc /// 1121 | typedef cudaError_t (*cudaMalloc_t)(void ** devPtr, size_t size); 1122 | static cudaMalloc_t native_cudaMalloc = NULL; 1123 | 1124 | extern "C" cudaError_t cudaMalloc(void ** devPtr, size_t size) { 1125 | printf("\n>>cudaMalloc interception\n"); 1126 | 1127 | if (native_cudaMalloc == NULL) { 1128 | native_cudaMalloc = (cudaMalloc_t)dlsym(RTLD_NEXT,"cudaMalloc"); 1129 | } 1130 | assert(native_cudaMalloc != NULL); 1131 | return native_cudaMalloc(devPtr,size); 1132 | } 1133 | 1134 | 1135 | /// cudaMalloc3D /// 1136 | typedef cudaError_t (*cudaMalloc3D_t)(struct cudaPitchedPtr * pitchedDevPtr, struct cudaExtent extent); 1137 | static cudaMalloc3D_t native_cudaMalloc3D = NULL; 1138 | 1139 | extern "C" cudaError_t cudaMalloc3D (struct cudaPitchedPtr * pitchedDevPtr, struct cudaExtent extent) { 1140 | printf("\n>>cudaMalloc3D interception\n"); 1141 | 1142 | if (native_cudaMalloc3D == NULL) { 1143 | native_cudaMalloc3D = (cudaMalloc3D_t)dlsym(RTLD_NEXT,"cudaMalloc3D"); 1144 | } 1145 | assert(native_cudaMalloc3D != NULL); 1146 | return native_cudaMalloc3D(pitchedDevPtr,extent); 1147 | } 1148 | 1149 | 1150 | /// cudaMalloc3DArray /// 1151 | typedef cudaError_t (*cudaMalloc3DArray_t)(struct cudaArray ** arrayPtr, const struct cudaChannelFormatDesc * desc, struct cudaExtent extent); 1152 | static cudaMalloc3DArray_t native_cudaMalloc3DArray = NULL; 1153 | 1154 | extern "C" cudaError_t cudaMalloc3DArray (struct cudaArray ** arrayPtr, const struct cudaChannelFormatDesc * desc, struct cudaExtent extent) { 1155 | printf("\n>>cudaMalloc3DArray interception\n"); 1156 | 1157 | if (native_cudaMalloc3DArray == NULL) { 1158 | native_cudaMalloc3DArray = (cudaMalloc3DArray_t)dlsym(RTLD_NEXT,"cudaMalloc3DArray"); 1159 | } 1160 | assert(native_cudaMalloc3DArray != NULL); 1161 | return native_cudaMalloc3DArray(arrayPtr,desc,extent); 1162 | } 1163 | 1164 | 1165 | /// cudaMallocArray /// 1166 | typedef cudaError_t (*cudaMallocArray_t)(struct cudaArray ** arrayPtr, const struct cudaChannelFormatDesc * desc, size_t width, size_t height); 1167 | static cudaMallocArray_t native_cudaMallocArray = NULL; 1168 | 1169 | extern "C" cudaError_t cudaMallocArray (struct cudaArray ** arrayPtr, const struct cudaChannelFormatDesc * desc, size_t width, size_t height) { 1170 | printf("\n>>cudaMallocArray interception\n"); 1171 | 1172 | if (native_cudaMallocArray == NULL) { 1173 | native_cudaMallocArray = (cudaMallocArray_t)dlsym(RTLD_NEXT,"cudaMallocArray"); 1174 | } 1175 | assert(native_cudaMallocArray != NULL); 1176 | return native_cudaMallocArray(arrayPtr,desc,width,height); 1177 | } 1178 | 1179 | 1180 | /// cudaMallocHost /// 1181 | typedef cudaError_t (*cudaMallocHost_t)(void ** ptr,size_t size); 1182 | static cudaMallocHost_t native_cudaMallocHost = NULL; 1183 | 1184 | extern "C" cudaError_t cudaMallocHost (void ** ptr,size_t size) { 1185 | printf("\n>>cudaMallocHost interception\n"); 1186 | 1187 | if (native_cudaMallocHost == NULL) { 1188 | native_cudaMallocHost = (cudaMallocHost_t)dlsym(RTLD_NEXT,"cudaMallocHost"); 1189 | } 1190 | assert(native_cudaMallocHost != NULL); 1191 | return native_cudaMallocHost(ptr,size); 1192 | } 1193 | 1194 | 1195 | /// cudaMallocPitch /// 1196 | typedef cudaError_t (*cudaMallocPitch_t)(void ** devPtr, size_t * pitch, size_t width, size_t height); 1197 | static cudaMallocPitch_t native_cudaMallocPitch = NULL; 1198 | 1199 | extern "C" cudaError_t cudaMallocPitch (void ** devPtr, size_t * pitch, size_t width, size_t height) { 1200 | printf("\n>>cudaMallocPitch interception\n"); 1201 | 1202 | if (native_cudaMallocPitch == NULL) { 1203 | native_cudaMallocPitch = (cudaMallocPitch_t)dlsym(RTLD_NEXT,"cudaMallocPitch"); 1204 | } 1205 | assert(native_cudaMallocPitch != NULL); 1206 | return native_cudaMallocPitch(devPtr,pitch,width,height); 1207 | } 1208 | 1209 | 1210 | /// cudaMemcpy /// 1211 | typedef cudaError_t (*cudaMemcpy_t)(void * dst, const void * src, size_t count, enum cudaMemcpyKind kind); 1212 | static cudaMemcpy_t native_cudaMemcpy = NULL; 1213 | 1214 | extern "C" cudaError_t cudaMemcpy (void * dst, const void * src, size_t count, enum cudaMemcpyKind kind) { 1215 | printf("\n>>cudaMemcpy interception\n"); 1216 | 1217 | if (native_cudaMemcpy == NULL) { 1218 | native_cudaMemcpy = (cudaMemcpy_t)dlsym(RTLD_NEXT,"cudaMemcpy"); 1219 | } 1220 | assert(native_cudaMemcpy != NULL); 1221 | return native_cudaMemcpy(dst,src,count,kind); 1222 | } 1223 | 1224 | 1225 | /// cudaMemcpy2D /// 1226 | typedef cudaError_t (*cudaMemcpy2D_t)(void * dst, size_t dpitch, const void * src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind); 1227 | static cudaMemcpy2D_t native_cudaMemcpy2D= NULL; 1228 | 1229 | extern "C" cudaError_t cudaMemcpy2D (void * dst, size_t dpitch, const void * src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { 1230 | printf("\n>>cudaMemcpy2D interception\n"); 1231 | 1232 | if (native_cudaMemcpy2D == NULL) { 1233 | native_cudaMemcpy2D = (cudaMemcpy2D_t)dlsym(RTLD_NEXT,"cudaMemcpy2D"); 1234 | } 1235 | assert(native_cudaMemcpy2D != NULL); 1236 | return native_cudaMemcpy2D(dst,dpitch,src,spitch,width,height,kind); 1237 | } 1238 | 1239 | 1240 | /// cudaMemcpy2DArrayToArray /// 1241 | typedef cudaError_t (*cudaMemcpy2DArrayToArray_t)(struct cudaArray * dst, 1242 | size_t wOffsetDst, 1243 | size_t hOffsetDst, 1244 | const struct cudaArray * src, 1245 | size_t wOffsetSrc, 1246 | size_t hOffsetSrc, 1247 | size_t width, 1248 | size_t height, 1249 | enum cudaMemcpyKind kind); 1250 | 1251 | static cudaMemcpy2DArrayToArray_t native_cudaMemcpy2DArrayToArray = NULL; 1252 | 1253 | extern "C" cudaError_t cudaMemcpy2DArrayToArray (struct cudaArray * dst, 1254 | size_t wOffsetDst, 1255 | size_t hOffsetDst, 1256 | const struct cudaArray * src, 1257 | size_t wOffsetSrc, 1258 | size_t hOffsetSrc, 1259 | size_t width, 1260 | size_t height, 1261 | enum cudaMemcpyKind kind) { 1262 | printf("\n>>cudaMalloc3D interception\n"); 1263 | 1264 | if (native_cudaMemcpy2DArrayToArray == NULL) { 1265 | native_cudaMemcpy2DArrayToArray = (cudaMemcpy2DArrayToArray_t)dlsym(RTLD_NEXT,"cudaMemcpy2DArrayToArray"); 1266 | } 1267 | assert(native_cudaMemcpy2DArrayToArray != NULL); 1268 | return native_cudaMemcpy2DArrayToArray(dst,wOffsetDst,hOffsetDst,src,wOffsetSrc,hOffsetSrc,width,height,kind); 1269 | } 1270 | 1271 | 1272 | /// cudaMemcpy2DAsync /// 1273 | typedef cudaError_t (*cudaMemcpy2DAsync_t)(void * dst, 1274 | size_t dpitch, 1275 | const void * src, 1276 | size_t spitch, 1277 | size_t width, 1278 | size_t height, 1279 | enum cudaMemcpyKind kind, 1280 | cudaStream_t stream); 1281 | 1282 | static cudaMemcpy2DAsync_t native_cudaMemcpy2DAsync = NULL; 1283 | 1284 | extern "C" cudaError_t cudaMemcpy2DAsync (void * dst, 1285 | size_t dpitch, 1286 | const void * src, 1287 | size_t spitch, 1288 | size_t width, 1289 | size_t height, 1290 | enum cudaMemcpyKind kind, 1291 | cudaStream_t stream) { 1292 | printf("\n>>cudaMemcpy2DAsync interception\n"); 1293 | 1294 | if (native_cudaMemcpy2DAsync == NULL) { 1295 | native_cudaMemcpy2DAsync = (cudaMemcpy2DAsync_t)dlsym(RTLD_NEXT,"cudaMemcpy2DAsync"); 1296 | } 1297 | assert(native_cudaMemcpy2DAsync != NULL); 1298 | return native_cudaMemcpy2DAsync(dst,dpitch,src,spitch,width,height,kind,stream); 1299 | } 1300 | 1301 | 1302 | /// cudaMemcpy2DFromArray /// 1303 | typedef cudaError_t (*cudaMemcpy2DFromArray_t)(void * dst, 1304 | size_t dpitch, 1305 | const struct cudaArray * src, 1306 | size_t wOffset, 1307 | size_t hOffset, 1308 | size_t width, 1309 | size_t height, 1310 | enum cudaMemcpyKind kind); 1311 | 1312 | static cudaMemcpy2DFromArray_t native_cudaMemcpy2DFromArray = NULL; 1313 | 1314 | extern "C" cudaError_t cudaMemcpy2DFromArray (void * dst, 1315 | size_t dpitch, 1316 | const struct cudaArray * src, 1317 | size_t wOffset, 1318 | size_t hOffset, 1319 | size_t width, 1320 | size_t height, 1321 | enum cudaMemcpyKind kind){ 1322 | 1323 | printf("\n>>cudaMemcpy2DFromArray interception\n"); 1324 | 1325 | if (native_cudaMemcpy2DFromArray == NULL) { 1326 | native_cudaMemcpy2DFromArray = (cudaMemcpy2DFromArray_t)dlsym(RTLD_NEXT,"cudaMemcpy2DFromArray"); 1327 | } 1328 | assert(native_cudaMemcpy2DFromArray != NULL); 1329 | return native_cudaMemcpy2DFromArray(dst,dpitch,src,wOffset,hOffset,width,height,kind); 1330 | } 1331 | 1332 | 1333 | 1334 | /// cudaMemcpy2DFromArrayAsync /// 1335 | typedef cudaError_t (*cudaMemcpy2DFromArrayAsync_t)(void * dst, 1336 | size_t dpitch, 1337 | const struct cudaArray * src, 1338 | size_t wOffset, 1339 | size_t hOffset, 1340 | size_t width, 1341 | size_t height, 1342 | enum cudaMemcpyKind kind, 1343 | cudaStream_t stream); 1344 | 1345 | static cudaMemcpy2DFromArrayAsync_t native_cudaMemcpy2DFromArrayAsync = NULL; 1346 | 1347 | extern "C" cudaError_t cudaMemcpy2DFromArrayAsync (void * dst, 1348 | size_t dpitch, 1349 | const struct cudaArray * src, 1350 | size_t wOffset, 1351 | size_t hOffset, 1352 | size_t width, 1353 | size_t height, 1354 | enum cudaMemcpyKind kind, 1355 | cudaStream_t stream){ 1356 | 1357 | printf("\n>>cudaMemcpy2DFromArrayAsync interception\n"); 1358 | 1359 | if (native_cudaMemcpy2DFromArrayAsync == NULL) { 1360 | native_cudaMemcpy2DFromArrayAsync = (cudaMemcpy2DFromArrayAsync_t)dlsym(RTLD_NEXT,"cudaMemcpy2DFromArrayAsync"); 1361 | } 1362 | assert(native_cudaMemcpy2DFromArrayAsync != NULL); 1363 | return native_cudaMemcpy2DFromArrayAsync(dst,dpitch,src,wOffset,hOffset,width,height,kind,stream); 1364 | } 1365 | 1366 | 1367 | 1368 | 1369 | 1370 | 1371 | 1372 | 1373 | 1374 | 1375 | /// cudaMemcpy2DToArray /// 1376 | typedef cudaError_t (*cudaMemcpy2DToArray_t)(struct cudaArray * dst, 1377 | size_t wOffset, 1378 | size_t hOffset, 1379 | const void * src, 1380 | size_t spitch, 1381 | size_t width, 1382 | size_t height, 1383 | enum cudaMemcpyKind kind); 1384 | 1385 | static cudaMemcpy2DToArray_t native_cudaMemcpy2DToArray= NULL; 1386 | 1387 | extern "C" cudaError_t cudaMemcpy2DToArray (struct cudaArray * dst, 1388 | size_t wOffset, 1389 | size_t hOffset, 1390 | const void * src, 1391 | size_t spitch, 1392 | size_t width, 1393 | size_t height, 1394 | enum cudaMemcpyKind kind) { 1395 | 1396 | printf("\n>>cudaMemcpy2DToArray interception\n"); 1397 | 1398 | if (native_cudaMemcpy2DToArray == NULL) { 1399 | native_cudaMemcpy2DToArray = (cudaMemcpy2DToArray_t)dlsym(RTLD_NEXT,"cudaMemcpy2DToArray"); 1400 | } 1401 | assert(native_cudaMemcpy2DToArray != NULL); 1402 | return native_cudaMemcpy2DToArray(dst,wOffset,hOffset,src,spitch,width,height,kind); 1403 | } 1404 | 1405 | 1406 | /// cudaMemcpy2DToArrayAsync /// 1407 | typedef cudaError_t (*cudaMemcpy2DToArrayAsync_t)(struct cudaArray * dst, 1408 | size_t wOffset, 1409 | size_t hOffset, 1410 | const void * src, 1411 | size_t spitch, 1412 | size_t width, 1413 | size_t height, 1414 | enum cudaMemcpyKind kind, 1415 | cudaStream_t stream); 1416 | 1417 | static cudaMemcpy2DToArrayAsync_t native_cudaMemcpy2DToArrayAsync = NULL; 1418 | 1419 | extern "C" cudaError_t cudaMemcpy2DToArrayAsync (struct cudaArray * dst, 1420 | size_t wOffset, 1421 | size_t hOffset, 1422 | const void * src, 1423 | size_t spitch, 1424 | size_t width, 1425 | size_t height, 1426 | enum cudaMemcpyKind kind, 1427 | cudaStream_t stream) { 1428 | 1429 | printf("\n>>cudaMemcpy2DToArrayAsync interception\n"); 1430 | 1431 | if (native_cudaMemcpy2DToArrayAsync == NULL) { 1432 | native_cudaMemcpy2DToArrayAsync = (cudaMemcpy2DToArrayAsync_t)dlsym(RTLD_NEXT,"cudaMemcpy2DToArrayAsync"); 1433 | } 1434 | assert(native_cudaMemcpy2DToArrayAsync != NULL); 1435 | return native_cudaMemcpy2DToArrayAsync(dst,wOffset,hOffset,src,spitch,width,height,kind,stream); 1436 | } 1437 | 1438 | 1439 | /// cudaMemcpy3D /// 1440 | typedef cudaError_t (*cudaMemcpy3D_t)(const struct cudaMemcpy3DParms * p); 1441 | static cudaMemcpy3D_t native_cudaMemcpy3D = NULL; 1442 | 1443 | extern "C" cudaError_t cudaMemcpy3D (const struct cudaMemcpy3DParms * p) { 1444 | printf("\n>>cudaMemcpy3D interception\n"); 1445 | 1446 | if (native_cudaMemcpy3D== NULL) { 1447 | native_cudaMemcpy3D = (cudaMemcpy3D_t)dlsym(RTLD_NEXT,"cudaMemcpy3D"); 1448 | } 1449 | assert(native_cudaMemcpy3D != NULL); 1450 | return native_cudaMemcpy3D(p); 1451 | } 1452 | 1453 | 1454 | /// cudaMemcpy3DAsync /// 1455 | typedef cudaError_t (*cudaMemcpy3DAsync_t)(const struct cudaMemcpy3DParms * p, cudaStream_t stream); 1456 | static cudaMemcpy3DAsync_t native_cudaMemcpy3DAsync = NULL; 1457 | 1458 | extern "C" cudaError_t cudaMemcpy3DAsync (const struct cudaMemcpy3DParms * p, cudaStream_t stream) { 1459 | printf("\n>>cudaMemcpy3DAsync interception\n"); 1460 | 1461 | if (native_cudaMemcpy3DAsync == NULL) { 1462 | native_cudaMemcpy3DAsync = (cudaMemcpy3DAsync_t)dlsym(RTLD_NEXT,"cudaMemcpy3DAsync"); 1463 | } 1464 | assert(native_cudaMemcpy3DAsync != NULL); 1465 | return native_cudaMemcpy3DAsync(p,stream); 1466 | } 1467 | 1468 | 1469 | /// cudaMemcpyArrayToArray /// 1470 | typedef cudaError_t (*cudaMemcpyArrayToArray_t)(struct cudaArray * dst, 1471 | size_t wOffsetDst, 1472 | size_t hOffsetDst, 1473 | const struct cudaArray * src, 1474 | size_t wOffsetSrc, 1475 | size_t hOffsetSrc, 1476 | size_t count, 1477 | enum cudaMemcpyKind kind); 1478 | 1479 | static cudaMemcpyArrayToArray_t native_cudaMemcpyArrayToArray = NULL; 1480 | 1481 | extern "C" cudaError_t cudaMemcpyArrayToArray(struct cudaArray * dst, 1482 | size_t wOffsetDst, 1483 | size_t hOffsetDst, 1484 | const struct cudaArray * src, 1485 | size_t wOffsetSrc, 1486 | size_t hOffsetSrc, 1487 | size_t count, 1488 | enum cudaMemcpyKind kind){ 1489 | 1490 | printf("\n>>cudaMemcpyArrayToArray interception\n"); 1491 | 1492 | if (native_cudaMemcpyArrayToArray == NULL) { 1493 | native_cudaMemcpyArrayToArray = (cudaMemcpyArrayToArray_t)dlsym(RTLD_NEXT,"cudaMemcpyArrayToArray"); 1494 | } 1495 | assert(native_cudaMemcpyArrayToArray != NULL); 1496 | return native_cudaMemcpyArrayToArray(dst,wOffsetDst,hOffsetDst,src,wOffsetSrc,hOffsetSrc,count,kind); 1497 | } 1498 | 1499 | 1500 | /// cudaMemcpyAsync /// 1501 | typedef cudaError_t (*cudaMemcpyAsync_t)(void * dst, 1502 | const void * src, 1503 | size_t count, 1504 | enum cudaMemcpyKind kind, 1505 | cudaStream_t stream); 1506 | 1507 | static cudaMemcpyAsync_t native_cudaMemcpyAsync = NULL; 1508 | 1509 | extern "C" cudaError_t cudaMemcpyAsync (void * dst, 1510 | const void * src, 1511 | size_t count, 1512 | enum cudaMemcpyKind kind, 1513 | cudaStream_t stream) { 1514 | 1515 | printf("\n>>cudaMemcpyAsync interception\n"); 1516 | 1517 | if (native_cudaMemcpyAsync == NULL) { 1518 | native_cudaMemcpyAsync = (cudaMemcpyAsync_t)dlsym(RTLD_NEXT,"cudaMemcpyAsync"); 1519 | } 1520 | assert(native_cudaMemcpyAsync != NULL); 1521 | return native_cudaMemcpyAsync(dst,src,count,kind,stream); 1522 | } 1523 | 1524 | 1525 | /// cudaMemcpyFromArray /// 1526 | typedef cudaError_t (*cudaMemcpyFromArray_t)(void * dst, 1527 | const struct cudaArray * src, 1528 | size_t wOffset, 1529 | size_t hOffset, 1530 | size_t count, 1531 | enum cudaMemcpyKind kind); 1532 | 1533 | static cudaMemcpyFromArray_t native_cudaMemcpyFromArray = NULL; 1534 | 1535 | extern "C" cudaError_t cudaMemcpyFromArray (void * dst, 1536 | const struct cudaArray * src, 1537 | size_t wOffset, 1538 | size_t hOffset, 1539 | size_t count, 1540 | enum cudaMemcpyKind kind){ 1541 | 1542 | printf("\n>>cudaMemcpyFromArray interception\n"); 1543 | 1544 | if (native_cudaMemcpyFromArray == NULL) { 1545 | native_cudaMemcpyFromArray = (cudaMemcpyFromArray_t)dlsym(RTLD_NEXT,"cudaMemcpyFromArray"); 1546 | } 1547 | assert(native_cudaMemcpyFromArray != NULL); 1548 | return native_cudaMemcpyFromArray(dst,src,wOffset,hOffset,count,kind); 1549 | } 1550 | 1551 | 1552 | /// cudaMemcpyFromArrayAsync /// 1553 | typedef cudaError_t (*cudaMemcpyFromArrayAsync_t)(void * dst, 1554 | const struct cudaArray * src, 1555 | size_t wOffset, 1556 | size_t hOffset, 1557 | size_t count, 1558 | enum cudaMemcpyKind kind, 1559 | cudaStream_t stream); 1560 | 1561 | static cudaMemcpyFromArrayAsync_t native_cudaMemcpyFromArrayAsync = NULL; 1562 | 1563 | extern "C" cudaError_t cudaMemcpyFromArrayAsync (void * dst, 1564 | const struct cudaArray * src, 1565 | size_t wOffset, 1566 | size_t hOffset, 1567 | size_t count, 1568 | enum cudaMemcpyKind kind, 1569 | cudaStream_t stream){ 1570 | 1571 | printf("\n>>cudaMemcpyFromArrayAsync interception\n"); 1572 | 1573 | if (native_cudaMemcpyFromArrayAsync == NULL) { 1574 | native_cudaMemcpyFromArrayAsync = (cudaMemcpyFromArrayAsync_t)dlsym(RTLD_NEXT,"cudaMemcpyFromArrayAsync"); 1575 | } 1576 | assert(native_cudaMemcpyFromArrayAsync != NULL); 1577 | return native_cudaMemcpyFromArrayAsync(dst,src,wOffset,hOffset,count,kind,stream); 1578 | } 1579 | 1580 | 1581 | /// cudaMemcpyFromSymbol /// 1582 | typedef cudaError_t (*cudaMemcpyFromSymbol_t)(void * dst, 1583 | const char * symbol, 1584 | size_t count, 1585 | size_t offset, 1586 | enum cudaMemcpyKind kind); 1587 | 1588 | static cudaMemcpyFromSymbol_t native_cudaMemcpyFromSymbol = NULL; 1589 | 1590 | extern "C" cudaError_t cudaMemcpyFromSymbol (void * dst, 1591 | const char * symbol, 1592 | size_t count, 1593 | size_t offset, 1594 | enum cudaMemcpyKind kind) { 1595 | 1596 | printf("\n>>cudaMemcpyFromSymbol interception\n"); 1597 | 1598 | if (native_cudaMemcpyFromSymbol == NULL) { 1599 | native_cudaMemcpyFromSymbol = (cudaMemcpyFromSymbol_t)dlsym(RTLD_NEXT,"cudaMemcpyFromSymbol"); 1600 | } 1601 | assert(native_cudaMemcpyFromSymbol != NULL); 1602 | return native_cudaMemcpyFromSymbol(dst,symbol,count,offset,kind); 1603 | } 1604 | 1605 | 1606 | /// cudaMemcpyFromSymbolAsync /// 1607 | typedef cudaError_t (*cudaMemcpyFromSymbolAsync_t)(void * dst, 1608 | const char * symbol, 1609 | size_t count, 1610 | size_t offset, 1611 | enum cudaMemcpyKind kind, 1612 | cudaStream_t stream); 1613 | 1614 | static cudaMemcpyFromSymbolAsync_t native_cudaMemcpyFromSymbolAsync = NULL; 1615 | 1616 | extern "C" cudaError_t cudaMemcpyFromSymbolAsync (void * dst, 1617 | const char * symbol, 1618 | size_t count, 1619 | size_t offset, 1620 | enum cudaMemcpyKind kind, 1621 | cudaStream_t stream) { 1622 | 1623 | printf("\n>>cudaMemcpyFromSymbolAsync interception\n"); 1624 | 1625 | if (native_cudaMemcpyFromSymbolAsync == NULL) { 1626 | native_cudaMemcpyFromSymbolAsync = (cudaMemcpyFromSymbolAsync_t)dlsym(RTLD_NEXT,"cudaMemcpyFromSymbolAsync"); 1627 | } 1628 | assert(native_cudaMemcpyFromSymbolAsync != NULL); 1629 | return native_cudaMemcpyFromSymbolAsync(dst,symbol,count,offset,kind,stream); 1630 | } 1631 | 1632 | 1633 | /// cudaMemcpyToArray /// 1634 | typedef cudaError_t (*cudaMemcpyToArray_t)(struct cudaArray * dst, 1635 | size_t wOffset, 1636 | size_t hOffset, 1637 | const void * src, 1638 | size_t count, 1639 | enum cudaMemcpyKind kind); 1640 | 1641 | static cudaMemcpyToArray_t native_cudaMemcpyToArray = NULL; 1642 | 1643 | extern "C" cudaError_t cudaMemcpyToArray (struct cudaArray * dst, 1644 | size_t wOffset, 1645 | size_t hOffset, 1646 | const void * src, 1647 | size_t count, 1648 | enum cudaMemcpyKind kind) { 1649 | 1650 | printf("\n>>cudaMemcpyToArray interception\n"); 1651 | 1652 | if (native_cudaMemcpyToArray == NULL) { 1653 | native_cudaMemcpyToArray = (cudaMemcpyToArray_t)dlsym(RTLD_NEXT,"cudaMemcpyToArray"); 1654 | } 1655 | assert(native_cudaMemcpyToArray != NULL); 1656 | return native_cudaMemcpyToArray(dst,wOffset,hOffset,src,count,kind); 1657 | } 1658 | 1659 | 1660 | /// cudaMemcpyToArrayAsync /// 1661 | typedef cudaError_t (*cudaMemcpyToArrayAsync_t)(struct cudaArray * dst, 1662 | size_t wOffset, 1663 | size_t hOffset, 1664 | const void * src, 1665 | size_t count, 1666 | enum cudaMemcpyKind kind, 1667 | cudaStream_t stream); 1668 | 1669 | static cudaMemcpyToArrayAsync_t native_cudaMemcpyToArrayAsync = NULL; 1670 | 1671 | extern "C" cudaError_t cudaMemcpyToArrayAsync (struct cudaArray * dst, 1672 | size_t wOffset, 1673 | size_t hOffset, 1674 | const void * src, 1675 | size_t count, 1676 | enum cudaMemcpyKind kind, 1677 | cudaStream_t stream) { 1678 | 1679 | printf("\n>>cudaMemcpyToArrayAsync interception\n"); 1680 | 1681 | if (native_cudaMemcpyToArrayAsync == NULL) { 1682 | native_cudaMemcpyToArrayAsync = (cudaMemcpyToArrayAsync_t)dlsym(RTLD_NEXT,"cudaMemcpyToArrayAsync"); 1683 | } 1684 | assert(native_cudaMemcpyToArrayAsync != NULL); 1685 | return native_cudaMemcpyToArrayAsync(dst,wOffset,hOffset,src,count,kind,stream); 1686 | } 1687 | 1688 | 1689 | /// cudaMemcpyToSymbol /// 1690 | typedef cudaError_t (*cudaMemcpyToSymbol_t)(const char * symbol, 1691 | const void * src, 1692 | size_t count, 1693 | size_t offset, 1694 | enum cudaMemcpyKind kind); 1695 | 1696 | static cudaMemcpyToSymbol_t native_cudaMemcpyToSymbol = NULL; 1697 | 1698 | extern "C" cudaError_t cudaMemcpyToSymbol (const char * symbol, 1699 | const void * src, 1700 | size_t count, 1701 | size_t offset, 1702 | enum cudaMemcpyKind kind) { 1703 | 1704 | printf("\n>>cudaMemcpyToSymbol interception\n"); 1705 | 1706 | if (native_cudaMemcpyToSymbol == NULL) { 1707 | native_cudaMemcpyToSymbol = (cudaMemcpyToSymbol_t)dlsym(RTLD_NEXT,"cudaMemcpyToSymbol"); 1708 | } 1709 | assert(native_cudaMemcpyToSymbol != NULL); 1710 | return native_cudaMemcpyToSymbol(symbol,src,count,offset,kind); 1711 | } 1712 | 1713 | 1714 | /// cudaMemcpyToSymbolAsync /// 1715 | typedef cudaError_t (*cudaMemcpyToSymbolAsync_t)(const char * symbol, 1716 | const void * src, 1717 | size_t count, 1718 | size_t offset, 1719 | enum cudaMemcpyKind kind, 1720 | cudaStream_t stream); 1721 | 1722 | static cudaMemcpyToSymbolAsync_t native_cudaMemcpyToSymbolAsync = NULL; 1723 | 1724 | extern "C" cudaError_t cudaMemcpyToSymbolAsync (const char * symbol, 1725 | const void * src, 1726 | size_t count, 1727 | size_t offset, 1728 | enum cudaMemcpyKind kind, 1729 | cudaStream_t stream) { 1730 | 1731 | printf("\n>>cudaMemcpyToSymbolAsync interception\n"); 1732 | 1733 | if (native_cudaMemcpyToSymbolAsync == NULL) { 1734 | native_cudaMemcpyToSymbolAsync = (cudaMemcpyToSymbolAsync_t)dlsym(RTLD_NEXT,"cudaMemcpyToSymbolAsync"); 1735 | } 1736 | assert(native_cudaMemcpyToSymbolAsync != NULL); 1737 | return native_cudaMemcpyToSymbolAsync(symbol,src,count,offset,kind,stream); 1738 | } 1739 | 1740 | 1741 | /// cudaMemset /// 1742 | typedef cudaError_t (*cudaMemset_t)(void * devPtr, int value, size_t count); 1743 | static cudaMemset_t native_cudaMemset = NULL; 1744 | 1745 | extern "C" cudaError_t cudaMemset(void * devPtr, int value, size_t count) { 1746 | printf("\n>>cudaMemset interception\n"); 1747 | 1748 | if (native_cudaMemset == NULL) { 1749 | native_cudaMemset = (cudaMemset_t)dlsym(RTLD_NEXT,"cudaMemset"); 1750 | } 1751 | assert(native_cudaMemset != NULL); 1752 | return native_cudaMemset(devPtr,value,count); 1753 | } 1754 | 1755 | 1756 | /// cudaMemset2D /// 1757 | typedef cudaError_t (*cudaMemset2D_t)(void * devPtr, 1758 | size_t pitch, 1759 | int value, 1760 | size_t width, 1761 | size_t height); 1762 | 1763 | static cudaMemset2D_t native_cudaMemset2D = NULL; 1764 | 1765 | extern "C" cudaError_t cudaMemset2D (void * devPtr, 1766 | size_t pitch, 1767 | int value, 1768 | size_t width, 1769 | size_t height) { 1770 | 1771 | printf("\n>>cudaMemset2D interception\n"); 1772 | 1773 | if (native_cudaMemset2D == NULL) { 1774 | native_cudaMemset2D = (cudaMemset2D_t)dlsym(RTLD_NEXT,"cudaMemset2D"); 1775 | } 1776 | assert(native_cudaMemset2D != NULL); 1777 | return native_cudaMemset2D(devPtr,pitch,value,width,height); 1778 | } 1779 | 1780 | 1781 | /// cudaMemset3D /// 1782 | typedef cudaError_t (*cudaMemset3D_t)(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent); 1783 | 1784 | static cudaMemset3D_t native_cudaMemset3D = NULL; 1785 | 1786 | extern "C" cudaError_t cudaMemset3D (struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { 1787 | printf("\n>>cudaMemset3D interception\n"); 1788 | 1789 | if (native_cudaMemset3D == NULL) { 1790 | native_cudaMemset3D = (cudaMemset3D_t)dlsym(RTLD_NEXT,"cudaMemset3D"); 1791 | } 1792 | assert(native_cudaMemset3D != NULL); 1793 | return native_cudaMemset3D(pitchedDevPtr,value,extent); 1794 | } 1795 | 1796 | 1797 | 1798 | //***********************************************// 1799 | // CUDA Runtime API Version Management // 1800 | //***********************************************// 1801 | /// cudaDriverGetVersion /// 1802 | typedef cudaError_t (*cudaDriverGetVersion_t)(int * driverVersion); 1803 | static cudaDriverGetVersion_t native_cudaDriverGetVersion = NULL; 1804 | 1805 | extern "C" cudaError_t cudaDriverGetVersion (int * driverVersion) { 1806 | printf("\ncudaDriverGetVersion interception\n"); 1807 | 1808 | if (native_cudaDriverGetVersion == NULL) { 1809 | native_cudaDriverGetVersion = (cudaDriverGetVersion_t)dlsym(RTLD_NEXT,"cudaDriverGetVersion"); 1810 | } 1811 | assert(native_cudaDriverGetVersion != NULL); 1812 | return native_cudaDriverGetVersion(driverVersion); 1813 | } 1814 | 1815 | /// cudaDriverGetVersion /// 1816 | typedef cudaError_t (*cudaRuntimeGetVersion_t)(int * runtimeVersion); 1817 | static cudaRuntimeGetVersion_t native_cudaRuntimeGetVersion = NULL; 1818 | 1819 | extern "C" cudaError_t cudaRuntimeGetVersion(int * runtimeVersion) { 1820 | printf("\ncudaRuntimeGetVersion interception\n"); 1821 | 1822 | if (native_cudaRuntimeGetVersion == NULL) { 1823 | native_cudaRuntimeGetVersion = (cudaRuntimeGetVersion_t)dlsym(RTLD_NEXT,"cudaRuntimeGetVersion"); 1824 | } 1825 | assert(native_cudaRuntimeGetVersion != NULL); 1826 | return native_cudaRuntimeGetVersion(runtimeVersion); 1827 | } 1828 | 1829 | 1830 | 1831 | //**********************************************// 1832 | // CUDA Runtime API Thread Management // 1833 | //**********************************************// 1834 | /// cudaThreadExit /// 1835 | typedef cudaError_t (*cudaThreadExit_t)(void); 1836 | static cudaThreadExit_t native_cudaThreadExit = NULL; 1837 | 1838 | extern "C" cudaError_t cudaThreadExit(void) { 1839 | printf("\n>>cudaThreadExit interception\n"); 1840 | 1841 | if (native_cudaThreadExit == NULL) { 1842 | native_cudaThreadExit = (cudaThreadExit_t)dlsym(RTLD_NEXT,"cudaThreadExit"); 1843 | } 1844 | assert(native_cudaThreadExit != NULL); 1845 | return native_cudaThreadExit(); 1846 | } 1847 | 1848 | /// cudaThreadExit /// 1849 | typedef cudaError_t (*cudaThreadSynchronize_t)(void); 1850 | static cudaThreadSynchronize_t native_cudaThreadSynchronize = NULL; 1851 | 1852 | extern "C" cudaError_t cudaThreadSynchronize(void) { 1853 | printf("\n>>cudaThreadSynchronize interception\n"); 1854 | 1855 | if (native_cudaThreadSynchronize == NULL) { 1856 | native_cudaThreadSynchronize = (cudaThreadSynchronize_t)dlsym(RTLD_NEXT,"cudaThreadSynchronize"); 1857 | } 1858 | assert(native_cudaThreadSynchronize != NULL); 1859 | return native_cudaThreadSynchronize(); 1860 | } 1861 | --------------------------------------------------------------------------------