├── .gitignore ├── .gitmodules ├── ApiTracker ├── ApiTracker.cpp └── Makefile ├── DeviceMalloc ├── DeviceMalloc.cpp ├── DeviceMalloc.h ├── DeviceMallocPatches.cu └── Makefile ├── Initcheck ├── Makefile └── memset_error.cu ├── LICENSE ├── Memcheck ├── Makefile └── memcheck_demo.cu ├── MemoryTracker ├── Makefile ├── MemoryTracker.cpp ├── MemoryTracker.h └── MemoryTrackerPatches.cu ├── NvtxMemoryPool ├── Makefile ├── NvtxMemoryPool.cu ├── NvtxMemoryPool.h └── README.md ├── NvtxNaming ├── Makefile ├── NvtxNaming.cu ├── NvtxNaming.h └── README.md ├── NvtxPermissions ├── Makefile ├── NvtxPermissions.cu ├── NvtxPermissions.h └── README.md ├── README.md ├── Racecheck ├── Makefile ├── block_error.cu └── warp_error.cu ├── Suppressions ├── Makefile ├── suppressions_demo.cu └── suppressions_initcheck_demo.cu └── Synccheck ├── Makefile ├── divergent_threads.cu └── illegal_syncwarp.cu /.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.o 3 | *.so 4 | *.so.* 5 | *.swp 6 | *.dll 7 | *.pdb 8 | *.lib 9 | .\#* 10 | *~ 11 | \#* 12 | dist 13 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "NVTX"] 2 | path = NVTX 3 | url = https://github.com/NVIDIA/NVTX 4 | branch = dev-mem-api 5 | -------------------------------------------------------------------------------- /ApiTracker/ApiTracker.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include 29 | 30 | // CUDA include for cudaError_t 31 | #include 32 | 33 | #include 34 | 35 | static void ApiTrackerCallback( 36 | void* userdata, 37 | Sanitizer_CallbackDomain domain, 38 | Sanitizer_CallbackId cbid, 39 | const void* cbdata) 40 | { 41 | if (domain != SANITIZER_CB_DOMAIN_RUNTIME_API) 42 | return; 43 | 44 | auto* pCallbackData = (Sanitizer_CallbackData*)cbdata; 45 | 46 | // ignore entry callback 47 | if (pCallbackData->callbackSite == SANITIZER_API_ENTER) 48 | return; 49 | 50 | auto returnValue = *(cudaError_t*)pCallbackData->functionReturnValue; 51 | 52 | std::cout << "API call to " << pCallbackData->functionName << " (return code " 53 | << returnValue << ")" << std::endl; 54 | } 55 | 56 | int InitializeInjection() 57 | { 58 | Sanitizer_SubscriberHandle handle; 59 | 60 | sanitizerSubscribe(&handle, ApiTrackerCallback, nullptr); 61 | sanitizerEnableDomain(1, handle, SANITIZER_CB_DOMAIN_RUNTIME_API); 62 | 63 | return 0; 64 | } 65 | 66 | int __global_initializer__ = InitializeInjection(); 67 | -------------------------------------------------------------------------------- /ApiTracker/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit 31 | CUDA_PATH ?= /usr/local/cuda 32 | SANITIZER_PATH ?= $(CUDA_PATH)/compute-sanitizer 33 | 34 | HOST_COMPILER ?= g++ 35 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 36 | 37 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include -I$(SANITIZER_PATH)/include 38 | 39 | LINK_FLAGS := -L$(SANITIZER_PATH) -fPIC -shared 40 | LINK_LIBS := -lsanitizer-public 41 | 42 | # Target rules 43 | all: build 44 | 45 | build: libApiTracker.so 46 | 47 | libApiTracker.so: ApiTracker.cpp 48 | $(HOST_COMPILER) $(INCLUDE_FLAGS) $(LINK_FLAGS) -o $@ $< $(LINK_LIBS) 49 | 50 | clean: 51 | rm -f libApiTracker.so 52 | 53 | clobber: clean 54 | 55 | -------------------------------------------------------------------------------- /DeviceMalloc/DeviceMalloc.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "DeviceMalloc.h" 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | void Work(std::future futureObj); 38 | 39 | struct Worker 40 | { 41 | std::thread t; 42 | std::promise exitSignal; 43 | 44 | Worker() 45 | { 46 | std::future futureObj = exitSignal.get_future(); 47 | t = std::thread(&Work, std::move(futureObj)); 48 | } 49 | 50 | ~Worker() 51 | { 52 | exitSignal.set_value(); 53 | t.join(); 54 | } 55 | }; 56 | 57 | Worker worker; 58 | std::vector trackers; 59 | 60 | void FlushTracker(EventTracker& tracker) 61 | { 62 | for (size_t i = 0; i < tracker.numEvents; ++i) 63 | { 64 | EventData& event = tracker.events[i]; 65 | 66 | switch (event.instructionId) 67 | { 68 | case SANITIZER_INSTRUCTION_DEVICE_SIDE_MALLOC: 69 | std::cout << "malloc(" << event.size << ") = 0x"; 70 | std::cout << std::hex << event.address << std::dec << std::endl; 71 | break; 72 | case SANITIZER_INSTRUCTION_DEVICE_SIDE_FREE: 73 | std::cout << "free(0x"; 74 | std::cout << std::hex << event.address << std::dec << ")" << std::endl; 75 | break; 76 | case SANITIZER_INSTRUCTION_DEVICE_ALIGNED_MALLOC: 77 | std::cout << "__nv_aligned_device_malloc(" << event.size << ") = 0x"; 78 | std::cout << std::hex << event.address << std::dec << std::endl; 79 | break; 80 | default: 81 | break; 82 | } 83 | } 84 | } 85 | 86 | void FlushData() 87 | { 88 | for (auto& pTracker : trackers) 89 | { 90 | FlushTracker(*pTracker); 91 | } 92 | } 93 | 94 | void Work(std::future futureObj) 95 | { 96 | while (futureObj.wait_for(std::chrono::milliseconds(1)) == std::future_status::timeout) 97 | { 98 | for (auto pTracker : trackers) 99 | { 100 | if (!pTracker->doorbell) 101 | { 102 | continue; 103 | } 104 | 105 | FlushTracker(*pTracker); 106 | 107 | pTracker->currentIndex = 0; 108 | pTracker->numEvents = 0; 109 | pTracker->doorbell = false; 110 | } 111 | 112 | std::this_thread::sleep_for(std::chrono::milliseconds(1000)); 113 | } 114 | } 115 | 116 | void ModuleLoaded(Sanitizer_ResourceModuleData* pModuleData) 117 | { 118 | // Instrument user code! 119 | if (SANITIZER_SUCCESS != sanitizerAddPatchesFromFile("DeviceMallocPatches.fatbin", 0)) 120 | { 121 | std::cerr << "Failed to load fatbin. Please check that it is in the current directory and contains the correct SM architecture" << std::endl; 122 | } 123 | 124 | sanitizerPatchInstructions(SANITIZER_INSTRUCTION_DEVICE_SIDE_MALLOC, pModuleData->module, "DeviceMalloc"); 125 | sanitizerPatchInstructions(SANITIZER_INSTRUCTION_DEVICE_SIDE_FREE, pModuleData->module, "DeviceFree"); 126 | sanitizerPatchInstructions(SANITIZER_INSTRUCTION_DEVICE_ALIGNED_MALLOC, pModuleData->module, "AlignedMalloc"); 127 | sanitizerPatchModule(pModuleData->module); 128 | } 129 | 130 | void LaunchBegin(Sanitizer_LaunchData* pLaunchData) 131 | { 132 | EventTracker* pTracker = nullptr; 133 | sanitizerAllocHost(pLaunchData->context, (void**)&pTracker, sizeof(EventTracker)); 134 | std::memset(pTracker, 0, sizeof(EventTracker)); 135 | 136 | sanitizerSetLaunchCallbackData( 137 | pLaunchData->hLaunch, 138 | pLaunchData->function, 139 | pLaunchData->hStream, 140 | pTracker); 141 | 142 | // not thread-safe! 143 | trackers.push_back(pTracker); 144 | } 145 | 146 | void cbFunction( 147 | void* userdata, 148 | Sanitizer_CallbackDomain domain, 149 | Sanitizer_CallbackId cbid, 150 | const void* cbdata) 151 | { 152 | switch (domain) 153 | { 154 | case SANITIZER_CB_DOMAIN_RESOURCE: 155 | switch (cbid) 156 | { 157 | case SANITIZER_CBID_RESOURCE_MODULE_LOADED: 158 | { 159 | auto* pModuleData = (Sanitizer_ResourceModuleData*)cbdata; 160 | ModuleLoaded(pModuleData); 161 | break; 162 | } 163 | default: 164 | break; 165 | } 166 | break; 167 | case SANITIZER_CB_DOMAIN_LAUNCH: 168 | switch (cbid) 169 | { 170 | case SANITIZER_CBID_LAUNCH_BEGIN: 171 | { 172 | auto* pLaunchData = (Sanitizer_LaunchData*)cbdata; 173 | LaunchBegin(pLaunchData); 174 | break; 175 | } 176 | default: 177 | break; 178 | } 179 | break; 180 | case SANITIZER_CB_DOMAIN_SYNCHRONIZE: 181 | switch (cbid) 182 | { 183 | case SANITIZER_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED: 184 | case SANITIZER_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED: 185 | FlushData(); 186 | break; 187 | default: 188 | break; 189 | } 190 | break; 191 | default: 192 | break; 193 | } 194 | } 195 | 196 | int InitializeInjection() 197 | { 198 | Sanitizer_SubscriberHandle handle; 199 | 200 | sanitizerSubscribe(&handle, cbFunction, nullptr); 201 | sanitizerEnableAllDomains(1, handle); 202 | 203 | return 0; 204 | } 205 | 206 | int __global_initializer__ = InitializeInjection(); 207 | 208 | -------------------------------------------------------------------------------- /DeviceMalloc/DeviceMalloc.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | 33 | #include 34 | 35 | #include 36 | 37 | static constexpr size_t kMaxEvents = 256; 38 | 39 | struct EventData 40 | { 41 | Sanitizer_InstructionId instructionId; 42 | 43 | uint64_t address; 44 | uint64_t size; // only filled for allocations 45 | }; 46 | 47 | // Main tracking structure that patches get as userdata 48 | struct EventTracker 49 | { 50 | EventData events[kMaxEvents]; 51 | 52 | uint32_t currentIndex; 53 | uint32_t numEvents; 54 | volatile bool doorbell; 55 | }; 56 | 57 | -------------------------------------------------------------------------------- /DeviceMalloc/DeviceMallocPatches.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "DeviceMalloc.h" 29 | 30 | static __device__ __inline__ 31 | void FlushData(EventTracker& tracker) 32 | { 33 | // make sure everything is visible in memory 34 | __threadfence_system(); 35 | 36 | tracker.doorbell = true; 37 | 38 | while (tracker.doorbell) 39 | { 40 | } 41 | 42 | tracker.numEvents = 0; 43 | __threadfence(); 44 | tracker.currentIndex = 0; 45 | } 46 | 47 | static __device__ __inline__ 48 | uint32_t GetEventIndex(EventTracker& tracker) 49 | { 50 | uint32_t idx = kMaxEvents; 51 | 52 | while (idx >= kMaxEvents) 53 | { 54 | idx = atomicAdd(&tracker.currentIndex, 1); 55 | 56 | if (idx >= kMaxEvents) 57 | { 58 | // buffer is full, wait for last writing thread to flush 59 | do 60 | { 61 | } 62 | while (*(volatile uint32_t*)&tracker.currentIndex >= kMaxEvents); 63 | } 64 | } 65 | 66 | return idx; 67 | } 68 | 69 | static __device__ inline 70 | void IncrementNumEvents(EventTracker& tracker) 71 | { 72 | __threadfence(); 73 | const uint32_t old = atomicAdd(&tracker.numEvents, 1); 74 | 75 | if (old == kMaxEvents - 1) 76 | { 77 | // buffer is full, require a flush 78 | FlushData(tracker); 79 | } 80 | } 81 | 82 | extern "C" __device__ __noinline__ 83 | SanitizerPatchResult DeviceMalloc( 84 | void* userdata, 85 | uint64_t pc, 86 | void* allocatedPtr, 87 | uint64_t allocatedSize) 88 | { 89 | auto& tracker = *(EventTracker*)userdata; 90 | 91 | const uint32_t idx = GetEventIndex(tracker); 92 | 93 | EventData& event = tracker.events[idx]; 94 | event.instructionId = SANITIZER_INSTRUCTION_DEVICE_SIDE_MALLOC; 95 | event.address = (uint64_t)(uintptr_t)allocatedPtr; 96 | event.size = allocatedSize; 97 | 98 | IncrementNumEvents(tracker); 99 | 100 | return SANITIZER_PATCH_SUCCESS; 101 | } 102 | 103 | extern "C" __device__ __noinline__ 104 | SanitizerPatchResult DeviceFree( 105 | void* userdata, 106 | uint64_t pc, 107 | void* ptr) 108 | { 109 | auto& tracker = *(EventTracker*)userdata; 110 | 111 | const uint32_t idx = GetEventIndex(tracker); 112 | 113 | EventData& event = tracker.events[idx]; 114 | event.instructionId = SANITIZER_INSTRUCTION_DEVICE_SIDE_FREE; 115 | event.address = (uint64_t)(uintptr_t)ptr; 116 | 117 | IncrementNumEvents(tracker); 118 | 119 | return SANITIZER_PATCH_SUCCESS; 120 | } 121 | 122 | extern "C" __device__ __noinline__ 123 | SanitizerPatchResult AlignedMalloc( 124 | void* userdata, 125 | uint64_t pc, 126 | void* allocatedPtr, 127 | uint64_t allocatedSize) 128 | { 129 | auto& tracker = *(EventTracker*)userdata; 130 | 131 | const uint32_t idx = GetEventIndex(tracker); 132 | 133 | EventData& event = tracker.events[idx]; 134 | event.instructionId = SANITIZER_INSTRUCTION_DEVICE_ALIGNED_MALLOC; 135 | event.address = (uint64_t)(uintptr_t)allocatedPtr; 136 | event.size = allocatedSize; 137 | 138 | IncrementNumEvents(tracker); 139 | 140 | return SANITIZER_PATCH_SUCCESS; 141 | } 142 | -------------------------------------------------------------------------------- /DeviceMalloc/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit 31 | CUDA_PATH ?= /usr/local/cuda 32 | SANITIZER_PATH ?= $(CUDA_PATH)/compute-sanitizer 33 | 34 | HOST_COMPILER ?= g++ 35 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 36 | 37 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include -I$(SANITIZER_PATH)/include 38 | 39 | LINK_FLAGS := -L$(SANITIZER_PATH) -fPIC -shared 40 | LINK_LIBS := -lsanitizer-public 41 | 42 | NVCC_FLAGS := --fatbin --compile-as-tools-patch 43 | NVCC_FLAGS += $(INCLUDE_FLAGS) 44 | 45 | ifeq ($(dbg),1) 46 | NVCC_FLAGS += -g -G 47 | endif 48 | 49 | ################################################################################ 50 | 51 | # architecture 52 | TARGET_ARCH := $(shell uname -m) 53 | 54 | ifeq ($(TARGET_ARCH),aarch64) 55 | SMS ?= 53 61 70 72 75 80 86 87 90 56 | else 57 | SMS ?= 52 60 70 75 80 86 90 58 | endif 59 | 60 | # Generate SASS code for each SM architecture listed in $(SMS) 61 | $(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) 62 | 63 | # Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility 64 | HIGHEST_SM := $(lastword $(sort $(SMS))) 65 | GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) 66 | 67 | ################################################################################ 68 | 69 | # Target rules 70 | all: build 71 | 72 | build: libDeviceMalloc.so DeviceMallocPatches.fatbin 73 | 74 | libDeviceMalloc.so: DeviceMalloc.cpp 75 | $(HOST_COMPILER) $(INCLUDE_FLAGS) $(LINK_FLAGS) -o $@ $< $(LINK_LIBS) 76 | 77 | DeviceMallocPatches.fatbin: DeviceMallocPatches.cu 78 | $(NVCC) $(NVCC_FLAGS) $(GENCODE_FLAGS) -o $@ -c $< 79 | 80 | clean: 81 | rm -f libDeviceMalloc.so DeviceMallocPatches.fatbin 82 | 83 | clobber: clean 84 | 85 | -------------------------------------------------------------------------------- /Initcheck/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit 31 | CUDA_PATH ?= /usr/local/cuda 32 | SANITIZER_PATH ?= $(CUDA_PATH)/compute-sanitizer 33 | SANITIZER_BIN ?= $(SANITIZER_PATH)/compute-sanitizer 34 | 35 | HOST_COMPILER ?= g++ 36 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 37 | 38 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include 39 | 40 | COMPILE_FLAGS := -MMD 41 | ifeq ($(dbg),1) 42 | COMPILE_FLAGS += -G -Xcompiler -rdynamic 43 | else 44 | COMPILE_FLAGS += -O2 -lineinfo 45 | endif 46 | 47 | # Pattern rules to build binary from .cu file 48 | %.o: %.cu 49 | $(NVCC) $(INCLUDE_FLAGS) $(COMPILE_FLAGS) -c $< 50 | %: %.o 51 | $(NVCC) $(LINK_FLAGS) -o $@ $< 52 | 53 | # Target rules 54 | TARGET_BINARY := memset_error 55 | 56 | all: build 57 | 58 | build: $(TARGET_BINARY) 59 | 60 | clean: 61 | rm -f $(TARGET_BINARY) memset_error.o 62 | 63 | clobber: clean 64 | 65 | # Run sanitizer tools 66 | run_initcheck: $(TARGET_BINARY) 67 | $(SANITIZER_BIN) --tool initcheck $(TARGET_BINARY) 68 | -------------------------------------------------------------------------------- /Initcheck/memset_error.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include 29 | 30 | #define checkCudaErrors(Code) assert((Code) == cudaSuccess) 31 | #define checkCudaLaunch(...) checkCudaErrors((__VA_ARGS__, cudaPeekAtLastError())) 32 | 33 | static constexpr int NumThreads = 32; 34 | static constexpr int NumBlocks = 2; 35 | 36 | __global__ 37 | void vectorAdd(int *v) 38 | { 39 | int tx = threadIdx.x + blockDim.x * blockIdx.x; 40 | 41 | v[tx] += tx; 42 | } 43 | 44 | int main() 45 | { 46 | int *d_vec = nullptr; 47 | 48 | checkCudaErrors(cudaMalloc((void**)&d_vec, sizeof(int) * NumBlocks * NumThreads)); 49 | 50 | // Size is missing `* sizeof(int)` 51 | checkCudaErrors(cudaMemset(d_vec, 0, NumBlocks * NumThreads)); 52 | 53 | checkCudaLaunch(vectorAdd<<>>(d_vec)); 54 | checkCudaErrors(cudaDeviceSynchronize()); 55 | 56 | checkCudaErrors(cudaFree(d_vec)); 57 | 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of NVIDIA CORPORATION nor the names of its 12 | contributors may be used to endorse or promote products derived 13 | from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /Memcheck/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit 31 | CUDA_PATH ?= /usr/local/cuda 32 | SANITIZER_PATH ?= $(CUDA_PATH)/compute-sanitizer 33 | SANITIZER_BIN ?= $(SANITIZER_PATH)/compute-sanitizer 34 | 35 | HOST_COMPILER ?= g++ 36 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 37 | 38 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include 39 | 40 | COMPILE_FLAGS := -MMD 41 | ifeq ($(dbg),1) 42 | COMPILE_FLAGS += -G -Xcompiler -rdynamic 43 | endif 44 | 45 | # Pattern rules to build binary from .cu file 46 | %.o: %.cu 47 | $(NVCC) $(INCLUDE_FLAGS) $(COMPILE_FLAGS) -c $< 48 | %: %.o 49 | $(NVCC) $(LINK_FLAGS) -o $@ $< 50 | 51 | # Target rules 52 | TARGET_BINARY := memcheck_demo 53 | 54 | all: build 55 | 56 | build: $(TARGET_BINARY) 57 | 58 | clean: 59 | rm -f $(TARGET_BINARY) memcheck_demo.o 60 | 61 | clobber: clean 62 | 63 | # Run sanitizer tools 64 | run_memcheck: $(TARGET_BINARY) 65 | $(SANITIZER_BIN) --destroy-on-device-error kernel $(TARGET_BINARY) 66 | 67 | run_leakcheck: $(TARGET_BINARY) 68 | $(SANITIZER_BIN) --destroy-on-device-error kernel --leak-check=full $(TARGET_BINARY) 69 | -------------------------------------------------------------------------------- /Memcheck/memcheck_demo.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include 29 | 30 | __device__ int x; 31 | 32 | __global__ void unaligned_kernel(void) 33 | { 34 | *(int*) ((char*)&x + 1) = 42; 35 | } 36 | 37 | __device__ void out_of_bounds_function(void) 38 | { 39 | *(int*) 0x87654320 = 42; 40 | } 41 | 42 | __global__ void out_of_bounds_kernel(void) 43 | { 44 | out_of_bounds_function(); 45 | } 46 | 47 | static void run_unaligned(void) 48 | { 49 | std::cout << "Running unaligned_kernel: "; 50 | unaligned_kernel<<<1,1>>>(); 51 | std::cout << cudaGetErrorString(cudaDeviceSynchronize()) << std::endl; 52 | } 53 | 54 | static void run_out_of_bounds(void) 55 | { 56 | std::cout << "Running out_of_bounds_kernel: "; 57 | out_of_bounds_kernel<<<1,1>>>(); 58 | std::cout << cudaGetErrorString(cudaDeviceSynchronize()) << std::endl; 59 | } 60 | 61 | int main() { 62 | int *devMem = nullptr; 63 | 64 | std::cout << "Mallocing memory" << std::endl; 65 | cudaMalloc((void**)&devMem, 1024); 66 | 67 | run_unaligned(); 68 | run_out_of_bounds(); 69 | 70 | // Omitted to demo leakcheck 71 | // cudaFree(devMem); 72 | 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /MemoryTracker/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit 31 | CUDA_PATH ?= /usr/local/cuda 32 | SANITIZER_PATH ?= $(CUDA_PATH)/compute-sanitizer 33 | 34 | HOST_COMPILER ?= g++ 35 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 36 | 37 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include -I$(SANITIZER_PATH)/include 38 | 39 | LINK_FLAGS := -L$(SANITIZER_PATH) -fPIC -shared 40 | LINK_LIBS := -lsanitizer-public 41 | 42 | NVCC_FLAGS := --fatbin --compile-as-tools-patch 43 | NVCC_FLAGS += $(INCLUDE_FLAGS) 44 | 45 | ifeq ($(dbg),1) 46 | NVCC_FLAGS += -g -G 47 | endif 48 | 49 | ################################################################################ 50 | 51 | # architecture 52 | TARGET_ARCH := $(shell uname -m) 53 | 54 | ifeq ($(TARGET_ARCH),aarch64) 55 | SMS ?= 53 61 70 72 75 80 86 87 90 56 | else 57 | SMS ?= 52 60 70 75 80 86 90 58 | endif 59 | 60 | # Generate SASS code for each SM architecture listed in $(SMS) 61 | $(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) 62 | 63 | # Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility 64 | HIGHEST_SM := $(lastword $(sort $(SMS))) 65 | GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) 66 | 67 | ################################################################################ 68 | 69 | # Target rules 70 | all: build 71 | 72 | build: libMemoryTracker.so MemoryTrackerPatches.fatbin 73 | 74 | libMemoryTracker.so: MemoryTracker.cpp 75 | $(HOST_COMPILER) $(INCLUDE_FLAGS) $(LINK_FLAGS) -o $@ $< $(LINK_LIBS) 76 | 77 | MemoryTrackerPatches.fatbin: MemoryTrackerPatches.cu 78 | $(NVCC) $(NVCC_FLAGS) $(GENCODE_FLAGS) -o $@ -c $< 79 | 80 | clean: 81 | rm -f libMemoryTracker.so MemoryTrackerPatches.fatbin 82 | 83 | clobber: clean 84 | -------------------------------------------------------------------------------- /MemoryTracker/MemoryTracker.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "MemoryTracker.h" 29 | 30 | #include 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | struct LaunchData 39 | { 40 | std::string functionName; 41 | MemoryAccessTracker* pTracker; 42 | }; 43 | 44 | using LaunchVector = std::vector; 45 | using StreamMap = std::map; 46 | using ContextMap = std::map; 47 | 48 | struct CallbackTracker 49 | { 50 | std::ostream* out = nullptr; 51 | std::shared_ptr outFile = nullptr; 52 | 53 | ContextMap memoryTrackers; 54 | 55 | CallbackTracker() 56 | { 57 | const char *pOutName = std::getenv("OUT_FILE_NAME"); 58 | if (pOutName) 59 | { 60 | outFile = std::make_shared(pOutName); 61 | out = outFile.get(); 62 | } 63 | else 64 | { 65 | out = &std::cout; 66 | } 67 | } 68 | 69 | // very basic singleton 70 | static CallbackTracker& GetInstance() 71 | { 72 | static CallbackTracker instance; 73 | return instance; 74 | } 75 | }; 76 | 77 | void ModuleLoaded(Sanitizer_ResourceModuleData* pModuleData) 78 | { 79 | // Instrument user code! 80 | if (SANITIZER_SUCCESS != sanitizerAddPatchesFromFile("MemoryTrackerPatches.fatbin", 0)) 81 | { 82 | std::cerr << "Failed to load fatbin. Please check that it is in the current directory and contains the correct SM architecture" << std::endl; 83 | } 84 | 85 | sanitizerPatchInstructions(SANITIZER_INSTRUCTION_GLOBAL_MEMORY_ACCESS, pModuleData->module, "MemoryGlobalAccessCallback"); 86 | sanitizerPatchInstructions(SANITIZER_INSTRUCTION_SHARED_MEMORY_ACCESS, pModuleData->module, "MemorySharedAccessCallback"); 87 | sanitizerPatchInstructions(SANITIZER_INSTRUCTION_LOCAL_MEMORY_ACCESS, pModuleData->module, "MemoryLocalAccessCallback"); 88 | sanitizerPatchInstructions(SANITIZER_INSTRUCTION_MEMCPY_ASYNC, pModuleData->module, "MemcpyAsyncCallback"); 89 | sanitizerPatchModule(pModuleData->module); 90 | } 91 | 92 | static size_t GetMemAccessSize() 93 | { 94 | constexpr size_t MemAccessDefaultSize = 1024; 95 | 96 | const char* pValue = std::getenv("MEM_ACCESS_SIZE"); 97 | if (!pValue) 98 | { 99 | return MemAccessDefaultSize; 100 | } 101 | 102 | return std::stoi(pValue); 103 | } 104 | 105 | void LaunchBegin( 106 | CallbackTracker* pCallbackTracker, 107 | CUcontext context, 108 | CUfunction function, 109 | std::string functionName, 110 | Sanitizer_StreamHandle stream) 111 | { 112 | const size_t MemAccessSize = GetMemAccessSize(); 113 | 114 | // alloc MemoryAccess array 115 | MemoryAccess* accesses = nullptr; 116 | sanitizerAlloc(context, (void**)&accesses, sizeof(MemoryAccess) * MemAccessSize); 117 | sanitizerMemset(accesses, 0, sizeof(MemoryAccess) * MemAccessSize, stream); 118 | 119 | MemoryAccessTracker hTracker; 120 | hTracker.currentEntry = 0; 121 | hTracker.maxEntry = MemAccessSize; 122 | hTracker.accesses = accesses; 123 | 124 | MemoryAccessTracker* dTracker = nullptr; 125 | sanitizerAlloc(context, (void**)&dTracker, sizeof(*dTracker)); 126 | sanitizerMemcpyHostToDeviceAsync(dTracker, &hTracker, sizeof(*dTracker), stream); 127 | 128 | sanitizerSetCallbackData(function, dTracker); 129 | 130 | LaunchData launchData = {functionName, dTracker}; 131 | std::vector& deviceTrackers = pCallbackTracker->memoryTrackers[context][stream]; 132 | deviceTrackers.push_back(launchData); 133 | } 134 | 135 | static std::string GetMemoryRWString(uint32_t flags) 136 | { 137 | const bool isWrite = !!(flags & SANITIZER_MEMORY_DEVICE_FLAG_WRITE); 138 | const bool isRead = !!(flags & SANITIZER_MEMORY_DEVICE_FLAG_READ); 139 | 140 | if (isWrite && isRead) 141 | { 142 | return "Atomic"; 143 | } 144 | else if (isRead) 145 | { 146 | return "Read"; 147 | } 148 | else if (isWrite) 149 | { 150 | return "Write"; 151 | } 152 | else 153 | { 154 | return "Unknown"; 155 | } 156 | } 157 | 158 | static std::string GetMemoryTypeString(MemoryAccessType type) 159 | { 160 | if (type == MemoryAccessType::Local) 161 | { 162 | return "local"; 163 | } 164 | else if (type == MemoryAccessType::Shared) 165 | { 166 | return "shared"; 167 | } 168 | else 169 | { 170 | return "global"; 171 | } 172 | } 173 | 174 | void StreamSynchronized( 175 | CallbackTracker* pCallbackTracker, 176 | CUcontext context, 177 | Sanitizer_StreamHandle stream) 178 | { 179 | MemoryAccessTracker hTracker = {0}; 180 | 181 | std::vector& deviceTrackers = pCallbackTracker->memoryTrackers[context][stream]; 182 | 183 | for (auto& tracker : deviceTrackers) 184 | { 185 | *pCallbackTracker->out << "Kernel Launch: " << tracker.functionName << std::endl; 186 | 187 | sanitizerMemcpyDeviceToHost(&hTracker, tracker.pTracker, sizeof(*tracker.pTracker), stream); 188 | 189 | uint32_t numEntries = std::min(hTracker.currentEntry, hTracker.maxEntry); 190 | 191 | *pCallbackTracker->out << " Memory accesses: " << numEntries << std::endl; 192 | 193 | std::vector accesses(numEntries); 194 | sanitizerMemcpyDeviceToHost(accesses.data(), hTracker.accesses, sizeof(MemoryAccess) * numEntries, stream); 195 | 196 | for (uint32_t i = 0; i < numEntries; ++i) 197 | { 198 | MemoryAccess& access = accesses[i]; 199 | 200 | *pCallbackTracker->out << " [" << i << "] " << GetMemoryRWString(access.flags) 201 | << " access of " << GetMemoryTypeString(access.type) 202 | << " memory by thread (" << access.threadId.x 203 | << "," << access.threadId.y 204 | << "," << access.threadId.z 205 | << ") at address 0x" << std::hex << access.address << std::dec 206 | << " (size is " << access.accessSize << " bytes)" << std::endl; 207 | } 208 | 209 | sanitizerFree(context, hTracker.accesses); 210 | sanitizerFree(context, tracker.pTracker); 211 | } 212 | 213 | deviceTrackers.clear(); 214 | } 215 | 216 | void ContextSynchronized(CallbackTracker* pCallbackTracker, CUcontext context) 217 | { 218 | auto& contextTracker = pCallbackTracker->memoryTrackers[context]; 219 | 220 | for (auto& streamTracker : contextTracker) 221 | { 222 | StreamSynchronized(pCallbackTracker, context, streamTracker.first); 223 | } 224 | } 225 | 226 | void MemoryTrackerCallback( 227 | void* userdata, 228 | Sanitizer_CallbackDomain domain, 229 | Sanitizer_CallbackId cbid, 230 | const void* cbdata) 231 | { 232 | auto* callbackTracker = (CallbackTracker*)userdata; 233 | 234 | switch (domain) 235 | { 236 | case SANITIZER_CB_DOMAIN_RESOURCE: 237 | switch (cbid) 238 | { 239 | case SANITIZER_CBID_RESOURCE_MODULE_LOADED: 240 | { 241 | auto* pModuleData = (Sanitizer_ResourceModuleData*)cbdata; 242 | ModuleLoaded(pModuleData); 243 | break; 244 | } 245 | default: 246 | break; 247 | } 248 | break; 249 | case SANITIZER_CB_DOMAIN_LAUNCH: 250 | switch (cbid) 251 | { 252 | case SANITIZER_CBID_LAUNCH_BEGIN: 253 | { 254 | auto* pLaunchData = (Sanitizer_LaunchData*)cbdata; 255 | LaunchBegin(callbackTracker, pLaunchData->context, pLaunchData->function, pLaunchData->functionName, pLaunchData->hStream); 256 | break; 257 | } 258 | default: 259 | break; 260 | } 261 | break; 262 | case SANITIZER_CB_DOMAIN_SYNCHRONIZE: 263 | switch (cbid) 264 | { 265 | case SANITIZER_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED: 266 | { 267 | auto* pSyncData = (Sanitizer_SynchronizeData*)cbdata; 268 | StreamSynchronized(callbackTracker, pSyncData->context, pSyncData->hStream); 269 | break; 270 | } 271 | case SANITIZER_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED: 272 | { 273 | auto* pSyncData = (Sanitizer_SynchronizeData*)cbdata; 274 | ContextSynchronized(callbackTracker, pSyncData->context); 275 | break; 276 | } 277 | default: 278 | break; 279 | } 280 | break; 281 | default: 282 | break; 283 | } 284 | } 285 | 286 | int InitializeInjection() 287 | { 288 | Sanitizer_SubscriberHandle handle; 289 | CallbackTracker& tracker = CallbackTracker::GetInstance(); 290 | 291 | sanitizerSubscribe(&handle, MemoryTrackerCallback, &tracker); 292 | sanitizerEnableAllDomains(1, handle); 293 | 294 | return 0; 295 | } 296 | 297 | int __global_initializer__ = InitializeInjection(); 298 | -------------------------------------------------------------------------------- /MemoryTracker/MemoryTracker.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #pragma once 29 | 30 | #include 31 | 32 | #include 33 | 34 | enum class MemoryAccessType 35 | { 36 | Global, 37 | Shared, 38 | Local, 39 | }; 40 | 41 | // Information regarding a memory access 42 | struct MemoryAccess 43 | { 44 | uint64_t address; 45 | uint32_t accessSize; 46 | uint32_t flags; 47 | dim3 threadId; 48 | MemoryAccessType type; 49 | }; 50 | 51 | // Main tracking structure that patches get as userdata 52 | struct MemoryAccessTracker 53 | { 54 | uint32_t currentEntry; 55 | uint32_t maxEntry; 56 | MemoryAccess* accesses; 57 | }; 58 | -------------------------------------------------------------------------------- /MemoryTracker/MemoryTrackerPatches.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "MemoryTracker.h" 29 | 30 | #include 31 | 32 | static __device__ 33 | SanitizerPatchResult CommonCallback( 34 | void* userdata, 35 | uint64_t pc, 36 | void* ptr, 37 | uint32_t accessSize, 38 | uint32_t flags, 39 | MemoryAccessType type) 40 | { 41 | auto* pTracker = (MemoryAccessTracker*)userdata; 42 | 43 | uint32_t old = atomicAdd(&(pTracker->currentEntry), 1); 44 | 45 | // no more space! 46 | if (old >= pTracker->maxEntry) 47 | return SANITIZER_PATCH_SUCCESS; 48 | 49 | MemoryAccess& access = pTracker->accesses[old]; 50 | access.address = (uint64_t)(uintptr_t)ptr; 51 | access.accessSize = accessSize; 52 | access.flags = flags; 53 | access.threadId = threadIdx; 54 | access.type = type; 55 | 56 | return SANITIZER_PATCH_SUCCESS; 57 | } 58 | 59 | extern "C" __device__ __noinline__ 60 | SanitizerPatchResult MemoryGlobalAccessCallback( 61 | void* userdata, 62 | uint64_t pc, 63 | void* ptr, 64 | uint32_t accessSize, 65 | uint32_t flags) 66 | { 67 | return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryAccessType::Global); 68 | } 69 | 70 | extern "C" __device__ __noinline__ 71 | SanitizerPatchResult MemorySharedAccessCallback( 72 | void* userdata, 73 | uint64_t pc, 74 | void* ptr, 75 | uint32_t accessSize, 76 | uint32_t flags) 77 | { 78 | return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryAccessType::Shared); 79 | } 80 | 81 | extern "C" __device__ __noinline__ 82 | SanitizerPatchResult MemoryLocalAccessCallback( 83 | void* userdata, 84 | uint64_t pc, 85 | void* ptr, 86 | uint32_t accessSize, 87 | uint32_t flags) 88 | { 89 | return CommonCallback(userdata, pc, ptr, accessSize, flags, MemoryAccessType::Local); 90 | } 91 | 92 | extern "C" __device__ __noinline__ 93 | SanitizerPatchResult MemcpyAsyncCallback(void* userdata, uint64_t pc, void* src, uint32_t dst, uint32_t accessSize, uint32_t totalShmemSize) 94 | { 95 | if (src) 96 | { 97 | CommonCallback(userdata, pc, src, accessSize, SANITIZER_MEMORY_DEVICE_FLAG_READ, MemoryAccessType::Global); 98 | } 99 | 100 | return CommonCallback(userdata, pc, (void*)dst, accessSize, SANITIZER_MEMORY_DEVICE_FLAG_WRITE, MemoryAccessType::Shared); 101 | } 102 | -------------------------------------------------------------------------------- /NvtxMemoryPool/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit & NVTX 31 | CUDA_PATH ?= /usr/local/cuda 32 | NVTX_PATH ?= ../NVTX/c 33 | 34 | HOST_COMPILER ?= g++ 35 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 36 | 37 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include -I$(NVTX_PATH)/include 38 | 39 | COMPILE_FLAGS := -MMD 40 | 41 | # Pattern rules to build binary from .cu file 42 | %.o: %.cu 43 | $(NVCC) $(INCLUDE_FLAGS) $(COMPILE_FLAGS) -c $< 44 | %: %.o 45 | $(NVCC) $(LINK_FLAGS) -o $@ $< $(LINK_LIBS) 46 | 47 | # Target rules 48 | all: build 49 | 50 | build: NvtxMemoryPool 51 | 52 | clean: 53 | rm -f NvtxMemoryPool NvtxMemoryPool.o NvtxMemoryPool.d 54 | 55 | clobber: clean 56 | 57 | # Tracks header dependencies (will rebuild on header changes) 58 | -include NvtxMemoryPool.d 59 | -------------------------------------------------------------------------------- /NvtxMemoryPool/NvtxMemoryPool.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "NvtxMemoryPool.h" 29 | 30 | #include 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #define checkCudaErrors(Code) assert((Code) == cudaSuccess) 37 | #define checkCudaLaunch(...) checkCudaErrors((__VA_ARGS__, cudaPeekAtLastError())) 38 | 39 | __global__ void Iota(uint8_t* v) 40 | { 41 | int i = blockIdx.x * blockDim.x + threadIdx.x; 42 | 43 | v[i] = static_cast(i); 44 | } 45 | 46 | int main(void) 47 | { 48 | constexpr size_t PoolSize = 4096 * sizeof(uint8_t); 49 | constexpr size_t NumThreads = 63; 50 | constexpr size_t AllocSize = NumThreads * sizeof(uint8_t); 51 | 52 | auto nvtxDomain = nvtxDomainCreateA("my-domain"); 53 | void *pool; 54 | checkCudaErrors(cudaMalloc(&pool, PoolSize)); 55 | 56 | { 57 | // Suballocator object creation (c.f. NvtxMemoryPool.h) 58 | auto suballocator = NV::Suballocator(nvtxDomain, pool, PoolSize); 59 | 60 | // Create a suballocation of size AllocSize at offset 16 61 | auto alloc = (uint8_t*)pool + 16; 62 | suballocator.Register(alloc, AllocSize); 63 | 64 | // Success: allocation is valid 65 | checkCudaLaunch(Iota<<<1, NumThreads>>>(alloc)); 66 | checkCudaErrors(cudaDeviceSynchronize()); 67 | 68 | // Violation: last byte out of bounds 69 | checkCudaLaunch(Iota<<<1, NumThreads + 1>>>(alloc)); 70 | checkCudaErrors(cudaDeviceSynchronize()); 71 | 72 | // Success: resizing 73 | suballocator.Resize(alloc, AllocSize + 1); 74 | checkCudaLaunch(Iota<<<1, NumThreads + 1>>>(alloc)); 75 | checkCudaErrors(cudaDeviceSynchronize()); 76 | 77 | // Violation: access after free 78 | suballocator.Unregister(alloc); 79 | checkCudaLaunch(Iota<<<1, 1>>>(alloc)); 80 | checkCudaErrors(cudaDeviceSynchronize()); 81 | 82 | // Violation: access after reset 83 | suballocator.Register(alloc, AllocSize); 84 | suballocator.Reset(); 85 | checkCudaLaunch(Iota<<<1, 1>>>(alloc)); 86 | checkCudaErrors(cudaDeviceSynchronize()); 87 | } 88 | 89 | checkCudaErrors(cudaFree(pool)); 90 | } 91 | -------------------------------------------------------------------------------- /NvtxMemoryPool/NvtxMemoryPool.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #pragma once 29 | 30 | #if !defined(__cplusplus) 31 | #error "C++ only header. Please use a C++ compiler." 32 | #endif 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | namespace NV { 40 | 41 | // Class designed to handle memory pool management 42 | class Suballocator 43 | { 44 | public: 45 | Suballocator(nvtxDomainHandle_t nvtxDomain, void *start, size_t capacity) 46 | : m_nvtxDomain(nvtxDomain), m_start(start), m_capacity(capacity) 47 | { 48 | nvtxMemVirtualRangeDesc_t nvtxRangeDesc = {}; 49 | nvtxRangeDesc.size = m_capacity; 50 | nvtxRangeDesc.ptr = m_start; 51 | 52 | nvtxMemHeapDesc_t nvtxHeapDesc = {}; 53 | nvtxHeapDesc.extCompatID = NVTX_EXT_COMPATID_MEM; 54 | nvtxHeapDesc.structSize = sizeof(nvtxHeapDesc); 55 | nvtxHeapDesc.usage = NVTX_MEM_HEAP_USAGE_TYPE_SUB_ALLOCATOR; 56 | nvtxHeapDesc.type = NVTX_MEM_TYPE_VIRTUAL_ADDRESS; 57 | nvtxHeapDesc.typeSpecificDescSize = sizeof(nvtxRangeDesc); 58 | nvtxHeapDesc.typeSpecificDesc = &nvtxRangeDesc; 59 | 60 | m_nvtxPool = nvtxMemHeapRegister( 61 | nvtxDomain, 62 | &nvtxHeapDesc); 63 | 64 | if (!m_nvtxPool) 65 | { 66 | throw std::runtime_error("Memory pool registration failed."); 67 | } 68 | } 69 | 70 | ~Suballocator() 71 | { 72 | nvtxMemHeapUnregister(m_nvtxDomain, m_nvtxPool); 73 | } 74 | 75 | void Reset() 76 | { 77 | nvtxMemHeapReset(m_nvtxDomain, m_nvtxPool); 78 | } 79 | 80 | void Register(void *ptr, size_t size) 81 | { 82 | nvtxMemVirtualRangeDesc_t nvtxRangeDesc = {}; 83 | nvtxRangeDesc.size = size; 84 | nvtxRangeDesc.ptr = ptr; 85 | 86 | nvtxMemRegionsRegisterBatch_t nvtxRegionsDesc = {}; 87 | nvtxRegionsDesc.extCompatID = NVTX_EXT_COMPATID_MEM; 88 | nvtxRegionsDesc.structSize = sizeof(nvtxRegionsDesc); 89 | nvtxRegionsDesc.regionType = NVTX_MEM_TYPE_VIRTUAL_ADDRESS; 90 | nvtxRegionsDesc.heap = m_nvtxPool; 91 | nvtxRegionsDesc.regionCount = 1; 92 | nvtxRegionsDesc.regionDescElementSize = sizeof(nvtxRangeDesc); 93 | nvtxRegionsDesc.regionDescElements = &nvtxRangeDesc; 94 | 95 | nvtxMemRegionsRegister(m_nvtxDomain, &nvtxRegionsDesc); 96 | } 97 | 98 | void Resize(void *ptr, size_t newSize) 99 | { 100 | nvtxMemVirtualRangeDesc_t nvtxRangeDesc = {}; 101 | nvtxRangeDesc.size = newSize; 102 | nvtxRangeDesc.ptr = ptr; 103 | 104 | nvtxMemRegionsResizeBatch_t nvtxRegionsDesc = {}; 105 | nvtxRegionsDesc.extCompatID = NVTX_EXT_COMPATID_MEM; 106 | nvtxRegionsDesc.structSize = sizeof(nvtxRegionsDesc); 107 | nvtxRegionsDesc.regionType = NVTX_MEM_TYPE_VIRTUAL_ADDRESS; 108 | nvtxRegionsDesc.regionDescCount = 1; 109 | nvtxRegionsDesc.regionDescElementSize = sizeof(nvtxRangeDesc); 110 | nvtxRegionsDesc.regionDescElements = &nvtxRangeDesc; 111 | 112 | nvtxMemRegionsResize(m_nvtxDomain, &nvtxRegionsDesc); 113 | } 114 | 115 | void Unregister(void *ptr) 116 | { 117 | nvtxMemRegionRef_t nvtxRegionRef; 118 | nvtxRegionRef.pointer = ptr; 119 | 120 | nvtxMemRegionsUnregisterBatch_t nvtxRegionsDesc = {}; 121 | nvtxRegionsDesc.extCompatID = NVTX_EXT_COMPATID_MEM; 122 | nvtxRegionsDesc.structSize = sizeof(nvtxRegionsDesc); 123 | nvtxRegionsDesc.refType = NVTX_MEM_REGION_REF_TYPE_POINTER; 124 | nvtxRegionsDesc.refCount = 1; 125 | nvtxRegionsDesc.refElementSize = sizeof(nvtxRegionRef); 126 | nvtxRegionsDesc.refElements = &nvtxRegionRef; 127 | 128 | nvtxMemRegionsUnregister(m_nvtxDomain, &nvtxRegionsDesc); 129 | } 130 | 131 | private: 132 | nvtxDomainHandle_t const m_nvtxDomain; 133 | void* const m_start; 134 | size_t const m_capacity; 135 | nvtxMemHeapHandle_t m_nvtxPool; 136 | }; 137 | 138 | } // namespace NV 139 | -------------------------------------------------------------------------------- /NvtxMemoryPool/README.md: -------------------------------------------------------------------------------- 1 | # NVTX Suballocation sample 2 | 3 | Sample demonstrating how to use NVTX Suballocation API: 4 | * `nvtxMemHeapRegister` 5 | * `nvtxMemHeapUnregister` 6 | * `nvtxMemHeapReset` 7 | * `nvtxMemRegionsRegister` 8 | * `nvtxMemRegionsResize` 9 | * `nvtxMemRegionsUnregister` 10 | 11 | ``` 12 | $ make --quiet 13 | $ compute-sanitizer --nvtx=yes --destroy-on-device-error=kernel --show-backtrace=no ./NvtxMemoryPool 14 | ========= COMPUTE-SANITIZER 15 | ========= Invalid __global__ write of size 1 bytes 16 | ========= at 0x60 in Iota(unsigned char*) 17 | ========= by thread (63,0,0) in block (0,0,0) 18 | ========= Address 0x7f004700004f is out of bounds 19 | ========= 20 | ========= Invalid __global__ write of size 1 bytes 21 | ========= at 0x60 in Iota(unsigned char*) 22 | ========= by thread (0,0,0) in block (0,0,0) 23 | ========= Address 0x7f0047000010 is out of bounds 24 | ========= 25 | ========= Invalid __global__ write of size 1 bytes 26 | ========= at 0x60 in Iota(unsigned char*) 27 | ========= by thread (0,0,0) in block (0,0,0) 28 | ========= Address 0x7f0047000010 is out of bounds 29 | ========= 30 | ========= ERROR SUMMARY: 3 errors 31 | ``` 32 | -------------------------------------------------------------------------------- /NvtxNaming/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit & NVTX 31 | CUDA_PATH ?= /usr/local/cuda 32 | NVTX_PATH ?= ../NVTX/c 33 | 34 | HOST_COMPILER ?= g++ 35 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 36 | 37 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include -I$(NVTX_PATH)/include 38 | 39 | COMPILE_FLAGS := -MMD 40 | 41 | # Pattern rules to build binary from .cu file 42 | %.o: %.cu 43 | $(NVCC) $(INCLUDE_FLAGS) $(COMPILE_FLAGS) -c $< 44 | %: %.o 45 | $(NVCC) $(LINK_FLAGS) -o $@ $< $(LINK_LIBS) 46 | 47 | # Target rules 48 | all: build 49 | 50 | build: NvtxNaming 51 | 52 | clean: 53 | rm -f NvtxNaming NvtxNaming.o NvtxNaming.d 54 | 55 | clobber: clean 56 | 57 | # Tracks header dependencies (will rebuild on header changes) 58 | -include NvtxNaming.d 59 | -------------------------------------------------------------------------------- /NvtxNaming/NvtxNaming.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "NvtxNaming.h" 29 | 30 | #include 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #define checkCudaErrors(Code) assert((Code) == cudaSuccess) 37 | 38 | int main(void) 39 | { 40 | auto nvtxDomain = nvtxDomainCreateA("my-domain"); 41 | 42 | // Create allocation and label it "My allocation". 43 | void* dptr; 44 | checkCudaErrors(cudaMalloc(&dptr, 1)); 45 | NV::LabelMemory(nvtxDomain, dptr, "My allocation"); 46 | 47 | // Leak 1 unitialized byte 48 | checkCudaErrors(cudaDeviceReset()); 49 | } 50 | -------------------------------------------------------------------------------- /NvtxNaming/NvtxNaming.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #pragma once 29 | 30 | #if !defined(__cplusplus) 31 | #error "C++ only header. Please use a C++ compiler." 32 | #endif 33 | 34 | #include 35 | 36 | namespace NV { 37 | 38 | void LabelMemory(nvtxDomainHandle_t nvtxDomain, const void* ptr, const char* name) 39 | { 40 | nvtxMemRegionNameDesc_t nvtxLabelDesc; 41 | nvtxLabelDesc.regionRefType = NVTX_MEM_REGION_REF_TYPE_POINTER; 42 | nvtxLabelDesc.nameType = NVTX_MESSAGE_TYPE_ASCII; 43 | nvtxLabelDesc.region.pointer = ptr; 44 | nvtxLabelDesc.name.ascii = name; 45 | 46 | nvtxMemRegionsNameBatch_t nvtxRegionsDesc = {}; 47 | nvtxRegionsDesc.extCompatID = NVTX_EXT_COMPATID_MEM; 48 | nvtxRegionsDesc.structSize = sizeof(nvtxRegionsDesc); 49 | nvtxRegionsDesc.regionCount = 1; 50 | nvtxRegionsDesc.regionElementSize = sizeof(nvtxLabelDesc); 51 | nvtxRegionsDesc.regionElements = &nvtxLabelDesc; 52 | 53 | nvtxMemRegionsName(nvtxDomain, &nvtxRegionsDesc); 54 | } 55 | 56 | } // namespace NV 57 | -------------------------------------------------------------------------------- /NvtxNaming/README.md: -------------------------------------------------------------------------------- 1 | # NVTX memory pool sample 2 | 3 | Sample demonstrating how to use `nvtxMemRegionsName`. 4 | 5 | ``` 6 | $ make --quiet 7 | $ compute-sanitizer --nvtx=yes --leak-check=full --destroy-on-device-error=kernel --show-backtrace=no ./NvtxNaming 8 | ========= COMPUTE-SANITIZER 9 | ========= Leaked 1 bytes at 0x7f26c3000000 called My allocation 10 | ========= 11 | ========= LEAK SUMMARY: 1 bytes leaked in 1 allocations 12 | ========= ERROR SUMMARY: 1 error 13 | $ compute-sanitizer --nvtx=yes --tool=initcheck --track-unused-memory=yes --destroy-on-device-error=kernel --show-backtrace=no ./NvtxNaming 14 | ========= COMPUTE-SANITIZER 15 | ========= Unused memory in allocation 0x7efc4d000000 called My allocation of size 1 16 | ========= Not written any memory. 17 | ========= 100% of allocation were unused. 18 | ========= 19 | ========= ERROR SUMMARY: 1 error 20 | ``` 21 | -------------------------------------------------------------------------------- /NvtxPermissions/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit & NVTX 31 | CUDA_PATH ?= /usr/local/cuda 32 | NVTX_PATH ?= ../NVTX/c 33 | 34 | HOST_COMPILER ?= g++ 35 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 36 | 37 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include -I$(NVTX_PATH)/include 38 | 39 | COMPILE_FLAGS := -MMD 40 | 41 | # Pattern rules to build binary from .cu file 42 | %.o: %.cu 43 | $(NVCC) $(INCLUDE_FLAGS) $(COMPILE_FLAGS) -c $< 44 | %: %.o 45 | $(NVCC) $(LINK_FLAGS) -o $@ $< $(LINK_LIBS) 46 | 47 | # Target rules 48 | all: build 49 | 50 | build: NvtxPermissions 51 | 52 | clean: 53 | rm -f NvtxPermissions NvtxPermissions.o NvtxPermissions.d 54 | 55 | clobber: clean 56 | 57 | # Tracks header dependencies (will rebuild on header changes) 58 | -include NvtxPermissions.d 59 | -------------------------------------------------------------------------------- /NvtxPermissions/NvtxPermissions.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "NvtxPermissions.h" 29 | 30 | #include 31 | #include 32 | 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | #define checkCudaErrors(Code) assert((Code) == cudaSuccess) 40 | #define checkCudaLaunch(...) checkCudaErrors((__VA_ARGS__, cudaPeekAtLastError())) 41 | 42 | __global__ void IncrementTwice(unsigned int* v) 43 | { 44 | unsigned int i = *v; 45 | *v = i + 1u; 46 | atomicAdd(v, 1u); 47 | } 48 | 49 | int main() 50 | { 51 | auto nvtxDomain = nvtxDomainCreateA("my-domain"); 52 | 53 | unsigned int* ptr; 54 | checkCudaErrors(cudaMalloc((void**)&ptr, sizeof(unsigned int))); 55 | checkCudaErrors(cudaMemset(ptr, 0, sizeof(unsigned int))); 56 | 57 | // Success: allocation is readable and writable 58 | checkCudaLaunch(IncrementTwice<<<1, 1>>>(ptr)); 59 | checkCudaErrors(cudaDeviceSynchronize()); 60 | 61 | // Violation: 4 bytes written on a read-only allocation 62 | NV::PermissionsAssign(nvtxDomain, ptr, NV::PERMISSIONS_READ); 63 | checkCudaLaunch(IncrementTwice<<<1, 1>>>(ptr)); 64 | checkCudaErrors(cudaDeviceSynchronize()); 65 | 66 | // Violation: 4 bytes read on a write-only allocation 67 | NV::PermissionsAssign(nvtxDomain, ptr, NV::PERMISSIONS_WRITE); 68 | checkCudaLaunch(IncrementTwice<<<1, 1>>>(ptr)); 69 | checkCudaErrors(cudaDeviceSynchronize()); 70 | 71 | // Violation: 4 bytes read on a no-permissions allocation 72 | NV::PermissionsAssign(nvtxDomain, ptr, NV::PERMISSIONS_NONE); 73 | checkCudaLaunch(IncrementTwice<<<1, 1>>>(ptr)); 74 | checkCudaErrors(cudaDeviceSynchronize()); 75 | 76 | // Violation: 4 bytes atomic operation on a no-atomic allocation 77 | NV::PermissionsAssign(nvtxDomain, ptr, NV::PERMISSIONS_READ | NV::PERMISSIONS_WRITE); 78 | checkCudaLaunch(IncrementTwice<<<1, 1>>>(ptr)); 79 | checkCudaErrors(cudaDeviceSynchronize()); 80 | 81 | checkCudaErrors(cudaFree(ptr)); 82 | } 83 | -------------------------------------------------------------------------------- /NvtxPermissions/NvtxPermissions.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #pragma once 29 | 30 | #if !defined(__cplusplus) 31 | #error "C++ only header. Please use a C++ compiler." 32 | #endif 33 | 34 | #include 35 | #include 36 | 37 | #include 38 | 39 | namespace NV { 40 | 41 | enum Permissions 42 | { 43 | PERMISSIONS_NONE = NVTX_MEM_PERMISSIONS_REGION_FLAGS_NONE, 44 | PERMISSIONS_READ = NVTX_MEM_PERMISSIONS_REGION_FLAGS_READ, 45 | PERMISSIONS_WRITE = NVTX_MEM_PERMISSIONS_REGION_FLAGS_WRITE, 46 | PERMISSIONS_ATOMIC = NVTX_MEM_PERMISSIONS_REGION_FLAGS_ATOMIC, 47 | PERMISSIONS_RESET = NVTX_MEM_PERMISSIONS_REGION_FLAGS_RESET 48 | }; 49 | 50 | void PermissionsAssign(nvtxDomainHandle_t nvtxDomain, const void* ptr, uint32_t flags, nvtxMemPermissionsHandle_t handle) 51 | { 52 | nvtxMemPermissionsAssignRegionDesc_t nvtxPermDesc; 53 | nvtxPermDesc.flags = flags; 54 | nvtxPermDesc.regionRefType = NVTX_MEM_REGION_REF_TYPE_POINTER; 55 | nvtxPermDesc.region.pointer = ptr; 56 | 57 | nvtxMemPermissionsAssignBatch_t nvtxRegionsDesc = {}; 58 | nvtxRegionsDesc.extCompatID = NVTX_EXT_COMPATID_MEM; 59 | nvtxRegionsDesc.structSize = sizeof(nvtxRegionsDesc); 60 | nvtxRegionsDesc.permissions = handle; 61 | nvtxRegionsDesc.regionCount = 1; 62 | nvtxRegionsDesc.regionElementSize = sizeof(nvtxPermDesc); 63 | nvtxRegionsDesc.regionElements = &nvtxPermDesc; 64 | 65 | nvtxMemPermissionsAssign(nvtxDomain, &nvtxRegionsDesc); 66 | } 67 | 68 | void PermissionsAssign(nvtxDomainHandle_t nvtxDomain, const void* ptr, uint32_t flags) 69 | { 70 | auto processPermHandle = nvtxMemCudaGetProcessWidePermissions(nvtxDomain); 71 | 72 | PermissionsAssign(nvtxDomain, ptr, flags, processPermHandle); 73 | } 74 | 75 | } // namespace NV 76 | -------------------------------------------------------------------------------- /NvtxPermissions/README.md: -------------------------------------------------------------------------------- 1 | # NVTX Suballocation sample 2 | 3 | Sample demonstrating how to use NVTX Permissions API (basic): 4 | * `nvtxMemPermissionsAssign` 5 | * `nvtxMemCudaGetProcessWidePermissions` 6 | 7 | ``` 8 | $ make --quiet 9 | $ compute-sanitizer --nvtx=yes --destroy-on-device-error=kernel --show-backtrace=no ./NvtxPermissions 10 | ========= COMPUTE-SANITIZER 11 | ========= Invalid __global__ write of size 4 bytes 12 | ========= at 0x90 in IncrementTwice(unsigned int*) 13 | ========= by thread (0,0,0) in block (0,0,0) 14 | ========= Address 0x7f6f67000000 is not writable 15 | ========= 16 | ========= Invalid __global__ read of size 4 bytes 17 | ========= at 0x20 in IncrementTwice(unsigned int*) 18 | ========= by thread (0,0,0) in block (0,0,0) 19 | ========= Address 0x7f6f67000000 is not readable 20 | ========= 21 | ========= Invalid __global__ read of size 4 bytes 22 | ========= at 0x20 in IncrementTwice(unsigned int*) 23 | ========= by thread (0,0,0) in block (0,0,0) 24 | ========= Address 0x7f6f67000000 is not readable 25 | ========= 26 | ========= Invalid __global__ atomic of size 4 bytes 27 | ========= at 0xa0 in IncrementTwice(unsigned int*) 28 | ========= by thread (0,0,0) in block (0,0,0) 29 | ========= Address 0x7f6f67000000 is not readable/writable with atomic operations 30 | ========= 31 | ========= ERROR SUMMARY: 4 errors 32 | ``` 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compute-sanitizer-samples 2 | 3 | Samples demonstrating how to use Compute Sanitizer APIs: 4 | * [Compute Sanitizer Tools](https://docs.nvidia.com/cuda/sanitizer-docs/ComputeSanitizer/index.html) 5 | * [Compute Sanitizer Public API](https://docs.nvidia.com/cuda/sanitizer-docs/SanitizerApiGuide/index.html) 6 | * [NVTX API for Compute Sanitizer](https://docs.nvidia.com/cuda/sanitizer-docs/SanitizerNvtxGuide/index.html) 7 | -------------------------------------------------------------------------------- /Racecheck/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit 31 | CUDA_PATH ?= /usr/local/cuda 32 | SANITIZER_PATH ?= $(CUDA_PATH)/compute-sanitizer 33 | SANITIZER_BIN ?= $(SANITIZER_PATH)/compute-sanitizer 34 | 35 | HOST_COMPILER ?= g++ 36 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 37 | 38 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include 39 | 40 | COMPILE_FLAGS := -MMD 41 | ifeq ($(dbg),1) 42 | COMPILE_FLAGS += -G -Xcompiler -rdynamic 43 | else 44 | COMPILE_FLAGS += -O2 -lineinfo 45 | endif 46 | 47 | # Pattern rules to build binary from .cu file 48 | %.o: %.cu 49 | $(NVCC) $(INCLUDE_FLAGS) $(COMPILE_FLAGS) -c $< 50 | %: %.o 51 | $(NVCC) $(LINK_FLAGS) -o $@ $< 52 | 53 | # Target rules 54 | TARGET_BINARIES := block_error warp_error 55 | 56 | all: build 57 | 58 | build: $(TARGET_BINARIES) 59 | 60 | clean: 61 | rm -f $(TARGET_BINARIES) block_error.o warp_error.o 62 | 63 | clobber: clean 64 | 65 | # Run sanitizer tools 66 | run_block_error: block_error 67 | $(SANITIZER_BIN) --tool racecheck --racecheck-report analysis ./block_error 68 | 69 | run_warp_error: warp_error 70 | $(SANITIZER_BIN) --tool racecheck --racecheck-report hazard ./warp_error 71 | -------------------------------------------------------------------------------- /Racecheck/block_error.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include 29 | 30 | #define checkCudaErrors(Code) assert((Code) == cudaSuccess) 31 | #define checkCudaLaunch(...) checkCudaErrors((__VA_ARGS__, cudaPeekAtLastError())) 32 | 33 | static constexpr int NumThreads = 128; 34 | 35 | __shared__ int smem[NumThreads]; 36 | 37 | __global__ 38 | void sumKernel(int *data_in, int *sum_out) 39 | { 40 | int tx = threadIdx.x; 41 | smem[tx] = data_in[tx] + tx; 42 | 43 | if (tx == 0) 44 | { 45 | *sum_out = 0; 46 | 47 | // Avoid loop unrolling for the purpose of racecheck demo 48 | #pragma unroll 1 49 | for (int i = 0; i < NumThreads; ++i) 50 | { 51 | *sum_out += smem[i]; 52 | } 53 | } 54 | } 55 | 56 | int main() 57 | { 58 | int *data_in = nullptr; 59 | int *sum_out = nullptr; 60 | 61 | checkCudaErrors(cudaMalloc((void**)&data_in, sizeof(int) * NumThreads)); 62 | checkCudaErrors(cudaMalloc((void**)&sum_out, sizeof(int))); 63 | checkCudaErrors(cudaMemset(data_in, 0, sizeof(int) * NumThreads)); 64 | 65 | checkCudaLaunch(sumKernel<<<1, NumThreads>>>(data_in, sum_out)); 66 | checkCudaErrors(cudaDeviceSynchronize()); 67 | 68 | checkCudaErrors(cudaFree(data_in)); 69 | checkCudaErrors(cudaFree(sum_out)); 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /Racecheck/warp_error.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include 29 | 30 | #define checkCudaErrors(Code) assert((Code) == cudaSuccess) 31 | #define checkCudaLaunch(...) checkCudaErrors((__VA_ARGS__, cudaPeekAtLastError())) 32 | 33 | static constexpr int Warps = 2; 34 | static constexpr int WarpSize = 32; 35 | static constexpr int NumThreads = Warps * WarpSize; 36 | 37 | __shared__ int smem_first[NumThreads]; 38 | __shared__ int smem_second[Warps]; 39 | 40 | __global__ 41 | void sumKernel(int *data_in, int *sum_out) 42 | { 43 | int tx = threadIdx.x; 44 | smem_first[tx] = data_in[tx] + tx; 45 | 46 | if (tx % WarpSize == 0) 47 | { 48 | int wx = tx / WarpSize; 49 | 50 | smem_second[wx] = 0; 51 | 52 | // Avoid loop unrolling for the purpose of racecheck demo 53 | #pragma unroll 1 54 | for (int i = 0; i < WarpSize; ++i) 55 | { 56 | smem_second[wx] += smem_first[wx * WarpSize + i]; 57 | } 58 | } 59 | 60 | __syncthreads(); 61 | 62 | if (tx == 0) 63 | { 64 | *sum_out = 0; 65 | for (int i = 0; i < Warps; ++i) 66 | { 67 | *sum_out += smem_second[i]; 68 | } 69 | } 70 | } 71 | 72 | int main() 73 | { 74 | int *data_in = nullptr; 75 | int *sum_out = nullptr; 76 | 77 | checkCudaErrors(cudaMalloc((void**)&data_in, sizeof(int) * NumThreads)); 78 | checkCudaErrors(cudaMalloc((void**)&sum_out, sizeof(int))); 79 | checkCudaErrors(cudaMemset(data_in, 0, sizeof(int) * NumThreads)); 80 | 81 | checkCudaLaunch(sumKernel<<<1, NumThreads>>>(data_in, sum_out)); 82 | checkCudaErrors(cudaDeviceSynchronize()); 83 | 84 | checkCudaErrors(cudaFree(data_in)); 85 | checkCudaErrors(cudaFree(sum_out)); 86 | return 0; 87 | } 88 | -------------------------------------------------------------------------------- /Suppressions/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit 31 | CUDA_PATH ?= /usr/local/cuda 32 | SANITIZER_PATH ?= $(CUDA_PATH)/compute-sanitizer 33 | SANITIZER_BIN ?= $(SANITIZER_PATH)/compute-sanitizer 34 | 35 | HOST_COMPILER ?= g++ 36 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 37 | 38 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include 39 | 40 | COMPILE_FLAGS := -MMD 41 | ifeq ($(dbg),1) 42 | COMPILE_FLAGS += -G -Xcompiler -rdynamic 43 | endif 44 | 45 | # Pattern rules to build binary from .cu file 46 | %.o: %.cu 47 | $(NVCC) $(INCLUDE_FLAGS) $(COMPILE_FLAGS) -c $< 48 | %: %.o 49 | $(NVCC) $(LINK_FLAGS) -o $@ $< 50 | 51 | # Target rules 52 | MEMCHECK_BINARY := suppressions_demo 53 | INITCHECK_BINARY := suppressions_initcheck_demo 54 | TARGET_BINARIES := $(MEMCHECK_BINARY) $(INITCHECK_BINARY) 55 | 56 | all: build 57 | 58 | build: $(TARGET_BINARIES) 59 | 60 | clean: 61 | rm -f $(TARGET_BINARY) suppressions_demo.o suppressions_initcheck_demo.o 62 | 63 | clobber: clean 64 | 65 | # Run sanitizer tools 66 | gen_supp: $(MEMCHECK_BINARY) 67 | $(SANITIZER_BIN) --save supp.xml --xml $(MEMCHECK_BINARY) 68 | 69 | run_memcheck: 70 | $(SANITIZER_BIN) $(MEMCHECK_BINARY) 71 | 72 | run_memcheck_with_supp: 73 | $(SANITIZER_BIN) --suppressions supp.xml $(MEMCHECK_BINARY) 74 | 75 | initcheck_gen_supp: $(INITCHECK_BINARY) 76 | $(SANITIZER_BIN) --tool initcheck --save supp.xml --xml $(INITCHECK_BINARY) 77 | 78 | run_initcheck: 79 | $(SANITIZER_BIN) --tool initcheck $(INITCHECK_BINARY) 80 | 81 | run_initcheck_with_supp: 82 | $(SANITIZER_BIN) --tool initcheck --suppressions supp.xml $(INITCHECK_BINARY) 83 | -------------------------------------------------------------------------------- /Suppressions/suppressions_demo.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | int main() { 29 | int* ptr = nullptr; 30 | int n = 45; 31 | 32 | while (cudaMalloc(&ptr, 1ull << n) != cudaSuccess) { 33 | n--; 34 | } 35 | 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /Suppressions/suppressions_initcheck_demo.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | __global__ void mult(int* a, int* b, int size) 29 | { 30 | int tid = threadIdx.x; 31 | if (tid < size) 32 | b[tid] = a[tid] * 2; 33 | } 34 | 35 | int main () 36 | { 37 | const int size = 123; 38 | int *a, *b; 39 | cudaMalloc(&a, size * sizeof(int)); 40 | cudaMemset(a, 0x0, size * sizeof(int) - 1); 41 | cudaMalloc(&b, size *sizeof(int)); 42 | 43 | mult<<<1, size>>> (a, b, size); 44 | cudaDeviceSynchronize(); 45 | } 46 | -------------------------------------------------------------------------------- /Synccheck/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | ################################################################################ 29 | 30 | # Location of the CUDA Toolkit 31 | CUDA_PATH ?= /usr/local/cuda 32 | SANITIZER_PATH ?= $(CUDA_PATH)/compute-sanitizer 33 | SANITIZER_BIN ?= $(SANITIZER_PATH)/compute-sanitizer 34 | 35 | HOST_COMPILER ?= g++ 36 | NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) 37 | 38 | INCLUDE_FLAGS := -I$(CUDA_PATH)/include 39 | 40 | COMPILE_FLAGS := -MMD 41 | ifeq ($(dbg),1) 42 | COMPILE_FLAGS += -G -Xcompiler -rdynamic 43 | else 44 | COMPILE_FLAGS += 45 | endif 46 | 47 | # Pattern rules to build binary from .cu file 48 | %.o: %.cu 49 | $(NVCC) $(INCLUDE_FLAGS) $(COMPILE_FLAGS) -c $< 50 | %: %.o 51 | $(NVCC) $(LINK_FLAGS) -o $@ $< 52 | 53 | # Target rules 54 | TARGET_BINARIES := divergent_threads illegal_syncwarp 55 | 56 | all: build 57 | 58 | build: $(TARGET_BINARIES) 59 | 60 | clean: 61 | rm -f $(TARGET_BINARIES) divergent_threads.o illegal_syncwarp.o 62 | 63 | clobber: clean 64 | 65 | # Run sanitizer tools 66 | run_divergent_threads: divergent_threads 67 | $(SANITIZER_BIN) --tool synccheck ./divergent_threads 68 | 69 | run_illegal_syncwarp: illegal_syncwarp 70 | $(SANITIZER_BIN) --tool synccheck ./illegal_syncwarp 71 | 72 | -------------------------------------------------------------------------------- /Synccheck/divergent_threads.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include 29 | 30 | #define checkCudaErrors(Code) assert((Code) == cudaSuccess) 31 | #define checkCudaLaunch(...) checkCudaErrors((__VA_ARGS__, cudaPeekAtLastError())) 32 | 33 | static constexpr int NumThreads = 64; 34 | static constexpr int DataBlocks = 16; 35 | static constexpr int Size = (NumThreads * DataBlocks) - 16; 36 | 37 | __shared__ int smem[NumThreads]; 38 | 39 | __global__ 40 | void myKernel(int *data_in, int *sum_out) 41 | { 42 | int tx = threadIdx.x; 43 | 44 | smem[tx] = 0; 45 | 46 | __syncthreads(); 47 | 48 | for (int b = 0; b < DataBlocks; ++b) 49 | { 50 | const int offset = NumThreads * b + tx; 51 | if (offset < Size) 52 | { 53 | smem[tx] += data_in[offset]; 54 | __syncthreads(); 55 | } 56 | } 57 | 58 | if (tx == 0) 59 | { 60 | *sum_out = 0; 61 | for (int i = 0; i < NumThreads; ++i) 62 | { 63 | *sum_out += smem[i]; 64 | } 65 | } 66 | } 67 | 68 | int main() 69 | { 70 | int *data_in = nullptr; 71 | int *sum_out = nullptr; 72 | 73 | checkCudaErrors(cudaMalloc((void**)&data_in, Size * sizeof(int))); 74 | checkCudaErrors(cudaMalloc((void**)&sum_out, sizeof(int))); 75 | 76 | checkCudaLaunch(myKernel<<<1, NumThreads>>>(data_in, sum_out)); 77 | cudaDeviceSynchronize(); 78 | 79 | cudaFree(data_in); 80 | cudaFree(sum_out); 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /Synccheck/illegal_syncwarp.cu: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 2 | * 3 | * Redistribution and use in source and binary forms, with or without 4 | * modification, are permitted provided that the following conditions 5 | * are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of NVIDIA CORPORATION nor the names of its 12 | * contributors may be used to endorse or promote products derived 13 | * from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include 29 | 30 | #define checkCudaErrors(Code) assert((Code) == cudaSuccess) 31 | #define checkCudaLaunch(...) checkCudaErrors((__VA_ARGS__, cudaPeekAtLastError())) 32 | 33 | static constexpr int NumThreads = 32; 34 | 35 | __shared__ int smem[NumThreads]; 36 | 37 | __global__ 38 | void myKernel(int *sum_out) 39 | { 40 | int tx = threadIdx.x; 41 | 42 | unsigned int mask = __ballot_sync(0xffffffff, tx < (NumThreads / 2)); 43 | 44 | if (tx <= (NumThreads / 2)) 45 | { 46 | smem[tx] = tx; 47 | 48 | __syncwarp(mask); 49 | 50 | if (tx == 0) 51 | { 52 | *sum_out = 0; 53 | for (int i = 0; i < (NumThreads / 2); ++i) 54 | { 55 | *sum_out += smem[i]; 56 | } 57 | } 58 | } 59 | 60 | __syncthreads(); 61 | } 62 | 63 | int main() 64 | { 65 | int *sum_out = nullptr; 66 | 67 | checkCudaErrors(cudaMalloc((void**)&sum_out, sizeof(int))); 68 | 69 | checkCudaLaunch(myKernel<<<1, NumThreads>>>(sum_out)); 70 | cudaDeviceSynchronize(); 71 | 72 | cudaFree(sum_out); 73 | return 0; 74 | } 75 | --------------------------------------------------------------------------------