├── .github └── workflows │ └── main.yml ├── .gitignore ├── Benchmarks.md ├── CMakeLists.txt ├── LICENSE ├── README.md ├── benchmarks ├── BenchGuardedVectorExample.cpp └── CMakeLists.txt ├── cmake └── BadAccessGuardsConfig.cmake.in ├── examples ├── Basic.cpp ├── FoundInProduction.png ├── GuardedVectorExample.cpp └── GuardedVectorExample.h └── src ├── BadAccessGuards.cpp └── BadAccessGuards.h /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | permissions: 3 | contents: read 4 | 5 | on: [push, pull_request, workflow_dispatch] 6 | 7 | jobs: 8 | build: 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | matrix: 12 | os: [ubuntu-latest, windows-latest, macos-latest] 13 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 14 | buildtype: [Debug, Release] 15 | env: 16 | BUILD_TYPE: ${{ matrix.buildtype }} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | submodules: recursive 22 | 23 | - name: Configure CMake 24 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 25 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 26 | run: cmake -B ${{runner.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} 27 | 28 | - name: Build 29 | # Build your program with the given configuration 30 | run: cmake --build ${{runner.workspace}}/build --config ${{env.BUILD_TYPE}} 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #IDEs 2 | 3 | .vs 4 | .idea 5 | CMakeSettings.json 6 | 7 | # CMake 8 | build*/ 9 | out/ 10 | install 11 | CMakeCache.txt 12 | CMakeFiles 13 | CMakeScripts 14 | Makefile 15 | cmake_install.cmake 16 | install_manifest.txt 17 | CTestTestfile.cmake 18 | Testing/ 19 | 20 | # Object files 21 | *.o 22 | *.ko 23 | *.obj 24 | *.elf 25 | 26 | # Precompiled Headers 27 | *.gch 28 | *.pch 29 | 30 | # Libraries 31 | *.lib 32 | *.a 33 | *.la 34 | *.lo 35 | 36 | # Shared objects (inc. Windows DLLs) 37 | *.dll 38 | *.so 39 | *.so.* 40 | *.dylib 41 | 42 | # Executables 43 | *.exe 44 | *.out 45 | *.app 46 | *.i*86 47 | *.x86_64 48 | *.hex 49 | 50 | # Debug files 51 | *.dSYM/ 52 | *.su 53 | *.aps 54 | -------------------------------------------------------------------------------- /Benchmarks.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | ## Setup 4 | 5 | - CPU: AMD Ryzen 7 7745HX. 6 | - RAM settings : 5200MT/s, timings 38-38-38-75, 2channels 7 | - CPU was stabilized at a fixed frequency of 3.49Ghz, frequency boost is disabled. 8 | 9 | Simple benchmarks were done using `std::vector<>::push_back` since it uses the most expensive guard (write access). 10 | Memory is reserved upfront, and vector emptied at each iteration, unless ` - noreserve` is mentionned, which means we freed memory after clearing. `complexityN` is the number of elements pushed per iteration. 11 | See [./benchmarks/BenchGuardedVectorExample.cpp](./benchmarks/BenchGuardedVectorExample.cpp). 12 | 13 | As usual for microbenchmarks, take them with a grain of salt. 14 | 15 | ## Release Results 16 | 17 | ### MSVC 19.42.34436.0 `/EHsc /O2 /Ob1 /DNDEBUG -MD` (CMake Release) 18 | 19 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 20 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 21 | | 1,000 | 1,564.15 | 639,323.91 | 0.3% | 1.21 | `std::vector.push_back` 22 | | 100,000 | 154,213.73 | 6,484.51 | 0.2% | 1.21 | `std::vector.push_back` 23 | | 10,000,000 | 15,335,971.43 | 65.21 | 0.4% | 1.22 | `std::vector.push_back` 24 | | 1,000 | 2,299.13 | 434,946.34 | 0.1% | 1.21 | `guardedvector.push_back` 25 | | 100,000 | 229,171.87 | 4,363.54 | 0.2% | 1.21 | `guardedvector.push_back` 26 | | 10,000,000 | 22,935,740.00 | 43.60 | 0.3% | 1.17 | `guardedvector.push_back` 27 | | 1,000 | 3,379.57 | 295,895.42 | 0.1% | 1.20 | `std::vector.push_back - noreserve` 28 | | 100,000 | 714,796.69 | 1,399.00 | 2.5% | 1.10 | `std::vector.push_back - noreserve` 29 | | 10,000,000 | 63,554,500.00 | 15.73 | 0.4% | 1.40 | `std::vector.push_back - noreserve` 30 | | 1,000 | 4,153.50 | 240,760.55 | 0.1% | 1.21 | `guardedvector.push_back - noreserve` 31 | | 100,000 | 613,052.66 | 1,631.18 | 4.0% | 1.07 | `guardedvector.push_back - noreserve` 32 | | 10,000,000 | 76,297,700.00 | 13.11 | 0.4% | 1.06 | `guardedvector.push_back - noreserve` 33 | 34 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 35 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 36 | | 1,000 | 6,551.51 | 152,636.61 | 0.2% | 1.21 | `std::vector.push_back` 37 | | 100,000 | 658,357.05 | 1,518.93 | 0.2% | 1.21 | `std::vector.push_back` 38 | | 10,000,000 | 66,165,800.00 | 15.11 | 0.4% | 1.27 | `std::vector.push_back` 39 | | 1,000 | 6,826.71 | 146,483.42 | 0.1% | 1.17 | `guardedvector.push_back` 40 | | 100,000 | 681,602.38 | 1,467.13 | 0.2% | 1.16 | `guardedvector.push_back` 41 | | 10,000,000 | 68,201,950.00 | 14.66 | 0.2% | 1.18 | `guardedvector.push_back` 42 | 43 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 44 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 45 | | 1,000 | 7,158.25 | 139,698.96 | 0.2% | 1.16 | `std::vector.push_back` 46 | | 100,000 | 719,018.42 | 1,390.78 | 0.2% | 1.17 | `std::vector.push_back` 47 | | 10,000,000 | 72,046,100.00 | 13.88 | 0.1% | 1.04 | `std::vector.push_back` 48 | | 1,000 | 7,111.63 | 140,614.74 | 0.3% | 1.16 | `guardedvector.push_back` 49 | | 100,000 | 714,421.74 | 1,399.73 | 0.2% | 1.17 | `guardedvector.push_back` 50 | | 10,000,000 | 71,789,000.00 | 13.93 | 0.3% | 1.04 | `guardedvector.push_back` 51 | 52 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 53 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 54 | | 1,000 | 8,467.38 | 118,100.24 | 0.2% | 1.18 | `std::vector.push_back` 55 | | 100,000 | 853,885.93 | 1,171.12 | 0.1% | 1.18 | `std::vector.push_back` 56 | | 10,000,000 | 89,772,500.00 | 11.14 | 0.3% | 1.02 | `std::vector.push_back` 57 | | 1,000 | 9,313.27 | 107,373.67 | 0.1% | 1.19 | `guardedvector.push_back` 58 | | 100,000 | 931,452.34 | 1,073.59 | 0.3% | 1.18 | `guardedvector.push_back` 59 | | 10,000,000 | 97,508,400.00 | 10.26 | 0.4% | 1.12 | `guardedvector.push_back` 60 | 61 | ### Clang 14.0.6 WSL `-O3 -DNDEBUG` (CMake Release default) 62 | 63 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 64 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 65 | | 1,000 | 1,255.43 | 796,539.90 | 0.3% | 1.21 | `std::vector.push_back` 66 | | 100,000 | 125,474.04 | 7,969.78 | 0.1% | 1.21 | `std::vector.push_back` 67 | | 10,000,000 | 12,602,706.56 | 79.35 | 0.3% | 1.21 | `std::vector.push_back` 68 | | 1,000 | 5,177.10 | 193,158.21 | 0.0% | 1.21 | `guardedvector.push_back` 69 | | 100,000 | 521,834.49 | 1,916.32 | 0.5% | 1.22 | `guardedvector.push_back` 70 | | 10,000,000 | 52,530,619.00 | 19.04 | 0.4% | 1.07 | `guardedvector.push_back` 71 | | 1,000 | 1,793.98 | 557,420.87 | 0.1% | 1.21 | `std::vector.push_back - noreserve` 72 | | 100,000 | 789,574.04 | 1,266.51 | 0.2% | 1.18 | `std::vector.push_back - noreserve` 73 | | 10,000,000 | 41,802,090.00 | 23.92 | 0.7% | 1.26 | `std::vector.push_back - noreserve` 74 | | 1,000 | 5,749.73 | 173,921.34 | 0.1% | 1.21 | `guardedvector.push_back - noreserve` 75 | | 100,000 | 561,216.09 | 1,781.84 | 0.0% | 1.21 | `guardedvector.push_back - noreserve` 76 | | 10,000,000 | 81,652,733.00 | 12.25 | 0.7% | 0.92 | `guardedvector.push_back - noreserve` 77 | 78 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 79 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 80 | | 1,000 | 1,510.51 | 662,029.30 | 0.1% | 1.21 | `std::vector.push_back` 81 | | 100,000 | 151,351.00 | 6,607.16 | 0.1% | 1.21 | `std::vector.push_back` 82 | | 10,000,000 | 15,283,910.14 | 65.43 | 0.1% | 1.25 | `std::vector.push_back` 83 | | 1,000 | 3,470.75 | 288,122.19 | 0.1% | 1.21 | `guardedvector.push_back` 84 | | 100,000 | 344,128.11 | 2,905.89 | 0.1% | 1.22 | `guardedvector.push_back` 85 | | 10,000,000 | 34,999,904.00 | 28.57 | 0.2% | 1.05 | `guardedvector.push_back` 86 | 87 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 88 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 89 | | 1,000 | 1,782.07 | 561,144.07 | 0.1% | 1.21 | `std::vector.push_back` 90 | | 100,000 | 174,452.56 | 5,732.22 | 0.4% | 1.22 | `std::vector.push_back` 91 | | 10,000,000 | 19,031,163.40 | 52.55 | 0.5% | 0.84 | `std::vector.push_back` 92 | | 1,000 | 3,536.04 | 282,801.98 | 0.1% | 1.21 | `guardedvector.push_back` 93 | | 100,000 | 349,298.37 | 2,862.88 | 0.6% | 1.21 | `guardedvector.push_back` 94 | | 10,000,000 | 35,701,763.00 | 28.01 | 0.1% | 0.99 | `guardedvector.push_back` 95 | 96 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 97 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 98 | | 1,000 | 8,251.14 | 121,195.32 | 0.1% | 1.18 | `std::vector.push_back` 99 | | 100,000 | 801,689.65 | 1,247.37 | 0.2% | 1.18 | `std::vector.push_back` 100 | | 10,000,000 | 85,695,325.00 | 11.67 | 0.3% | 1.00 | `std::vector.push_back` 101 | | 1,000 | 8,541.92 | 117,069.75 | 0.0% | 1.18 | `guardedvector.push_back` 102 | | 100,000 | 837,175.80 | 1,194.49 | 0.1% | 1.19 | `guardedvector.push_back` 103 | | 10,000,000 | 89,646,819.00 | 11.15 | 0.9% | 1.04 | `guardedvector.push_back` 104 | 105 | ### Clang 14.0.6 WSL `-O2 -DNDEBUG` (CMake RelWithDebInfo default) 106 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 107 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 108 | | 1,000 | 1,268.95 | 788,051.74 | 0.1% | 1.21 | `std::vector.push_back` 109 | | 100,000 | 126,408.23 | 7,910.88 | 0.1% | 1.21 | `std::vector.push_back` 110 | | 10,000,000 | 12,687,909.22 | 78.82 | 0.1% | 1.19 | `std::vector.push_back` 111 | | 1,000 | 4,583.13 | 218,191.46 | 0.1% | 1.21 | `guardedvector.push_back` 112 | | 100,000 | 461,465.16 | 2,167.01 | 0.5% | 1.22 | `guardedvector.push_back` 113 | | 10,000,000 | 45,963,623.50 | 21.76 | 0.1% | 1.19 | `guardedvector.push_back` 114 | | 1,000 | 1,830.49 | 546,302.77 | 0.1% | 1.21 | `std::vector.push_back - noreserve` 115 | | 100,000 | 792,822.74 | 1,261.32 | 0.3% | 1.17 | `std::vector.push_back - noreserve` 116 | | 10,000,000 | 42,369,383.67 | 23.60 | 1.2% | 1.23 | `std::vector.push_back - noreserve` 117 | | 1,000 | 5,216.11 | 191,713.57 | 0.7% | 1.21 | `guardedvector.push_back - noreserve` 118 | | 100,000 | 501,444.59 | 1,994.24 | 0.1% | 1.18 | `guardedvector.push_back - noreserve` 119 | | 10,000,000 | 75,929,147.00 | 13.17 | 0.9% | 0.93 | `guardedvector.push_back - noreserve` 120 | 121 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 122 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 123 | | 1,000 | 6,950.17 | 143,881.46 | 0.2% | 1.18 | `std::vector.push_back` 124 | | 100,000 | 702,932.35 | 1,422.61 | 0.4% | 1.17 | `std::vector.push_back` 125 | | 10,000,000 | 70,359,581.00 | 14.21 | 0.3% | 1.02 | `std::vector.push_back` 126 | | 1,000 | 7,043.86 | 141,967.58 | 0.2% | 1.17 | `guardedvector.push_back` 127 | | 100,000 | 700,146.79 | 1,428.27 | 0.1% | 1.16 | `guardedvector.push_back` 128 | | 10,000,000 | 70,189,106.00 | 14.25 | 0.3% | 1.02 | `guardedvector.push_back` 129 | 130 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 131 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 132 | | 1,000 | 7,258.91 | 137,761.67 | 0.2% | 1.17 | `std::vector.push_back` 133 | | 100,000 | 725,305.81 | 1,378.73 | 0.1% | 1.16 | `std::vector.push_back` 134 | | 10,000,000 | 73,063,461.00 | 13.69 | 0.1% | 0.87 | `std::vector.push_back` 135 | | 1,000 | 8,186.97 | 122,145.24 | 0.0% | 1.18 | `guardedvector.push_back` 136 | | 100,000 | 751,092.72 | 1,331.39 | 0.1% | 1.19 | `guardedvector.push_back` 137 | | 10,000,000 | 83,006,817.00 | 12.05 | 0.4% | 0.97 | `guardedvector.push_back` 138 | 139 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 140 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 141 | | 1,000 | 9,304.15 | 107,478.92 | 0.4% | 1.19 | `std::vector.push_back` 142 | | 100,000 | 936,314.02 | 1,068.02 | 0.3% | 1.19 | `std::vector.push_back` 143 | | 10,000,000 | 95,620,062.00 | 10.46 | 0.2% | 1.11 | `std::vector.push_back` 144 | | 1,000 | 10,389.77 | 96,248.51 | 0.1% | 1.21 | `guardedvector.push_back` 145 | | 100,000 | 1,039,111.34 | 962.36 | 0.1% | 1.21 | `guardedvector.push_back` 146 | | 10,000,000 | 108,038,449.00 | 9.26 | 0.4% | 1.25 | `guardedvector.push_back` 147 | 148 | ### GCC 12.2.0 WSL `-O3 -DNDEBUG` (CMake Release default) 149 | 150 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 151 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 152 | | 1,000 | 914.55 | 1,093,437.87 | 0.1% | 1.19 | `std::vector.push_back` 153 | | 100,000 | 91,657.94 | 10,910.13 | 0.3% | 1.19 | `std::vector.push_back` 154 | | 10,000,000 | 9,405,401.83 | 106.32 | 0.3% | 1.20 | `std::vector.push_back` 155 | | 1,000 | 2,298.95 | 434,980.49 | 0.1% | 1.21 | `guardedvector.push_back` 156 | | 100,000 | 229,764.35 | 4,352.29 | 0.1% | 1.21 | `guardedvector.push_back` 157 | | 10,000,000 | 23,035,121.25 | 43.41 | 0.1% | 1.17 | `guardedvector.push_back` 158 | | 1,000 | 1,491.71 | 670,371.51 | 0.0% | 1.21 | `std::vector.push_back - noreserve` 159 | | 100,000 | 749,281.29 | 1,334.61 | 0.6% | 1.18 | `std::vector.push_back - noreserve` 160 | | 10,000,000 | 37,198,530.67 | 26.88 | 1.6% | 1.18 | `std::vector.push_back - noreserve` 161 | | 1,000 | 2,929.68 | 341,334.35 | 0.3% | 1.22 | `guardedvector.push_back - noreserve` 162 | | 100,000 | 275,802.73 | 3,625.78 | 0.2% | 1.21 | `guardedvector.push_back - noreserve` 163 | | 10,000,000 | 51,219,745.50 | 19.52 | 1.6% | 1.08 | `guardedvector.push_back - noreserve` 164 | 165 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 166 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 167 | | 1,000 | 6,582.44 | 151,919.41 | 0.1% | 1.21 | `std::vector.push_back` 168 | | 100,000 | 660,532.34 | 1,513.93 | 0.2% | 1.21 | `std::vector.push_back` 169 | | 10,000,000 | 66,092,501.00 | 15.13 | 0.6% | 1.16 | `std::vector.push_back` 170 | | 1,000 | 6,867.22 | 145,619.32 | 0.4% | 1.16 | `guardedvector.push_back` 171 | | 100,000 | 689,774.62 | 1,449.75 | 0.1% | 1.17 | `guardedvector.push_back` 172 | | 10,000,000 | 69,074,072.50 | 14.48 | 0.2% | 1.07 | `guardedvector.push_back` 173 | 174 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 175 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 176 | | 1,000 | 8,986.54 | 111,277.49 | 0.2% | 1.18 | `std::vector.push_back` 177 | | 100,000 | 776,507.54 | 1,287.82 | 0.4% | 1.18 | `std::vector.push_back` 178 | | 10,000,000 | 90,180,989.00 | 11.09 | 0.2% | 1.05 | `std::vector.push_back` 179 | | 1,000 | 7,884.75 | 126,827.11 | 0.2% | 1.17 | `guardedvector.push_back` 180 | | 100,000 | 753,485.57 | 1,327.17 | 0.1% | 1.17 | `guardedvector.push_back` 181 | | 10,000,000 | 80,594,003.00 | 12.41 | 0.1% | 0.95 | `guardedvector.push_back` 182 | 183 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 184 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 185 | | 1,000 | 9,321.70 | 107,276.60 | 0.1% | 1.19 | `std::vector.push_back` 186 | | 100,000 | 776,655.69 | 1,287.57 | 0.3% | 1.18 | `std::vector.push_back` 187 | | 10,000,000 | 97,298,103.00 | 10.28 | 0.8% | 1.13 | `std::vector.push_back` 188 | | 1,000 | 9,013.14 | 110,949.10 | 0.0% | 1.18 | `guardedvector.push_back` 189 | | 100,000 | 875,499.57 | 1,142.21 | 0.4% | 1.18 | `guardedvector.push_back` 190 | | 10,000,000 | 92,305,430.00 | 10.83 | 0.7% | 1.08 | `guardedvector.push_back` 191 | 192 | 193 | ## Debug builds results 194 | 195 | ### MSVC 19.42.34436.0 `/Ob0 /Od /RTC1 -MDd` (CMake Debug default) 196 | 197 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 198 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 199 | | 1,000 | 53,578.34 | 18,664.26 | 0.1% | 1.21 | `std::vector.push_back` 200 | | 1,000 | 54,655.18 | 18,296.53 | 0.2% | 1.21 | `guardedvector.push_back_noguard` 201 | | 1,000 | 78,635.86 | 12,716.84 | 0.4% | 1.17 | `guardedvector.push_back` 202 | | 1,000 | 61,116.90 | 16,362.09 | 0.1% | 1.20 | `std::vector.push_back - noreserve` 203 | | 1,000 | 62,312.97 | 16,048.02 | 0.1% | 1.21 | `guardedvector.push_back_noguard - noreserve` 204 | | 1,000 | 85,988.50 | 11,629.46 | 0.0% | 1.18 | `guardedvector.push_back - noreserve` 205 | 206 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 207 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 208 | | 1,000 | 63,358.73 | 15,783.14 | 0.0% | 1.21 | `std::vector.push_back` 209 | | 1,000 | 66,385.34 | 15,063.57 | 0.0% | 1.21 | `guardedvector.push_back_noguard` 210 | | 1,000 | 90,441.90 | 11,056.82 | 0.1% | 1.18 | `guardedvector.push_back` 211 | 212 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 213 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 214 | | 1,000 | 62,788.88 | 15,926.39 | 0.1% | 1.21 | `std::vector.push_back` 215 | | 1,000 | 65,782.26 | 15,201.67 | 0.0% | 1.21 | `guardedvector.push_back_noguard` 216 | | 1,000 | 89,670.20 | 11,151.98 | 0.1% | 1.18 | `guardedvector.push_back` 217 | 218 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 219 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 220 | | 1,000 | 892,186.67 | 1,120.84 | 0.0% | 1.18 | `std::vector.push_back` 221 | | 1,000 | 891,737.39 | 1,121.41 | 0.0% | 1.18 | `guardedvector.push_back_noguard` 222 | | 1,000 | 921,326.50 | 1,085.39 | 0.1% | 1.19 | `guardedvector.push_back` 223 | 224 | ### MSVC 19.42.34436.0 `/d2Obforceinline /RTC1` 225 | 226 | > Note: NOT `/Od` since it even disables `/d2Obforceinline`... You should probably be using at least `/O1` anyway. 227 | 228 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 229 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 230 | | 1,000 | 55,695.84 | 17,954.66 | 0.3% | 1.21 | `std::vector.push_back` 231 | | 1,000 | 55,816.60 | 17,915.82 | 0.2% | 1.22 | `guardedvector.push_back_noguard` 232 | | 1,000 | 71,371.23 | 14,011.25 | 0.3% | 1.16 | `guardedvector.push_back` 233 | | 1,000 | 63,074.18 | 15,854.35 | 0.2% | 1.21 | `std::vector.push_back - noreserve` 234 | | 1,000 | 63,063.28 | 15,857.09 | 0.1% | 1.21 | `guardedvector.push_back_noguard - noreserve` 235 | | 1,000 | 78,940.42 | 12,667.78 | 0.1% | 1.17 | `guardedvector.push_back - noreserve` 236 | 237 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 238 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 239 | | 1,000 | 63,507.71 | 15,746.12 | 0.1% | 1.21 | `std::vector.push_back` 240 | | 1,000 | 66,471.69 | 15,044.00 | 0.1% | 1.21 | `guardedvector.push_back_noguard` 241 | | 1,000 | 83,548.44 | 11,969.10 | 0.1% | 1.18 | `guardedvector.push_back` 242 | 243 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 244 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 245 | | 1,000 | 63,555.23 | 15,734.35 | 0.2% | 1.21 | `std::vector.push_back` 246 | | 1,000 | 66,519.09 | 15,033.28 | 0.1% | 1.21 | `guardedvector.push_back_noguard` 247 | | 1,000 | 82,997.72 | 12,048.52 | 0.1% | 1.18 | `guardedvector.push_back` 248 | 249 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 250 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 251 | | 1,000 | 881,261.21 | 1,134.74 | 0.0% | 1.18 | `std::vector.push_back` 252 | | 1,000 | 888,316.95 | 1,125.72 | 0.1% | 1.18 | `guardedvector.push_back_noguard` 253 | | 1,000 | 898,869.30 | 1,112.51 | 0.1% | 1.18 | `guardedvector.push_back` 254 | 255 | ### Clang 14.0.6 WSL `-g` (CMake Debug default) 256 | 257 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 258 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 259 | | 1,000 | 11,448.58 | 87,347.04 | 0.1% | 1.21 | `std::vector.push_back` 260 | | 1,000 | 13,378.30 | 74,747.89 | 1.1% | 1.22 | `guardedvector.push_back_noguard` 261 | | 1,000 | 20,614.45 | 48,509.65 | 0.1% | 1.21 | `guardedvector.push_back` 262 | | 1,000 | 13,890.99 | 71,989.10 | 0.1% | 1.21 | `std::vector.push_back - noreserve` 263 | | 1,000 | 16,102.80 | 62,101.00 | 0.1% | 1.21 | `guardedvector.push_back_noguard - noreserve` 264 | | 1,000 | 23,119.37 | 43,253.76 | 0.1% | 1.21 | `guardedvector.push_back - noreserve` 265 | 266 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 267 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 268 | | 1,000 | 18,230.03 | 54,854.56 | 0.0% | 1.21 | `std::vector.push_back` 269 | | 1,000 | 21,258.31 | 47,040.43 | 0.0% | 1.21 | `guardedvector.push_back_noguard` 270 | | 1,000 | 27,903.21 | 35,838.17 | 0.0% | 1.21 | `guardedvector.push_back` 271 | 272 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 273 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 274 | | 1,000 | 19,238.49 | 51,979.13 | 0.0% | 1.21 | `std::vector.push_back` 275 | | 1,000 | 22,229.66 | 44,984.93 | 0.1% | 1.21 | `guardedvector.push_back_noguard` 276 | | 1,000 | 28,778.67 | 34,747.96 | 0.0% | 1.21 | `guardedvector.push_back` 277 | 278 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 279 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 280 | | 1,000 | 40,453.30 | 24,719.86 | 0.0% | 1.21 | `std::vector.push_back` 281 | | 1,000 | 43,442.36 | 23,019.01 | 0.1% | 1.22 | `guardedvector.push_back_noguard` 282 | | 1,000 | 49,608.16 | 20,157.97 | 0.2% | 1.22 | `guardedvector.push_back` 283 | 284 | ### Clang 14.0.6 WSL `-g -Og` 285 | 286 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 287 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 288 | | 1,000 | 1,370.84 | 729,480.18 | 0.1% | 1.21 | `std::vector.push_back` 289 | | 1,000 | 1,493.93 | 669,375.85 | 0.0% | 1.21 | `guardedvector.push_back_noguard` 290 | | 1,000 | 4,596.86 | 217,539.95 | 0.0% | 1.21 | `guardedvector.push_back` 291 | | 1,000 | 1,871.19 | 534,419.24 | 0.1% | 1.23 | `std::vector.push_back - noreserve` 292 | | 1,000 | 2,010.68 | 497,343.61 | 0.0% | 1.22 | `guardedvector.push_back_noguard - noreserve` 293 | | 1,000 | 5,165.98 | 193,574.13 | 0.1% | 1.21 | `guardedvector.push_back - noreserve` 294 | 295 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 296 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 297 | | 1,000 | 6,907.74 | 144,765.06 | 0.0% | 1.16 | `std::vector.push_back` 298 | | 1,000 | 7,228.74 | 138,336.76 | 0.1% | 1.17 | `guardedvector.push_back_noguard` 299 | | 1,000 | 7,243.88 | 138,047.62 | 0.1% | 1.17 | `guardedvector.push_back` 300 | 301 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 302 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 303 | | 1,000 | 7,296.97 | 137,043.15 | 0.0% | 1.17 | `std::vector.push_back` 304 | | 1,000 | 7,242.02 | 138,082.96 | 0.1% | 1.17 | `guardedvector.push_back_noguard` 305 | | 1,000 | 7,359.22 | 135,883.91 | 0.0% | 1.17 | `guardedvector.push_back` 306 | 307 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 308 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 309 | | 1,000 | 9,203.12 | 108,658.80 | 0.0% | 1.19 | `std::vector.push_back` 310 | | 1,000 | 9,208.55 | 108,594.77 | 0.0% | 1.19 | `guardedvector.push_back_noguard` 311 | | 1,000 | 10,355.30 | 96,568.95 | 0.0% | 1.21 | `guardedvector.push_back` 312 | 313 | ## ThreadSanitizer 314 | 315 | ### Clang 14.0.6 WSL `-fsanitize=thread -O3 -DNDEBUG` (CMake Release default) 316 | 317 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 318 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 319 | | 1,000 | 27,569.43 | 36,272.06 | 0.1% | 1.21 | `std::vector.push_back` 320 | | 100,000 | 2,755,273.56 | 362.94 | 0.1% | 1.21 | `std::vector.push_back` 321 | | 10,000,000 | 275,502,920.00 | 3.63 | 0.0% | 3.18 | `std::vector.push_back` 322 | | 1,000 | 34,961.04 | 28,603.27 | 0.0% | 1.21 | `std::vector.push_back - noreserve` 323 | | 100,000 | 6,557,961.12 | 152.49 | 0.1% | 1.22 | `std::vector.push_back - noreserve` 324 | | 10,000,000 | 873,837,301.00 | 1.14 | 3.2% | 9.30 | `std::vector.push_back - noreserve` 325 | 326 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 327 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 328 | | 1,000 | 33,820.31 | 29,568.03 | 0.1% | 1.22 | `std::vector.push_back` 329 | | 100,000 | 3,385,774.65 | 295.35 | 0.1% | 1.21 | `std::vector.push_back` 330 | | 10,000,000 | 338,732,175.00 | 2.95 | 0.1% | 4.15 | `std::vector.push_back` 331 | 332 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 333 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 334 | | 1,000 | 54,151.45 | 18,466.73 | 0.0% | 1.21 | `std::vector.push_back` 335 | | 100,000 | 5,437,534.41 | 183.91 | 0.2% | 1.22 | `std::vector.push_back` 336 | | 10,000,000 | 543,613,425.00 | 1.84 | 0.1% | 6.57 | `std::vector.push_back` 337 | 338 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 339 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 340 | | 1,000 | 48,015.64 | 20,826.55 | 0.1% | 1.21 | `std::vector.push_back` 341 | | 100,000 | 4,812,346.95 | 207.80 | 0.1% | 1.23 | `std::vector.push_back` 342 | | 10,000,000 | 483,697,656.00 | 2.07 | 0.1% | 5.90 | `std::vector.push_back` 343 | 344 | 345 | ### Clang 14.0.6 WSL `-fsanitize=thread -O2 -DNDEBUG` (CMake RelWithDebInfo default) 346 | 347 | 348 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t 349 | |------------:|--------------------:|--------------------:|--------:|----------:|:------------------- 350 | | 1,000 | 27,600.31 | 36,231.48 | 0.1% | 1.21 | `std::vector.push_back` 351 | | 100,000 | 2,756,755.84 | 362.75 | 0.1% | 1.21 | `std::vector.push_back` 352 | | 10,000,000 | 275,798,601.00 | 3.63 | 0.1% | 3.18 | `std::vector.push_back` 353 | | 1,000 | 35,054.95 | 28,526.64 | 0.0% | 1.21 | `std::vector.push_back - noreserve` 354 | | 100,000 | 7,808,870.40 | 128.06 | 0.2% | 1.18 | `std::vector.push_back - noreserve` 355 | | 10,000,000 | 888,449,152.00 | 1.13 | 1.1% | 9.83 | `std::vector.push_back - noreserve` 356 | 357 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*2 358 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 359 | | 1,000 | 65,370.03 | 15,297.53 | 0.1% | 1.16 | `std::vector.push_back` 360 | | 100,000 | 6,549,550.81 | 152.68 | 0.2% | 1.14 | `std::vector.push_back` 361 | | 10,000,000 | 653,770,052.00 | 1.53 | 0.0% | 7.47 | `std::vector.push_back` 362 | 363 | | complexityN | ns/op | op/s | err% | total | Vector of uint64_t*4 364 | |------------:|--------------------:|--------------------:|--------:|----------:|:--------------------- 365 | | 1,000 | 91,220.54 | 10,962.44 | 0.2% | 1.19 | `std::vector.push_back` 366 | | 100,000 | 9,141,664.85 | 109.39 | 0.1% | 1.23 | `std::vector.push_back` 367 | | 10,000,000 | 914,898,255.00 | 1.09 | 0.1% | 10.89 | `std::vector.push_back` 368 | 369 | | complexityN | ns/op | op/s | err% | total | Vector of std::string 370 | |------------:|--------------------:|--------------------:|--------:|----------:|:---------------------- 371 | | 1,000 | 91,597.87 | 10,917.28 | 0.0% | 1.19 | `std::vector.push_back` 372 | | 100,000 | 9,182,358.45 | 108.90 | 0.1% | 1.23 | `std::vector.push_back` 373 | | 10,000,000 | 920,280,188.00 | 1.09 | 0.1% | 10.69 | `std::vector.push_back` 374 | 375 | 376 | ## Summary 377 | 378 | - Release builds 379 | - `vector` => **+50%** overhead 380 | - `vector` - noreserve => **~10-25%** overhead, depends on the growth strategy 381 | - `vector` => **5%** overhead 382 | - `vector` => **3%** overhead 383 | - `vector` => **5-7%** overhead 384 | - clang Release `-O3` (but not `-O2` nor GCC `-O3`) seems to be the exception and shows a much bigger overhead (between **+100%** and **+150%**) for types other than `std::string` 385 | - It seems the guards defeat some kind of optimization here 386 | - Did not investigate why yet 387 | - Debug builds 388 | - In CMake Debug default configuration (MSVC `/Od` / clang without `-Og`) 389 | - Overhead is between 1% (`std::string`) and **30%** (the rest) 390 | - The base overhead of the debug mode for `std::vector` is so bad anyway (especially for non-trivial types), that you might as well just go ahead and use the guards. 391 | - `push_back_noguard` was added to show the overhead of the implementation of the wrapper, which should not be here if you have your own vector implementation. 392 | - In debug optimized (clang `-Og`), results are globally the same as Release builds. 393 | 394 | This seems to be a totally acceptable overhead in most cases given the chances it has to detect issues. 395 | Any object containing the equivalent of two pointers will most likely see only a small decrease in performance for `push_back`. 396 | 397 | Compared to thread sanitizer, this is 5 to 30 times faster. Of course the detection level is way lower, but this is great as a smoketest since your code runs a lot during development. 398 | 399 | On games for which we tested the guards, less than 2% of regression in frame duration was observed. Which makes sense, since you do not (rather, should not) spend most of your time doing operations on containers. 400 | 401 | However, I would still recommend disabling the guards in production. 402 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.25) 2 | 3 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) 4 | message(FATAL_ERROR "Do not build in-source. Please remove CMakeCache.txt and the CMakeFiles/ directory. Then build out-of-source.") 5 | endif() 6 | 7 | project(BadAccessGuards LANGUAGES C CXX VERSION 1.0.0) 8 | 9 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) 10 | set(${PROJECT_NAME}_IS_ROOT_PROJECT TRUE) 11 | endif() 12 | 13 | ############################ 14 | ## Modules and scripts ## 15 | ############################ 16 | 17 | include(GNUInstallDirs) # This will define the default values for installation directories (all platforms even if named GNU) 18 | #include(InstallRequiredSystemLibraries) # Tell CMake that the `install` target needs to install required system libraries (eg: Windows SDK) 19 | include(CMakePackageConfigHelpers) # Helper to create relocatable packages 20 | 21 | ############### 22 | ## OPTIONS ## 23 | ############### 24 | 25 | option(${PROJECT_NAME}_EXAMPLES "Build the examples" ${${PROJECT_NAME}_IS_ROOT_PROJECT}) 26 | option(${PROJECT_NAME}_BENCH "Build the benchmarks" ${${PROJECT_NAME}_IS_ROOT_PROJECT}) 27 | option(${PROJECT_NAME}_FORCE_ENABLE "Build with BAD_ACCESS_GUARDS_ENABLE=1 defined." ${${PROJECT_NAME}_IS_ROOT_PROJECT}) 28 | option(${PROJECT_NAME}_INSTALL "Should ${PROJECT_NAME} be added to the install list? Useful if included using add_subdirectory." ${${PROJECT_NAME}_IS_ROOT_PROJECT}) 29 | 30 | ############### 31 | ## PROJECT ## 32 | ############### 33 | 34 | add_library(BadAccessGuards 35 | src/BadAccessGuards.cpp 36 | src/BadAccessGuards.h 37 | ) 38 | target_include_directories(${PROJECT_NAME} 39 | PUBLIC $ # Due to the way installation work, we only want this path set when building, not once installed 40 | ) 41 | set_target_properties(${PROJECT_NAME} 42 | PROPERTIES 43 | PUBLIC_HEADER ${CMAKE_CURRENT_LIST_DIR}/src/BadAccessGuards.h 44 | DEBUG_POSTFIX d 45 | ) 46 | 47 | target_compile_features(BadAccessGuards PUBLIC cxx_std_11) 48 | add_library(${PROJECT_NAME}::BadAccessGuards ALIAS BadAccessGuards) 49 | 50 | if(${PROJECT_NAME}_FORCE_ENABLE) 51 | target_compile_definitions(BadAccessGuards PUBLIC BAD_ACCESS_GUARDS_ENABLE=1) 52 | endif() 53 | 54 | ############################# 55 | ## Examples and benchmarks ## 56 | ############################# 57 | 58 | if(${PROJECT_NAME}_EXAMPLES) 59 | add_executable(BasicExample examples/Basic.cpp) 60 | target_link_libraries(BasicExample PRIVATE BadAccessGuards) 61 | target_compile_features(BasicExample PUBLIC cxx_std_14) # chrono_literals 62 | set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT BasicExample) 63 | 64 | add_executable(GuardedVectorExample examples/GuardedVectorExample.cpp examples/GuardedVectorExample.h) 65 | target_link_libraries(GuardedVectorExample PRIVATE BadAccessGuards) 66 | target_compile_features(GuardedVectorExample PUBLIC cxx_std_14) # chrono_literals 67 | endif() 68 | 69 | if(${PROJECT_NAME}_BENCH) 70 | add_subdirectory(benchmarks) 71 | endif() 72 | 73 | ############# 74 | ## Install ## 75 | ############# 76 | 77 | if(${PROJECT_NAME}_INSTALL) 78 | set(${PROJECT_NAME}_INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" CACHE STRING "Path to ${PROJECT_NAME} cmake files") 79 | 80 | # Use version checking boilerplate 81 | write_basic_package_version_file( 82 | ${PROJECT_NAME}ConfigVersion.cmake 83 | COMPATIBILITY SameMajorVersion 84 | ) 85 | 86 | configure_package_config_file( 87 | ${CMAKE_CURRENT_LIST_DIR}/cmake/${PROJECT_NAME}Config.cmake.in 88 | ${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake 89 | INSTALL_DESTINATION ${${PROJECT_NAME}_INSTALL_CMAKEDIR} 90 | # Imported targets do not require the following macros 91 | NO_SET_AND_CHECK_MACRO 92 | NO_CHECK_REQUIRED_COMPONENTS_MACRO 93 | ) 94 | 95 | install( 96 | TARGETS BadAccessGuards 97 | EXPORT ${PROJECT_NAME}_Targets 98 | INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 99 | PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 100 | ) 101 | 102 | install( 103 | EXPORT ${PROJECT_NAME}_Targets 104 | NAMESPACE ${PROJECT_NAME}:: 105 | FILE ${PROJECT_NAME}Targets.cmake 106 | DESTINATION ${${PROJECT_NAME}_INSTALL_CMAKEDIR} 107 | ) 108 | 109 | install(FILES 110 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake 111 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake 112 | DESTINATION ${${PROJECT_NAME}_INSTALL_CMAKEDIR} 113 | ) 114 | endif() 115 | 116 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BadAccessGuards 2 | 3 | This C++ library makes it possible to detect most race conditions involving (but not only) containers through their instrumentation, at a minimal runtime cost. 4 | 5 | Finding race conditions and bad access patterns usually involves sanitizers or special programs such as Valgrind. 6 | Those are however seldom run because they can have a heavy runtime performance cost. 7 | 8 | As a bonus, we also get detection of memory use-after-free and corruption for free. This also detects recursive operations, which are often dangerous in containers. 9 | 10 | # Who is this for? 11 | 12 | - Container/library implementers 13 | - Teams working on real-time/legacy projects 14 | - Teams working with concurrency a lot 15 | - Especially with frequent onboarding of newcomers 16 | - Toolchain vendors that would like to propose a lighter version of ThreadSanitizer shadow algorithm 17 | - Someone suspecting specific code to be racy but has trouble reproducing the race 18 | 19 | # Goals/Features 20 | 21 | - Easy to integrate and modify for your project 22 | - There are only two files: `BadAccessGuards.h` and `BadAccessGuards.cpp` 23 | - Licensed under the [Unlicence](LICENSE), you can just copy/modify it without worrying about legal. 24 | - It does not include the C++ standard library, and can thus be used in your std-free libraries (or even for a standard library implementation!) 25 | - Small, there are only a few platform-specific functions to implement 26 | - Supports MSVC, GCC and Clang. 27 | - Detect race conditions with minimal performance impact 28 | - You want to be able to run this in your day-to-day development builds 29 | - Adds only a few load/store/masks depending on the operations 30 | - Fast path (read) on *Windows* is 2`mov`s + 1`test` + `je` 31 | - Debug builds were given love 32 | - See [Benchmarks](#Benchmarks) 33 | - No false positives that you wouldn't want to fix 34 | - Provide details as accurate as possible 35 | - We detect if the access was done from another thread, and for platforms that allow it (Windows), print its information. We also give what kind of operation it was executing. 36 | - Break as early as possible to hopefully be able to inspect the other threads in the debugger. 37 | - No dependencies other than your compiler* 38 | - *And your platform threading libraries (non-mandatory) 39 | - *Does include the C standard library for `uint64_t` and `uintptr_t`, and + for the default `BadAccessGuardReport` function. (easily removed) 40 | - Easy to enable/disable with a single macro: `BAD_ACCESS_GUARDS_ENABLE=0/1`. By default disabled if `defined(NDEBUG)`. 41 | - Battle-tested: Used on projects with 200+ people running the applications/games daily. 42 | 43 | # Non-goals/Will not implement 44 | - Detect every single type of race condition 45 | - This is not the objective of this library, you would use ThreadSanitizer, Valgrind or other tools for that. **This is not a substitute for proper design, test, and usage of sanitizers** but rather another tool in the box. 46 | - Some access patterns are not detected on purpose for performance. 47 | - Detect lock-free containers issues. 48 | - This method cannot (unless proven otherwise) work where read/writes need to be considered atomic 49 | - Support every platform in existance. 50 | - Rather than implementing them all, major ones are provided. Adding your own was made easy. 51 | - Providing hardened versions of the `std::` containers. 52 | - If you want this to be supported by `std::` containers, ask your implementer to add this technique to their implementation 53 | - Perhaps someone could make another repository with wrappers for the `std::` containers? 54 | 55 | # Usage 56 | 57 | 1. Add `BadAccessGuards.h` and `BadAccessGuards.cpp` to your project. (Via CMake, or copy the files) 58 | 2. Define `BAD_ACCESS_GUARDS_ENABLE=1` for your **in-house** builds (you probably want it off in production...) 59 | 3. Declare the shadow memory that will hold the state and pointer to stack with `BA_GUARD_DECL(varname)` 60 | 4. For all (relevant) read operations of the container / object, use the scope guard `BA_GUARD_READ(varname)`. 61 | . For all (relevant) write operations of the container / object, use the scope guard `BA_GUARD_WRITE(varname)`. 62 | 5 Do this only if it always writes! For example, don't use it on `operator[]` even though it returns a reference, use `BA_GUARD_READ` instead. 63 | 6. Add `BA_GUARD_DESTROY(varname)` at the beginning of the destructor. 64 | 7. Enjoy! 65 | 66 | # Examples 67 | 68 | Examples are available in [./examples](./examples). 69 | You may build and run them easily using CMake: 70 | 71 | ```sh 72 | cmake -B build # Generate the project 73 | # Open the project in your IDE (if supported by your generator) 74 | cmake --open build 75 | # Or build it 76 | cmake --build build 77 | ``` 78 | 79 | Here is how it looks in Visual Studio(error caught in production, we were appending decoded frames to a collection while reading it): 80 | 81 | ![Race condition in production](./examples/FoundInProduction.png) 82 | 83 | ## Sample output: 84 | 85 | > Run with `--break` to make this example break in the debugger. 86 | > 87 | > 88 | > Testing read during write on the same thread, output: 89 | > Recursion detected: This may lead to invalid operations 90 | > - Parent operation: Writing. 91 | > - This operation: Reading. 92 | > 93 | > 94 | > Testing read during write on different threads, output: 95 | > Race condition: Multiple threads are reading/writing to the data at the same time, potentially corrupting it! 96 | > - Other thread: Writing (Desc=ØUnsafe WriterØ Id=18084) 97 | > - This thread: Reading. 98 | 99 | # How does it work? 100 | 101 | The idea is based on the following observation: 102 | 103 | > You do not need to catch all occurences of a race condition, catching it once should be enough when the code runs often and provides enough information to locate the issue. 104 | 105 | Indeed, (especially in big teams), your code will be run hundreds, if not thousands, of times by your developers. 106 | So even if you have a 10% chance to catch the issue with useful details, it is better than a 1% chance and not crashing or detecting it way later in the execution of the program. 107 | 108 | The point is that it is better than not being detected at all. And better than having people run sanitizers once every moon eclipse, or only on small unit tests, or worse, never. 109 | 110 | ## Detection 111 | 112 | So our objective is to detect race conditions, as fast as possible. And we do not need 100% detection as long as developers can afford to run their code with theses tests. 113 | 114 | ### Access states 115 | 116 | We have 3 possible access states for a given object: 117 | 118 | ```mermaid 119 | stateDiagram-v2 120 | direction LR 121 | IdleRead: Idle/Read 122 | [*]-->IdleRead 123 | IdleRead-->Write 124 | Write-->IdleRead 125 | IdleRead-->Destroyed 126 | Destroyed-->[*] 127 | ``` 128 | 129 | - **Idle / Read**: We're either reading the object or not using it at all. These two states are merged because you do not want your reads to be costly! 130 | - **Write**: We're mutating the object. As soon as we are done, we go back to the **Idle** state. 131 | - **Destroy**: The object has been destroyed (or freed), and should not be used anymore. 132 | 133 | And there's an implicit one: **Corrupted**, if we see a value not in the list above. 134 | 135 | ### Access states transitions 136 | 137 | Now that we have the above states, let's consider the following operations: 138 | 139 | - A **Read** is only allowed if the previous state was **Idle / Read** too. Otherwise, the previous operation was not complete. 140 | - Do not change the state 141 | - Starting a **Write** operation is allowed only if the previous state is **Idle / Read**. 142 | - Change state to **Write** 143 | - After a **Write** operation, we can check that we were still in the **Write** state, then change it back to **Idle / Read**. 144 | - A **Destroy** is allowed only if the previous state is **Idle / Read**. 145 | 146 | That's it! Now all we need to do is to check those invariants. If the operations are executed on a single thread, those invariants cannot break. 147 | 148 | Which means that if the invariants do not hold, something bad happened! 149 | There are two options: 150 | - A race condition happened. 151 | - Some kind of recursion happened. 152 | 153 | And since we do not need a 100% error detection, these state changes do not need to be atomic. We just want to force the reads and writes of the state. 154 | **Remember:** the invariants cannot break on a single thread except in the case of a recursion! 155 | 156 | > **Note:** We do not care about potential reorders of the operations by the compiler. It must honor the invariants from a single-thread point of view. 157 | > The re-orders also cannot cause issues (except lowering the detection rate) in a multithread context. If it did, it would mean your synchronization is bad, which you want to detect anyway. 158 | > The only exception would be for lock-free containers, which are not the target of this library. 159 | 160 | ## Breaking with more details 161 | 162 | The ideal situation would be to have all threads suspended as soon as the race condition occurs. 163 | The next best option is the one we implement: send an interrupt for the debugger to break. 164 | 165 | - The sooner we break, the higher your chances of breaking with the other thread at the location responsible for the race condition. 166 | - This one is simple; the first thing we do when detecting a problem is to break! (See `BadAccessGuardConfig::breakASAP`) 167 | - The faster we can detect issues, the faster we can break. So we limit the cost of the detection. 168 | - If we break, we want to know what thread was involved in the bad access. 169 | - This is necessary to know if the issue is recursion or a race condition 170 | - We could have used the thread ID, but this is slow to get. (and again, we want this to be fast). Instead, simply store the stack pointer. This is enough to identify a thread! 171 | - And if you are using fibers, well, you actually get fiber identification for free too, assuming you keep them around a bit and can list them. 172 | - Right now, looking up what thread stack contains the pointer is not implemented on *Linux* (because the OS provides no way to do this, would have to hook `pthread` for example), nor on *MacOS* (though it is possible and comments on how to do it are in the source). We can however still determine if the issue is a recursion or race condition. 173 | 174 | ## Keeping it fast and lightweight 175 | 176 | We need to store two pieces of information: 177 | - A pointer in the stack of the current thread 178 | - The state. 179 | 180 | State will fit in 2 bits, which is lower than the alignment of the stack address and size on virtually every platform. 181 | 182 | So we only need to drop the lowest 2 bits of the pointer and store the state there! 183 | 184 | > On *Windows*, all stacks are paged aligned (4kB). So we can actually drop the 8 lower bits. This way to get the state the compiler only needs to load a byte instead of masking with `0b11`. 185 | 186 | As for how to obtain our pointer to the current stack... we use intrinsics (see `BA_GUARD_GET_PTR_IN_STACK`), but in theory we could use the address of any variable on the stack (but would then need to make sure the compiler does not optimize it). This is a single `mov` instruction on all platforms. 187 | 188 | # Benchmarks 189 | 190 | ## Setup 191 | 192 | - CPU: AMD Ryzen 7 7745HX. 193 | - RAM settings: 5200MT/s, timings 38-38-38-75, 2channels 194 | - CPU was stabilized at a fixed frequency of 3.49Ghz, frequency boost is disabled. 195 | 196 | Simple benchmarks were done using `std::vector<>::push_back` since it uses the most expensive guard (write access). 197 | Memory is reserved upfront, and the vector is emptied at each iteration unless ` - noreserve` is mentionned, which means we freed memory after clearing. `complexityN` is the number of elements pushed per iteration. 198 | See [./benchmarks/BenchGuardedVectorExample.cpp](./benchmarks/BenchGuardedVectorExample.cpp). 199 | 200 | As usual for microbenchmarks, take them with a grain of salt. 201 | 202 | ## Results 203 | 204 | See [Benchmarks.md](Benchmarks.md) 205 | 206 | ## Quick recap 207 | 208 | - Release builds 209 | - `vector` => **+50%** overhead 210 | - `vector` - noreserve => **~10-25%** overhead, depends on the growth strategy 211 | - `vector` => **5%** overhead 212 | - `vector` => **3%** overhead 213 | - `vector` => **5-7%** overhead 214 | - clang Release `-O3` (but not `-O2` nor GCC `-O3`) seems to be the exception and shows a much bigger overhead (between **+100%** and **+150%**) for types other than `std::string` 215 | - It seems the guards defeat some kind of optimization here 216 | - Did not investigate why yet 217 | - Debug builds 218 | - In CMake Debug default configuration (MSVC `/Od` / clang without `-Og`) 219 | - Overhead is between 1% (`std::string`) and **30%** (the rest) 220 | - The base overhead of the debug mode for `std::vector` is so bad anyway (especially for non-trivial types), that you might as well just go ahead and use the guards. 221 | - `push_back_noguard` was added to show the overhead of the implementation of the wrapper, which should not be here if you have your own vector implementation. 222 | - In debug optimized (clang `-Og`), results are globally the same as Release builds. 223 | 224 | This seems to be a totally acceptable overhead in most cases given the chances it has to detect issues. 225 | Any object containing the equivalent of two pointers will most likely see only a small decrease in performance for `push_back`. 226 | 227 | Compared to thread sanitizer, this is 5 to 30 times faster. Of course the detection level is way lower, but this is great as a smoketest since your code runs a lot during development. 228 | 229 | On games for which we tested the guards, less than 2% of regression in frame duration was observed. Which makes sense, since you do not (rather, should not) spend most of your time doing operations on containers. 230 | 231 | However, I would still recommend disabling the guards in production. 232 | 233 | 234 | # LICENSE 235 | 236 | This work is released under the [Unlicense](https://unlicense.org/). To sum up, this is in the public domain but you cannot hold contributors liable. 237 | -------------------------------------------------------------------------------- /benchmarks/BenchGuardedVectorExample.cpp: -------------------------------------------------------------------------------- 1 | #include "../examples/GuardedVectorExample.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #if defined(__has_feature) // Clang 9 | # if __has_feature(thread_sanitizer) 10 | # define USING_THREAD_SANITIZER 1 11 | # else 12 | # define USING_THREAD_SANITIZER 0 13 | # endif 14 | #elif defined(__SANITIZE_THREAD__) // GCC 15 | # define USING_THREAD_SANITIZER 1 16 | #else 17 | # define USING_THREAD_SANITIZER 0 18 | #endif 19 | 20 | using namespace std::chrono_literals; 21 | const auto minEpoch = 100ms; 22 | 23 | template 24 | int BenchVector(ankerl::nanobench::Bench& bench, bool withNoReserve) 25 | { 26 | #ifdef NDEBUG 27 | const size_t nbPushBacksPerIteration[] = { 1'000, 100'000, 10'000'000 }; 28 | #else 29 | const size_t nbPushBacksPerIteration[] = { 1'000 }; 30 | #endif 31 | uint64_t x = 1; 32 | { 33 | std::vector vector; 34 | for (size_t size : nbPushBacksPerIteration) 35 | { 36 | vector.reserve(size); // Don't measure allocator 37 | bench.complexityN(size) 38 | .minEpochTime(minEpoch) 39 | .run("std::vector.push_back", [&] { 40 | for (int i = 0; i < size; i++) 41 | { 42 | vector.push_back({ x }); 43 | } 44 | ankerl::nanobench::doNotOptimizeAway(x += vector.size()); 45 | vector.clear(); 46 | }); 47 | } 48 | } 49 | 50 | #if !USING_THREAD_SANITIZER 51 | #ifndef NDEBUG // Only gives different perf in debug builds 52 | { 53 | ExampleGuardedVector guardedvector; 54 | for (size_t size : nbPushBacksPerIteration) 55 | { 56 | guardedvector.reserve(size); // Don't measure allocator 57 | bench.complexityN(size) 58 | .minEpochTime(minEpoch) 59 | .run("guardedvector.push_back_noguard", [&] { 60 | for (int i = 0; i < size; i++) 61 | { 62 | // This is benched to demonstrate that the guard is not much more expensive thant a simple forwarding call in debug... 63 | guardedvector.push_back_noguard({ x }); 64 | } 65 | ankerl::nanobench::doNotOptimizeAway(x += guardedvector.size()); 66 | guardedvector.clear(); 67 | }); 68 | } 69 | } 70 | #endif 71 | { 72 | ExampleGuardedVector guardedvector; 73 | for (size_t size : nbPushBacksPerIteration) 74 | { 75 | guardedvector.reserve(size); // Don't measure allocator 76 | bench.complexityN(size) 77 | .minEpochTime(minEpoch) 78 | .run("guardedvector.push_back", [&] { 79 | for (int i = 0; i < size; i++) 80 | { 81 | guardedvector.push_back({ x }); 82 | } 83 | ankerl::nanobench::doNotOptimizeAway(x += guardedvector.size()); 84 | guardedvector.clear(); 85 | }); 86 | } 87 | } 88 | #endif //!USING_THREAD_SANITIZER 89 | if (withNoReserve) 90 | { 91 | { 92 | std::vector vector; 93 | for (size_t size : nbPushBacksPerIteration) 94 | { 95 | bench.complexityN(size) 96 | .minEpochTime(minEpoch) 97 | .run("std::vector.push_back - noreserve", [&] { 98 | for (int i = 0; i < size; i++) 99 | { 100 | vector.push_back({ x }); 101 | } 102 | ankerl::nanobench::doNotOptimizeAway(x += vector.size()); 103 | vector.clear(); 104 | vector.shrink_to_fit(); 105 | }); 106 | } 107 | } 108 | 109 | #if !USING_THREAD_SANITIZER 110 | #ifndef NDEBUG // Only gives different perf in debug builds 111 | { 112 | ExampleGuardedVector guardedvector; 113 | for (size_t size : nbPushBacksPerIteration) 114 | { 115 | guardedvector.reserve(size); // Don't measure allocator 116 | bench.complexityN(size) 117 | .minEpochTime(minEpoch) 118 | .run("guardedvector.push_back_noguard - noreserve", [&] { 119 | for (int i = 0; i < size; i++) 120 | { 121 | // This is benched to demonstrate that the guard is not much more expensive thant a simple forwarding call in debug... 122 | guardedvector.push_back_noguard({ x }); 123 | } 124 | ankerl::nanobench::doNotOptimizeAway(x += guardedvector.size()); 125 | guardedvector.clear(); 126 | guardedvector.shrink_to_fit(); 127 | }); 128 | } 129 | } 130 | #endif 131 | { 132 | ExampleGuardedVector guardedvector; 133 | for (size_t size : nbPushBacksPerIteration) 134 | { 135 | bench.complexityN(size) 136 | .minEpochTime(minEpoch) 137 | .run("guardedvector.push_back - noreserve", [&] { 138 | for (int i = 0; i < size; i++) 139 | { 140 | guardedvector.push_back({ x }); 141 | } 142 | ankerl::nanobench::doNotOptimizeAway(x += guardedvector.size()); 143 | guardedvector.clear(); 144 | guardedvector.shrink_to_fit(); 145 | }); 146 | } 147 | } 148 | #endif //!USING_THREAD_SANITIZER 149 | } 150 | return x; 151 | } 152 | 153 | int main() { 154 | 155 | int x = 0; 156 | 157 | x += BenchVector(ankerl::nanobench::Bench().title("Vector of uint64_t"), true); 158 | struct Payload16B { 159 | uint64_t storage[2]; 160 | }; 161 | x += BenchVector(ankerl::nanobench::Bench().title("Vector of uint64_t*2"), false); 162 | struct Payload32B { 163 | uint64_t storage[4]; 164 | }; 165 | x += BenchVector(ankerl::nanobench::Bench().title("Vector of uint64_t*4"), false); 166 | struct PayloadString { 167 | PayloadString(uint64_t v) { 168 | (void)v; 169 | storage += char(v); 170 | } 171 | std::string storage; 172 | }; 173 | x += BenchVector(ankerl::nanobench::Bench().title("Vector of std::string"), false); 174 | return x; 175 | } -------------------------------------------------------------------------------- /benchmarks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(FetchContent) 2 | 3 | FetchContent_Declare( 4 | nanobench 5 | GIT_REPOSITORY https://github.com/martinus/nanobench.git 6 | GIT_TAG e4327893194f06928012eb81cabc606c4e4791ac 7 | GIT_SHALLOW TRUE 8 | ) 9 | 10 | FetchContent_MakeAvailable(nanobench) 11 | 12 | add_executable(BenchGuardedVectorExample BenchGuardedVectorExample.cpp) 13 | target_link_libraries(BenchGuardedVectorExample 14 | PRIVATE 15 | BadAccessGuards 16 | nanobench 17 | ) 18 | target_compile_features(BenchGuardedVectorExample PUBLIC cxx_std_14) # chrono_literals 19 | 20 | -------------------------------------------------------------------------------- /cmake/BadAccessGuardsConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | # Required so that on windows Release and RelWithDebInfo can be used instead of default fallback which is Debug 4 | # See https://gitlab.kitware.com/cmake/cmake/-/issues/20319 5 | set(CMAKE_MAP_IMPORTED_CONFIG_MINSIZEREL MinSizeRel RelWithDebInfo Release Debug) 6 | set(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO RelWithDebInfo Release MinSizeRel Debug) 7 | set(CMAKE_MAP_IMPORTED_CONFIG_RELEASE Release RelWithDebInfo MinSizeRel Debug) 8 | 9 | include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") 10 | -------------------------------------------------------------------------------- /examples/Basic.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #ifdef _WIN32 8 | # include 9 | #endif 10 | #include 11 | 12 | int main(int argv, char** argc) 13 | { 14 | bool allowBreak = false; 15 | if (argv >= 2 && !strcmp(argc[1], "--break")) 16 | { 17 | allowBreak = true; 18 | } 19 | else 20 | { 21 | printf("Run with `--break` to make this example break in the debugger.\n\n"); 22 | } 23 | #if BAD_ACCESS_GUARDS_ENABLE 24 | // Don't break for this sample as the race condition is controlled and won't trigger a crash, so we'll just be printing! 25 | 26 | 27 | BadAccessGuardSetConfig({ 28 | allowBreak, // allowBreak 29 | false, // breakASAP 30 | }); 31 | #else 32 | # error "Can't really test the guards if we don't enable them can we ?" 33 | #endif 34 | 35 | 36 | // To test memory corruption/use after free, we use placement to control the lifetime of the object, and its memory 37 | // In the real world, freeing memory won't always (and most often don't) release the memory pages, so it is still accessible 38 | // Here we keep the backing memory (shadowAlignedStorage) alive for the duration of the test, even though we destroy the actual object earlier. 39 | // This is actually not really needed for the test, but simulates what would happen with real objects. 40 | alignas(BadAccessGuardShadow) char shadowAlignedStorage[sizeof(BadAccessGuardShadow)]; 41 | 42 | // Create the shadow object in the aligned storage 43 | BadAccessGuardShadow* shadowPtr = new (&shadowAlignedStorage) BadAccessGuardShadow; 44 | 45 | // Reference to the shadow we'll be using. 46 | BadAccessGuardShadow& shadow = *shadowPtr; 47 | 48 | // Showcase we can detect we're on the same thread 49 | { 50 | printf("Testing read during write on the same thread, output:\n"); 51 | 52 | BadAccessGuardWrite writeg{ shadow }; 53 | BadAccessGuardRead readg{ shadow }; 54 | } 55 | 56 | // Now for the actual race from multiple threads 57 | using namespace std::chrono_literals; 58 | 59 | { 60 | printf("\n\nTesting read during write on different threads, output:\n"); 61 | 62 | std::thread otherthread([&] { 63 | #ifdef _WIN32 64 | SetThreadDescription(GetCurrentThread(), L"ØUnsafe WriterØ"); 65 | #endif 66 | BadAccessGuardWrite writeg{ shadow }; 67 | // Simulate a long write, so that the main thread can attempt to read during this time 68 | std::this_thread::sleep_for(3s); 69 | }); 70 | // Wait a bit so that the otherthread has enough time to spawn and go to sleep 71 | std::this_thread::sleep_for(1s); 72 | // Assuming the other thread has been scheduled. No sync on purpose since we want to see if our code could detect the race conditon. 73 | BadAccessGuardRead readg{ shadow }; 74 | // Wait for the other thread to exit 75 | otherthread.join(); 76 | } 77 | 78 | 79 | { 80 | printf("\n\nTesting write during write on different threads, output:\n"); 81 | 82 | using namespace std::chrono_literals; 83 | std::thread otherthread([&] { 84 | #ifdef _WIN32 85 | SetThreadDescription(GetCurrentThread(), L"ØUnsafe WriterØ"); 86 | #endif 87 | BadAccessGuardWrite writeg{ shadow }; 88 | // Simulate a long write, so that the main thread can attempt to write during this time 89 | std::this_thread::sleep_for(3s); 90 | }); 91 | // Wait a bit so that the otherthread has enough time to spawn and go to sleep 92 | std::this_thread::sleep_for(1s); 93 | // Assuming the other thread has been scheduled. No sync on purpose since we want to see if our code could detect the race conditon. 94 | BadAccessGuardWrite readg{ shadow }; 95 | // Wait for the other thread to exit 96 | otherthread.join(); 97 | } 98 | 99 | // Destroy the shadow value 100 | { 101 | printf("\n\nDestroying shadow, keeping its storage memory.\n"); 102 | BadAccessGuardDestroy gd{ shadow }; // This would typically be in your object destructor 103 | shadowPtr->~BadAccessGuardShadow(); // Note we don't actually need to destroy it to simulate our tests 104 | } 105 | 106 | // Attempt use after free 107 | { 108 | printf("\nUse after free:\n"); 109 | BadAccessGuardRead readg{ shadow }; 110 | } 111 | 112 | // Corruption, set an invalid state 113 | { 114 | printf("\n\nUse after corruption:\n"); 115 | memset(&shadowAlignedStorage, 0xDD ,sizeof(shadowAlignedStorage)); 116 | BadAccessGuardRead readg{ shadow }; 117 | } 118 | 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /examples/FoundInProduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lectem/BadAccessGuards/db8ce68d15a178798589ddbd0f6345696884bc04/examples/FoundInProduction.png -------------------------------------------------------------------------------- /examples/GuardedVectorExample.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #ifdef _WIN32 5 | # include 6 | #endif 7 | #include 8 | #include 9 | #include "GuardedVectorExample.h" 10 | #include 11 | #include 12 | 13 | // Simulate any kind of event system where one may register and trigger delegates 14 | static std::vector> gEventDelegates; 15 | static void TriggerEvent() 16 | { 17 | for (const auto& delegate : gEventDelegates) 18 | { 19 | delegate(); 20 | } 21 | } 22 | 23 | int main() 24 | { 25 | #if !BAD_ACCESS_GUARDS_ENABLE 26 | # error "Can't really test the guards if we don't enable them can we ?" 27 | #endif 28 | 29 | printf("This test should be ran under a debugger as it will trigger breakpoints and crash if no debugger is attached!\n"); 30 | 31 | int accum = 0; // Variable to avoid optimizations 32 | 33 | { 34 | printf("\nTesting write during write on the same thread, output:\n"); 35 | // A class that somehow triggers the event on construction 36 | struct RecursiveBehaviour 37 | { 38 | RecursiveBehaviour() 39 | { 40 | // You are actually inside the `ExampleGuardedVector::emplace_back()`! 41 | TriggerEvent(); 42 | } 43 | }; 44 | 45 | ExampleGuardedVector vector; 46 | // Somewhere you setup a delegate function clearing the vector 47 | gEventDelegates.push_back([&]() { 48 | vector.clear(); 49 | }); 50 | 51 | // Then you add some item that triggers the event delegates (could be a signal, event, etc...) 52 | // The issue is that while modifying the vector itself (you're building the item inside it!), you're also asking to clear it (due to the delegate). 53 | vector.emplace_back(); 54 | } 55 | 56 | using namespace std::chrono_literals; 57 | { 58 | printf("\n\nShowcase we do not crash on reads from different threads, there should be no output:\n"); 59 | 60 | ExampleGuardedVector vec; 61 | for (int i = 0; i < 10'000'000; i++) 62 | { 63 | vec.push_back(i); 64 | } 65 | 66 | 67 | auto beginTime = std::chrono::steady_clock::now(); 68 | // Run both threads concurrently for 0.5s 69 | auto endTime = std::chrono::steady_clock::now() + 0.5s; 70 | 71 | std::thread otherthread([&] { 72 | while (std::chrono::steady_clock::now() < endTime) 73 | { 74 | for (int elem : vec) { accum += elem; } 75 | } 76 | }); 77 | // Assume other thread will be scheduled before the endTime 78 | while (std::chrono::steady_clock::now() < endTime) 79 | { 80 | for (int elem : vec) { accum += elem; } 81 | } 82 | otherthread.join(); 83 | } 84 | 85 | { 86 | printf("\n\nTesting read during write on different threads, output:\n"); 87 | 88 | auto beginTime = std::chrono::steady_clock::now(); 89 | // Run both threads concurrently for 0.5s 90 | auto endTime = std::chrono::steady_clock::now() + 0.5s; 91 | 92 | ExampleGuardedVector vec; 93 | std::thread otherthread([&] { 94 | #ifdef _WIN32 95 | SetThreadDescription(GetCurrentThread(), L"ØUnsafe WriterØ"); 96 | #endif 97 | while (std::chrono::steady_clock::now() < endTime) 98 | { 99 | for (int i = 0; i < 10'000'000; i++) 100 | { 101 | vec.push_back(i); 102 | } 103 | vec.clear(); 104 | vec.shrink_to_fit(); // Force this thread to reallocate a bit 105 | } 106 | std::this_thread::sleep_for(3s); 107 | }); 108 | 109 | // Assume other thread will be scheduled before the endTime 110 | while (std::chrono::steady_clock::now() < endTime) 111 | { 112 | // We may (or may not) crash while reading since the other thread may be reallocating 113 | for (int elem : vec) 114 | { 115 | accum += elem; 116 | } 117 | } 118 | 119 | otherthread.join(); 120 | } 121 | return accum; // Prevent optimization 122 | } 123 | -------------------------------------------------------------------------------- /examples/GuardedVectorExample.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Very small example of implementation for a vector type with a reduced number of methods. 5 | // This is NOT the inteded way to use the library, ideally you would add the guards to the implementation of the container itself! 6 | // Here we are paying the cost of passing things around, especially in Debug builds. 7 | template 8 | class ExampleGuardedVector : public std::vector 9 | { 10 | using super = std::vector; 11 | BA_GUARD_DECL(BAShadow); 12 | public: 13 | ExampleGuardedVector(){} 14 | ExampleGuardedVector(const ExampleGuardedVector& rhs) : super(rhs) { } 15 | ExampleGuardedVector(ExampleGuardedVector&& rhs) : super(std::move(rhs)) { } 16 | 17 | ~ExampleGuardedVector() 18 | { 19 | BA_GUARD_DESTROY(BAShadow); 20 | } 21 | 22 | ExampleGuardedVector& operator=(ExampleGuardedVector&& rhs) 23 | { 24 | BA_GUARD_WRITE(BAShadow); 25 | super::operator=(std::move(rhs)); 26 | return *this; 27 | } 28 | 29 | ExampleGuardedVector& operator=(const ExampleGuardedVector& rhs) 30 | { 31 | BA_GUARD_WRITE(BAShadow); 32 | super::operator=(rhs); 33 | return *this; 34 | } 35 | 36 | void push_back(const T& val) { 37 | 38 | BA_GUARD_WRITE(BAShadow); 39 | super::push_back(val); 40 | } 41 | 42 | 43 | void push_back_noguard(const T& val) { 44 | super::push_back(val); 45 | } 46 | 47 | void push_back(T&& val) 48 | { 49 | BA_GUARD_WRITE(BAShadow); 50 | super::push_back(std::move(val)); 51 | } 52 | 53 | void push_back_noguard(T&& val) 54 | { 55 | super::push_back(std::move(val)); 56 | } 57 | 58 | template 59 | decltype(auto) emplace_back(_Valty&&... _Val) 60 | { 61 | BA_GUARD_WRITE(BAShadow); 62 | return super::emplace_back(std::forward<_Valty>(_Val)...); 63 | } 64 | 65 | 66 | T* data() 67 | { 68 | BA_GUARD_READ(BAShadow); 69 | return super::data(); 70 | } 71 | const T* data() const 72 | { 73 | BA_GUARD_READ(BAShadow); 74 | return super::data(); 75 | } 76 | 77 | typename super::size_type size() const 78 | { 79 | BA_GUARD_READ(BAShadow); 80 | return super::size(); 81 | } 82 | 83 | typename super::size_type capacity() const 84 | { 85 | BA_GUARD_READ(BAShadow); 86 | return super::capacity(); 87 | } 88 | 89 | void resize(typename super::size_type newSize) 90 | { 91 | BA_GUARD_WRITE(BAShadow); 92 | super::resize(newSize); 93 | } 94 | 95 | void reserve(typename super::size_type newCapacity) 96 | { 97 | BA_GUARD_WRITE(BAShadow); 98 | super::reserve(newCapacity); 99 | } 100 | 101 | void shrink_to_fit() 102 | { 103 | BA_GUARD_WRITE(BAShadow); 104 | super::shrink_to_fit(); 105 | } 106 | 107 | void clear() noexcept 108 | { 109 | BA_GUARD_WRITE(BAShadow); 110 | super::clear(); 111 | } 112 | 113 | T& operator[](const typename super::size_type index) noexcept 114 | { 115 | // We can't know whether it is used as read only or wrote to, accept the limitation and err on the conservative size 116 | BA_GUARD_READ(BAShadow); 117 | return super::operator[](index); 118 | } 119 | 120 | const T& operator[](const typename super::size_type index) const noexcept 121 | { 122 | BA_GUARD_READ(BAShadow); 123 | return super::operator[](index); 124 | } 125 | 126 | // Note our iterators return pointers directly but could return objects. 127 | // While creating the iterators should be guarded, you probably don't want to use 128 | // the guards for each and every access (iterator dereference) for performance reasons. 129 | T* begin() noexcept 130 | { 131 | // We can't know whether it is used as read only or wrote to, accept the limitation and err on the conservative size 132 | BA_GUARD_READ(BAShadow); 133 | return super::data(); 134 | } 135 | const T* begin() const noexcept 136 | { 137 | BA_GUARD_READ(BAShadow); 138 | return super::data(); 139 | } 140 | T* end() noexcept 141 | { 142 | // We can't know whether it is used as read only or wrote to, accept the limitation and err on the conservative size 143 | BA_GUARD_READ(BAShadow); 144 | return super::data() + super::size(); 145 | } 146 | const T* end() const noexcept 147 | { 148 | BA_GUARD_READ(BAShadow); 149 | return super::data() + super::size(); 150 | } 151 | }; 152 | -------------------------------------------------------------------------------- /src/BadAccessGuards.cpp: -------------------------------------------------------------------------------- 1 | // BadAccessGuards v1.0.0 https://github.com/Lectem/BadAccessGuards 2 | 3 | ////////////////////////////////////////////////////////////////// 4 | ////// DEFAULT IMPL FILE ////// 5 | ////// Feel free to modify it as needed for your platforms! ////// 6 | ////// Or if is is enough, just use BadAccessGuardSetConfig ////// 7 | ////////////////////////////////////////////////////////////////// 8 | 9 | #include "BadAccessGuards.h" 10 | 11 | #if BAD_ACCESS_GUARDS_ENABLE 12 | 13 | // Per OS thread information retrieval 14 | using ThreadDescBuffer = char[512]; 15 | 16 | #ifdef _WIN32 // Windows 17 | 18 | #include 19 | 20 | bool IsAddressInStack(NT_TIB* tib, void* ptr) 21 | { 22 | return tib->StackLimit <= ptr && ptr <= tib->StackBase; 23 | } 24 | 25 | bool IsAddressInStackSafe(NT_TIB* tib, void* ptr) 26 | { 27 | __try 28 | { 29 | // If you break here with the debugger, it means that you're unlucky and the thread just closed right after we saw it 30 | // This can be ignored, but means that if this was the offending thread we will not be able to know it. 31 | return tib && tib->StackLimit <= ptr && ptr <= tib->StackBase; 32 | } 33 | __except (EXCEPTION_EXECUTE_HANDLER) 34 | { 35 | return false; 36 | } 37 | } 38 | 39 | bool IsAddressInCurrentStack(void* ptr) 40 | { 41 | NT_TIB* tib = (NT_TIB*)NtCurrentTeb(); // NT_TEB starts with NT_TIB for all usermode threads. See ntddk.h. 42 | return IsAddressInStack(tib, ptr); 43 | } 44 | 45 | #include 46 | // GetThreadDescription may not be in old versions of the SDK, 47 | // and if we want to be able to run on older versions of Windows, we need to dynamically load it anyway. 48 | typedef HRESULT(WINAPI* GetThreadDescriptionPtrType)(HANDLE hThread, PWSTR* threadDescription); 49 | 50 | uint64_t FindThreadWithPtrInStack(void* ptr, ThreadDescBuffer outDescription) 51 | { 52 | // Attempt to get GetThreadDescription on first use of this function 53 | static GetThreadDescriptionPtrType GetThreadDescriptionPtr = (GetThreadDescriptionPtrType)GetProcAddress(GetModuleHandleA("KernelBase.dll"), "GetThreadDescription"); 54 | 55 | // Clean description 56 | outDescription[0] = '\0'; 57 | DWORD idOfThreadWithAddrInStack = 0; // 0 is an invalid thread id 58 | 59 | // Pss* functions are available for applications/drivers since Windows 8.1. 60 | // CreateToolhelp32Snapshot is only available on desktop but was available since Windows XP. 61 | #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM) && (NTDDI_VERSION >= NTDDI_WINBLUE) 62 | // Take a snapshot of all running threads. 63 | // Weirdly it does not necessarily see all threads (even if they are still alive and shown in the debugger) 64 | // It seems that threads need to be alive for a certain amount of time before it can actually see them. 65 | // CreateToolhelp32Snapshot suffers from the same issue. 66 | HPSS snapshotHandle; 67 | if (ERROR_SUCCESS != PssCaptureSnapshot(GetCurrentProcess(), PSS_CAPTURE_THREADS, 0, &snapshotHandle)) 68 | return 0; 69 | 70 | HPSSWALK walkMarker; 71 | if (ERROR_SUCCESS == PssWalkMarkerCreate(nullptr, &walkMarker)) 72 | { 73 | const DWORD dwOwnerPID = GetCurrentProcessId(); 74 | PSS_THREAD_ENTRY threadEntry; 75 | while (idOfThreadWithAddrInStack == 0 // Stop if we find the thread 76 | && ERROR_SUCCESS == PssWalkSnapshot(snapshotHandle, PSS_WALK_THREADS, walkMarker, &threadEntry, sizeof(threadEntry))) 77 | { 78 | if (threadEntry.ProcessId != dwOwnerPID // This should never happen, but be safe 79 | || (threadEntry.Flags & PSS_THREAD_FLAGS_TERMINATED) != 0 // Thread was terminated, we can't access its PEB 80 | || threadEntry.ExitStatus != STILL_ACTIVE // Thread has exited, we can't access its PEB 81 | ) 82 | { 83 | continue; // Just continue with the next threads 84 | } 85 | 86 | // The thread might die while we check its TEB, so we have a safe version that won't crash 87 | if (IsAddressInStackSafe((NT_TIB*)threadEntry.TebBaseAddress, ptr)) 88 | { 89 | idOfThreadWithAddrInStack = threadEntry.ThreadId; 90 | if (HANDLE threadHdl = OpenThread(THREAD_QUERY_INFORMATION | THREAD_QUERY_LIMITED_INFORMATION, FALSE, threadEntry.ThreadId)) 91 | { 92 | // PSS_WALK_THREAD_NAME sounds nice until you realize its not implemented. 93 | // It's actually not recognized and you'll get a ERROR_INVALID_PARAMETER (at least on my version on Windows). 94 | // That's probably why it's undocumented. Instead, we'll just use GetThreadDescription 95 | PWSTR desc; 96 | if (GetThreadDescriptionPtr && SUCCEEDED(GetThreadDescriptionPtr(threadHdl, &desc))) 97 | { 98 | const int nbChars = WideCharToMultiByte(GetConsoleCP(), 0, desc, -1, outDescription, sizeof(ThreadDescBuffer) - 1, NULL, NULL); 99 | outDescription[nbChars] = '\0'; // Make sure to have a null terminated string. nbChars is 0 on error => Empty string 100 | LocalFree(desc); 101 | } 102 | CloseHandle(threadHdl); 103 | } 104 | } 105 | } 106 | PssWalkMarkerFree(walkMarker); 107 | } 108 | 109 | PssFreeSnapshot(GetCurrentProcess(), snapshotHandle); 110 | #endif 111 | return idOfThreadWithAddrInStack; 112 | } 113 | 114 | #elif defined(_GNU_SOURCE) && (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) // Linux / POSIX 115 | 116 | #include 117 | bool IsAddressInCurrentStack(void* ptr) 118 | { 119 | // TODO: handle failure properly. For now return false on failure. (Assume this is a MT error) 120 | pthread_attr_t attributes; 121 | if (0 != pthread_getattr_np(pthread_self(), &attributes)) return false; 122 | 123 | void* stackAddr; 124 | size_t stackSize; 125 | if (0 != pthread_attr_getstack(&attributes, &stackAddr, &stackSize)) return false; 126 | 127 | // On POSIX, address is indeed the start address (what you would give if allocating yourself) 128 | return stackAddr <= ptr && uintptr_t(ptr) < (uintptr_t(stackAddr) + stackSize); 129 | } 130 | 131 | // There is no way to iterate threads and get their stack address + size. 132 | // Attempted to get information for linux but failed, same conclusion as https://unix.stackexchange.com/questions/758975/how-can-i-locate-the-stacks-of-child-tasks-threads-using-proc-pid-maps 133 | // So in the end we'd need to keep the threadid instead of just a pointer to the current thread stack, which would be too expensive. 134 | // Just let the user inspect the stacks in the debugger instead. 135 | // The alternative is for the user to keep track of their threads and implement this function themselves. 136 | // We could also setup hooks to intercept pthread functions (this is what TSan does) but this is getting too heavy for this mini library. 137 | uint64_t FindThreadWithPtrInStack(void* ptr, ThreadDescBuffer outDescription) { outDescription[0] = '\0'; return 0; } 138 | 139 | #elif defined(__APPLE__) // MacOS / iOS 140 | // Apple, why do you make it so hard to look for your posix_*_np functions... Just give us docs or something instead of having us dive into the darwin-libpthread code! Didn't bother going further and try to compile/run this. 141 | 142 | #include 143 | bool IsAddressInCurrentStack(void* ptr) 144 | { 145 | pthread_t currentThread = pthread_self(); 146 | void* stackAddr = pthread_get_stackaddr_np(currentThread); 147 | size_t stackSize = pthread_get_stacksize_np(currentThread); 148 | // Stack grows downards, see https://github.com/apple/darwin-libpthread/blob/2b46cbcc56ba33791296cd9714b2c90dae185ec7/src/pthread.c#L979 149 | // The two functions above do NOT match the pthread_attr_tt attributes! https://github.com/apple/darwin-libpthread/blob/2b46cbcc56ba33791296cd9714b2c90dae185ec7/src/pthread.c#L476 150 | return (uintptr_t(stackAddr) - stackSize) <= uintptr_t(ptr) && ptr <= stackAddr; 151 | } 152 | // We could use https://developer.apple.com/documentation/kernel/1537751-task_threads + pthread_from_mach_thread_np + pthread_get_stackaddr/size_np 153 | // Pull Requests are welcome! 154 | uint64_t FindThreadWithPtrInStack(void* ptr, ThreadDescBuffer outDescription) { outDescription[0] = '\0'; return 0; } 155 | 156 | #else // Unknown platform, default to assuming race conditions. 157 | 158 | bool IsAddressInCurrentStack(void* ptr) { return false; } // Who knows ? 159 | uint64_t FindThreadWithPtrInStack(void* ptr, ThreadDescBuffer outDescription) { outDescription[0] = '\0'; return 0; } 160 | 161 | #endif 162 | 163 | bool DefaultReportBadAccess(StateAndStackAddr previousOperation, BadAccessGuardState toState, bool assertionOrWarning, const char* message); 164 | 165 | BadAccessGuardConfig gBadAccessGuardConfig{ 166 | true, // allowBreak 167 | #ifdef WIN32 168 | (bool)IsDebuggerPresent(), // breakASAP 169 | #else 170 | // Note: Not implementing Linux as it seems there is no trival way to differentiate a tracer process that is a debugger from a profiler. 171 | // Not implementing MacOs as would need to test 172 | false, // breakASAP 173 | #endif 174 | DefaultReportBadAccess, // reportBadAccess 175 | }; 176 | 177 | BadAccessGuardConfig BadAccessGuardGetConfig() { return gBadAccessGuardConfig; } 178 | void BadAccessGuardSetConfig(BadAccessGuardConfig config) 179 | { 180 | if (!config.reportBadAccess) { 181 | config.reportBadAccess = DefaultReportBadAccess; 182 | } 183 | gBadAccessGuardConfig = config; 184 | } 185 | 186 | 187 | // Return true if you want to break (unless breakASAP is set) 188 | bool BadAccessGuardReport(bool assertionOrWarning, const char* fmt, ...); 189 | bool DefaultReportBadAccess(StateAndStackAddr previousOperation, BadAccessGuardState toState, bool assertionOrWarning, const char* message) 190 | { 191 | const BadAccessGuardState previousState = BadAccessGuardShadow::GetState(previousOperation); 192 | const bool fromSameThread = IsAddressInCurrentStack(BadAccessGuardShadow::GetInStackAddr(previousOperation)); 193 | if (message) 194 | { 195 | return BadAccessGuardReport(assertionOrWarning, message); 196 | } 197 | else if (previousState >= BAGuard_StatesCount) 198 | { 199 | return BadAccessGuardReport(assertionOrWarning, "Shadow value was corrupted! This could be due to use after-free, out of bounds writes, etc..."); 200 | } 201 | else 202 | { 203 | const char* stateToStr[] = { 204 | toState == BAGuard_Writing ? "Writing" : "Reading", // The only cases when we can see this state are when we try to read, or in the writing guard destructor which means another write ended before. So we know that it can only be due to a write (or corruption). 205 | "Writing", 206 | "Destroyed" 207 | }; 208 | static_assert(sizeof(stateToStr) / sizeof(stateToStr[0]) == BAGuard_StatesCount, "Mismatch, new state added ?"); 209 | 210 | if (fromSameThread) 211 | { 212 | 213 | return BadAccessGuardReport(assertionOrWarning, "Recursion detected: This may lead to invalid operations\n- Parent operation: %s.\n- This operation: %s.", stateToStr[previousState], stateToStr[toState]); 214 | } 215 | else 216 | { 217 | ThreadDescBuffer outDescription; 218 | uint64_t otherThreadId = FindThreadWithPtrInStack(BadAccessGuardShadow::GetInStackAddr(previousOperation), outDescription); 219 | return BadAccessGuardReport(assertionOrWarning, 220 | "Race condition: Multiple threads are reading/writing to the data at the same time, potentially corrupting it!\n- Other thread: %s (Desc=%s Id=%llu)\n- This thread: %s.", 221 | stateToStr[previousState], 222 | outDescription[0] != '\0' ? outDescription : "", 223 | otherThreadId, 224 | stateToStr[toState] 225 | ); 226 | 227 | } 228 | 229 | } 230 | } 231 | 232 | void BA_GUARD_NO_INLINE BAGuardHandleBadAccess(StateAndStackAddr previousOperation, BadAccessGuardState toState, bool assertionOrWarning, const char* message) 233 | { 234 | // If you break here it means that we detected some bad memory access pattern 235 | // It could be that you are mutating a container recursively or a multi-threading race condition 236 | // You can now: 237 | // - Step/Continue to get information about the error (potentially waking offending threads if caused by a race condition) 238 | // - Inspect other threads callstacks (If using Visual Studio: Debug => Windows => Parallel Stacks) 239 | // If the debugger broke and froze the other threads fast enough, you might be able to find the offending thread. 240 | if (assertionOrWarning && gBadAccessGuardConfig.allowBreak && gBadAccessGuardConfig.breakASAP) BA_GUARD_DEBUGBREAK(); // Break asap in an attempt to catch the other thread in the act ! 241 | 242 | const bool breakAllowed = gBadAccessGuardConfig.reportBadAccess(previousOperation, toState, assertionOrWarning, message); 243 | 244 | if (assertionOrWarning && breakAllowed && gBadAccessGuardConfig.allowBreak && !gBadAccessGuardConfig.breakASAP) BA_GUARD_DEBUGBREAK(); 245 | } 246 | 247 | #include 248 | #include 249 | bool BadAccessGuardReport(bool assertionOrWarning, const char* fmt, ...) 250 | { 251 | va_list args; 252 | va_start(args, fmt); 253 | vfprintf(stderr, fmt, args); 254 | va_end(args); 255 | fputc('\n', stderr); 256 | // Let the caller break 257 | return true; 258 | } 259 | 260 | #endif 261 | -------------------------------------------------------------------------------- /src/BadAccessGuards.h: -------------------------------------------------------------------------------- 1 | // BadAccessGuards v1.0.0 https://github.com/Lectem/BadAccessGuards 2 | #pragma once 3 | 4 | // 1. Define `BAD_ACCESS_GUARDS_ENABLE=1` for your **in-house** builds (you probably want it off in production...) 5 | // 2. Declare the shadow memory that will hold the state and pointer to stack with `BA_GUARD_DECL(varname)` 6 | // 3. For all (relevant) read operations of the container / object, use the scope guard `BA_GUARD_READ(varname)`. 7 | // 4. For all (relevant) write operations of the container / object, use the scope guard `BA_GUARD_WRITE(varname)`. 8 | // Do this only if it always writes! For example, don't use it on `operator[]` even though it returns a reference, use `BA_GUARD_READ` instead. 9 | // 5. Add `BA_GUARD_DESTROY(varname)` at the beginning of the destructor. 10 | // 6. Enjoy! 11 | // 12 | // You may optionally configure it with `BadAccessGuardSetConfig`. 13 | 14 | #if !defined(BAD_ACCESS_GUARDS_ENABLE) 15 | # if defined(NDEBUG) 16 | # define BAD_ACCESS_GUARDS_ENABLE 0 17 | # else 18 | # define BAD_ACCESS_GUARDS_ENABLE 1 19 | # endif 20 | #endif 21 | 22 | // Disable the guards when running ThreadSanitizer, we are racy by design! 23 | #if defined(__has_feature) // Clang 24 | # if __has_feature(thread_sanitizer) // Can't be checked in same #if than defined or it will break old GCC versions 25 | # undef BAD_ACCESS_GUARDS_ENABLE 26 | # define BAD_ACCESS_GUARDS_ENABLE 0 27 | # endif 28 | #elif defined(__SANITIZE_THREAD__) // GCC 29 | # undef BAD_ACCESS_GUARDS_ENABLE 30 | # define BAD_ACCESS_GUARDS_ENABLE 0 31 | #endif 32 | 33 | #if BAD_ACCESS_GUARDS_ENABLE 34 | 35 | #include 36 | 37 | // Why we use those macros: 38 | // - BA_GUARD_NO_INLINE: This is to limit performance impact on the fast path. 39 | // - BA_GUARD_GET_PTR_IN_STACK: No other portable way to do it. This must return a pointer to the current stack. Expected to be faster than getting the thread Id (and works with fibers). 40 | // - BA_GUARD_FORCE_INLINE: We want to reduce the overhead in debug builds as much as possible. 41 | // - BA_GUARD_ATOMIC_RELAXED_LOAD/STORE_UPTR: We really don't want to use std::atomic for debug build performance. 42 | // On top of this, this avoids including std headers for project that may restrict its usage. 43 | #if defined(_MSC_VER) // MSVC 44 | # include // Necessary for _AddressOfReturnAddress 45 | # define BA_GUARD_NO_INLINE __declspec(noinline) 46 | # define BA_GUARD_FORCE_INLINE __forceinline // Need to use /d2Obforceinline for MSVC 17.7+ debug builds, otherwise it doesnt work! Not compatible with /Od... 47 | # define BA_GUARD_GET_PTR_IN_STACK() _AddressOfReturnAddress() 48 | # ifdef _WIN64 // 64 bits 49 | # define BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(var) static_cast(__iso_volatile_load64(reinterpret_cast(&var))) 50 | # define BA_GUARD_ATOMIC_RELAXED_STORE_UPTR(var, value) __iso_volatile_store64(reinterpret_cast(&var), value) 51 | # else // Assume 32 bits 52 | # define BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(var) static_cast(__iso_volatile_load32(reinterpret_cast(&var))) 53 | # define BA_GUARD_ATOMIC_RELAXED_STORE_UPTR(var, value) __iso_volatile_store32(reinterpret_cast(&var), value) 54 | # endif 55 | # define BA_GUARD_DEBUGBREAK() __debugbreak() 56 | #elif defined(__GNUC__) || defined(__GNUG__) // GCC / clang 57 | # define BA_GUARD_FORCE_INLINE __attribute__((always_inline)) 58 | # define BA_GUARD_NO_INLINE __attribute__ ((noinline)) 59 | # define BA_GUARD_GET_PTR_IN_STACK() __builtin_frame_address(0) 60 | # define BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(var) __atomic_load_n(&var, __ATOMIC_RELAXED); 61 | # define BA_GUARD_ATOMIC_RELAXED_STORE_UPTR(var, value) __atomic_store_n(&var, value, __ATOMIC_RELAXED); 62 | # if defined(__clang__) 63 | # define BA_GUARD_DEBUGBREAK() __builtin_debugtrap() 64 | # else 65 | // FFS, it's high time GCC implemented __builtin_debugtrap... maybe they will thanks to C++26 adding std::breakpoint? 66 | // Instead we have to write our own assembly. 67 | # if defined(__i386__) || defined(__x86_64__) 68 | # define BA_GUARD_DEBUGBREAK() __asm__ volatile("int $0x03") 69 | # elif defined(__thumb__) 70 | # define BA_GUARD_DEBUGBREAK() __asm__ volatile(".inst 0xde01") 71 | # elif defined(__arm__) && !defined(__thumb__) 72 | # define BA_GUARD_DEBUGBREAK() __asm__ volatile(".inst 0xe7f001f0") 73 | # else 74 | # include 75 | # define BA_GUARD_DEBUGBREAK() assert(false); 76 | # endif 77 | # endif 78 | #else // Are there really other compilers that don't implement one or the other nowadays? Maybe Intel/NVidia compilers... Well, up to you to implement it for those. 79 | # error "Unknown compiler" 80 | #endif 81 | 82 | // Does not seem to have much impact on performance in simple benchmarks, but might as well use it. 83 | #if defined(__has_cpp_attribute) && __has_cpp_attribute(unlikely) >= 201803L 84 | # define BA_GUARD_UNLIKELY [[unlikely]] 85 | #else 86 | # define BA_GUARD_UNLIKELY 87 | #endif 88 | 89 | 90 | enum BadAccessGuardState : uintptr_t 91 | { 92 | BAGuard_ReadingOrIdle = 0, 93 | BAGuard_Writing = 1, 94 | BAGuard_DestructorCalled = 2, 95 | BAGuard_StatesCount 96 | }; 97 | 98 | using StateAndStackAddr = uintptr_t; 99 | struct BadAccessGuardShadow 100 | { 101 | #ifdef _WIN32 102 | // On Windows all stacks are aligned to page boundaries (both address and size), so we can store the state as a byte to avoid masking! 103 | static constexpr int BadAccessStateBits = 8; 104 | static_assert((1 << BadAccessStateBits) <= 4096, "The number of bits used for the state must be smaller than page alignment"); 105 | #else 106 | // Note: On 64bits platforms we could store the state in the upper byte instead, as in userspace the 12 upper bits are unused 107 | // Left as an exercise for the reader. 108 | static constexpr int BadAccessStateBits = 2; 109 | static_assert((1<< BadAccessStateBits) <= alignof(uint32_t), "Assume the stack base and size are at most aligned to your CPU native alignement"); 110 | #endif 111 | static_assert(BAGuard_StatesCount <= (1 << BadAccessStateBits), "BadAccessGuardState must fit in the lower bits"); 112 | 113 | static constexpr StateAndStackAddr BadAccessStateMask = (1 << BadAccessStateBits) - 1; 114 | static constexpr StateAndStackAddr InStackAddrMask = StateAndStackAddr(-1) ^ BadAccessStateMask; 115 | 116 | StateAndStackAddr stateAndInStackAddr{ BAGuard_ReadingOrIdle }; 117 | 118 | BA_GUARD_FORCE_INLINE void SetStateAtomicRelaxed(BadAccessGuardState newState) 119 | { 120 | // All in a single line for debug builds... sorry ! 121 | BA_GUARD_ATOMIC_RELAXED_STORE_UPTR(stateAndInStackAddr, (StateAndStackAddr(BA_GUARD_GET_PTR_IN_STACK()) & InStackAddrMask) | StateAndStackAddr(newState)); 122 | } 123 | // Those are static because we want to work on copies of the data and not pay for the atomic access 124 | static BA_GUARD_FORCE_INLINE BadAccessGuardState GetState(StateAndStackAddr packedValue) { return BadAccessGuardState(packedValue & BadAccessStateMask); } 125 | static BA_GUARD_FORCE_INLINE void* GetInStackAddr(StateAndStackAddr packedValue) { return (void*)StateAndStackAddr(packedValue & InStackAddrMask); } 126 | }; 127 | 128 | // We have two versions to reduce code size at call site 129 | void BA_GUARD_NO_INLINE BAGuardHandleBadAccess(StateAndStackAddr previousOperation, BadAccessGuardState toState, bool assertionOrWarning, const char* message); 130 | // Both inline and no_inline! inline is necessary because we define it in a header, but still we don't actually want to inline it, hence no-inline. 131 | inline void BA_GUARD_NO_INLINE BAGuardHandleBadAccess(StateAndStackAddr previousOperation, BadAccessGuardState toState) { BAGuardHandleBadAccess(previousOperation, toState, true, nullptr); } 132 | 133 | struct BadAccessGuardRead 134 | { 135 | // We have two versions of the constructor purely for performance 136 | BA_GUARD_FORCE_INLINE BadAccessGuardRead(BadAccessGuardShadow& shadow) 137 | { 138 | const StateAndStackAddr lastSeenOp = BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(shadow.stateAndInStackAddr); 139 | if (BadAccessGuardShadow::GetState(lastSeenOp) != BAGuard_ReadingOrIdle) BA_GUARD_UNLIKELY // Early out on fast path 140 | { 141 | BAGuardHandleBadAccess(lastSeenOp, BAGuard_ReadingOrIdle); 142 | } 143 | } 144 | BA_GUARD_FORCE_INLINE BadAccessGuardRead(BadAccessGuardShadow& shadow, bool assertionOrWarning, char* message) 145 | { 146 | const StateAndStackAddr lastSeenOp = BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(shadow.stateAndInStackAddr); 147 | if (BadAccessGuardShadow::GetState(lastSeenOp) != BAGuard_ReadingOrIdle) BA_GUARD_UNLIKELY// Early out on fast path 148 | { 149 | BAGuardHandleBadAccess(lastSeenOp, BAGuard_ReadingOrIdle, assertionOrWarning, message); 150 | } 151 | } 152 | // We do not check again after the read itself, it would add too much cost for little benefit. Most of the issues will be caught by the write ops. 153 | }; 154 | 155 | struct BadAccessGuardWrite 156 | { 157 | BadAccessGuardShadow& shadow; 158 | BA_GUARD_FORCE_INLINE BadAccessGuardWrite(BadAccessGuardShadow& shadow) 159 | : shadow(shadow) 160 | { 161 | const StateAndStackAddr lastSeenOp = BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(shadow.stateAndInStackAddr); 162 | if (BadAccessGuardShadow::GetState(lastSeenOp) != BAGuard_ReadingOrIdle) BA_GUARD_UNLIKELY 163 | { 164 | BAGuardHandleBadAccess(lastSeenOp, BAGuard_Writing); 165 | } 166 | shadow.SetStateAtomicRelaxed(BAGuard_Writing); // Always write, so that we may trigger in the other thread too 167 | } 168 | BA_GUARD_FORCE_INLINE ~BadAccessGuardWrite() 169 | { 170 | const StateAndStackAddr lastSeenOp = BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(shadow.stateAndInStackAddr); 171 | if (BadAccessGuardShadow::GetState(lastSeenOp) != BAGuard_Writing) BA_GUARD_UNLIKELY 172 | { 173 | BAGuardHandleBadAccess(lastSeenOp, BAGuard_Writing); 174 | } 175 | shadow.SetStateAtomicRelaxed(BAGuard_ReadingOrIdle); 176 | } 177 | }; 178 | 179 | // Same as BadAccessGuardWrite, but with additional options 180 | struct BadAccessGuardWriteEx 181 | { 182 | BadAccessGuardShadow& shadow; 183 | const char* const message; 184 | const bool assertionOrWarning; 185 | BA_GUARD_FORCE_INLINE BadAccessGuardWriteEx(BadAccessGuardShadow& d, bool assertionOrWarning = false, char* message = nullptr) 186 | : shadow(d) 187 | , message(message) 188 | , assertionOrWarning(assertionOrWarning) 189 | { 190 | const StateAndStackAddr lastSeenOp = BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(shadow.stateAndInStackAddr); 191 | if (BadAccessGuardShadow::GetState(lastSeenOp) != BAGuard_ReadingOrIdle) BA_GUARD_UNLIKELY 192 | { 193 | BAGuardHandleBadAccess(lastSeenOp, BAGuard_Writing, assertionOrWarning, message); 194 | } 195 | shadow.SetStateAtomicRelaxed(BAGuard_Writing); // Always write, may trigger on other thread too 196 | } 197 | BA_GUARD_FORCE_INLINE ~BadAccessGuardWriteEx() 198 | { 199 | const StateAndStackAddr lastSeenOp = BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(shadow.stateAndInStackAddr); 200 | if (BadAccessGuardShadow::GetState(lastSeenOp) != BAGuard_Writing) BA_GUARD_UNLIKELY 201 | { 202 | BAGuardHandleBadAccess(lastSeenOp, BAGuard_Writing, assertionOrWarning, message); 203 | } 204 | shadow.SetStateAtomicRelaxed(BAGuard_ReadingOrIdle); 205 | } 206 | }; 207 | 208 | struct BadAccessGuardDestroy 209 | { 210 | BadAccessGuardShadow& shadow; 211 | BA_GUARD_FORCE_INLINE BadAccessGuardDestroy(BadAccessGuardShadow& shadow) 212 | : shadow(shadow) 213 | { 214 | const StateAndStackAddr lastSeenOp = BA_GUARD_ATOMIC_RELAXED_LOAD_UPTR(shadow.stateAndInStackAddr); 215 | if (BadAccessGuardShadow::GetState(lastSeenOp) != BAGuard_ReadingOrIdle) BA_GUARD_UNLIKELY 216 | { 217 | BAGuardHandleBadAccess(lastSeenOp, BAGuard_Writing); 218 | } 219 | shadow.SetStateAtomicRelaxed(BAGuard_DestructorCalled); // Always write 220 | } 221 | }; 222 | 223 | struct BadAccessGuardConfig 224 | { 225 | // Should we allow to break at all, or simply call `reportBadAccess` 226 | // Default: true. 227 | bool allowBreak; 228 | // Set this to true if you want to break early. 229 | // Usually you would want to set this to true when the debugger is connected. 230 | // If no debugger is connected, you most likely want this set to false to get the error in your logs. 231 | // Of course, if you save minidumps, logging is probably unnecessary. 232 | // Default: true on Windows if a debugger is detected during startup, false otherwise. 233 | bool breakASAP; 234 | 235 | // If non-null, used to report errors instead of the default function. 236 | // Breaking is still controlled by `allowBreak` and `breakASAP`. 237 | // Returning false can prevent triggering the breakpoint (except if `breakASAP` is true) 238 | using ReportBadAccessFunction = bool(StateAndStackAddr previousOperation, BadAccessGuardState toState, bool assertionOrWarning, const char* message); 239 | ReportBadAccessFunction* reportBadAccess; 240 | }; 241 | 242 | // Check BAD_ACCESS_GUARDS_ENABLE if you want to use those 243 | BadAccessGuardConfig BadAccessGuardGetConfig(); 244 | void BadAccessGuardSetConfig(BadAccessGuardConfig config); 245 | 246 | #define BA_GUARD_DECL(SHADOWNAME) mutable BadAccessGuardShadow SHADOWNAME 247 | #define BA_GUARD_READ(SHADOWNAME) BadAccessGuardRead BAGuardRead_##SHADOWNAME{SHADOWNAME} 248 | #define BA_GUARD_READ_EX(SHADOWNAME,ASSERT_OR_WARN,MESSAGE) BadAccessGuardRead BAGuardRead_##SHADOWNAME{SHADOWNAME, (ASSERT_OR_WARN), (MESSAGE)} 249 | #define BA_GUARD_WRITE(SHADOWNAME) BadAccessGuardWrite BAGuardWrite_##SHADOWNAME{SHADOWNAME} 250 | #define BA_GUARD_WRITE_EX(SHADOWNAME,ASSERT_OR_WARN,MESSAGE) BadAccessGuardWriteEx BAGuardWriteEx_##SHADOWNAME{SHADOWNAME, (ASSERT_OR_WARN), (MESSAGE)} 251 | #define BA_GUARD_DESTROY(SHADOWNAME) BadAccessGuardDestroy BAGuardDestroy_##SHADOWNAME{SHADOWNAME} 252 | 253 | #else // BAD_ACCESS_GUARDS_ENABLE 254 | 255 | #define BA_GUARD_DECL(SHADOWNAME) 256 | #define BA_GUARD_READ(SHADOWNAME) do {} while(false) 257 | #define BA_GUARD_READ_EX(SHADOWNAME,ASSERT_OR_WARN,MESSAGE) do {} while(false) 258 | #define BA_GUARD_WRITE(SHADOWNAME) do {} while(false) 259 | #define BA_GUARD_WRITE_EX(SHADOWNAME,ASSERT_OR_WARN,MESSAGE) do {} while(false) 260 | #define BA_GUARD_DESTROY(SHADOWNAME) do {} while(false) 261 | 262 | #endif // BAD_ACCESS_GUARDS_ENABLE --------------------------------------------------------------------------------