├── README.md ├── css └── index.css ├── img └── fork_me_right_gray_6d6d6d.png ├── index.html └── js ├── calculator.coffee ├── calculator.js ├── calculator.js.map └── index.js /README.md: -------------------------------------------------------------------------------- 1 | # Online CUDA Occupancy Calculator 2 | 3 | https://xmartlabs.github.io/cuda-calculator/ 4 | 5 | Port of the [CUDA Occupancy Calculator spreadsheet](https://docs.nvidia.com/cuda/cuda-occupancy-calculator/CUDA_Occupancy_Calculator.xls). 6 | 7 | ## Changes from upstream 8 | 9 | * Extended until Compute Capabilities 8.6 10 | * Improved the UI. 11 | 12 | ## Compilation 13 | 14 | ```shell 15 | coffee -c js/calculator.coffee 16 | ``` 17 | 18 | ## Credits 19 | 20 | * [Xmartlabs](https://xmartlabs.github.io/cuda-calculator/) 21 | * [Karthikeyan](https://github.com/lxkarthi/cuda-calculator/) - Improved cuda-calculator online version. 22 | * [Mihai Maruseac](https://github.com/mihaimaruseac/cuda-calculator/) - Thanks for the fork. 23 | * [Aliaksei](https://github.com/roadhump/cuda-calculator/) - Original Author deleted it. :( 24 | -------------------------------------------------------------------------------- /css/index.css: -------------------------------------------------------------------------------- 1 | caption { 2 | caption-side: top; 3 | } 4 | 5 | .table td { 6 | text-align: right; 7 | } 8 | 9 | .c3-axis-x-label, .c3-axis-y-label, .c3-xgrid-line { 10 | font-size: 1rem; 11 | } 12 | 13 | #fork-me { 14 | position: absolute; 15 | top: 0; 16 | right: 0; 17 | border: 0; 18 | } 19 | -------------------------------------------------------------------------------- /img/fork_me_right_gray_6d6d6d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xmartlabs/cuda-calculator/d60116f321cda7cfd414c260d3133e48b4feceea/img/fork_me_right_gray_6d6d6d.png -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | CUDA Occupancy Calculator 7 | 8 | 9 | 11 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
21 |

CUDA Occupancy Calculator

22 | 23 |
24 |
25 | 26 | 44 |
45 |
46 | 47 | 51 |
52 |
53 | 54 | 55 |
56 |
57 | 58 | 59 |
60 | 61 |
62 | 64 | bytes 65 |
66 |
67 | 68 |
69 |
70 | 71 | 202 |
203 | 204 | 205 | Fork me on GitHub 206 | 207 | 208 | 211 | 212 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | -------------------------------------------------------------------------------- /js/calculator.coffee: -------------------------------------------------------------------------------- 1 | mainConfig = 2 | '2.0': 3 | version: '2.0' 4 | threadsPerWarp: 32 5 | warpsPerMultiprocessor: 48 6 | threadsPerMultiprocessor: 1536 7 | threadBlocksPerMultiprocessor: 8 8 | sharedMemoryPerMultiprocessor: 49152 9 | 10 | registerFileSize: 32768 11 | registerAllocationUnitSize: 64 12 | 13 | allocationGranularity: 'warp' 14 | maxRegistersPerThread: 63 15 | maxRegistersPerBlock: 32768 16 | sharedMemoryAllocationUnitSize: 128 17 | warpAllocationGranularity: 2 18 | maxThreadBlockSize: 1024 19 | 20 | '2.1': 21 | version: '2.1' 22 | threadsPerWarp: 32 23 | warpsPerMultiprocessor: 48 24 | threadsPerMultiprocessor: 1536 25 | threadBlocksPerMultiprocessor: 8 26 | sharedMemoryPerMultiprocessor: 49152 27 | 28 | registerFileSize: 32768 29 | registerAllocationUnitSize: 64 30 | 31 | allocationGranularity: 'warp' 32 | maxRegistersPerThread: 63 33 | maxRegistersPerBlock: 32768 34 | sharedMemoryAllocationUnitSize: 128 35 | warpAllocationGranularity: 2 36 | maxThreadBlockSize: 1024 37 | 38 | '3.0': 39 | version: '3.0' 40 | threadsPerWarp: 32 41 | warpsPerMultiprocessor: 64 42 | threadsPerMultiprocessor: 2048 43 | threadBlocksPerMultiprocessor: 16 44 | sharedMemoryPerMultiprocessor: 49152 45 | 46 | registerFileSize: 65536 47 | registerAllocationUnitSize: 256 48 | 49 | allocationGranularity: 'warp' 50 | maxRegistersPerThread: 63 51 | maxRegistersPerBlock: 65536 52 | sharedMemoryAllocationUnitSize: 256 53 | warpAllocationGranularity: 4 54 | maxThreadBlockSize: 1024 55 | 56 | '3.2': 57 | version: '3.2' 58 | threadsPerWarp: 32 59 | warpsPerMultiprocessor: 64 60 | threadsPerMultiprocessor: 2048 61 | threadBlocksPerMultiprocessor: 16 62 | sharedMemoryPerMultiprocessor: 49152 63 | 64 | registerFileSize: 65536 65 | registerAllocationUnitSize: 256 66 | 67 | allocationGranularity: 'warp' 68 | maxRegistersPerThread: 255 69 | maxRegistersPerBlock: 65536 70 | sharedMemoryAllocationUnitSize: 256 71 | warpAllocationGranularity: 4 72 | maxThreadBlockSize: 1024 73 | 74 | '3.5': 75 | version: '3.5' 76 | threadsPerWarp: 32 77 | warpsPerMultiprocessor: 64 78 | threadsPerMultiprocessor: 2048 79 | threadBlocksPerMultiprocessor: 16 80 | sharedMemoryPerMultiprocessor: 49152 81 | 82 | registerFileSize: 65536 83 | registerAllocationUnitSize: 256 84 | 85 | allocationGranularity: 'warp' 86 | maxRegistersPerThread: 255 87 | maxRegistersPerBlock: 65536 88 | sharedMemoryAllocationUnitSize: 256 89 | warpAllocationGranularity: 4 90 | maxThreadBlockSize: 1024 91 | 92 | '3.7': 93 | version: '3.7' 94 | threadsPerWarp: 32 95 | warpsPerMultiprocessor: 64 96 | threadsPerMultiprocessor: 2048 97 | threadBlocksPerMultiprocessor: 16 98 | sharedMemoryPerMultiprocessor: 114688 99 | 100 | registerFileSize: 131072 101 | registerAllocationUnitSize: 256 102 | 103 | allocationGranularity: 'warp' 104 | maxRegistersPerThread: 255 105 | maxRegistersPerBlock: 65536 106 | sharedMemoryAllocationUnitSize: 256 107 | warpAllocationGranularity: 4 108 | maxThreadBlockSize: 1024 109 | 110 | '5.0': 111 | version: '5.0' 112 | threadsPerWarp: 32 113 | warpsPerMultiprocessor: 64 114 | threadsPerMultiprocessor: 2048 115 | threadBlocksPerMultiprocessor: 32 116 | sharedMemoryPerMultiprocessor: 65536 117 | 118 | registerFileSize: 65536 119 | registerAllocationUnitSize: 256 120 | 121 | allocationGranularity: 'warp' 122 | maxRegistersPerThread: 255 123 | maxRegistersPerBlock: 65536 124 | sharedMemoryAllocationUnitSize: 256 125 | warpAllocationGranularity: 4 126 | maxThreadBlockSize: 1024 127 | 128 | '5.2': 129 | version: '5.2' 130 | threadsPerWarp: 32 131 | warpsPerMultiprocessor: 64 132 | threadsPerMultiprocessor: 2048 133 | threadBlocksPerMultiprocessor: 32 134 | sharedMemoryPerMultiprocessor: 98304 135 | 136 | registerFileSize: 65536 137 | registerAllocationUnitSize: 256 138 | 139 | allocationGranularity: 'warp' 140 | maxRegistersPerThread: 255 141 | maxRegistersPerBlock: 32768 142 | sharedMemoryAllocationUnitSize: 256 143 | warpAllocationGranularity: 4 144 | maxThreadBlockSize: 1024 145 | 146 | '5.3': 147 | version: '5.3' 148 | threadsPerWarp: 32 149 | warpsPerMultiprocessor: 64 150 | threadsPerMultiprocessor: 2048 151 | threadBlocksPerMultiprocessor: 32 152 | sharedMemoryPerMultiprocessor: 65536 153 | 154 | registerFileSize: 65536 155 | registerAllocationUnitSize: 256 156 | 157 | allocationGranularity: 'warp' 158 | maxRegistersPerThread: 255 159 | maxRegistersPerBlock: 32768 160 | sharedMemoryAllocationUnitSize: 256 161 | warpAllocationGranularity: 4 162 | maxThreadBlockSize: 1024 163 | 164 | '6.0': 165 | version: '6.0' 166 | threadsPerWarp: 32 167 | warpsPerMultiprocessor: 64 168 | threadsPerMultiprocessor: 2048 169 | threadBlocksPerMultiprocessor: 32 170 | sharedMemoryPerMultiprocessor: 65536 171 | 172 | registerFileSize: 65536 173 | registerAllocationUnitSize: 256 174 | 175 | allocationGranularity: 'warp' 176 | maxRegistersPerThread: 255 177 | maxRegistersPerBlock: 65536 178 | sharedMemoryAllocationUnitSize: 256 179 | warpAllocationGranularity: 2 180 | maxThreadBlockSize: 1024 181 | 182 | '6.1': 183 | version: '6.1' 184 | threadsPerWarp: 32 185 | warpsPerMultiprocessor: 64 186 | threadsPerMultiprocessor: 2048 187 | threadBlocksPerMultiprocessor: 32 188 | sharedMemoryPerMultiprocessor: 98304 189 | 190 | registerFileSize: 65536 191 | registerAllocationUnitSize: 256 192 | 193 | allocationGranularity: 'warp' 194 | maxRegistersPerThread: 255 195 | maxRegistersPerBlock: 65536 196 | sharedMemoryAllocationUnitSize: 256 197 | warpAllocationGranularity: 4 198 | maxThreadBlockSize: 1024 199 | 200 | '6.2': 201 | version: '6.2' 202 | threadsPerWarp: 32 203 | warpsPerMultiprocessor: 64 204 | threadsPerMultiprocessor: 2048 205 | threadBlocksPerMultiprocessor: 32 206 | sharedMemoryPerMultiprocessor: 65536 207 | 208 | registerFileSize: 65536 209 | registerAllocationUnitSize: 256 210 | 211 | allocationGranularity: 'warp' 212 | maxRegistersPerThread: 255 213 | maxRegistersPerBlock: 65536 214 | sharedMemoryAllocationUnitSize: 256 215 | warpAllocationGranularity: 4 216 | maxThreadBlockSize: 1024 217 | 218 | '7.0': 219 | version: '7.0' 220 | threadsPerWarp: 32 221 | warpsPerMultiprocessor: 64 222 | threadsPerMultiprocessor: 2048 223 | threadBlocksPerMultiprocessor: 32 224 | sharedMemoryPerMultiprocessor: 98304 225 | 226 | registerFileSize: 65536 227 | registerAllocationUnitSize: 256 228 | 229 | allocationGranularity: 'warp' 230 | maxRegistersPerThread: 255 231 | maxRegistersPerBlock: 65536 232 | sharedMemoryAllocationUnitSize: 256 233 | warpAllocationGranularity: 4 234 | maxThreadBlockSize: 1024 235 | 236 | '7.5': 237 | version: '7.5' 238 | threadsPerWarp: 32 239 | warpsPerMultiprocessor: 32 240 | threadsPerMultiprocessor: 1024 241 | threadBlocksPerMultiprocessor: 16 242 | sharedMemoryPerMultiprocessor: 65536 243 | 244 | registerFileSize: 65536 245 | registerAllocationUnitSize: 256 246 | 247 | allocationGranularity: 'warp' 248 | maxRegistersPerThread: 255 249 | maxRegistersPerBlock: 65536 250 | sharedMemoryAllocationUnitSize: 256 251 | warpAllocationGranularity: 4 252 | maxThreadBlockSize: 1024 253 | 254 | '8.0': 255 | version: '8.0' 256 | threadsPerWarp: 32 257 | warpsPerMultiprocessor: 64 258 | threadsPerMultiprocessor: 2048 259 | threadBlocksPerMultiprocessor: 32 260 | sharedMemoryPerMultiprocessor: 167936 261 | 262 | registerFileSize: 65536 263 | registerAllocationUnitSize: 256 264 | 265 | allocationGranularity: 'warp' 266 | maxRegistersPerThread: 255 267 | maxRegistersPerBlock: 65536 268 | sharedMemoryAllocationUnitSize: 128 269 | warpAllocationGranularity: 4 270 | maxThreadBlockSize: 1024 271 | 272 | '8.6': 273 | version: '8.6' 274 | threadsPerWarp: 32 275 | warpsPerMultiprocessor: 48 276 | threadsPerMultiprocessor: 1536 277 | threadBlocksPerMultiprocessor: 16 278 | sharedMemoryPerMultiprocessor: 102400 279 | 280 | registerFileSize: 65536 281 | registerAllocationUnitSize: 256 282 | 283 | allocationGranularity: 'warp' 284 | maxRegistersPerThread: 255 285 | maxRegistersPerBlock: 65536 286 | sharedMemoryAllocationUnitSize: 128 287 | warpAllocationGranularity: 4 288 | maxThreadBlockSize: 1024 289 | 290 | # Starting with Compute Capability 8.x, CUDA runtime uses 1KB of Shared Memory 291 | cudaRuntimeUsedSharedMemory = 292 | '11.0': 1024 293 | '11.1': 1024 294 | 295 | ceil = (a, b) -> Math.ceil(a / b) * b 296 | 297 | floor = (a, b) -> Math.floor(a / b) * b 298 | 299 | 300 | window.calculateOccupancy = (input) -> 301 | config = mainConfig[input.version] 302 | 303 | # number of warps per block 304 | blockWarps = () -> 305 | Math.ceil(input.threadsPerBlock / config.threadsPerWarp) 306 | 307 | # number of registers per warp 308 | registersPerWarp = () -> 309 | ceil(input.registersPerThread * config.threadsPerWarp, config.registerAllocationUnitSize) 310 | 311 | # number of registers per block 312 | blockRegisters = () -> 313 | registersPerWarp() * blockWarps() 314 | 315 | # maximum warps per SM when limited by registers 316 | warpsPerMultiprocessorLimitedByRegisters = () -> 317 | floor(config.maxRegistersPerBlock / registersPerWarp(), config.warpAllocationGranularity) 318 | 319 | # starting with Compute Capability 8.x, the CUDA runtime consumes 1KB of shared memory 320 | # the amount might change depending on the CUDA runtime version in the future 321 | blockCudaRuntimeSharedMemory = () -> 322 | if Number.parseFloat(input.version) >= 8 323 | cudaRuntimeUsedSharedMemory[input.cudaVersion] 324 | else 325 | 0 326 | 327 | # shared memory per thread block 328 | blockSharedMemory = () -> 329 | ceil( 330 | Number.parseInt(input.sharedMemoryPerBlock) + blockCudaRuntimeSharedMemory(), 331 | config.sharedMemoryAllocationUnitSize 332 | ) 333 | 334 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor = () -> 335 | Math.min( 336 | config.threadBlocksPerMultiprocessor, Math.floor(config.warpsPerMultiprocessor / blockWarps()) 337 | ) 338 | 339 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor = () -> 340 | if input.registersPerThread > config.maxRegistersPerThread 341 | 0 342 | else if input.registersPerThread > 0 343 | Math.floor(warpsPerMultiprocessorLimitedByRegisters() / blockWarps()) * 344 | Math.floor(config.registerFileSize / config.maxRegistersPerBlock) 345 | else 346 | config.threadBlocksPerMultiprocessor 347 | 348 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor = () -> 349 | if input.sharedMemoryPerBlock > 0 350 | Math.floor(config.sharedMemoryPerMultiprocessor / blockSharedMemory()) 351 | else 352 | config.threadBlocksPerMultiprocessor 353 | 354 | activeThreadsPerMultiprocessor = () -> 355 | input.threadsPerBlock * activeThreadBlocksPerMultiprocessor() 356 | 357 | activeWarpsPerMultiprocessor = () -> 358 | activeThreadBlocksPerMultiprocessor() * blockWarps() 359 | 360 | activeThreadBlocksPerMultiprocessor = () -> 361 | Math.min( 362 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor(), 363 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor(), 364 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor() 365 | ) 366 | 367 | occupancyOfMultiprocessor = () -> 368 | activeWarpsPerMultiprocessor() / config.warpsPerMultiprocessor 369 | 370 | output = 371 | activeThreadsPerMultiprocessor: activeThreadsPerMultiprocessor() 372 | activeWarpsPerMultiprocessor: activeWarpsPerMultiprocessor() 373 | activeThreadBlocksPerMultiprocessor: activeThreadBlocksPerMultiprocessor() 374 | occupancyOfMultiprocessor: occupancyOfMultiprocessor() 375 | 376 | blockWarps: blockWarps() 377 | blockSharedMemory: blockSharedMemory() 378 | blockCudaRuntimeSharedMemory: blockCudaRuntimeSharedMemory() 379 | blockRegisters: blockRegisters() 380 | 381 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor: 382 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor() 383 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor: 384 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor() 385 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor: 386 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor() 387 | 388 | return Object.assign(output, config) 389 | 390 | window.computeGraphsValues = (input) -> 391 | config = mainConfig[input.version] 392 | 393 | graphWarpOccupancyOfThreadsPerBlock = () -> 394 | current = 395 | key: input.threadsPerBlock 396 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor 397 | 398 | inp = Object.assign({}, input) # Shallow copy. 399 | r = [] 400 | for threadsPerBlock in [32..config.maxThreadBlockSize] by 32 401 | inp.threadsPerBlock = threadsPerBlock 402 | 403 | r.push({ 404 | key: threadsPerBlock 405 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor 406 | }) 407 | 408 | return { 409 | xLabel: "Threads per block" 410 | data: r 411 | current: current 412 | } 413 | 414 | graphWarpOccupancyOfRegistersPerThread = () -> 415 | current = 416 | key: input.registersPerThread 417 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor 418 | 419 | inp = Object.assign({}, input) # Shallow copy. 420 | r = [] 421 | for registersPerThread in [0..config.maxRegistersPerThread] 422 | inp.registersPerThread = registersPerThread 423 | 424 | r.push({ 425 | key: registersPerThread 426 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor 427 | }) 428 | 429 | return { 430 | xLabel: "Registers per thread" 431 | data: r 432 | current: current 433 | } 434 | 435 | graphWarpOccupancyOfSharedMemoryPerBlock = () -> 436 | current = 437 | key: input.sharedMemoryPerBlock 438 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor 439 | 440 | inp = Object.assign({}, input) # Shallow copy. 441 | r = [] 442 | for sharedMemoryPerBlock in [0..config.sharedMemoryPerMultiprocessor] by 512 443 | inp.sharedMemoryPerBlock = sharedMemoryPerBlock 444 | 445 | r.push({ 446 | key: sharedMemoryPerBlock 447 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor 448 | }) 449 | 450 | return { 451 | xLabel: "Shared memory per block" 452 | data: r 453 | current: current 454 | } 455 | 456 | return { 457 | graphWarpOccupancyOfThreadsPerBlock: graphWarpOccupancyOfThreadsPerBlock() 458 | graphWarpOccupancyOfRegistersPerThread: graphWarpOccupancyOfRegistersPerThread() 459 | graphWarpOccupancyOfSharedMemoryPerBlock: graphWarpOccupancyOfSharedMemoryPerBlock() 460 | } 461 | -------------------------------------------------------------------------------- /js/calculator.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.12.7 2 | (function() { 3 | var ceil, cudaRuntimeUsedSharedMemory, floor, mainConfig; 4 | 5 | mainConfig = { 6 | '2.0': { 7 | version: '2.0', 8 | threadsPerWarp: 32, 9 | warpsPerMultiprocessor: 48, 10 | threadsPerMultiprocessor: 1536, 11 | threadBlocksPerMultiprocessor: 8, 12 | sharedMemoryPerMultiprocessor: 49152, 13 | registerFileSize: 32768, 14 | registerAllocationUnitSize: 64, 15 | allocationGranularity: 'warp', 16 | maxRegistersPerThread: 63, 17 | maxRegistersPerBlock: 32768, 18 | sharedMemoryAllocationUnitSize: 128, 19 | warpAllocationGranularity: 2, 20 | maxThreadBlockSize: 1024 21 | }, 22 | '2.1': { 23 | version: '2.1', 24 | threadsPerWarp: 32, 25 | warpsPerMultiprocessor: 48, 26 | threadsPerMultiprocessor: 1536, 27 | threadBlocksPerMultiprocessor: 8, 28 | sharedMemoryPerMultiprocessor: 49152, 29 | registerFileSize: 32768, 30 | registerAllocationUnitSize: 64, 31 | allocationGranularity: 'warp', 32 | maxRegistersPerThread: 63, 33 | maxRegistersPerBlock: 32768, 34 | sharedMemoryAllocationUnitSize: 128, 35 | warpAllocationGranularity: 2, 36 | maxThreadBlockSize: 1024 37 | }, 38 | '3.0': { 39 | version: '3.0', 40 | threadsPerWarp: 32, 41 | warpsPerMultiprocessor: 64, 42 | threadsPerMultiprocessor: 2048, 43 | threadBlocksPerMultiprocessor: 16, 44 | sharedMemoryPerMultiprocessor: 49152, 45 | registerFileSize: 65536, 46 | registerAllocationUnitSize: 256, 47 | allocationGranularity: 'warp', 48 | maxRegistersPerThread: 63, 49 | maxRegistersPerBlock: 65536, 50 | sharedMemoryAllocationUnitSize: 256, 51 | warpAllocationGranularity: 4, 52 | maxThreadBlockSize: 1024 53 | }, 54 | '3.2': { 55 | version: '3.2', 56 | threadsPerWarp: 32, 57 | warpsPerMultiprocessor: 64, 58 | threadsPerMultiprocessor: 2048, 59 | threadBlocksPerMultiprocessor: 16, 60 | sharedMemoryPerMultiprocessor: 49152, 61 | registerFileSize: 65536, 62 | registerAllocationUnitSize: 256, 63 | allocationGranularity: 'warp', 64 | maxRegistersPerThread: 255, 65 | maxRegistersPerBlock: 65536, 66 | sharedMemoryAllocationUnitSize: 256, 67 | warpAllocationGranularity: 4, 68 | maxThreadBlockSize: 1024 69 | }, 70 | '3.5': { 71 | version: '3.5', 72 | threadsPerWarp: 32, 73 | warpsPerMultiprocessor: 64, 74 | threadsPerMultiprocessor: 2048, 75 | threadBlocksPerMultiprocessor: 16, 76 | sharedMemoryPerMultiprocessor: 49152, 77 | registerFileSize: 65536, 78 | registerAllocationUnitSize: 256, 79 | allocationGranularity: 'warp', 80 | maxRegistersPerThread: 255, 81 | maxRegistersPerBlock: 65536, 82 | sharedMemoryAllocationUnitSize: 256, 83 | warpAllocationGranularity: 4, 84 | maxThreadBlockSize: 1024 85 | }, 86 | '3.7': { 87 | version: '3.7', 88 | threadsPerWarp: 32, 89 | warpsPerMultiprocessor: 64, 90 | threadsPerMultiprocessor: 2048, 91 | threadBlocksPerMultiprocessor: 16, 92 | sharedMemoryPerMultiprocessor: 114688, 93 | registerFileSize: 131072, 94 | registerAllocationUnitSize: 256, 95 | allocationGranularity: 'warp', 96 | maxRegistersPerThread: 255, 97 | maxRegistersPerBlock: 65536, 98 | sharedMemoryAllocationUnitSize: 256, 99 | warpAllocationGranularity: 4, 100 | maxThreadBlockSize: 1024 101 | }, 102 | '5.0': { 103 | version: '5.0', 104 | threadsPerWarp: 32, 105 | warpsPerMultiprocessor: 64, 106 | threadsPerMultiprocessor: 2048, 107 | threadBlocksPerMultiprocessor: 32, 108 | sharedMemoryPerMultiprocessor: 65536, 109 | registerFileSize: 65536, 110 | registerAllocationUnitSize: 256, 111 | allocationGranularity: 'warp', 112 | maxRegistersPerThread: 255, 113 | maxRegistersPerBlock: 65536, 114 | sharedMemoryAllocationUnitSize: 256, 115 | warpAllocationGranularity: 4, 116 | maxThreadBlockSize: 1024 117 | }, 118 | '5.2': { 119 | version: '5.2', 120 | threadsPerWarp: 32, 121 | warpsPerMultiprocessor: 64, 122 | threadsPerMultiprocessor: 2048, 123 | threadBlocksPerMultiprocessor: 32, 124 | sharedMemoryPerMultiprocessor: 98304, 125 | registerFileSize: 65536, 126 | registerAllocationUnitSize: 256, 127 | allocationGranularity: 'warp', 128 | maxRegistersPerThread: 255, 129 | maxRegistersPerBlock: 32768, 130 | sharedMemoryAllocationUnitSize: 256, 131 | warpAllocationGranularity: 4, 132 | maxThreadBlockSize: 1024 133 | }, 134 | '5.3': { 135 | version: '5.3', 136 | threadsPerWarp: 32, 137 | warpsPerMultiprocessor: 64, 138 | threadsPerMultiprocessor: 2048, 139 | threadBlocksPerMultiprocessor: 32, 140 | sharedMemoryPerMultiprocessor: 65536, 141 | registerFileSize: 65536, 142 | registerAllocationUnitSize: 256, 143 | allocationGranularity: 'warp', 144 | maxRegistersPerThread: 255, 145 | maxRegistersPerBlock: 32768, 146 | sharedMemoryAllocationUnitSize: 256, 147 | warpAllocationGranularity: 4, 148 | maxThreadBlockSize: 1024 149 | }, 150 | '6.0': { 151 | version: '6.0', 152 | threadsPerWarp: 32, 153 | warpsPerMultiprocessor: 64, 154 | threadsPerMultiprocessor: 2048, 155 | threadBlocksPerMultiprocessor: 32, 156 | sharedMemoryPerMultiprocessor: 65536, 157 | registerFileSize: 65536, 158 | registerAllocationUnitSize: 256, 159 | allocationGranularity: 'warp', 160 | maxRegistersPerThread: 255, 161 | maxRegistersPerBlock: 65536, 162 | sharedMemoryAllocationUnitSize: 256, 163 | warpAllocationGranularity: 2, 164 | maxThreadBlockSize: 1024 165 | }, 166 | '6.1': { 167 | version: '6.1', 168 | threadsPerWarp: 32, 169 | warpsPerMultiprocessor: 64, 170 | threadsPerMultiprocessor: 2048, 171 | threadBlocksPerMultiprocessor: 32, 172 | sharedMemoryPerMultiprocessor: 98304, 173 | registerFileSize: 65536, 174 | registerAllocationUnitSize: 256, 175 | allocationGranularity: 'warp', 176 | maxRegistersPerThread: 255, 177 | maxRegistersPerBlock: 65536, 178 | sharedMemoryAllocationUnitSize: 256, 179 | warpAllocationGranularity: 4, 180 | maxThreadBlockSize: 1024 181 | }, 182 | '6.2': { 183 | version: '6.2', 184 | threadsPerWarp: 32, 185 | warpsPerMultiprocessor: 64, 186 | threadsPerMultiprocessor: 2048, 187 | threadBlocksPerMultiprocessor: 32, 188 | sharedMemoryPerMultiprocessor: 65536, 189 | registerFileSize: 65536, 190 | registerAllocationUnitSize: 256, 191 | allocationGranularity: 'warp', 192 | maxRegistersPerThread: 255, 193 | maxRegistersPerBlock: 65536, 194 | sharedMemoryAllocationUnitSize: 256, 195 | warpAllocationGranularity: 4, 196 | maxThreadBlockSize: 1024 197 | }, 198 | '7.0': { 199 | version: '7.0', 200 | threadsPerWarp: 32, 201 | warpsPerMultiprocessor: 64, 202 | threadsPerMultiprocessor: 2048, 203 | threadBlocksPerMultiprocessor: 32, 204 | sharedMemoryPerMultiprocessor: 98304, 205 | registerFileSize: 65536, 206 | registerAllocationUnitSize: 256, 207 | allocationGranularity: 'warp', 208 | maxRegistersPerThread: 255, 209 | maxRegistersPerBlock: 65536, 210 | sharedMemoryAllocationUnitSize: 256, 211 | warpAllocationGranularity: 4, 212 | maxThreadBlockSize: 1024 213 | }, 214 | '7.5': { 215 | version: '7.5', 216 | threadsPerWarp: 32, 217 | warpsPerMultiprocessor: 32, 218 | threadsPerMultiprocessor: 1024, 219 | threadBlocksPerMultiprocessor: 16, 220 | sharedMemoryPerMultiprocessor: 65536, 221 | registerFileSize: 65536, 222 | registerAllocationUnitSize: 256, 223 | allocationGranularity: 'warp', 224 | maxRegistersPerThread: 255, 225 | maxRegistersPerBlock: 65536, 226 | sharedMemoryAllocationUnitSize: 256, 227 | warpAllocationGranularity: 4, 228 | maxThreadBlockSize: 1024 229 | }, 230 | '8.0': { 231 | version: '8.0', 232 | threadsPerWarp: 32, 233 | warpsPerMultiprocessor: 64, 234 | threadsPerMultiprocessor: 2048, 235 | threadBlocksPerMultiprocessor: 32, 236 | sharedMemoryPerMultiprocessor: 167936, 237 | registerFileSize: 65536, 238 | registerAllocationUnitSize: 256, 239 | allocationGranularity: 'warp', 240 | maxRegistersPerThread: 255, 241 | maxRegistersPerBlock: 65536, 242 | sharedMemoryAllocationUnitSize: 128, 243 | warpAllocationGranularity: 4, 244 | maxThreadBlockSize: 1024 245 | }, 246 | '8.6': { 247 | version: '8.6', 248 | threadsPerWarp: 32, 249 | warpsPerMultiprocessor: 48, 250 | threadsPerMultiprocessor: 1536, 251 | threadBlocksPerMultiprocessor: 16, 252 | sharedMemoryPerMultiprocessor: 102400, 253 | registerFileSize: 65536, 254 | registerAllocationUnitSize: 256, 255 | allocationGranularity: 'warp', 256 | maxRegistersPerThread: 255, 257 | maxRegistersPerBlock: 65536, 258 | sharedMemoryAllocationUnitSize: 128, 259 | warpAllocationGranularity: 4, 260 | maxThreadBlockSize: 1024 261 | } 262 | }; 263 | 264 | cudaRuntimeUsedSharedMemory = { 265 | '11.0': 1024, 266 | '11.1': 1024 267 | }; 268 | 269 | ceil = function(a, b) { 270 | return Math.ceil(a / b) * b; 271 | }; 272 | 273 | floor = function(a, b) { 274 | return Math.floor(a / b) * b; 275 | }; 276 | 277 | window.calculateOccupancy = function(input) { 278 | var activeThreadBlocksPerMultiprocessor, activeThreadsPerMultiprocessor, activeWarpsPerMultiprocessor, blockCudaRuntimeSharedMemory, blockRegisters, blockSharedMemory, blockWarps, config, occupancyOfMultiprocessor, output, registersPerWarp, threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor, threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor, threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor, warpsPerMultiprocessorLimitedByRegisters; 279 | config = mainConfig[input.version]; 280 | blockWarps = function() { 281 | return Math.ceil(input.threadsPerBlock / config.threadsPerWarp); 282 | }; 283 | registersPerWarp = function() { 284 | return ceil(input.registersPerThread * config.threadsPerWarp, config.registerAllocationUnitSize); 285 | }; 286 | blockRegisters = function() { 287 | return registersPerWarp() * blockWarps(); 288 | }; 289 | warpsPerMultiprocessorLimitedByRegisters = function() { 290 | return floor(config.maxRegistersPerBlock / registersPerWarp(), config.warpAllocationGranularity); 291 | }; 292 | blockCudaRuntimeSharedMemory = function() { 293 | if (Number.parseFloat(input.version) >= 8) { 294 | return cudaRuntimeUsedSharedMemory[input.cudaVersion]; 295 | } else { 296 | return 0; 297 | } 298 | }; 299 | blockSharedMemory = function() { 300 | return ceil(Number.parseInt(input.sharedMemoryPerBlock) + blockCudaRuntimeSharedMemory(), config.sharedMemoryAllocationUnitSize); 301 | }; 302 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor = function() { 303 | return Math.min(config.threadBlocksPerMultiprocessor, Math.floor(config.warpsPerMultiprocessor / blockWarps())); 304 | }; 305 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor = function() { 306 | if (input.registersPerThread > config.maxRegistersPerThread) { 307 | return 0; 308 | } else if (input.registersPerThread > 0) { 309 | return Math.floor(warpsPerMultiprocessorLimitedByRegisters() / blockWarps()) * Math.floor(config.registerFileSize / config.maxRegistersPerBlock); 310 | } else { 311 | return config.threadBlocksPerMultiprocessor; 312 | } 313 | }; 314 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor = function() { 315 | if (input.sharedMemoryPerBlock > 0) { 316 | return Math.floor(config.sharedMemoryPerMultiprocessor / blockSharedMemory()); 317 | } else { 318 | return config.threadBlocksPerMultiprocessor; 319 | } 320 | }; 321 | activeThreadsPerMultiprocessor = function() { 322 | return input.threadsPerBlock * activeThreadBlocksPerMultiprocessor(); 323 | }; 324 | activeWarpsPerMultiprocessor = function() { 325 | return activeThreadBlocksPerMultiprocessor() * blockWarps(); 326 | }; 327 | activeThreadBlocksPerMultiprocessor = function() { 328 | return Math.min(threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor(), threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor(), threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor()); 329 | }; 330 | occupancyOfMultiprocessor = function() { 331 | return activeWarpsPerMultiprocessor() / config.warpsPerMultiprocessor; 332 | }; 333 | output = { 334 | activeThreadsPerMultiprocessor: activeThreadsPerMultiprocessor(), 335 | activeWarpsPerMultiprocessor: activeWarpsPerMultiprocessor(), 336 | activeThreadBlocksPerMultiprocessor: activeThreadBlocksPerMultiprocessor(), 337 | occupancyOfMultiprocessor: occupancyOfMultiprocessor(), 338 | blockWarps: blockWarps(), 339 | blockSharedMemory: blockSharedMemory(), 340 | blockCudaRuntimeSharedMemory: blockCudaRuntimeSharedMemory(), 341 | blockRegisters: blockRegisters(), 342 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor: threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor(), 343 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor: threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor(), 344 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor: threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor() 345 | }; 346 | return Object.assign(output, config); 347 | }; 348 | 349 | window.computeGraphsValues = function(input) { 350 | var config, graphWarpOccupancyOfRegistersPerThread, graphWarpOccupancyOfSharedMemoryPerBlock, graphWarpOccupancyOfThreadsPerBlock; 351 | config = mainConfig[input.version]; 352 | graphWarpOccupancyOfThreadsPerBlock = function() { 353 | var current, i, inp, r, ref, threadsPerBlock; 354 | current = { 355 | key: input.threadsPerBlock, 356 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor 357 | }; 358 | inp = Object.assign({}, input); 359 | r = []; 360 | for (threadsPerBlock = i = 32, ref = config.maxThreadBlockSize; i <= ref; threadsPerBlock = i += 32) { 361 | inp.threadsPerBlock = threadsPerBlock; 362 | r.push({ 363 | key: threadsPerBlock, 364 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor 365 | }); 366 | } 367 | return { 368 | xLabel: "Threads per block", 369 | data: r, 370 | current: current 371 | }; 372 | }; 373 | graphWarpOccupancyOfRegistersPerThread = function() { 374 | var current, i, inp, r, ref, registersPerThread; 375 | current = { 376 | key: input.registersPerThread, 377 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor 378 | }; 379 | inp = Object.assign({}, input); 380 | r = []; 381 | for (registersPerThread = i = 0, ref = config.maxRegistersPerThread; 0 <= ref ? i <= ref : i >= ref; registersPerThread = 0 <= ref ? ++i : --i) { 382 | inp.registersPerThread = registersPerThread; 383 | r.push({ 384 | key: registersPerThread, 385 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor 386 | }); 387 | } 388 | return { 389 | xLabel: "Registers per thread", 390 | data: r, 391 | current: current 392 | }; 393 | }; 394 | graphWarpOccupancyOfSharedMemoryPerBlock = function() { 395 | var current, i, inp, r, ref, sharedMemoryPerBlock; 396 | current = { 397 | key: input.sharedMemoryPerBlock, 398 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor 399 | }; 400 | inp = Object.assign({}, input); 401 | r = []; 402 | for (sharedMemoryPerBlock = i = 0, ref = config.sharedMemoryPerMultiprocessor; i <= ref; sharedMemoryPerBlock = i += 512) { 403 | inp.sharedMemoryPerBlock = sharedMemoryPerBlock; 404 | r.push({ 405 | key: sharedMemoryPerBlock, 406 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor 407 | }); 408 | } 409 | return { 410 | xLabel: "Shared memory per block", 411 | data: r, 412 | current: current 413 | }; 414 | }; 415 | return { 416 | graphWarpOccupancyOfThreadsPerBlock: graphWarpOccupancyOfThreadsPerBlock(), 417 | graphWarpOccupancyOfRegistersPerThread: graphWarpOccupancyOfRegistersPerThread(), 418 | graphWarpOccupancyOfSharedMemoryPerBlock: graphWarpOccupancyOfSharedMemoryPerBlock() 419 | }; 420 | }; 421 | 422 | }).call(this); 423 | -------------------------------------------------------------------------------- /js/calculator.js.map: -------------------------------------------------------------------------------- 1 | { 2 | "version": 3, 3 | "file": "calculator.js", 4 | "sourceRoot": "", 5 | "sources": [ 6 | "calculator.coffee" 7 | ], 8 | "names": [], 9 | "mappings": ";AAAA;AAAA,MAAA;;EAAA,UAAA,GACE;IAAA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,GAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,IAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,OAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,GAdpB;KADF;IAiBA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,GAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,IAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,OAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,GAdpB;KAlBF;IAkCA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,OAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,GAdpB;KAnCF;IAmDA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,OAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,GAdpB;KApDF;IAoEA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,EAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,EAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KArEF;IAqFA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,EAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,EAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAtFF;IAsGA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,EAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAvGF;IAuHA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAxHF;IAwIA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,MAL/B;MAOA,gBAAA,EAAkB,MAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAzIF;IAyJA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA1JF;IA0KA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA3KF;IA2LA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA5LF;IA4MA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA7MF;IA6NA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA9NF;IA8OA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA/OF;IA+PA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAhQF;IAgRA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAjRF;IAiSA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,MAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAlSF;IAkTA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,MAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAnTF;;;EAoUF,IAAA,GAAO,SAAC,CAAD,EAAI,CAAJ;WAAU,IAAI,CAAC,IAAL,CAAU,CAAA,GAAI,CAAd,CAAA,GAAmB;EAA7B;;EAEP,KAAA,GAAQ,SAAC,CAAD,EAAI,CAAJ;WAAU,IAAI,CAAC,KAAL,CAAW,CAAA,GAAI,CAAf,CAAA,GAAoB;EAA9B;;EAGR,MAAM,CAAC,kBAAP,GAA4B,SAAC,KAAD;AAC1B,QAAA;IAAA,MAAA,GAAS,UAAW,CAAA,KAAK,CAAC,OAAN;IAEpB,UAAA,GAAa,SAAA;aACX,IAAI,CAAC,IAAL,CAAU,KAAK,CAAC,eAAN,GAAwB,MAAM,CAAC,cAAzC;IADW;IAGb,cAAA,GAAiB,SAAA;MACf,IAAG,MAAM,CAAC,qBAAP,KAAgC,OAAnC;eACE,IAAA,CAAK,IAAA,CAAK,UAAA,CAAA,CAAL,EAAmB,MAAM,CAAC,yBAA1B,CAAA,GAAuD,KAAK,CAAC,kBAA7D,GAAkF,MAAM,CAAC,cAA9F,EAA8G,MAAM,CAAC,0BAArH,EADF;OAAA,MAAA;eAIE,IAAA,CAAK,KAAK,CAAC,kBAAN,GAA2B,MAAM,CAAC,cAAvC,EAAuD,MAAM,CAAC,0BAA9D,CAAA,GAA4F,UAAA,CAAA,EAJ9F;;IADe;IAOjB,uBAAA,GAA0B,SAAA;MACxB,IAAG,MAAM,CAAC,qBAAP,KAAgC,OAAnC;eACE,MAAM,CAAC,iBADT;OAAA,MAAA;eAIE,KAAA,CAAM,MAAM,CAAC,gBAAP,GAA0B,IAAA,CAAK,KAAK,CAAC,kBAAN,GAA2B,MAAM,CAAC,cAAvC,EAC9B,MAAM,CAAC,0BADuB,CAAhC,EACsC,MAAM,CAAC,yBAD7C,CAAA,GAEE,IAAA,CAAK,KAAK,CAAC,kBAAN,GAA2B,MAAM,CAAC,cAAvC,EAAuD,MAAM,CAAC,0BAA9D,EANJ;;IADwB;IAS1B,iBAAA,GAAoB,SAAA;aAClB,IAAA,CAAK,KAAK,CAAC,oBAAX,EAAiC,MAAM,CAAC,8BAAxC;IADkB;IAGpB,oEAAA,GAAuE,SAAA;aACrE,IAAI,CAAC,GAAL,CAAS,MAAM,CAAC,6BAAhB,EAA+C,IAAI,CAAC,KAAL,CAAW,MAAM,CAAC,sBAAP,GAAgC,UAAA,CAAA,CAA3C,CAA/C;IADqE;IAGvE,gEAAA,GAAmE,SAAA;MACjE,IAAG,KAAK,CAAC,kBAAN,GAA2B,MAAM,CAAC,qBAArC;eACE,EADF;OAAA,MAEK,IAAG,KAAK,CAAC,kBAAN,GAA2B,CAA9B;eACH,IAAI,CAAC,KAAL,CAAW,uBAAA,CAAA,CAAA,GAA4B,cAAA,CAAA,CAAvC,EADG;OAAA,MAAA;eAGH,MAAM,CAAC,8BAHJ;;IAH4D;IAQnE,mEAAA,GAAsE,SAAA;MACpE,IAAG,KAAK,CAAC,oBAAN,GAA6B,CAAhC;eACE,IAAI,CAAC,KAAL,CAAW,MAAM,CAAC,6BAAP,GAAuC,iBAAA,CAAA,CAAlD,EADF;OAAA,MAAA;eAGE,MAAM,CAAC,8BAHT;;IADoE;IAMtE,8BAAA,GAAiC,SAAA;aAC/B,KAAK,CAAC,eAAN,GAAwB,mCAAA,CAAA;IADO;IAGjC,4BAAA,GAA+B,SAAA;aAC7B,mCAAA,CAAA,CAAA,GAAwC,UAAA,CAAA;IADX;IAG/B,mCAAA,GAAsC,SAAA;aACpC,IAAI,CAAC,GAAL,CACE,oEAAA,CAAA,CADF,EAEE,gEAAA,CAAA,CAFF,EAGE,mEAAA,CAAA,CAHF;IADoC;IAOtC,yBAAA,GAA4B,SAAA;aAC1B,4BAAA,CAAA,CAAA,GAAiC,MAAM,CAAC;IADd;IAG5B,MAAA,GACE;MAAA,8BAAA,EAAgC,8BAAA,CAAA,CAAhC;MACA,4BAAA,EAA8B,4BAAA,CAAA,CAD9B;MAEA,mCAAA,EAAqC,mCAAA,CAAA,CAFrC;MAGA,yBAAA,EAA2B,yBAAA,CAAA,CAH3B;MAKA,UAAA,EAAY,UAAA,CAAA,CALZ;MAMA,iBAAA,EAAmB,iBAAA,CAAA,CANnB;MAOA,cAAA,EAAgB,cAAA,CAAA,CAPhB;MASA,oEAAA,EAAsE,oEAAA,CAAA,CATtE;MAUA,gEAAA,EAAkE,gEAAA,CAAA,CAVlE;MAWA,mEAAA,EAAqE,mEAAA,CAAA,CAXrE;;AAaF,WAAO,MAAM,CAAC,MAAP,CAAc,MAAd,EAAsB,MAAtB;EAxEmB;;EA0E5B,MAAM,CAAC,mBAAP,GAA6B,SAAC,KAAD;AAC3B,QAAA;IAAA,MAAA,GAAS,UAAW,CAAA,KAAK,CAAC,OAAN;IAEpB,mCAAA,GAAsC,SAAA;AACpC,UAAA;MAAA,OAAA,GACE;QAAA,GAAA,EAAK,KAAK,CAAC,eAAX;QACA,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,KAA1B,CAAgC,CAAC,4BADxC;;MAGF,GAAA,GAAM,MAAM,CAAC,MAAP,CAAc,EAAd,EAAkB,KAAlB;MACN,CAAA,GAAI;AACJ,WAAuB,8FAAvB;QACE,GAAG,CAAC,eAAJ,GAAsB;QAEtB,CAAC,CAAC,IAAF,CAAO;UACL,GAAA,EAAK,eADA;UAEL,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,GAA1B,CAA8B,CAAC,4BAFjC;SAAP;AAHF;AAQA,aAAO;QACL,MAAA,EAAQ,mBADH;QAEL,IAAA,EAAM,CAFD;QAGL,OAAA,EAAS,OAHJ;;IAf6B;IAqBtC,sCAAA,GAAyC,SAAA;AACvC,UAAA;MAAA,OAAA,GACE;QAAA,GAAA,EAAK,KAAK,CAAC,kBAAX;QACA,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,KAA1B,CAAgC,CAAC,4BADxC;;MAGF,GAAA,GAAM,MAAM,CAAC,MAAP,CAAc,EAAd,EAAkB,KAAlB;MACN,CAAA,GAAI;AACJ,WAA0B,yIAA1B;QACE,GAAG,CAAC,kBAAJ,GAAyB;QAEzB,CAAC,CAAC,IAAF,CAAO;UACL,GAAA,EAAK,kBADA;UAEL,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,GAA1B,CAA8B,CAAC,4BAFjC;SAAP;AAHF;AAQA,aAAO;QACL,MAAA,EAAQ,sBADH;QAEL,IAAA,EAAM,CAFD;QAGL,OAAA,EAAS,OAHJ;;IAfgC;IAqBzC,wCAAA,GAA2C,SAAA;AACzC,UAAA;MAAA,OAAA,GACE;QAAA,GAAA,EAAK,KAAK,CAAC,oBAAX;QACA,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,KAA1B,CAAgC,CAAC,4BADxC;;MAGF,GAAA,GAAM,MAAM,CAAC,MAAP,CAAc,EAAd,EAAkB,KAAlB;MACN,CAAA,GAAI;AACJ,WAA4B,mHAA5B;QACE,GAAG,CAAC,oBAAJ,GAA2B;QAE3B,CAAC,CAAC,IAAF,CAAO;UACL,GAAA,EAAK,oBADA;UAEL,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,GAA1B,CAA8B,CAAC,4BAFjC;SAAP;AAHF;AAQA,aAAO;QACL,MAAA,EAAQ,yBADH;QAEL,IAAA,EAAM,CAFD;QAGL,OAAA,EAAS,OAHJ;;IAfkC;AAqB3C,WAAO;MACL,mCAAA,EAAqC,mCAAA,CAAA,CADhC;MAEL,sCAAA,EAAwC,sCAAA,CAAA,CAFnC;MAGL,wCAAA,EAA0C,wCAAA,CAAA,CAHrC;;EAlEoB;AApZ7B" 10 | } -------------------------------------------------------------------------------- /js/index.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Serialize all form data into an array of key/value pairs 3 | * (c) 2020 Chris Ferdinandi, MIT License, https://gomakethings.com 4 | * @param {Node} form The form to serialize 5 | * @return {Array} The serialized form data 6 | */ 7 | function serializeArray(form) { 8 | const arr = []; 9 | Array.prototype.slice.call(form.elements).forEach(function (field) { 10 | if (!field.name || field.disabled || ['file', 'reset', 'submit', 'button'].indexOf(field.type) > -1) { 11 | return; 12 | } 13 | if (field.type === 'select-multiple') { 14 | Array.prototype.slice.call(field.options).forEach(function (option) { 15 | if (!option.selected) { 16 | return; 17 | } 18 | arr.push({ 19 | name: field.name, 20 | value: option.value 21 | }); 22 | }); 23 | return; 24 | } 25 | if (['checkbox', 'radio'].indexOf(field.type) > -1 && !field.checked) { 26 | return; 27 | } 28 | arr.push({ 29 | name: field.name, 30 | value: field.value 31 | }); 32 | }); 33 | return arr; 34 | } 35 | 36 | function docReady(fn) { 37 | if (document.readyState === "complete" || document.readyState === "interactive") { 38 | setTimeout(fn, 1); // Call on next available tick. 39 | } else { 40 | document.addEventListener("DOMContentLoaded", fn); 41 | } 42 | } 43 | 44 | function plot(x, y, currentX, currentY, xLabel, targetElement) { 45 | const chart = c3.generate({ 46 | bindto: targetElement, 47 | data: { 48 | xs: { 49 | " ": "x", 50 | "Current": "x current", 51 | }, 52 | columns: [ 53 | ["x"].concat(x), 54 | [" "].concat(y), 55 | 56 | ["x current", currentX], 57 | ["Current", currentY], 58 | ], 59 | }, 60 | axis: { 61 | x: { 62 | label: xLabel, 63 | min: 0, 64 | padding: { 65 | left: 0, 66 | right: 0, 67 | }, 68 | }, 69 | y: { 70 | label: { 71 | position: "outer-middle", 72 | text: "# warps", 73 | }, 74 | min: 0, 75 | padding: { 76 | bottom: 0, 77 | top: 0, 78 | }, 79 | }, 80 | }, 81 | grid: { 82 | y: { 83 | show: true, 84 | }, 85 | }, 86 | legend: { 87 | show: false, 88 | }, 89 | padding: { 90 | right: 20, 91 | } 92 | }); 93 | 94 | // To have no padding in axes but preventing the points from being dropped. 95 | d3.select(chart.element).select("." + c3.chart.internal.fn.CLASS.chart).attr("clip-path", null); 96 | } 97 | 98 | function onSubmit(e) { 99 | e.preventDefault(); 100 | 101 | const formElement = e.target; 102 | 103 | const formData = Object.fromEntries(serializeArray(formElement).map(n => [n.name, n.value])); 104 | 105 | const occupancyCalculationOutput = calculateOccupancy(formData); 106 | const graphsValues = computeGraphsValues(formData); 107 | 108 | document.getElementById("output").removeAttribute("hidden"); 109 | 110 | Object.entries(occupancyCalculationOutput) 111 | .forEach(([k, v]) => document.getElementById(k).innerText = v.toString()); 112 | 113 | Object.entries(graphsValues).forEach(([k, v]) => 114 | plot(v.data.map(d => d.key), v.data.map(d => d.value), v.current.key, v.current.value, v.xLabel, 115 | document.getElementById(k))); 116 | 117 | // show or hide the alert for Shared Memory used by Cuda Runtime 118 | const $alert = document.querySelector("#alertCudaRuntimeSharedMemory"); 119 | if (Number.parseFloat(occupancyCalculationOutput.version) >= 8.0) { 120 | $alert.removeAttribute("hidden"); 121 | } else { 122 | $alert.setAttribute("hidden", ""); 123 | } 124 | } 125 | 126 | function main() { 127 | document.getElementsByTagName("form")[0].onsubmit = onSubmit; 128 | 129 | // add event listener for changes on Compute Capability selector 130 | document.querySelector("#ccVersion").addEventListener('change', (event) => { 131 | // selected compute capability version 132 | const cc = Number.parseFloat(event.target.value); 133 | 134 | // get the div element with the selector for cuda version 135 | const $cudaVersionBlock = document.querySelector("#cudaVersion").closest("div"); 136 | 137 | // if CC >= 8.x, show the CUDA runtime version selector, else hide it 138 | if (cc >= 8) { 139 | $cudaVersionBlock.removeAttribute("hidden"); 140 | } else { 141 | $cudaVersionBlock.setAttribute("hidden", ""); 142 | } 143 | }); 144 | 145 | // set the default Compute Capability (latest version) 146 | const $ccSelect = document.querySelector("#ccVersion"); 147 | $ccSelect.selectedIndex = $ccSelect.options.length - 1; 148 | $ccSelect.dispatchEvent(new Event('change')); 149 | } 150 | 151 | docReady(main); 152 | --------------------------------------------------------------------------------