├── README.md
├── css
└── index.css
├── img
└── fork_me_right_gray_6d6d6d.png
├── index.html
└── js
├── calculator.coffee
├── calculator.js
├── calculator.js.map
└── index.js
/README.md:
--------------------------------------------------------------------------------
1 | # Online CUDA Occupancy Calculator
2 |
3 | https://xmartlabs.github.io/cuda-calculator/
4 |
5 | Port of the [CUDA Occupancy Calculator spreadsheet](https://docs.nvidia.com/cuda/cuda-occupancy-calculator/CUDA_Occupancy_Calculator.xls).
6 |
7 | ## Changes from upstream
8 |
9 | * Extended until Compute Capabilities 8.6
10 | * Improved the UI.
11 |
12 | ## Compilation
13 |
14 | ```shell
15 | coffee -c js/calculator.coffee
16 | ```
17 |
18 | ## Credits
19 |
20 | * [Xmartlabs](https://xmartlabs.github.io/cuda-calculator/)
21 | * [Karthikeyan](https://github.com/lxkarthi/cuda-calculator/) - Improved cuda-calculator online version.
22 | * [Mihai Maruseac](https://github.com/mihaimaruseac/cuda-calculator/) - Thanks for the fork.
23 | * [Aliaksei](https://github.com/roadhump/cuda-calculator/) - Original Author deleted it. :(
24 |
--------------------------------------------------------------------------------
/css/index.css:
--------------------------------------------------------------------------------
1 | caption {
2 | caption-side: top;
3 | }
4 |
5 | .table td {
6 | text-align: right;
7 | }
8 |
9 | .c3-axis-x-label, .c3-axis-y-label, .c3-xgrid-line {
10 | font-size: 1rem;
11 | }
12 |
13 | #fork-me {
14 | position: absolute;
15 | top: 0;
16 | right: 0;
17 | border: 0;
18 | }
19 |
--------------------------------------------------------------------------------
/img/fork_me_right_gray_6d6d6d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xmartlabs/cuda-calculator/d60116f321cda7cfd414c260d3133e48b4feceea/img/fork_me_right_gray_6d6d6d.png
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | CUDA Occupancy Calculator
7 |
8 |
9 |
11 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
CUDA Occupancy Calculator
22 |
23 |
70 |
71 |
72 |
73 | GPU Occupancy Data is displayed here and in the graphs
74 |
75 | Active Threads per Multiprocessor |
76 | |
77 |
78 |
79 | Active Warps per Multiprocessor |
80 | |
81 |
82 |
83 | Active Thread Blocks per Multiprocessor |
84 | |
85 |
86 |
87 | Occupancy of each Multiprocessor |
88 | |
89 |
90 |
91 |
92 |
93 | Physical Limits for GPU Compute Capability
94 |
95 | Version |
96 | |
97 |
98 |
99 | Threads per Warp |
100 | |
101 |
102 |
103 | Warps per Multiprocessor |
104 | |
105 |
106 |
107 | Threads per Multiprocessor |
108 | |
109 |
110 |
111 | Thread Blocks per Multiprocessor |
112 | |
113 |
114 |
115 | Total # of 32-bit registers per Multiprocessor |
116 | |
117 |
118 |
119 | Register allocation unit size |
120 | |
121 |
122 |
123 | Register allocation granularity |
124 | |
125 |
126 |
127 | Max registers per Block |
128 | |
129 |
130 |
131 | Max registers per thread |
132 | |
133 |
134 |
135 | Shared Memory per Multiprocessor (bytes) |
136 | |
137 |
138 |
139 | Shared Memory Allocation unit size |
140 | |
141 |
142 |
143 | Warp allocation granularity (for register allocation) |
144 | |
145 |
146 |
147 | Max thread block size |
148 | |
149 |
150 |
151 |
152 |
153 | Allocation Per Thread Block
154 |
155 | Warps |
156 | |
157 |
158 |
159 | Registers |
160 | |
161 |
162 |
163 | Shared Memory |
164 | |
165 |
166 |
167 |
168 | Note: CUDA Runtime uses bytes of Shared Memory per Thread Block.
169 |
170 |
171 |
172 | Maximum Thread Blocks Per Multiprocessor
173 |
174 | Limited by Max Warps / Blocks per Multiprocessor |
175 | |
176 |
177 |
178 | Limited by Registers per Multiprocessor |
179 | |
180 |
181 |
182 | Limited by Shared Memory per Multiprocessor |
183 | |
184 |
185 |
186 |
187 |
188 | Impact of Varying Block Size
189 |
190 |
191 |
192 |
193 | Impact of Varying Register Count Per Thread
194 |
195 |
196 |
197 |
198 | Impact of Varying Shared Memory Usage Per Block
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
211 |
212 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
--------------------------------------------------------------------------------
/js/calculator.coffee:
--------------------------------------------------------------------------------
1 | mainConfig =
2 | '2.0':
3 | version: '2.0'
4 | threadsPerWarp: 32
5 | warpsPerMultiprocessor: 48
6 | threadsPerMultiprocessor: 1536
7 | threadBlocksPerMultiprocessor: 8
8 | sharedMemoryPerMultiprocessor: 49152
9 |
10 | registerFileSize: 32768
11 | registerAllocationUnitSize: 64
12 |
13 | allocationGranularity: 'warp'
14 | maxRegistersPerThread: 63
15 | maxRegistersPerBlock: 32768
16 | sharedMemoryAllocationUnitSize: 128
17 | warpAllocationGranularity: 2
18 | maxThreadBlockSize: 1024
19 |
20 | '2.1':
21 | version: '2.1'
22 | threadsPerWarp: 32
23 | warpsPerMultiprocessor: 48
24 | threadsPerMultiprocessor: 1536
25 | threadBlocksPerMultiprocessor: 8
26 | sharedMemoryPerMultiprocessor: 49152
27 |
28 | registerFileSize: 32768
29 | registerAllocationUnitSize: 64
30 |
31 | allocationGranularity: 'warp'
32 | maxRegistersPerThread: 63
33 | maxRegistersPerBlock: 32768
34 | sharedMemoryAllocationUnitSize: 128
35 | warpAllocationGranularity: 2
36 | maxThreadBlockSize: 1024
37 |
38 | '3.0':
39 | version: '3.0'
40 | threadsPerWarp: 32
41 | warpsPerMultiprocessor: 64
42 | threadsPerMultiprocessor: 2048
43 | threadBlocksPerMultiprocessor: 16
44 | sharedMemoryPerMultiprocessor: 49152
45 |
46 | registerFileSize: 65536
47 | registerAllocationUnitSize: 256
48 |
49 | allocationGranularity: 'warp'
50 | maxRegistersPerThread: 63
51 | maxRegistersPerBlock: 65536
52 | sharedMemoryAllocationUnitSize: 256
53 | warpAllocationGranularity: 4
54 | maxThreadBlockSize: 1024
55 |
56 | '3.2':
57 | version: '3.2'
58 | threadsPerWarp: 32
59 | warpsPerMultiprocessor: 64
60 | threadsPerMultiprocessor: 2048
61 | threadBlocksPerMultiprocessor: 16
62 | sharedMemoryPerMultiprocessor: 49152
63 |
64 | registerFileSize: 65536
65 | registerAllocationUnitSize: 256
66 |
67 | allocationGranularity: 'warp'
68 | maxRegistersPerThread: 255
69 | maxRegistersPerBlock: 65536
70 | sharedMemoryAllocationUnitSize: 256
71 | warpAllocationGranularity: 4
72 | maxThreadBlockSize: 1024
73 |
74 | '3.5':
75 | version: '3.5'
76 | threadsPerWarp: 32
77 | warpsPerMultiprocessor: 64
78 | threadsPerMultiprocessor: 2048
79 | threadBlocksPerMultiprocessor: 16
80 | sharedMemoryPerMultiprocessor: 49152
81 |
82 | registerFileSize: 65536
83 | registerAllocationUnitSize: 256
84 |
85 | allocationGranularity: 'warp'
86 | maxRegistersPerThread: 255
87 | maxRegistersPerBlock: 65536
88 | sharedMemoryAllocationUnitSize: 256
89 | warpAllocationGranularity: 4
90 | maxThreadBlockSize: 1024
91 |
92 | '3.7':
93 | version: '3.7'
94 | threadsPerWarp: 32
95 | warpsPerMultiprocessor: 64
96 | threadsPerMultiprocessor: 2048
97 | threadBlocksPerMultiprocessor: 16
98 | sharedMemoryPerMultiprocessor: 114688
99 |
100 | registerFileSize: 131072
101 | registerAllocationUnitSize: 256
102 |
103 | allocationGranularity: 'warp'
104 | maxRegistersPerThread: 255
105 | maxRegistersPerBlock: 65536
106 | sharedMemoryAllocationUnitSize: 256
107 | warpAllocationGranularity: 4
108 | maxThreadBlockSize: 1024
109 |
110 | '5.0':
111 | version: '5.0'
112 | threadsPerWarp: 32
113 | warpsPerMultiprocessor: 64
114 | threadsPerMultiprocessor: 2048
115 | threadBlocksPerMultiprocessor: 32
116 | sharedMemoryPerMultiprocessor: 65536
117 |
118 | registerFileSize: 65536
119 | registerAllocationUnitSize: 256
120 |
121 | allocationGranularity: 'warp'
122 | maxRegistersPerThread: 255
123 | maxRegistersPerBlock: 65536
124 | sharedMemoryAllocationUnitSize: 256
125 | warpAllocationGranularity: 4
126 | maxThreadBlockSize: 1024
127 |
128 | '5.2':
129 | version: '5.2'
130 | threadsPerWarp: 32
131 | warpsPerMultiprocessor: 64
132 | threadsPerMultiprocessor: 2048
133 | threadBlocksPerMultiprocessor: 32
134 | sharedMemoryPerMultiprocessor: 98304
135 |
136 | registerFileSize: 65536
137 | registerAllocationUnitSize: 256
138 |
139 | allocationGranularity: 'warp'
140 | maxRegistersPerThread: 255
141 | maxRegistersPerBlock: 32768
142 | sharedMemoryAllocationUnitSize: 256
143 | warpAllocationGranularity: 4
144 | maxThreadBlockSize: 1024
145 |
146 | '5.3':
147 | version: '5.3'
148 | threadsPerWarp: 32
149 | warpsPerMultiprocessor: 64
150 | threadsPerMultiprocessor: 2048
151 | threadBlocksPerMultiprocessor: 32
152 | sharedMemoryPerMultiprocessor: 65536
153 |
154 | registerFileSize: 65536
155 | registerAllocationUnitSize: 256
156 |
157 | allocationGranularity: 'warp'
158 | maxRegistersPerThread: 255
159 | maxRegistersPerBlock: 32768
160 | sharedMemoryAllocationUnitSize: 256
161 | warpAllocationGranularity: 4
162 | maxThreadBlockSize: 1024
163 |
164 | '6.0':
165 | version: '6.0'
166 | threadsPerWarp: 32
167 | warpsPerMultiprocessor: 64
168 | threadsPerMultiprocessor: 2048
169 | threadBlocksPerMultiprocessor: 32
170 | sharedMemoryPerMultiprocessor: 65536
171 |
172 | registerFileSize: 65536
173 | registerAllocationUnitSize: 256
174 |
175 | allocationGranularity: 'warp'
176 | maxRegistersPerThread: 255
177 | maxRegistersPerBlock: 65536
178 | sharedMemoryAllocationUnitSize: 256
179 | warpAllocationGranularity: 2
180 | maxThreadBlockSize: 1024
181 |
182 | '6.1':
183 | version: '6.1'
184 | threadsPerWarp: 32
185 | warpsPerMultiprocessor: 64
186 | threadsPerMultiprocessor: 2048
187 | threadBlocksPerMultiprocessor: 32
188 | sharedMemoryPerMultiprocessor: 98304
189 |
190 | registerFileSize: 65536
191 | registerAllocationUnitSize: 256
192 |
193 | allocationGranularity: 'warp'
194 | maxRegistersPerThread: 255
195 | maxRegistersPerBlock: 65536
196 | sharedMemoryAllocationUnitSize: 256
197 | warpAllocationGranularity: 4
198 | maxThreadBlockSize: 1024
199 |
200 | '6.2':
201 | version: '6.2'
202 | threadsPerWarp: 32
203 | warpsPerMultiprocessor: 64
204 | threadsPerMultiprocessor: 2048
205 | threadBlocksPerMultiprocessor: 32
206 | sharedMemoryPerMultiprocessor: 65536
207 |
208 | registerFileSize: 65536
209 | registerAllocationUnitSize: 256
210 |
211 | allocationGranularity: 'warp'
212 | maxRegistersPerThread: 255
213 | maxRegistersPerBlock: 65536
214 | sharedMemoryAllocationUnitSize: 256
215 | warpAllocationGranularity: 4
216 | maxThreadBlockSize: 1024
217 |
218 | '7.0':
219 | version: '7.0'
220 | threadsPerWarp: 32
221 | warpsPerMultiprocessor: 64
222 | threadsPerMultiprocessor: 2048
223 | threadBlocksPerMultiprocessor: 32
224 | sharedMemoryPerMultiprocessor: 98304
225 |
226 | registerFileSize: 65536
227 | registerAllocationUnitSize: 256
228 |
229 | allocationGranularity: 'warp'
230 | maxRegistersPerThread: 255
231 | maxRegistersPerBlock: 65536
232 | sharedMemoryAllocationUnitSize: 256
233 | warpAllocationGranularity: 4
234 | maxThreadBlockSize: 1024
235 |
236 | '7.5':
237 | version: '7.5'
238 | threadsPerWarp: 32
239 | warpsPerMultiprocessor: 32
240 | threadsPerMultiprocessor: 1024
241 | threadBlocksPerMultiprocessor: 16
242 | sharedMemoryPerMultiprocessor: 65536
243 |
244 | registerFileSize: 65536
245 | registerAllocationUnitSize: 256
246 |
247 | allocationGranularity: 'warp'
248 | maxRegistersPerThread: 255
249 | maxRegistersPerBlock: 65536
250 | sharedMemoryAllocationUnitSize: 256
251 | warpAllocationGranularity: 4
252 | maxThreadBlockSize: 1024
253 |
254 | '8.0':
255 | version: '8.0'
256 | threadsPerWarp: 32
257 | warpsPerMultiprocessor: 64
258 | threadsPerMultiprocessor: 2048
259 | threadBlocksPerMultiprocessor: 32
260 | sharedMemoryPerMultiprocessor: 167936
261 |
262 | registerFileSize: 65536
263 | registerAllocationUnitSize: 256
264 |
265 | allocationGranularity: 'warp'
266 | maxRegistersPerThread: 255
267 | maxRegistersPerBlock: 65536
268 | sharedMemoryAllocationUnitSize: 128
269 | warpAllocationGranularity: 4
270 | maxThreadBlockSize: 1024
271 |
272 | '8.6':
273 | version: '8.6'
274 | threadsPerWarp: 32
275 | warpsPerMultiprocessor: 48
276 | threadsPerMultiprocessor: 1536
277 | threadBlocksPerMultiprocessor: 16
278 | sharedMemoryPerMultiprocessor: 102400
279 |
280 | registerFileSize: 65536
281 | registerAllocationUnitSize: 256
282 |
283 | allocationGranularity: 'warp'
284 | maxRegistersPerThread: 255
285 | maxRegistersPerBlock: 65536
286 | sharedMemoryAllocationUnitSize: 128
287 | warpAllocationGranularity: 4
288 | maxThreadBlockSize: 1024
289 |
290 | # Starting with Compute Capability 8.x, CUDA runtime uses 1KB of Shared Memory
291 | cudaRuntimeUsedSharedMemory =
292 | '11.0': 1024
293 | '11.1': 1024
294 |
295 | ceil = (a, b) -> Math.ceil(a / b) * b
296 |
297 | floor = (a, b) -> Math.floor(a / b) * b
298 |
299 |
300 | window.calculateOccupancy = (input) ->
301 | config = mainConfig[input.version]
302 |
303 | # number of warps per block
304 | blockWarps = () ->
305 | Math.ceil(input.threadsPerBlock / config.threadsPerWarp)
306 |
307 | # number of registers per warp
308 | registersPerWarp = () ->
309 | ceil(input.registersPerThread * config.threadsPerWarp, config.registerAllocationUnitSize)
310 |
311 | # number of registers per block
312 | blockRegisters = () ->
313 | registersPerWarp() * blockWarps()
314 |
315 | # maximum warps per SM when limited by registers
316 | warpsPerMultiprocessorLimitedByRegisters = () ->
317 | floor(config.maxRegistersPerBlock / registersPerWarp(), config.warpAllocationGranularity)
318 |
319 | # starting with Compute Capability 8.x, the CUDA runtime consumes 1KB of shared memory
320 | # the amount might change depending on the CUDA runtime version in the future
321 | blockCudaRuntimeSharedMemory = () ->
322 | if Number.parseFloat(input.version) >= 8
323 | cudaRuntimeUsedSharedMemory[input.cudaVersion]
324 | else
325 | 0
326 |
327 | # shared memory per thread block
328 | blockSharedMemory = () ->
329 | ceil(
330 | Number.parseInt(input.sharedMemoryPerBlock) + blockCudaRuntimeSharedMemory(),
331 | config.sharedMemoryAllocationUnitSize
332 | )
333 |
334 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor = () ->
335 | Math.min(
336 | config.threadBlocksPerMultiprocessor, Math.floor(config.warpsPerMultiprocessor / blockWarps())
337 | )
338 |
339 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor = () ->
340 | if input.registersPerThread > config.maxRegistersPerThread
341 | 0
342 | else if input.registersPerThread > 0
343 | Math.floor(warpsPerMultiprocessorLimitedByRegisters() / blockWarps()) *
344 | Math.floor(config.registerFileSize / config.maxRegistersPerBlock)
345 | else
346 | config.threadBlocksPerMultiprocessor
347 |
348 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor = () ->
349 | if input.sharedMemoryPerBlock > 0
350 | Math.floor(config.sharedMemoryPerMultiprocessor / blockSharedMemory())
351 | else
352 | config.threadBlocksPerMultiprocessor
353 |
354 | activeThreadsPerMultiprocessor = () ->
355 | input.threadsPerBlock * activeThreadBlocksPerMultiprocessor()
356 |
357 | activeWarpsPerMultiprocessor = () ->
358 | activeThreadBlocksPerMultiprocessor() * blockWarps()
359 |
360 | activeThreadBlocksPerMultiprocessor = () ->
361 | Math.min(
362 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor(),
363 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor(),
364 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor()
365 | )
366 |
367 | occupancyOfMultiprocessor = () ->
368 | activeWarpsPerMultiprocessor() / config.warpsPerMultiprocessor
369 |
370 | output =
371 | activeThreadsPerMultiprocessor: activeThreadsPerMultiprocessor()
372 | activeWarpsPerMultiprocessor: activeWarpsPerMultiprocessor()
373 | activeThreadBlocksPerMultiprocessor: activeThreadBlocksPerMultiprocessor()
374 | occupancyOfMultiprocessor: occupancyOfMultiprocessor()
375 |
376 | blockWarps: blockWarps()
377 | blockSharedMemory: blockSharedMemory()
378 | blockCudaRuntimeSharedMemory: blockCudaRuntimeSharedMemory()
379 | blockRegisters: blockRegisters()
380 |
381 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor:
382 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor()
383 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor:
384 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor()
385 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor:
386 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor()
387 |
388 | return Object.assign(output, config)
389 |
390 | window.computeGraphsValues = (input) ->
391 | config = mainConfig[input.version]
392 |
393 | graphWarpOccupancyOfThreadsPerBlock = () ->
394 | current =
395 | key: input.threadsPerBlock
396 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor
397 |
398 | inp = Object.assign({}, input) # Shallow copy.
399 | r = []
400 | for threadsPerBlock in [32..config.maxThreadBlockSize] by 32
401 | inp.threadsPerBlock = threadsPerBlock
402 |
403 | r.push({
404 | key: threadsPerBlock
405 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor
406 | })
407 |
408 | return {
409 | xLabel: "Threads per block"
410 | data: r
411 | current: current
412 | }
413 |
414 | graphWarpOccupancyOfRegistersPerThread = () ->
415 | current =
416 | key: input.registersPerThread
417 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor
418 |
419 | inp = Object.assign({}, input) # Shallow copy.
420 | r = []
421 | for registersPerThread in [0..config.maxRegistersPerThread]
422 | inp.registersPerThread = registersPerThread
423 |
424 | r.push({
425 | key: registersPerThread
426 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor
427 | })
428 |
429 | return {
430 | xLabel: "Registers per thread"
431 | data: r
432 | current: current
433 | }
434 |
435 | graphWarpOccupancyOfSharedMemoryPerBlock = () ->
436 | current =
437 | key: input.sharedMemoryPerBlock
438 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor
439 |
440 | inp = Object.assign({}, input) # Shallow copy.
441 | r = []
442 | for sharedMemoryPerBlock in [0..config.sharedMemoryPerMultiprocessor] by 512
443 | inp.sharedMemoryPerBlock = sharedMemoryPerBlock
444 |
445 | r.push({
446 | key: sharedMemoryPerBlock
447 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor
448 | })
449 |
450 | return {
451 | xLabel: "Shared memory per block"
452 | data: r
453 | current: current
454 | }
455 |
456 | return {
457 | graphWarpOccupancyOfThreadsPerBlock: graphWarpOccupancyOfThreadsPerBlock()
458 | graphWarpOccupancyOfRegistersPerThread: graphWarpOccupancyOfRegistersPerThread()
459 | graphWarpOccupancyOfSharedMemoryPerBlock: graphWarpOccupancyOfSharedMemoryPerBlock()
460 | }
461 |
--------------------------------------------------------------------------------
/js/calculator.js:
--------------------------------------------------------------------------------
1 | // Generated by CoffeeScript 1.12.7
2 | (function() {
3 | var ceil, cudaRuntimeUsedSharedMemory, floor, mainConfig;
4 |
5 | mainConfig = {
6 | '2.0': {
7 | version: '2.0',
8 | threadsPerWarp: 32,
9 | warpsPerMultiprocessor: 48,
10 | threadsPerMultiprocessor: 1536,
11 | threadBlocksPerMultiprocessor: 8,
12 | sharedMemoryPerMultiprocessor: 49152,
13 | registerFileSize: 32768,
14 | registerAllocationUnitSize: 64,
15 | allocationGranularity: 'warp',
16 | maxRegistersPerThread: 63,
17 | maxRegistersPerBlock: 32768,
18 | sharedMemoryAllocationUnitSize: 128,
19 | warpAllocationGranularity: 2,
20 | maxThreadBlockSize: 1024
21 | },
22 | '2.1': {
23 | version: '2.1',
24 | threadsPerWarp: 32,
25 | warpsPerMultiprocessor: 48,
26 | threadsPerMultiprocessor: 1536,
27 | threadBlocksPerMultiprocessor: 8,
28 | sharedMemoryPerMultiprocessor: 49152,
29 | registerFileSize: 32768,
30 | registerAllocationUnitSize: 64,
31 | allocationGranularity: 'warp',
32 | maxRegistersPerThread: 63,
33 | maxRegistersPerBlock: 32768,
34 | sharedMemoryAllocationUnitSize: 128,
35 | warpAllocationGranularity: 2,
36 | maxThreadBlockSize: 1024
37 | },
38 | '3.0': {
39 | version: '3.0',
40 | threadsPerWarp: 32,
41 | warpsPerMultiprocessor: 64,
42 | threadsPerMultiprocessor: 2048,
43 | threadBlocksPerMultiprocessor: 16,
44 | sharedMemoryPerMultiprocessor: 49152,
45 | registerFileSize: 65536,
46 | registerAllocationUnitSize: 256,
47 | allocationGranularity: 'warp',
48 | maxRegistersPerThread: 63,
49 | maxRegistersPerBlock: 65536,
50 | sharedMemoryAllocationUnitSize: 256,
51 | warpAllocationGranularity: 4,
52 | maxThreadBlockSize: 1024
53 | },
54 | '3.2': {
55 | version: '3.2',
56 | threadsPerWarp: 32,
57 | warpsPerMultiprocessor: 64,
58 | threadsPerMultiprocessor: 2048,
59 | threadBlocksPerMultiprocessor: 16,
60 | sharedMemoryPerMultiprocessor: 49152,
61 | registerFileSize: 65536,
62 | registerAllocationUnitSize: 256,
63 | allocationGranularity: 'warp',
64 | maxRegistersPerThread: 255,
65 | maxRegistersPerBlock: 65536,
66 | sharedMemoryAllocationUnitSize: 256,
67 | warpAllocationGranularity: 4,
68 | maxThreadBlockSize: 1024
69 | },
70 | '3.5': {
71 | version: '3.5',
72 | threadsPerWarp: 32,
73 | warpsPerMultiprocessor: 64,
74 | threadsPerMultiprocessor: 2048,
75 | threadBlocksPerMultiprocessor: 16,
76 | sharedMemoryPerMultiprocessor: 49152,
77 | registerFileSize: 65536,
78 | registerAllocationUnitSize: 256,
79 | allocationGranularity: 'warp',
80 | maxRegistersPerThread: 255,
81 | maxRegistersPerBlock: 65536,
82 | sharedMemoryAllocationUnitSize: 256,
83 | warpAllocationGranularity: 4,
84 | maxThreadBlockSize: 1024
85 | },
86 | '3.7': {
87 | version: '3.7',
88 | threadsPerWarp: 32,
89 | warpsPerMultiprocessor: 64,
90 | threadsPerMultiprocessor: 2048,
91 | threadBlocksPerMultiprocessor: 16,
92 | sharedMemoryPerMultiprocessor: 114688,
93 | registerFileSize: 131072,
94 | registerAllocationUnitSize: 256,
95 | allocationGranularity: 'warp',
96 | maxRegistersPerThread: 255,
97 | maxRegistersPerBlock: 65536,
98 | sharedMemoryAllocationUnitSize: 256,
99 | warpAllocationGranularity: 4,
100 | maxThreadBlockSize: 1024
101 | },
102 | '5.0': {
103 | version: '5.0',
104 | threadsPerWarp: 32,
105 | warpsPerMultiprocessor: 64,
106 | threadsPerMultiprocessor: 2048,
107 | threadBlocksPerMultiprocessor: 32,
108 | sharedMemoryPerMultiprocessor: 65536,
109 | registerFileSize: 65536,
110 | registerAllocationUnitSize: 256,
111 | allocationGranularity: 'warp',
112 | maxRegistersPerThread: 255,
113 | maxRegistersPerBlock: 65536,
114 | sharedMemoryAllocationUnitSize: 256,
115 | warpAllocationGranularity: 4,
116 | maxThreadBlockSize: 1024
117 | },
118 | '5.2': {
119 | version: '5.2',
120 | threadsPerWarp: 32,
121 | warpsPerMultiprocessor: 64,
122 | threadsPerMultiprocessor: 2048,
123 | threadBlocksPerMultiprocessor: 32,
124 | sharedMemoryPerMultiprocessor: 98304,
125 | registerFileSize: 65536,
126 | registerAllocationUnitSize: 256,
127 | allocationGranularity: 'warp',
128 | maxRegistersPerThread: 255,
129 | maxRegistersPerBlock: 32768,
130 | sharedMemoryAllocationUnitSize: 256,
131 | warpAllocationGranularity: 4,
132 | maxThreadBlockSize: 1024
133 | },
134 | '5.3': {
135 | version: '5.3',
136 | threadsPerWarp: 32,
137 | warpsPerMultiprocessor: 64,
138 | threadsPerMultiprocessor: 2048,
139 | threadBlocksPerMultiprocessor: 32,
140 | sharedMemoryPerMultiprocessor: 65536,
141 | registerFileSize: 65536,
142 | registerAllocationUnitSize: 256,
143 | allocationGranularity: 'warp',
144 | maxRegistersPerThread: 255,
145 | maxRegistersPerBlock: 32768,
146 | sharedMemoryAllocationUnitSize: 256,
147 | warpAllocationGranularity: 4,
148 | maxThreadBlockSize: 1024
149 | },
150 | '6.0': {
151 | version: '6.0',
152 | threadsPerWarp: 32,
153 | warpsPerMultiprocessor: 64,
154 | threadsPerMultiprocessor: 2048,
155 | threadBlocksPerMultiprocessor: 32,
156 | sharedMemoryPerMultiprocessor: 65536,
157 | registerFileSize: 65536,
158 | registerAllocationUnitSize: 256,
159 | allocationGranularity: 'warp',
160 | maxRegistersPerThread: 255,
161 | maxRegistersPerBlock: 65536,
162 | sharedMemoryAllocationUnitSize: 256,
163 | warpAllocationGranularity: 2,
164 | maxThreadBlockSize: 1024
165 | },
166 | '6.1': {
167 | version: '6.1',
168 | threadsPerWarp: 32,
169 | warpsPerMultiprocessor: 64,
170 | threadsPerMultiprocessor: 2048,
171 | threadBlocksPerMultiprocessor: 32,
172 | sharedMemoryPerMultiprocessor: 98304,
173 | registerFileSize: 65536,
174 | registerAllocationUnitSize: 256,
175 | allocationGranularity: 'warp',
176 | maxRegistersPerThread: 255,
177 | maxRegistersPerBlock: 65536,
178 | sharedMemoryAllocationUnitSize: 256,
179 | warpAllocationGranularity: 4,
180 | maxThreadBlockSize: 1024
181 | },
182 | '6.2': {
183 | version: '6.2',
184 | threadsPerWarp: 32,
185 | warpsPerMultiprocessor: 64,
186 | threadsPerMultiprocessor: 2048,
187 | threadBlocksPerMultiprocessor: 32,
188 | sharedMemoryPerMultiprocessor: 65536,
189 | registerFileSize: 65536,
190 | registerAllocationUnitSize: 256,
191 | allocationGranularity: 'warp',
192 | maxRegistersPerThread: 255,
193 | maxRegistersPerBlock: 65536,
194 | sharedMemoryAllocationUnitSize: 256,
195 | warpAllocationGranularity: 4,
196 | maxThreadBlockSize: 1024
197 | },
198 | '7.0': {
199 | version: '7.0',
200 | threadsPerWarp: 32,
201 | warpsPerMultiprocessor: 64,
202 | threadsPerMultiprocessor: 2048,
203 | threadBlocksPerMultiprocessor: 32,
204 | sharedMemoryPerMultiprocessor: 98304,
205 | registerFileSize: 65536,
206 | registerAllocationUnitSize: 256,
207 | allocationGranularity: 'warp',
208 | maxRegistersPerThread: 255,
209 | maxRegistersPerBlock: 65536,
210 | sharedMemoryAllocationUnitSize: 256,
211 | warpAllocationGranularity: 4,
212 | maxThreadBlockSize: 1024
213 | },
214 | '7.5': {
215 | version: '7.5',
216 | threadsPerWarp: 32,
217 | warpsPerMultiprocessor: 32,
218 | threadsPerMultiprocessor: 1024,
219 | threadBlocksPerMultiprocessor: 16,
220 | sharedMemoryPerMultiprocessor: 65536,
221 | registerFileSize: 65536,
222 | registerAllocationUnitSize: 256,
223 | allocationGranularity: 'warp',
224 | maxRegistersPerThread: 255,
225 | maxRegistersPerBlock: 65536,
226 | sharedMemoryAllocationUnitSize: 256,
227 | warpAllocationGranularity: 4,
228 | maxThreadBlockSize: 1024
229 | },
230 | '8.0': {
231 | version: '8.0',
232 | threadsPerWarp: 32,
233 | warpsPerMultiprocessor: 64,
234 | threadsPerMultiprocessor: 2048,
235 | threadBlocksPerMultiprocessor: 32,
236 | sharedMemoryPerMultiprocessor: 167936,
237 | registerFileSize: 65536,
238 | registerAllocationUnitSize: 256,
239 | allocationGranularity: 'warp',
240 | maxRegistersPerThread: 255,
241 | maxRegistersPerBlock: 65536,
242 | sharedMemoryAllocationUnitSize: 128,
243 | warpAllocationGranularity: 4,
244 | maxThreadBlockSize: 1024
245 | },
246 | '8.6': {
247 | version: '8.6',
248 | threadsPerWarp: 32,
249 | warpsPerMultiprocessor: 48,
250 | threadsPerMultiprocessor: 1536,
251 | threadBlocksPerMultiprocessor: 16,
252 | sharedMemoryPerMultiprocessor: 102400,
253 | registerFileSize: 65536,
254 | registerAllocationUnitSize: 256,
255 | allocationGranularity: 'warp',
256 | maxRegistersPerThread: 255,
257 | maxRegistersPerBlock: 65536,
258 | sharedMemoryAllocationUnitSize: 128,
259 | warpAllocationGranularity: 4,
260 | maxThreadBlockSize: 1024
261 | }
262 | };
263 |
264 | cudaRuntimeUsedSharedMemory = {
265 | '11.0': 1024,
266 | '11.1': 1024
267 | };
268 |
269 | ceil = function(a, b) {
270 | return Math.ceil(a / b) * b;
271 | };
272 |
273 | floor = function(a, b) {
274 | return Math.floor(a / b) * b;
275 | };
276 |
277 | window.calculateOccupancy = function(input) {
278 | var activeThreadBlocksPerMultiprocessor, activeThreadsPerMultiprocessor, activeWarpsPerMultiprocessor, blockCudaRuntimeSharedMemory, blockRegisters, blockSharedMemory, blockWarps, config, occupancyOfMultiprocessor, output, registersPerWarp, threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor, threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor, threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor, warpsPerMultiprocessorLimitedByRegisters;
279 | config = mainConfig[input.version];
280 | blockWarps = function() {
281 | return Math.ceil(input.threadsPerBlock / config.threadsPerWarp);
282 | };
283 | registersPerWarp = function() {
284 | return ceil(input.registersPerThread * config.threadsPerWarp, config.registerAllocationUnitSize);
285 | };
286 | blockRegisters = function() {
287 | return registersPerWarp() * blockWarps();
288 | };
289 | warpsPerMultiprocessorLimitedByRegisters = function() {
290 | return floor(config.maxRegistersPerBlock / registersPerWarp(), config.warpAllocationGranularity);
291 | };
292 | blockCudaRuntimeSharedMemory = function() {
293 | if (Number.parseFloat(input.version) >= 8) {
294 | return cudaRuntimeUsedSharedMemory[input.cudaVersion];
295 | } else {
296 | return 0;
297 | }
298 | };
299 | blockSharedMemory = function() {
300 | return ceil(Number.parseInt(input.sharedMemoryPerBlock) + blockCudaRuntimeSharedMemory(), config.sharedMemoryAllocationUnitSize);
301 | };
302 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor = function() {
303 | return Math.min(config.threadBlocksPerMultiprocessor, Math.floor(config.warpsPerMultiprocessor / blockWarps()));
304 | };
305 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor = function() {
306 | if (input.registersPerThread > config.maxRegistersPerThread) {
307 | return 0;
308 | } else if (input.registersPerThread > 0) {
309 | return Math.floor(warpsPerMultiprocessorLimitedByRegisters() / blockWarps()) * Math.floor(config.registerFileSize / config.maxRegistersPerBlock);
310 | } else {
311 | return config.threadBlocksPerMultiprocessor;
312 | }
313 | };
314 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor = function() {
315 | if (input.sharedMemoryPerBlock > 0) {
316 | return Math.floor(config.sharedMemoryPerMultiprocessor / blockSharedMemory());
317 | } else {
318 | return config.threadBlocksPerMultiprocessor;
319 | }
320 | };
321 | activeThreadsPerMultiprocessor = function() {
322 | return input.threadsPerBlock * activeThreadBlocksPerMultiprocessor();
323 | };
324 | activeWarpsPerMultiprocessor = function() {
325 | return activeThreadBlocksPerMultiprocessor() * blockWarps();
326 | };
327 | activeThreadBlocksPerMultiprocessor = function() {
328 | return Math.min(threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor(), threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor(), threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor());
329 | };
330 | occupancyOfMultiprocessor = function() {
331 | return activeWarpsPerMultiprocessor() / config.warpsPerMultiprocessor;
332 | };
333 | output = {
334 | activeThreadsPerMultiprocessor: activeThreadsPerMultiprocessor(),
335 | activeWarpsPerMultiprocessor: activeWarpsPerMultiprocessor(),
336 | activeThreadBlocksPerMultiprocessor: activeThreadBlocksPerMultiprocessor(),
337 | occupancyOfMultiprocessor: occupancyOfMultiprocessor(),
338 | blockWarps: blockWarps(),
339 | blockSharedMemory: blockSharedMemory(),
340 | blockCudaRuntimeSharedMemory: blockCudaRuntimeSharedMemory(),
341 | blockRegisters: blockRegisters(),
342 | threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor: threadBlocksPerMultiprocessorLimitedByWarpsOrBlocksPerMultiprocessor(),
343 | threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor: threadBlocksPerMultiprocessorLimitedByRegistersPerMultiprocessor(),
344 | threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor: threadBlocksPerMultiprocessorLimitedBySharedMemoryPerMultiprocessor()
345 | };
346 | return Object.assign(output, config);
347 | };
348 |
349 | window.computeGraphsValues = function(input) {
350 | var config, graphWarpOccupancyOfRegistersPerThread, graphWarpOccupancyOfSharedMemoryPerBlock, graphWarpOccupancyOfThreadsPerBlock;
351 | config = mainConfig[input.version];
352 | graphWarpOccupancyOfThreadsPerBlock = function() {
353 | var current, i, inp, r, ref, threadsPerBlock;
354 | current = {
355 | key: input.threadsPerBlock,
356 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor
357 | };
358 | inp = Object.assign({}, input);
359 | r = [];
360 | for (threadsPerBlock = i = 32, ref = config.maxThreadBlockSize; i <= ref; threadsPerBlock = i += 32) {
361 | inp.threadsPerBlock = threadsPerBlock;
362 | r.push({
363 | key: threadsPerBlock,
364 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor
365 | });
366 | }
367 | return {
368 | xLabel: "Threads per block",
369 | data: r,
370 | current: current
371 | };
372 | };
373 | graphWarpOccupancyOfRegistersPerThread = function() {
374 | var current, i, inp, r, ref, registersPerThread;
375 | current = {
376 | key: input.registersPerThread,
377 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor
378 | };
379 | inp = Object.assign({}, input);
380 | r = [];
381 | for (registersPerThread = i = 0, ref = config.maxRegistersPerThread; 0 <= ref ? i <= ref : i >= ref; registersPerThread = 0 <= ref ? ++i : --i) {
382 | inp.registersPerThread = registersPerThread;
383 | r.push({
384 | key: registersPerThread,
385 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor
386 | });
387 | }
388 | return {
389 | xLabel: "Registers per thread",
390 | data: r,
391 | current: current
392 | };
393 | };
394 | graphWarpOccupancyOfSharedMemoryPerBlock = function() {
395 | var current, i, inp, r, ref, sharedMemoryPerBlock;
396 | current = {
397 | key: input.sharedMemoryPerBlock,
398 | value: window.calculateOccupancy(input).activeWarpsPerMultiprocessor
399 | };
400 | inp = Object.assign({}, input);
401 | r = [];
402 | for (sharedMemoryPerBlock = i = 0, ref = config.sharedMemoryPerMultiprocessor; i <= ref; sharedMemoryPerBlock = i += 512) {
403 | inp.sharedMemoryPerBlock = sharedMemoryPerBlock;
404 | r.push({
405 | key: sharedMemoryPerBlock,
406 | value: window.calculateOccupancy(inp).activeWarpsPerMultiprocessor
407 | });
408 | }
409 | return {
410 | xLabel: "Shared memory per block",
411 | data: r,
412 | current: current
413 | };
414 | };
415 | return {
416 | graphWarpOccupancyOfThreadsPerBlock: graphWarpOccupancyOfThreadsPerBlock(),
417 | graphWarpOccupancyOfRegistersPerThread: graphWarpOccupancyOfRegistersPerThread(),
418 | graphWarpOccupancyOfSharedMemoryPerBlock: graphWarpOccupancyOfSharedMemoryPerBlock()
419 | };
420 | };
421 |
422 | }).call(this);
423 |
--------------------------------------------------------------------------------
/js/calculator.js.map:
--------------------------------------------------------------------------------
1 | {
2 | "version": 3,
3 | "file": "calculator.js",
4 | "sourceRoot": "",
5 | "sources": [
6 | "calculator.coffee"
7 | ],
8 | "names": [],
9 | "mappings": ";AAAA;AAAA,MAAA;;EAAA,UAAA,GACE;IAAA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,GAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,IAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,OAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,GAdpB;KADF;IAiBA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,GAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,IAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,OAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,GAdpB;KAlBF;IAkCA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,OAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,GAdpB;KAnCF;IAmDA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,OAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,GAdpB;KApDF;IAoEA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,EAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,EAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KArEF;IAqFA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,CAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,EAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,EAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAtFF;IAsGA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,EAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAvGF;IAuHA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAxHF;IAwIA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,MAL/B;MAOA,gBAAA,EAAkB,MAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAzIF;IAyJA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA1JF;IA0KA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA3KF;IA2LA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA5LF;IA4MA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA7MF;IA6NA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA9NF;IA8OA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KA/OF;IA+PA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAhQF;IAgRA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,KAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAjRF;IAiSA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,MAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAlSF;IAkTA,KAAA,EACE;MAAA,OAAA,EAAS,KAAT;MACA,cAAA,EAAgB,EADhB;MAEA,sBAAA,EAAwB,EAFxB;MAGA,wBAAA,EAA0B,IAH1B;MAIA,6BAAA,EAA+B,EAJ/B;MAKA,6BAAA,EAA+B,MAL/B;MAOA,gBAAA,EAAkB,KAPlB;MAQA,0BAAA,EAA4B,GAR5B;MAUA,qBAAA,EAAuB,MAVvB;MAWA,qBAAA,EAAuB,GAXvB;MAYA,8BAAA,EAAgC,GAZhC;MAaA,yBAAA,EAA2B,CAb3B;MAcA,kBAAA,EAAoB,IAdpB;KAnTF;;;EAoUF,IAAA,GAAO,SAAC,CAAD,EAAI,CAAJ;WAAU,IAAI,CAAC,IAAL,CAAU,CAAA,GAAI,CAAd,CAAA,GAAmB;EAA7B;;EAEP,KAAA,GAAQ,SAAC,CAAD,EAAI,CAAJ;WAAU,IAAI,CAAC,KAAL,CAAW,CAAA,GAAI,CAAf,CAAA,GAAoB;EAA9B;;EAGR,MAAM,CAAC,kBAAP,GAA4B,SAAC,KAAD;AAC1B,QAAA;IAAA,MAAA,GAAS,UAAW,CAAA,KAAK,CAAC,OAAN;IAEpB,UAAA,GAAa,SAAA;aACX,IAAI,CAAC,IAAL,CAAU,KAAK,CAAC,eAAN,GAAwB,MAAM,CAAC,cAAzC;IADW;IAGb,cAAA,GAAiB,SAAA;MACf,IAAG,MAAM,CAAC,qBAAP,KAAgC,OAAnC;eACE,IAAA,CAAK,IAAA,CAAK,UAAA,CAAA,CAAL,EAAmB,MAAM,CAAC,yBAA1B,CAAA,GAAuD,KAAK,CAAC,kBAA7D,GAAkF,MAAM,CAAC,cAA9F,EAA8G,MAAM,CAAC,0BAArH,EADF;OAAA,MAAA;eAIE,IAAA,CAAK,KAAK,CAAC,kBAAN,GAA2B,MAAM,CAAC,cAAvC,EAAuD,MAAM,CAAC,0BAA9D,CAAA,GAA4F,UAAA,CAAA,EAJ9F;;IADe;IAOjB,uBAAA,GAA0B,SAAA;MACxB,IAAG,MAAM,CAAC,qBAAP,KAAgC,OAAnC;eACE,MAAM,CAAC,iBADT;OAAA,MAAA;eAIE,KAAA,CAAM,MAAM,CAAC,gBAAP,GAA0B,IAAA,CAAK,KAAK,CAAC,kBAAN,GAA2B,MAAM,CAAC,cAAvC,EAC9B,MAAM,CAAC,0BADuB,CAAhC,EACsC,MAAM,CAAC,yBAD7C,CAAA,GAEE,IAAA,CAAK,KAAK,CAAC,kBAAN,GAA2B,MAAM,CAAC,cAAvC,EAAuD,MAAM,CAAC,0BAA9D,EANJ;;IADwB;IAS1B,iBAAA,GAAoB,SAAA;aAClB,IAAA,CAAK,KAAK,CAAC,oBAAX,EAAiC,MAAM,CAAC,8BAAxC;IADkB;IAGpB,oEAAA,GAAuE,SAAA;aACrE,IAAI,CAAC,GAAL,CAAS,MAAM,CAAC,6BAAhB,EAA+C,IAAI,CAAC,KAAL,CAAW,MAAM,CAAC,sBAAP,GAAgC,UAAA,CAAA,CAA3C,CAA/C;IADqE;IAGvE,gEAAA,GAAmE,SAAA;MACjE,IAAG,KAAK,CAAC,kBAAN,GAA2B,MAAM,CAAC,qBAArC;eACE,EADF;OAAA,MAEK,IAAG,KAAK,CAAC,kBAAN,GAA2B,CAA9B;eACH,IAAI,CAAC,KAAL,CAAW,uBAAA,CAAA,CAAA,GAA4B,cAAA,CAAA,CAAvC,EADG;OAAA,MAAA;eAGH,MAAM,CAAC,8BAHJ;;IAH4D;IAQnE,mEAAA,GAAsE,SAAA;MACpE,IAAG,KAAK,CAAC,oBAAN,GAA6B,CAAhC;eACE,IAAI,CAAC,KAAL,CAAW,MAAM,CAAC,6BAAP,GAAuC,iBAAA,CAAA,CAAlD,EADF;OAAA,MAAA;eAGE,MAAM,CAAC,8BAHT;;IADoE;IAMtE,8BAAA,GAAiC,SAAA;aAC/B,KAAK,CAAC,eAAN,GAAwB,mCAAA,CAAA;IADO;IAGjC,4BAAA,GAA+B,SAAA;aAC7B,mCAAA,CAAA,CAAA,GAAwC,UAAA,CAAA;IADX;IAG/B,mCAAA,GAAsC,SAAA;aACpC,IAAI,CAAC,GAAL,CACE,oEAAA,CAAA,CADF,EAEE,gEAAA,CAAA,CAFF,EAGE,mEAAA,CAAA,CAHF;IADoC;IAOtC,yBAAA,GAA4B,SAAA;aAC1B,4BAAA,CAAA,CAAA,GAAiC,MAAM,CAAC;IADd;IAG5B,MAAA,GACE;MAAA,8BAAA,EAAgC,8BAAA,CAAA,CAAhC;MACA,4BAAA,EAA8B,4BAAA,CAAA,CAD9B;MAEA,mCAAA,EAAqC,mCAAA,CAAA,CAFrC;MAGA,yBAAA,EAA2B,yBAAA,CAAA,CAH3B;MAKA,UAAA,EAAY,UAAA,CAAA,CALZ;MAMA,iBAAA,EAAmB,iBAAA,CAAA,CANnB;MAOA,cAAA,EAAgB,cAAA,CAAA,CAPhB;MASA,oEAAA,EAAsE,oEAAA,CAAA,CATtE;MAUA,gEAAA,EAAkE,gEAAA,CAAA,CAVlE;MAWA,mEAAA,EAAqE,mEAAA,CAAA,CAXrE;;AAaF,WAAO,MAAM,CAAC,MAAP,CAAc,MAAd,EAAsB,MAAtB;EAxEmB;;EA0E5B,MAAM,CAAC,mBAAP,GAA6B,SAAC,KAAD;AAC3B,QAAA;IAAA,MAAA,GAAS,UAAW,CAAA,KAAK,CAAC,OAAN;IAEpB,mCAAA,GAAsC,SAAA;AACpC,UAAA;MAAA,OAAA,GACE;QAAA,GAAA,EAAK,KAAK,CAAC,eAAX;QACA,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,KAA1B,CAAgC,CAAC,4BADxC;;MAGF,GAAA,GAAM,MAAM,CAAC,MAAP,CAAc,EAAd,EAAkB,KAAlB;MACN,CAAA,GAAI;AACJ,WAAuB,8FAAvB;QACE,GAAG,CAAC,eAAJ,GAAsB;QAEtB,CAAC,CAAC,IAAF,CAAO;UACL,GAAA,EAAK,eADA;UAEL,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,GAA1B,CAA8B,CAAC,4BAFjC;SAAP;AAHF;AAQA,aAAO;QACL,MAAA,EAAQ,mBADH;QAEL,IAAA,EAAM,CAFD;QAGL,OAAA,EAAS,OAHJ;;IAf6B;IAqBtC,sCAAA,GAAyC,SAAA;AACvC,UAAA;MAAA,OAAA,GACE;QAAA,GAAA,EAAK,KAAK,CAAC,kBAAX;QACA,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,KAA1B,CAAgC,CAAC,4BADxC;;MAGF,GAAA,GAAM,MAAM,CAAC,MAAP,CAAc,EAAd,EAAkB,KAAlB;MACN,CAAA,GAAI;AACJ,WAA0B,yIAA1B;QACE,GAAG,CAAC,kBAAJ,GAAyB;QAEzB,CAAC,CAAC,IAAF,CAAO;UACL,GAAA,EAAK,kBADA;UAEL,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,GAA1B,CAA8B,CAAC,4BAFjC;SAAP;AAHF;AAQA,aAAO;QACL,MAAA,EAAQ,sBADH;QAEL,IAAA,EAAM,CAFD;QAGL,OAAA,EAAS,OAHJ;;IAfgC;IAqBzC,wCAAA,GAA2C,SAAA;AACzC,UAAA;MAAA,OAAA,GACE;QAAA,GAAA,EAAK,KAAK,CAAC,oBAAX;QACA,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,KAA1B,CAAgC,CAAC,4BADxC;;MAGF,GAAA,GAAM,MAAM,CAAC,MAAP,CAAc,EAAd,EAAkB,KAAlB;MACN,CAAA,GAAI;AACJ,WAA4B,mHAA5B;QACE,GAAG,CAAC,oBAAJ,GAA2B;QAE3B,CAAC,CAAC,IAAF,CAAO;UACL,GAAA,EAAK,oBADA;UAEL,KAAA,EAAO,MAAM,CAAC,kBAAP,CAA0B,GAA1B,CAA8B,CAAC,4BAFjC;SAAP;AAHF;AAQA,aAAO;QACL,MAAA,EAAQ,yBADH;QAEL,IAAA,EAAM,CAFD;QAGL,OAAA,EAAS,OAHJ;;IAfkC;AAqB3C,WAAO;MACL,mCAAA,EAAqC,mCAAA,CAAA,CADhC;MAEL,sCAAA,EAAwC,sCAAA,CAAA,CAFnC;MAGL,wCAAA,EAA0C,wCAAA,CAAA,CAHrC;;EAlEoB;AApZ7B"
10 | }
--------------------------------------------------------------------------------
/js/index.js:
--------------------------------------------------------------------------------
1 | /*!
2 | * Serialize all form data into an array of key/value pairs
3 | * (c) 2020 Chris Ferdinandi, MIT License, https://gomakethings.com
4 | * @param {Node} form The form to serialize
5 | * @return {Array} The serialized form data
6 | */
7 | function serializeArray(form) {
8 | const arr = [];
9 | Array.prototype.slice.call(form.elements).forEach(function (field) {
10 | if (!field.name || field.disabled || ['file', 'reset', 'submit', 'button'].indexOf(field.type) > -1) {
11 | return;
12 | }
13 | if (field.type === 'select-multiple') {
14 | Array.prototype.slice.call(field.options).forEach(function (option) {
15 | if (!option.selected) {
16 | return;
17 | }
18 | arr.push({
19 | name: field.name,
20 | value: option.value
21 | });
22 | });
23 | return;
24 | }
25 | if (['checkbox', 'radio'].indexOf(field.type) > -1 && !field.checked) {
26 | return;
27 | }
28 | arr.push({
29 | name: field.name,
30 | value: field.value
31 | });
32 | });
33 | return arr;
34 | }
35 |
36 | function docReady(fn) {
37 | if (document.readyState === "complete" || document.readyState === "interactive") {
38 | setTimeout(fn, 1); // Call on next available tick.
39 | } else {
40 | document.addEventListener("DOMContentLoaded", fn);
41 | }
42 | }
43 |
44 | function plot(x, y, currentX, currentY, xLabel, targetElement) {
45 | const chart = c3.generate({
46 | bindto: targetElement,
47 | data: {
48 | xs: {
49 | " ": "x",
50 | "Current": "x current",
51 | },
52 | columns: [
53 | ["x"].concat(x),
54 | [" "].concat(y),
55 |
56 | ["x current", currentX],
57 | ["Current", currentY],
58 | ],
59 | },
60 | axis: {
61 | x: {
62 | label: xLabel,
63 | min: 0,
64 | padding: {
65 | left: 0,
66 | right: 0,
67 | },
68 | },
69 | y: {
70 | label: {
71 | position: "outer-middle",
72 | text: "# warps",
73 | },
74 | min: 0,
75 | padding: {
76 | bottom: 0,
77 | top: 0,
78 | },
79 | },
80 | },
81 | grid: {
82 | y: {
83 | show: true,
84 | },
85 | },
86 | legend: {
87 | show: false,
88 | },
89 | padding: {
90 | right: 20,
91 | }
92 | });
93 |
94 | // To have no padding in axes but preventing the points from being dropped.
95 | d3.select(chart.element).select("." + c3.chart.internal.fn.CLASS.chart).attr("clip-path", null);
96 | }
97 |
98 | function onSubmit(e) {
99 | e.preventDefault();
100 |
101 | const formElement = e.target;
102 |
103 | const formData = Object.fromEntries(serializeArray(formElement).map(n => [n.name, n.value]));
104 |
105 | const occupancyCalculationOutput = calculateOccupancy(formData);
106 | const graphsValues = computeGraphsValues(formData);
107 |
108 | document.getElementById("output").removeAttribute("hidden");
109 |
110 | Object.entries(occupancyCalculationOutput)
111 | .forEach(([k, v]) => document.getElementById(k).innerText = v.toString());
112 |
113 | Object.entries(graphsValues).forEach(([k, v]) =>
114 | plot(v.data.map(d => d.key), v.data.map(d => d.value), v.current.key, v.current.value, v.xLabel,
115 | document.getElementById(k)));
116 |
117 | // show or hide the alert for Shared Memory used by Cuda Runtime
118 | const $alert = document.querySelector("#alertCudaRuntimeSharedMemory");
119 | if (Number.parseFloat(occupancyCalculationOutput.version) >= 8.0) {
120 | $alert.removeAttribute("hidden");
121 | } else {
122 | $alert.setAttribute("hidden", "");
123 | }
124 | }
125 |
126 | function main() {
127 | document.getElementsByTagName("form")[0].onsubmit = onSubmit;
128 |
129 | // add event listener for changes on Compute Capability selector
130 | document.querySelector("#ccVersion").addEventListener('change', (event) => {
131 | // selected compute capability version
132 | const cc = Number.parseFloat(event.target.value);
133 |
134 | // get the div element with the selector for cuda version
135 | const $cudaVersionBlock = document.querySelector("#cudaVersion").closest("div");
136 |
137 | // if CC >= 8.x, show the CUDA runtime version selector, else hide it
138 | if (cc >= 8) {
139 | $cudaVersionBlock.removeAttribute("hidden");
140 | } else {
141 | $cudaVersionBlock.setAttribute("hidden", "");
142 | }
143 | });
144 |
145 | // set the default Compute Capability (latest version)
146 | const $ccSelect = document.querySelector("#ccVersion");
147 | $ccSelect.selectedIndex = $ccSelect.options.length - 1;
148 | $ccSelect.dispatchEvent(new Event('change'));
149 | }
150 |
151 | docReady(main);
152 |
--------------------------------------------------------------------------------