├── LICENSE ├── Makefile ├── README.md ├── cache_size.c ├── cache_size.html ├── cache_size.js └── cache_size_worker.js /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Allan Wirth 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, copy, 7 | modify, merge, publish, distribute, sublicense, and/or sell copies 8 | of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: cache_size 2 | 3 | cache_size: cache_size.c 4 | gcc -o $@ $< -O2 -std=c99 -march=native -Wall -Wextra 5 | 6 | clean: 7 | rm -rf cache_size 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Javascript Cache Size Measurement 2 | 3 | In this repo is a simple script to measure the size of a CPU cache with javascript. It requires WebWorkers and ArrayBuffers, although could probably be adapted to not require either. 4 | 5 | There really isn't that much special going on. Read cache_size_worker.js to get an idea of how the measurement is taking place - it is extensively commented. You can see a live demo here: https://fromwhenceitca.me/cache_size/cache_size.html 6 | 7 | Licensed under the MIT license (see LICENSE file). 8 | -------------------------------------------------------------------------------- /cache_size.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define NUM_ACCESSES (1 << 24) 11 | 12 | #define MIN_ARRAY_SIZE (1024) 13 | #define SIZE_MAGNITUDE 16 14 | #define MAX_ARRAY_SIZE (MIN_ARRAY_SIZE * (2 << SIZE_MAGNITUDE)) 15 | 16 | #define MIN_STRIDE 16 17 | #define STRIDES_MAGNITUDE 8 18 | 19 | static volatile char measurement_array[MAX_ARRAY_SIZE]; 20 | 21 | // https://stackoverflow.com/questions/3898840/converting-a-number-of-bytes-into-a-file-size-in-c 22 | void printsize(int size) { 23 | static const char *SIZES[] = { "B", "k", "M", "G" }; 24 | unsigned int div = 0; 25 | 26 | while (size >= 1024 && div < (sizeof SIZES / sizeof *SIZES)) { 27 | div++; 28 | size /= 1024; 29 | } 30 | 31 | printf("% 6d%s", size, SIZES[div]); 32 | } 33 | 34 | double microtime() { 35 | struct timeval t; 36 | if (gettimeofday(&t, NULL) != 0) { 37 | perror("Couldn't gettimeofday?"); 38 | exit(1); 39 | } 40 | return (double)t.tv_sec + ((double)t.tv_usec / 1.0e6); 41 | } 42 | 43 | double measure(int array_size, int stride) { 44 | assert(array_size <= MAX_ARRAY_SIZE); 45 | int rounds = NUM_ACCESSES / (array_size / stride); 46 | double start = microtime(); 47 | for (int i = 0; i < rounds; i++) { 48 | for (int j = 0; j < array_size; j += stride) { 49 | measurement_array[j] = 0; 50 | } 51 | } 52 | double end = microtime(); 53 | double total_time = end - start; 54 | return total_time / NUM_ACCESSES; 55 | } 56 | 57 | int main() { 58 | double * results = malloc(sizeof(double) * SIZE_MAGNITUDE * STRIDES_MAGNITUDE); 59 | 60 | memset((char *)measurement_array, 0, sizeof(measurement_array)); 61 | 62 | printf("Showing avg time to access memory in nanoseconds. Rows are array size, columns are stride\n"); 63 | printf("-------"); 64 | for (int i = 0; i < STRIDES_MAGNITUDE; i++) { 65 | printsize(MIN_STRIDE * (2 << i)); 66 | } 67 | putchar('\n'); 68 | 69 | for (int i = 0; i < SIZE_MAGNITUDE; i++) { 70 | int as = MIN_ARRAY_SIZE * (2 << i); 71 | printsize(as); 72 | for (int j = 0; j < STRIDES_MAGNITUDE; j++) { 73 | int stride = MIN_STRIDE * (2 << j); 74 | double cache_time; 75 | if (stride < as) { 76 | cache_time = measure(as, stride); 77 | } else { 78 | cache_time = NAN; 79 | } 80 | results[i * STRIDES_MAGNITUDE + j] = cache_time; 81 | printf(" %6.3f", cache_time * 1.0e9); 82 | fflush(stdout); 83 | } 84 | putchar('\n'); 85 | } 86 | 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /cache_size.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Cache Size Measurement 5 | 6 | 7 | 8 | A web worker is currently measuring your CPU cache size.
9 | The test iterates through arrays of different sizes, and accesses elements with a given stride in a psuedo-random order (to defeat prefetching).
10 | Showing 32-bit integer access time (plus loop overhead) in nanoseconds, with relative from previous size in parens.
11 | Rows are array size, columns are stride. Source is available on github. 12 | 13 | 14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /cache_size.js: -------------------------------------------------------------------------------- 1 | var myWorker = new Worker("cache_size_worker.js"); 2 | 3 | var table = $("").appendTo("#target"); 4 | 5 | var current_row = null; 6 | var last_for_stride = {}; 7 | 8 | function formatSize(s) { 9 | var sizes = ["", "k", "M"]; 10 | var i = 0; 11 | while (s >= 1024) { 12 | s = Math.floor(s / 1024); 13 | i++; 14 | } 15 | return s + sizes[i]; 16 | } 17 | 18 | // Handle various message from the web worker by drawing new DOM 19 | // I should feel bad about this terrible DOM manipulation with jQuery 20 | // But I really don't. 21 | myWorker.onmessage = function(m) { 22 | if (m.data.type == "head") { 23 | var row = $("").appendTo(table); 24 | row.append($("").appendTo(table); 30 | current_row.append($("
").text("size \\ stride")); 25 | m.data.headers.forEach(function(e) { 26 | $("").text(formatSize(e)).appendTo(row); 27 | }); 28 | } else if (m.data.type == "start_row") { 29 | current_row = $("
").text(formatSize(m.data.size))); 31 | } else if (m.data.type == "measurement") { 32 | var last = last_for_stride.hasOwnProperty(m.data.stride) ? last_for_stride[m.data.stride] : m.data.time; 33 | var diff = m.data.time - last; 34 | var pct_change = diff / last * 100; 35 | var diff_s = ((diff > 0) ? ("+") : "") + pct_change.toFixed(2); 36 | var new_el = $("") 37 | .addClass("stride_" + m.data.stride) 38 | .data("time", m.data.time) 39 | .data("diff", diff) 40 | .data("pct_change", pct_change); 41 | if (m.data.time == m.data.time) { // not NaN 42 | new_el.text(m.data.time.toFixed(4)); 43 | } 44 | if (pct_change == pct_change && pct_change != 0) { // not nan 45 | new_el.append($("").text(" (" + diff_s + "%)")); 46 | } 47 | last_for_stride[m.data.stride] = m.data.time; 48 | current_row.append(new_el); 49 | } else if (m.data.type == "done") { 50 | var strides = Object.keys(last_for_stride), i; 51 | for (i = 0; i < strides.length; i++) { 52 | var els = $(".stride_" + strides[i]).get(); 53 | els = els.map($).filter(function(a) { 54 | return a.text(); 55 | }).sort(function(a, b) { 56 | return a.data("pct_change") - b.data("pct_change"); 57 | }); 58 | els.forEach(function(x, i, a) { 59 | // Parameters were pulled out of my ass 60 | // seemed to work well on my test machines ¯\_(ツ)_/¯ 61 | if (i < a.length - 4) { return; } 62 | var color = Math.floor(255 * (1 - Math.pow(i/a.length, 3))); 63 | x.css("border-top", "4px solid rgb(255, " + color + "," + color + ")"); 64 | x.css("font-weight", "bold"); 65 | }); 66 | } 67 | $("

").text("Large relative changes (indicated in red) incidate likely cache size boundaries").insertAfter(table); 68 | } else { 69 | console.log(m.data); 70 | } 71 | }; 72 | -------------------------------------------------------------------------------- /cache_size_worker.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | "use strict"; 3 | 4 | // Why 2^19 accesses per round? I pulled it out of my ass, just like 5 | // all the other constants. Higher numbers give a reduction in noise 6 | var NUM_ACCESSES = (1 << 19); 7 | // Minimum size of array to use in measurement, in bytes 8 | // This should be smaller than what we think the smallest L1 cache size is. 9 | // Also it's cool to see how consistent the smaller array sizes are. 10 | var MIN_ARRAY_SIZE = 1024; 11 | // How many different sizes do we want to try? Going bigger than 32MB seems pointless 12 | var SIZE_MAGNITUDE = 16; 13 | 14 | // Minumimum byte offset to stride by. Remember: cache lines are 64 bytes 15 | var MIN_STRIDE = 32; 16 | // How many different strides do we want to try? 17 | var STRIDES_MAGNITUDE = 8; 18 | 19 | // Returns the current time as a float in seconds. 20 | // Higher resolution timing here would reduce noise. 21 | // One option is to polyfill using another worker and SharedArrayBuffer 22 | // But this isn't supported on most browsers yet. 23 | // Other options are PNaCl or similar other multithreaded polyfills. 24 | // performance.now() works reasonably fine though, even though it's crippled 25 | // and has a maximum resolution. 26 | // See https://bugs.chromium.org/p/chromium/issues/detail?id=506723 27 | var microtime; 28 | if (typeof performance !== "undefined" && typeof performance.now !== "undefined") { 29 | microtime = performance.now.bind(performance); 30 | // on systems that don't have it, fall back to Date.now 31 | // I'm looking at you Safari on Mavericks 32 | // Old versions of IE don't have Date.now, but they also don't have 33 | // Int32Array so they're screwed regardless 34 | } else { 35 | microtime = Date.now.bind(Date); 36 | NUM_ACCESSES *= 8; // sorry old browsers, but we need that extra precision 37 | } 38 | 39 | function measure_inner(array, array_size, stride) { 40 | // Divide the number of accesses that we want to do by the number of 41 | // hits we do per loop. Keeping the number of accesses ~constant 42 | // is nice for the demo because it looks cool and allows visual comparison 43 | // of the time it takes to do each round. 44 | // Note for larger array sizes this error gets bigger from the .ceil oh well 45 | var rounds = Math.ceil(NUM_ACCESSES / (array_size / stride)); 46 | 47 | var start = microtime(); 48 | 49 | // We only ever hit the end of the permutation on a multiple of 4 accesses 50 | // So funroll the loop a bit gentoo style. 51 | // We check that the precondition is true in measure_all. 52 | // This cuts some of the loop overhead out, which gets a bit tighter 53 | // results for the L1+L2 areas where the loop overhead matters. 54 | // It's also pretty badass tbh 55 | var i = 0, p = 0; 56 | while (true) { 57 | p = array[p]; 58 | p = array[p]; 59 | p = array[p]; 60 | p = array[p]; 61 | if (p === 0) { // when we hite p === 0 here, we have gone through the full permutation 62 | i += 1; 63 | if (i === rounds) { 64 | break; 65 | } 66 | } 67 | } 68 | // This loop terminates because we set up the entries in array as a permutation 69 | // of length (array_size / stride), and the length of the permutation is 0 mod 4 70 | // Note: Originally this was two nested for loops. Doing one loop is slightly slower 71 | // but much more consistent, presumably due to branch prediction. 72 | // Also I can't imagine how terrible all these data dependent instructions are 73 | // for the pipelining 74 | 75 | var end = microtime(); 76 | 77 | var total_time = end - start; 78 | 79 | // Return the average loop iteration time. 80 | return total_time / (rounds * (array_size / stride)); 81 | } 82 | 83 | function setup_array(array, array_size, stride) { 84 | // Chose an odd multiple of stride that the golden ratio times the number 85 | // of items that we want to access. 86 | // Note that this is a generator of {0,stride,2*stride,...} mod array_size 87 | // AKA this can generate a permutation of all the bytes we want to access 88 | var step = (((array_size / stride * 0.61803398875) | 1) * stride) | 0; 89 | 90 | var i, current, next; 91 | 92 | // zero the entire array (or, the portion we care about) 93 | // not required at all, but but is a nice to have 94 | // note: safari doesn't support array.fill, so use a loop 95 | for (i = 0; i < array_size/4; i += 1) { 96 | array[i] = 0; 97 | } 98 | 99 | // Generate our permutation and insert the index of the next element into 100 | // each element in the array. 101 | current = 0; 102 | for (i = 0; i < array_size; i += stride) { 103 | next = (current + step) % array_size; 104 | array[current/4] = next/4; // divide by 4 for integer alignment 105 | current = next; 106 | } 107 | 108 | console.assert(current === 0, "Didn't generate?"); 109 | 110 | // Touch every element to try to clean up our cache a bit 111 | for (i = 0; i < array_size/4; i+=1) { 112 | array[i] |= 0; 113 | } 114 | } 115 | 116 | // This calls measure_inner multiple times and returns the minimum, which is presumably 117 | // close to the optimal time that the loop in measure_inner could run in. 118 | function measure(measurement_array, array_size, stride) { 119 | var i = 0, best = 0, m; 120 | 121 | console.assert(array_size <= measurement_array.length * 4, "Weird size"); 122 | console.assert((array_size / stride) % 4 === 0, "Didn't meet unroll precondition"); 123 | 124 | setup_array(measurement_array, array_size, stride); 125 | 126 | // Why 4 passes? I pulled it out of my ass. Lowest number that seemed to get 127 | // reasonably consistent results. Lower numbers have more noise, presumably 128 | // Return minimum time, not average, because we want the optimal run time 129 | for (i = 0; i < 4; i += 1) { 130 | m = measure_inner(measurement_array, array_size, stride); 131 | if (best === 0 || m < best) { 132 | best = m; 133 | } 134 | } 135 | return best; 136 | } 137 | 138 | // This just posts a list of all possible strides we're going to test to the page 139 | function post_headers() { 140 | var headers = [], i; 141 | for (i = 0; i < STRIDES_MAGNITUDE; i += 1) { 142 | headers.push(MIN_STRIDE * (1 << i)); 143 | } 144 | self.postMessage({"type": "head", "headers": headers}); 145 | } 146 | 147 | 148 | function measure_all() { 149 | // Int32 vs. Uint32 doesn't seem to have a big difference here 150 | // Lets use Int32 because it's a bit more friendly to the engine, probably? 151 | // (For uint32 extraction don't they have to do a bunch of wankery with signs?) 152 | var measurement_array = new Int32Array((MIN_ARRAY_SIZE * (1 << SIZE_MAGNITUDE)) / 4); 153 | 154 | var results = new Float64Array(SIZE_MAGNITUDE * STRIDES_MAGNITUDE); 155 | var cache_time, stride, as, i, j; 156 | 157 | post_headers(); 158 | 159 | // try to warm up the JIT? Does this even work? 160 | // It seems to help a little on FireFox for the first few 161 | for (i = 0; i < SIZE_MAGNITUDE/2; i += 1) { 162 | measure(measurement_array, MIN_ARRAY_SIZE * (1 << i), 8); 163 | } 164 | 165 | for (i = 0; i < SIZE_MAGNITUDE; i += 1) { 166 | as = MIN_ARRAY_SIZE * (1 << i); 167 | self.postMessage({"type": "start_row", "size": as}); 168 | for (j = 0; j < STRIDES_MAGNITUDE; j += 1) { 169 | stride = MIN_STRIDE * (1 << j); 170 | // measure_inner has a funrolled loop that assumes that stride is less than half 171 | // of the array_size (because it assumes the permutation will be at least length 4) 172 | if (stride * 2 < as) { 173 | cache_time = measure(measurement_array, as, stride); 174 | } else { 175 | cache_time = NaN; 176 | } 177 | self.postMessage({"type":"measurement", "time": cache_time*1e6, "stride": stride }); 178 | results[i * STRIDES_MAGNITUDE + j] = cache_time; 179 | } 180 | } 181 | 182 | measurement_array = null; 183 | 184 | self.postMessage({"type":"done", "results": results}); 185 | 186 | close(); 187 | } 188 | 189 | measure_all(); 190 | }()); 191 | --------------------------------------------------------------------------------