├── LICENSE
├── Makefile
├── README.md
├── cache_size.c
├── cache_size.html
├── cache_size.js
└── cache_size_worker.js


/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016 Allan Wirth
 2 | 
 3 | Permission is hereby granted, free of charge, to any person 
 4 | obtaining a copy of this software and associated documentation 
 5 | files (the "Software"), to deal in the Software without 
 6 | restriction, including without limitation the rights to use, copy, 
 7 | modify, merge, publish, distribute, sublicense, and/or sell copies 
 8 | of the Software, and to permit persons to whom the Software is 
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be 
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
18 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
19 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all: cache_size
2 | 
3 | cache_size: cache_size.c
4 | 	gcc -o $@ $< -O2 -std=c99 -march=native -Wall -Wextra
5 | 
6 | clean:
7 | 	rm -rf cache_size
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Javascript Cache Size Measurement
2 | 
3 | In this repo is a simple script to measure the size of a CPU cache with javascript. It requires WebWorkers and ArrayBuffers, although could probably be adapted to not require either.
4 | 
5 | There really isn't that much special going on. Read cache_size_worker.js to get an idea of how the measurement is taking place - it is extensively commented. You can see a live demo here: https://fromwhenceitca.me/cache_size/cache_size.html
6 | 
7 | Licensed under the MIT license (see LICENSE file).
8 | 


--------------------------------------------------------------------------------
/cache_size.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <time.h>
 3 | #include <unistd.h>
 4 | #include <sys/time.h>
 5 | #include <stdlib.h>
 6 | #include <assert.h>
 7 | #include <math.h>
 8 | #include <string.h>
 9 | 
10 | #define NUM_ACCESSES (1 << 24)
11 | 
12 | #define MIN_ARRAY_SIZE (1024)
13 | #define SIZE_MAGNITUDE 16
14 | #define MAX_ARRAY_SIZE (MIN_ARRAY_SIZE * (2 << SIZE_MAGNITUDE))
15 | 
16 | #define MIN_STRIDE 16
17 | #define STRIDES_MAGNITUDE 8
18 | 
19 | static volatile char measurement_array[MAX_ARRAY_SIZE];
20 | 
21 | // https://stackoverflow.com/questions/3898840/converting-a-number-of-bytes-into-a-file-size-in-c
22 | void printsize(int size) {
23 |     static const char *SIZES[] = { "B", "k", "M", "G" };
24 |     unsigned int div = 0;
25 | 
26 |     while (size >= 1024 && div < (sizeof SIZES / sizeof *SIZES)) {
27 |         div++;
28 |         size /= 1024;
29 |     }
30 | 
31 |     printf("% 6d%s", size, SIZES[div]);
32 | }
33 | 
34 | double microtime() {
35 |   struct timeval t;
36 |   if (gettimeofday(&t, NULL) != 0) {
37 |     perror("Couldn't gettimeofday?");
38 |     exit(1);
39 |   }
40 |   return (double)t.tv_sec + ((double)t.tv_usec / 1.0e6);
41 | }
42 | 
43 | double measure(int array_size, int stride) {
44 |   assert(array_size <= MAX_ARRAY_SIZE);
45 |   int rounds = NUM_ACCESSES / (array_size / stride);
46 |   double start = microtime();
47 |   for (int i = 0; i < rounds; i++) {
48 |     for (int j = 0; j < array_size; j += stride) {
49 |       measurement_array[j] = 0;
50 |     }
51 |   }
52 |   double end = microtime();
53 |   double total_time = end - start;
54 |   return total_time / NUM_ACCESSES;
55 | }
56 | 
57 | int main() {
58 |   double * results = malloc(sizeof(double) * SIZE_MAGNITUDE * STRIDES_MAGNITUDE);
59 | 
60 |   memset((char *)measurement_array, 0, sizeof(measurement_array));
61 | 
62 |   printf("Showing avg time to access memory in nanoseconds. Rows are array size, columns are stride\n");
63 |   printf("-------");
64 |   for (int i = 0; i < STRIDES_MAGNITUDE; i++) {
65 |     printsize(MIN_STRIDE * (2 << i));
66 |   }
67 |   putchar('\n');
68 | 
69 |   for (int i = 0; i < SIZE_MAGNITUDE; i++) {
70 |     int as = MIN_ARRAY_SIZE * (2 << i);
71 |     printsize(as);
72 |     for (int j = 0; j < STRIDES_MAGNITUDE; j++) {
73 |       int stride = MIN_STRIDE * (2 << j);
74 |       double cache_time;
75 |       if (stride < as) {
76 |         cache_time = measure(as, stride);
77 |       } else {
78 |         cache_time = NAN;
79 |       }
80 |       results[i * STRIDES_MAGNITUDE + j] = cache_time;
81 |       printf(" %6.3f", cache_time * 1.0e9);
82 |       fflush(stdout);
83 |     }
84 |     putchar('\n');
85 |   }
86 | 
87 |   return 0;
88 | }
89 | 


--------------------------------------------------------------------------------
/cache_size.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title>Cache Size Measurement</title>
 5 | </head>
 6 | <body>
 7 | 
 8 | A web worker is currently measuring your CPU cache size<noscript style="color: red"> or at least it would be if you didn't have javascript disabled</noscript>.<br/>
 9 | The test iterates through arrays of different sizes, and accesses elements with a given stride in a psuedo-random order (to defeat prefetching).<br/>
10 | Showing 32-bit integer access time (plus loop overhead) in nanoseconds, with relative from previous size in parens.<br/>
11 | Rows are array size, columns are stride. Source is available <a href="https://github.com/allanlw/cache_size">on github</a>.
12 | 
13 | 
14 | <div id="target"></div>
15 | 
16 | <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
17 | <script src="cache_size.js"></script>
18 | 	
19 | </body>
20 | </html>
21 | 


--------------------------------------------------------------------------------
/cache_size.js:
--------------------------------------------------------------------------------
 1 | var myWorker = new Worker("cache_size_worker.js");
 2 | 
 3 | var table = $("<table/>").appendTo("#target");
 4 | 
 5 | var current_row = null;
 6 | var last_for_stride = {};
 7 | 
 8 | function formatSize(s) {
 9 |   var sizes = ["", "k", "M"];
10 |   var i = 0;
11 |   while (s >= 1024) {
12 |     s = Math.floor(s / 1024);
13 |     i++;
14 |   }
15 |   return s + sizes[i];
16 | }
17 | 
18 | // Handle various message from the web worker by drawing new DOM
19 | // I should feel bad about this terrible DOM manipulation with jQuery
20 | // But I really don't.
21 | myWorker.onmessage = function(m) {
22 |   if (m.data.type == "head") {
23 |     var row = $("<tr/>").appendTo(table);
24 |     row.append($("<th/>").text("size \\ stride"));
25 |     m.data.headers.forEach(function(e) {
26 |       $("<th/>").text(formatSize(e)).appendTo(row);
27 |     });
28 |   } else if (m.data.type == "start_row") {
29 |     current_row = $("<tr/>").appendTo(table);
30 |     current_row.append($("<td/>").text(formatSize(m.data.size)));
31 |   } else if (m.data.type == "measurement") {
32 |     var last = last_for_stride.hasOwnProperty(m.data.stride) ? last_for_stride[m.data.stride] : m.data.time;
33 |     var diff = m.data.time - last;
34 |     var pct_change = diff / last * 100;
35 |     var diff_s = ((diff > 0) ? ("+") : "") + pct_change.toFixed(2);
36 |     var new_el = $("<td/>")
37 |         .addClass("stride_" + m.data.stride)
38 |         .data("time", m.data.time)
39 |         .data("diff", diff)
40 |         .data("pct_change", pct_change);
41 |     if (m.data.time == m.data.time) { // not NaN
42 |         new_el.text(m.data.time.toFixed(4));
43 |     }
44 |     if (pct_change == pct_change && pct_change != 0) { // not nan
45 |         new_el.append($("<small/>").text(" (" + diff_s + "%)"));
46 |     }
47 |     last_for_stride[m.data.stride] = m.data.time;
48 |     current_row.append(new_el);
49 |   } else if (m.data.type == "done") {
50 |     var strides = Object.keys(last_for_stride), i;
51 |     for (i = 0; i < strides.length; i++) {
52 |       var els = $(".stride_" + strides[i]).get();
53 |       els = els.map($).filter(function(a) {
54 |         return a.text();
55 |       }).sort(function(a, b) {
56 |         return a.data("pct_change") - b.data("pct_change");
57 |       });
58 |       els.forEach(function(x, i, a) {
59 |         // Parameters were pulled out of my ass
60 |         // seemed to work well on my test machines ¯\_(ツ)_/¯
61 |         if (i < a.length - 4) { return; }
62 |         var color = Math.floor(255 * (1 - Math.pow(i/a.length, 3)));
63 |         x.css("border-top", "4px solid rgb(255, " + color + "," + color + ")");
64 |         x.css("font-weight", "bold");
65 |       });
66 |     }
67 |     $("<p>").text("Large relative changes (indicated in red) incidate likely cache size boundaries").insertAfter(table);
68 |   } else {
69 |     console.log(m.data);
70 |   }
71 | };
72 | 


--------------------------------------------------------------------------------
/cache_size_worker.js:
--------------------------------------------------------------------------------
  1 | (function() {
  2 |   "use strict";
  3 | 
  4 |   // Why 2^19 accesses per round? I pulled it out of my ass, just like
  5 |   // all the other constants. Higher numbers give a reduction in noise
  6 |   var NUM_ACCESSES = (1 << 19);
  7 |   // Minimum size of array to use in measurement, in bytes
  8 |   // This should be smaller than what we think the smallest L1 cache size is.
  9 |   // Also it's cool to see how consistent the smaller array sizes are.
 10 |   var MIN_ARRAY_SIZE = 1024;
 11 |   // How many different sizes do we want to try? Going bigger than 32MB seems pointless
 12 |   var SIZE_MAGNITUDE = 16;
 13 | 
 14 |   // Minumimum byte offset to stride by. Remember: cache lines are 64 bytes
 15 |   var MIN_STRIDE = 32;
 16 |   // How many different strides do we want to try?
 17 |   var STRIDES_MAGNITUDE = 8;
 18 | 
 19 |   // Returns the current time as a float in seconds.
 20 |   // Higher resolution timing here would reduce noise.
 21 |   // One option is to polyfill using another worker and SharedArrayBuffer
 22 |   // But this isn't supported on most browsers yet.
 23 |   // Other options are PNaCl or similar other multithreaded polyfills.
 24 |   // performance.now() works reasonably fine though, even though it's crippled
 25 |   // and has a maximum resolution.
 26 |   // See https://bugs.chromium.org/p/chromium/issues/detail?id=506723
 27 |   var microtime;
 28 |   if (typeof performance !== "undefined" && typeof performance.now !== "undefined") {
 29 |     microtime = performance.now.bind(performance);
 30 |   // on systems that don't have it, fall back to Date.now
 31 |   // I'm looking at you Safari on Mavericks
 32 |   // Old versions of IE don't have Date.now, but they also don't have
 33 |   // Int32Array so they're screwed regardless
 34 |   } else {
 35 |     microtime = Date.now.bind(Date);
 36 |     NUM_ACCESSES *= 8; // sorry old browsers, but we need that extra precision
 37 |   }
 38 | 
 39 |   function measure_inner(array, array_size, stride) {
 40 |     // Divide the number of accesses that we want to do by the number of
 41 |     // hits we do per loop. Keeping the number of accesses ~constant
 42 |     // is nice for the demo because it looks cool and allows visual comparison
 43 |     // of the time it takes to do each round.
 44 |     // Note for larger array sizes this error gets bigger from the .ceil oh well
 45 |     var rounds = Math.ceil(NUM_ACCESSES / (array_size / stride));
 46 | 
 47 |     var start = microtime();
 48 | 
 49 |     // We only ever hit the end of the permutation on a multiple of 4 accesses
 50 |     // So funroll the loop a bit gentoo style.
 51 |     // We check that the precondition is true in measure_all.
 52 |     // This cuts some of the loop overhead out, which gets a bit tighter
 53 |     // results for the L1+L2 areas where the loop overhead matters.
 54 |     // It's also pretty badass tbh
 55 |     var i = 0, p = 0;
 56 |     while (true) {
 57 |       p = array[p];
 58 |       p = array[p];
 59 |       p = array[p];
 60 |       p = array[p];
 61 |       if (p === 0) { // when we hite p === 0 here, we have gone through the full permutation
 62 |         i += 1;
 63 |         if (i === rounds) {
 64 |           break;
 65 |         }
 66 |       }
 67 |     }
 68 |     // This loop terminates because we set up the entries in array as a permutation
 69 |     // of length (array_size / stride), and the length of the permutation is 0 mod 4
 70 |     // Note: Originally this was two nested for loops. Doing one loop is slightly slower
 71 |     // but much more consistent, presumably due to branch prediction.
 72 |     // Also I can't imagine how terrible all these data dependent instructions are
 73 |     // for the pipelining
 74 | 
 75 |     var end = microtime();
 76 | 
 77 |     var total_time = end - start;
 78 | 
 79 |     // Return the average loop iteration time.
 80 |     return total_time / (rounds * (array_size / stride));
 81 |   }
 82 | 
 83 |   function setup_array(array, array_size, stride) {
 84 |     // Chose an odd multiple of stride that the golden ratio times the number
 85 |     // of items that we want to access.
 86 |     // Note that this is a generator of {0,stride,2*stride,...} mod array_size
 87 |     // AKA this can generate a permutation of all the bytes we want to access
 88 |     var step = (((array_size / stride * 0.61803398875) | 1)  * stride) | 0;
 89 | 
 90 |     var i, current, next;
 91 | 
 92 |     // zero the entire array (or, the portion we care about)
 93 |     // not required at all, but  but is a nice to have
 94 |     // note: safari doesn't support array.fill, so use a loop
 95 |     for (i = 0; i < array_size/4; i += 1) {
 96 |       array[i] = 0;
 97 |     }
 98 | 
 99 |     // Generate our permutation and insert the index of the next element into
100 |     // each element in the array.
101 |     current = 0;
102 |     for (i = 0; i < array_size; i += stride) {
103 |       next = (current + step) % array_size; 
104 |       array[current/4] = next/4; // divide by 4 for integer alignment
105 |       current = next;
106 |     }
107 | 
108 |     console.assert(current === 0, "Didn't generate?");
109 | 
110 |     // Touch every element to try to clean up our cache a bit
111 |     for (i = 0; i < array_size/4; i+=1) {
112 |       array[i] |= 0;
113 |     }
114 |   }
115 | 
116 |   // This calls measure_inner multiple times and returns the minimum, which is presumably
117 |   // close to the optimal time that the loop in measure_inner could run in.
118 |   function measure(measurement_array, array_size, stride) {
119 |     var i = 0, best = 0, m;
120 | 
121 |     console.assert(array_size <= measurement_array.length * 4, "Weird size");
122 |     console.assert((array_size / stride) % 4 === 0, "Didn't meet unroll precondition");
123 | 
124 |     setup_array(measurement_array, array_size, stride);
125 | 
126 |     // Why 4 passes? I pulled it out of my ass. Lowest number that seemed to get
127 |     // reasonably consistent results. Lower numbers have more noise, presumably
128 |     // Return minimum time, not average, because we want the optimal run time
129 |     for (i = 0; i < 4; i += 1) {
130 |       m = measure_inner(measurement_array, array_size, stride);
131 |       if (best === 0 || m < best) {
132 |         best = m;
133 |       }
134 |     }
135 |     return best;
136 |   }
137 | 
138 |   // This just posts a list of all possible strides we're going to test to the page
139 |   function post_headers() {
140 |     var headers = [], i;
141 |     for (i = 0; i < STRIDES_MAGNITUDE; i += 1) {
142 |       headers.push(MIN_STRIDE * (1 << i));
143 |     }
144 |     self.postMessage({"type": "head", "headers": headers});
145 |   }
146 | 
147 | 
148 |   function measure_all() {
149 |     // Int32 vs. Uint32 doesn't seem to have a big difference here
150 |     // Lets use Int32 because it's a bit more friendly to the engine, probably?
151 |     // (For uint32 extraction don't they have to do a bunch of wankery with signs?)
152 |     var measurement_array = new Int32Array((MIN_ARRAY_SIZE * (1 << SIZE_MAGNITUDE)) / 4);
153 | 
154 |     var results = new Float64Array(SIZE_MAGNITUDE * STRIDES_MAGNITUDE);
155 |     var cache_time, stride, as, i, j;
156 | 
157 |     post_headers();
158 | 
159 |     // try to warm up the JIT? Does this even work?
160 |     // It seems to help a little on FireFox for the first few
161 |     for (i = 0; i < SIZE_MAGNITUDE/2; i += 1) {
162 |       measure(measurement_array, MIN_ARRAY_SIZE * (1 << i), 8);
163 |     }
164 | 
165 |     for (i = 0; i < SIZE_MAGNITUDE; i += 1) {
166 |       as = MIN_ARRAY_SIZE * (1 << i);
167 |       self.postMessage({"type": "start_row", "size": as});
168 |       for (j = 0; j < STRIDES_MAGNITUDE; j += 1) {
169 |         stride = MIN_STRIDE * (1 << j);
170 |         // measure_inner has a funrolled loop that assumes that stride is less than half
171 |         // of the array_size (because it assumes the permutation will be at least length 4)
172 |         if (stride * 2 < as) {
173 |           cache_time = measure(measurement_array, as, stride);
174 |         } else {
175 |           cache_time = NaN;
176 |         }
177 |         self.postMessage({"type":"measurement", "time": cache_time*1e6, "stride": stride });
178 |         results[i * STRIDES_MAGNITUDE + j] = cache_time;
179 |       }
180 |     }
181 | 
182 |     measurement_array = null;
183 | 
184 |     self.postMessage({"type":"done", "results": results});
185 | 
186 |     close();
187 |   }
188 | 
189 |   measure_all();
190 | }());
191 | 


--------------------------------------------------------------------------------