├── LICENSE.txt
├── README.md
├── extended-api.md
├── run_dev_server.sh
├── src
    ├── benchmarks
    │   ├── aobench.js
    │   ├── averageFloat32x4.js
    │   ├── averageFloat32x4LoadFromInt8Array.js
    │   ├── averageFloat32x4LoadX.js
    │   ├── averageFloat32x4LoadXY.js
    │   ├── averageFloat32x4LoadXYZ.js
    │   ├── averageInt32x4Load.js
    │   ├── base.js
    │   ├── index.html
    │   ├── inverse4x4.js
    │   ├── kernel-template.js
    │   ├── mandelbrot.js
    │   ├── matrix-multiplication.js
    │   ├── memcpy.js
    │   ├── memset.js
    │   ├── run.js
    │   ├── run_browser.js
    │   ├── shiftrows.js
    │   ├── sinx4.js
    │   ├── transform.js
    │   └── transpose4x4.js
    ├── ecmascript_simd.js
    ├── ecmascript_simd_tests.js
    ├── external
    │   ├── qunit.css
    │   └── qunit.js
    ├── index.html
    ├── shell_test_runner.js
    └── test.js
└── tc39
    ├── SIMD-128 TC-39.pdf
    └── spec.html


/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Copyright (C) 2013
 3 | 
 4 |   This software is provided 'as-is', without any express or implied
 5 |   warranty.  In no event will the authors be held liable for any damages
 6 |   arising from the use of this software.
 7 | 
 8 |   Permission is granted to anyone to use this software for any purpose,
 9 |   including commercial applications, and to alter it and redistribute it
10 |   freely, subject to the following restrictions:
11 | 
12 |   1. The origin of this software must not be misrepresented; you must not
13 |      claim that you wrote the original software. If you use this software
14 |      in a product, an acknowledgment in the product documentation would be
15 |      appreciated but is not required.
16 |   2. Altered source versions must be plainly marked as such, and must not be
17 |      misrepresented as being the original software.
18 |   3. This notice may not be removed or altered from any source distribution.
19 | */
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SIMD.js
 2 | ===============
 3 | 
 4 | SIMD.js has been taken out of active development in TC39 and removed
 5 | from Stage 3, and is not being pursued by web browsers for
 6 | implementation. SIMD operations exposed to the web are under active
 7 | development within WebAssembly, with operations based on the SIMD.js
 8 | operations. With WebAssembly in advanced development or shipping in
 9 | multiple browsers, it seems like an adequate vehicle to subsume asm.js
10 | use cases, which are judged to be the broader cases. Although some
11 | developers have expressed interest in using SIMD.js outside of asm.js,
12 | implementers have found that implementing and optimizing for this case
13 | reliably creates a lot of complexity, and have made the decision to
14 | focus instead on delivering WebAssembly and SIMD instructions in WASM.
15 | 
16 | See https://github.com/WebAssembly/simd for current development.
17 | 
18 | This repository retains a historical snapshot of the SIMD.js specification work:
19 | *  The authoritative API reference documentation is generated from tc39/spec.html. You can view a rendered copy at http://tc39.github.io/ecmascript_simd/ .
20 | *  A polyfill at src/ecmascript_simd.js, which can't implement value semantics, but includes a correct implementation of all functions
21 | *  Extensive tests at src/ecmascript_simd_tests.js, which can be run using other files in src/. Benchmarks and example code live in the same directory.
22 | *  A presentation explaining the motivation and outlining the approach at [tc39/SIMD-128 TC-39.pdf](https://github.com/tc39/ecmascript_simd/blob/master/tc39/SIMD-128%20TC-39.pdf)
23 | 


--------------------------------------------------------------------------------
/extended-api.md:
--------------------------------------------------------------------------------
 1 | SIMD.js Extended API Proposal
 2 | =============================
 3 | 
 4 | This document proposes an extended API for SIMD.js which is meant provide access
 5 | to platforms-specific optimizations. It will sit on top of and complement the
 6 | base API.
 7 | 
 8 | The expectation is that most users will use the base API most of the time. While
 9 | some compromises are being made to serve portability, most of the base API will
10 | still be fast, and it will deliver the most consistent results. The extension API
11 | will offer opportunities for performance tuning, will support specialized code
12 | sequences, and will aid in porting of code from other platforms.
13 | 
14 | This proposal splits the problem space into two parts:
15 |  - operations which are portable, but with semantic differences
16 |  - operations which are only available on some platforms
17 | 
18 | Operations which are portable, but with semantic differences
19 | ------------------------------------------------------------
20 | 
21 | Primarily, this will use a new `SIMD.Relaxed` namespace:
22 | 
23 | ```
24 | SIMD.Relaxed.Int32x4.fromFloat32x4     // relaxed on NaN or overflow
25 | SIMD.Relaxed.Float32x4.max             // relaxed on NaN, 0 and -0 fungible
26 | SIMD.Relaxed.Int32x4.shiftLeftByScalar // relaxed on shift count overflow
27 | ...
28 | ```
29 | 
30 | Functions in `SIMD.Relaxed` mimic functions in the base API with corresponding names,
31 | and provide weaker portability with greater potential for performance, for example by
32 | having unspecified results if NaN appear in any part of the (implied) computation, by
33 | treating negative zero as interchangeable with zero, or by having unspecified
34 | results if an overflow occurs.
35 | 
36 | Note that an implementation in which these are all identical to their corresponding
37 | functions in the base namespace will be fully conforming.
38 | 
39 | Accompanying this is a new `SIMD.Checked` namespace to help developers find errors:
40 | 
41 | ```
42 | SIMD.Checked.Int32x4.fromFloat32x4
43 | SIMD.Checked.Float32x4.max
44 | SIMD.Checked.Int32x4.shiftLeftByScalar
45 | ...
46 | ```
47 | 
48 | Functions in `SIMD.Checked` all correspond to functions in `SIMD.Relaxed` and
49 | throw on any value which would produce unspecified results. They may also
50 | canonicalize negative zero to positive zero. We'll publish a standard polyfill for
51 | these functions which implementations or users can use if they wish.
52 | 
53 | Operations which are only available on some platforms
54 | -----------------------------------------------------
55 | 
56 | Operations from all platforms are collected together in a single `SIMD.Universe` namespace:
57 | 
58 | ```
59 | SIMD.Universe.Float32x4.fma
60 | SIMD.Universe.Int32x4.rotateLeft
61 | SIMD.Universe.Int32x4.rotateRight
62 | SIMD.Universe.Int32x4.signMask        // movmskps on x86
63 | SIMD.Universe.Int32x4.bitInsertIfTrue // vbit on ARM
64 | ...
65 | ```
66 | 
67 | Unlike in the `SIMD.Relaxed` namespace, these operations all have fairly strict
68 | semantics.
69 | 
70 | We'll publish a standard polyfill that will fill in all functions in the
71 | `SIMD.Universe` namespace that the JIT doesn't predefine. This will ensure that
72 | programs continue to at least execute across platforms, though of course the
73 | performance may vary widely.
74 | 
75 | Some indication of the performance will be made:
76 | 
77 | ```
78 | SIMD.isFast
79 | ```
80 | 
81 | This function takes a single argument, a function in the `SIMD.Universe` API,
82 | and returns a bool indicating whether the given function is "fast" -- roughly
83 | meaning a single operation in the underlying platform.
84 | 


--------------------------------------------------------------------------------
/run_dev_server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | python -m SimpleHTTPServer
3 | 


--------------------------------------------------------------------------------
/src/benchmarks/aobench.js:
--------------------------------------------------------------------------------
  1 | // AOBench
  2 | // ambient occlusion renderer
  3 | // See full demo at https://github.com/wahbahdoo/aobench
  4 | 
  5 | (function () {
  6 | 
  7 |   // Kernel configuration
  8 |   var kernelConfig = {
  9 |     kernelName:       "AOBench",
 10 |     kernelInit:       initAobench,
 11 |     kernelCleanup:    cleanupAobench,
 12 |     kernelSimd:       simdAobench,
 13 |     kernelNonSimd:    nonSimdAobench
 14 |   };
 15 | 
 16 |   // Hook up to the harness
 17 |   benchmarks.add (new Benchmark (kernelConfig));
 18 | 
 19 |   // Global variables
 20 |   var NAO_SAMPLES = 8;
 21 |   var spheres;
 22 |   var plane;
 23 |   var rands1;
 24 |   var rands2;
 25 |   var isect0;
 26 | 
 27 |   // Initialization and verification
 28 |   function initAobench () {
 29 |     init_scene();
 30 |     var A = ambient_occlusion(isect0);
 31 |     var B = ambient_occlusion_simd(isect0);
 32 |     return ((A.x == B.x) && (A.y == B.y) && (A.z == B.z));
 33 |   }
 34 | 
 35 |   function cleanupAobench() {
 36 |     return initAobench();
 37 |   }
 38 | 
 39 |   // Non SIMD version of the kernel
 40 |   function nonSimdAobench (n) {
 41 |     for (var i = 0; i < n; i++) {
 42 |       ambient_occlusion(isect0);
 43 |     }
 44 |   }
 45 | 
 46 |   // SIMD version of the kernel
 47 |   function simdAobench (n) {
 48 |     for (var i = 0; i < n; i++) {
 49 |       ambient_occlusion_simd(isect0);
 50 |     }
 51 |   }
 52 | 
 53 |   // AOBench initialization of objects and pseudorand numbers (for benchmark predictability)
 54 |   function init_scene() {
 55 |     spheres = new Array();
 56 |     spheres[0] = {
 57 |       center: {
 58 |         x: -2.0,
 59 |         y: 0.0,
 60 |         z: -3.5
 61 |       },
 62 |       radius: 0.5
 63 |     };
 64 |     spheres[1] = {
 65 |       center: {
 66 |         x: -0.5,
 67 |         y: 0.0,
 68 |         z: -3.0
 69 |       },
 70 |       radius: 0.5
 71 |     };
 72 |     spheres[2] = {
 73 |       center: {
 74 |         x: 1.0,
 75 |         y: 0.0,
 76 |         z: -2.2
 77 |       },
 78 |       radius: 0.5
 79 |     };
 80 |     plane = {
 81 |       p: {
 82 |         x: 0.0,
 83 |         y: -0.5,
 84 |         z: 0.0
 85 |       },
 86 |       n: {
 87 |         x: 0.0,
 88 |         y: 1.0,
 89 |         z: 0.0
 90 |       }
 91 |     };
 92 |     rands1 = new Array(0.1352356830611825,  0.288015044759959,   0.7678821850568056,  0.2686317905317992,
 93 |                        0.3331136927008629,  0.8684257145505399,  0.781927386065945,   0.5896540696267039,
 94 |                        0.44623699225485325, 0.9686877066269517,  0.07219804194755852, 0.32867410429753363,
 95 |                        0.25455036014318466, 0.6900878311134875,  0.32115139183588326, 0.8623794671148062,
 96 |                        0.41069260938093066, 0.999176808167249,   0.31144002149812877, 0.21190544497221708,
 97 |                        0.589751492254436,   0.618399447761476,   0.7838233797810972,  0.22662024036981165,
 98 |                        0.5274769144598395,  0.8913978524506092,  0.2461202829144895,  0.575232774252072,
 99 |                        0.20723191439174116, 0.15211533522233367, 0.5140219402965158,  0.695398824987933,
100 |                        0.7201623972505331,  0.1737971710972488,  0.3138047114480287,  0.09142904286272824,
101 |                        0.15824169223196805, 0.11588017432950437, 0.4076798539608717,  0.06385629274882376,
102 |                        0.9907234299462289,  0.1742915315553546,  0.9236432255711406,  0.8344372694846243,
103 |                        0.05793144227936864, 0.35464465571567416, 0.3937969475518912,  0.8209003841038793,
104 |                        0.6443945677019656,  0.15443599177524447, 0.8957053178455681,  0.4145913925021887,
105 |                        0.4667414356954396,  0.42764953384175897, 0.03486692951992154, 0.13391495239920914,
106 |                        0.6122364429756999,  0.7934473238419741,  0.13505113637074828, 0.7279673060402274,
107 |                        0.3638722419273108,  0.30750402715057135, 0.8705337035935372,  0.3060465627349913);
108 | 
109 |     rands2 = new Array(0.6100146626122296,  0.8141843967605382,  0.7538463387172669,  0.538857217412442,
110 |                        0.7884696905966848,  0.2656198723707348,  0.3280213042162359,  0.25133296218700707,
111 |                        0.18718935316428542, 0.7374026740435511,  0.8333564973436296,  0.22081619454547763,
112 |                        0.08140448946505785, 0.7737920694053173,  0.9531879865098745,  0.385226191021502,
113 |                        0.8437968089710921,  0.45293551217764616, 0.11351405014283955, 0.6402874339837581,
114 |                        0.9657228307332844,  0.5241556512191892,  0.9501411342062056,  0.7991736396215856,
115 |                        0.7572617880068719,  0.6777111298870295,  0.19950113398954272, 0.09956562682054937,
116 |                        0.03746219468303025, 0.18719390942715108, 0.1519025124143809,  0.8241845818702132,
117 |                        0.9609565436840057,  0.7231316142715514,  0.26712060417048633, 0.7414182834327221,
118 |                        0.4706993775907904,  0.9619642498437315,  0.14598079677671194, 0.1517641346435994,
119 |                        0.5583144023548812,  0.7664180144201964,  0.8109071112703532,  0.4008640209212899,
120 |                        0.10891564912162721, 0.8558103002142161,  0.03816548571921885, 0.4263107746373862,
121 |                        0.280488790711388,   0.915016517508775,   0.8379701666999608,  0.5821647725533694,
122 |                        0.3671900019980967,  0.6120628621429205,  0.5861144624650478,  0.5639409353025258,
123 |                        0.4884668991435319,  0.9718172331340611,  0.4438377188052982,  0.9853541473858058,
124 |                        0.021908782655373216,0.6144221667200327,  0.11301262397319078, 0.17565111187286675);
125 |     isect0 =  {
126 |       t: 0.7907924036719444,
127 |       hit: 1,
128 |       p: {
129 |         x: 0.3484251968503937,
130 |         y: -0.49999999999999994,
131 |         z: -0.5039370078740157
132 |       },
133 |       n: {
134 |         x: 0,
135 |         y: 1,
136 |         z: 0
137 |       }
138 |     };
139 |   }
140 | 
141 |   // Sequential AO calculation functions ----------------------------------------------
142 | 
143 |   function ambient_occlusion(isect) {
144 |     var col = {};
145 | 
146 |     var ntheta = NAO_SAMPLES;
147 |     var nphi = NAO_SAMPLES;
148 |     var eps = 0.0001;
149 | 
150 |     var p = {
151 |       x: isect.p.x + eps * isect.n.x,
152 |       y: isect.p.y + eps * isect.n.y,
153 |       z: isect.p.z + eps * isect.n.z
154 |     };
155 | 
156 |     var basis = new Array({}, {}, {});
157 |     orthoBasis(basis, isect.n);
158 | 
159 |     var occlusion = 0;
160 | 
161 |     for (var j = 0; j < ntheta; j++) {
162 |       for (var i = 0; i < nphi; i++) {
163 |         var theta = Math.sqrt(rands1[j * ntheta + i]);
164 |         var phi = 2 * Math.PI * rands2[j * ntheta + i];
165 | 
166 |         var x = Math.cos(phi) * theta;
167 |         var y = Math.sin(phi) * theta;
168 |         var z = Math.sqrt(1 - theta * theta);
169 | 
170 |         var rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
171 |         var ry = x * basis[0].y + y * basis[1].y + z * basis[2].y;
172 |         var rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;
173 | 
174 |         var ray = {
175 |           org: p,
176 |           dir: {
177 |             x: rx,
178 |             y: ry,
179 |             z: rz
180 |           }
181 |         };
182 | 
183 |         var occIsectA = {
184 |           t: 1e17,
185 |           hit: 0
186 |         }
187 |         var occIsectB = {
188 |           p: { x:0, y:0, z:0 },
189 |           n: { x:0, y:0, z:0 }
190 |         };
191 | 
192 |         ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[0]);
193 |         ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[1]);
194 |         ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[2]);
195 |         ray_plane_intersect(occIsectA, occIsectB, ray, plane);
196 | 
197 |         if (occIsectA.hit) occlusion += 1.0;
198 | 
199 |       }
200 |     }
201 | 
202 |     occlusion = (ntheta * nphi - occlusion) / (ntheta * nphi);
203 | 
204 |     col.x = occlusion;
205 |     col.y = occlusion;
206 |     col.z = occlusion;
207 | 
208 |     return col;
209 |   }
210 | 
211 |   function ray_sphere_intersect(isectA, isectB, ray, sphere) {
212 |     var rs = {
213 |       x: ray.org.x - sphere.center.x,
214 |       y: ray.org.y - sphere.center.y,
215 |       z: ray.org.z - sphere.center.z
216 |     };
217 | 
218 |     var B = vdot(rs, ray.dir);
219 |     var C = vdot(rs, rs) - sphere.radius * sphere.radius;
220 |     var D = B * B - C;
221 | 
222 |     if (D > 0) {
223 |       var t = -B - Math.sqrt(D);
224 |       if ((t > 0) && (t < isectA.t)) {
225 | 
226 |         isectA.t = t;
227 |         isectA.hit = 1;
228 | 
229 |         isectB.p.x = ray.org.x + ray.dir.x * t;
230 |         isectB.p.y = ray.org.y + ray.dir.y * t;
231 |         isectB.p.z = ray.org.z + ray.dir.z * t;
232 | 
233 |         isectB.n.x = isectB.p.x - sphere.center.x;
234 |         isectB.n.y = isectB.p.y - sphere.center.y;
235 |         isectB.n.z = isectB.p.z - sphere.center.z;
236 | 
237 |         vnormalize(isectB.n);
238 |       }
239 |     }
240 | 
241 |   }
242 | 
243 |   function ray_plane_intersect(isectA, isectB, ray, plane) {
244 |     var d = -vdot(plane.p, plane.n);
245 |     var v = vdot(ray.dir, plane.n);
246 | 
247 |     if (Math.abs(v) < 1e-17) return;
248 | 
249 |     var t = -(vdot(ray.org, plane.n) + d) / v;
250 | 
251 |     if ((t > 0) && (t < isectA.t)) {
252 |       isectA.t = t;
253 |       isectA.hit = 1;
254 |       isectB.p.x = ray.org.x + ray.dir.x * t;
255 |       isectB.p.y = ray.org.y + ray.dir.y * t;
256 |       isectB.p.z = ray.org.z + ray.dir.z * t;
257 |       isectB.n = plane.n;
258 |     }
259 |   }
260 | 
261 |   // SIMD AO calculation functions ----------------------------------------------------
262 | 
263 |   function ambient_occlusion_simd(isect) {
264 |     var col = {};
265 | 
266 |     var i, j;
267 |     var ntheta = NAO_SAMPLES;
268 |     var nphi = NAO_SAMPLES;
269 |     var eps = 0.0001;
270 | 
271 |     var p = {
272 |       x: isect.p.x + eps * isect.n.x,
273 |       y: isect.p.y + eps * isect.n.y,
274 |       z: isect.p.z + eps * isect.n.z
275 |     };
276 | 
277 |     var basis = new Array({}, {}, {});
278 |     orthoBasis(basis, isect.n);
279 | 
280 |     var occlusion = 0;
281 |     var occlusionx4 = SIMD.Float32x4.splat(0.0);
282 | 
283 |     for (j = 0; j < ntheta; j++) {
284 |       for (i = 0; i < nphi; i += 4) {
285 |         var theta = SIMD.Float32x4.sqrt(SIMD.Float32x4(rands1[j * ntheta + i], rands1[j * ntheta + i + 1], rands1[j * ntheta + i + 2], rands1[j * ntheta + i + 3]));
286 |         var phi0 = 2 * Math.PI * rands2[j * ntheta + i];
287 |         var phi1 = 2 * Math.PI * rands2[j * ntheta + i + 1];
288 |         var phi2 = 2 * Math.PI * rands2[j * ntheta + i + 2];
289 |         var phi3 = 2 * Math.PI * rands2[j * ntheta + i + 3];
290 |         var sinphi = SIMD.Float32x4(Math.sin(phi0), Math.sin(phi1), Math.sin(phi2), Math.sin(phi3));
291 |         var cosphi = SIMD.Float32x4(Math.cos(phi0), Math.cos(phi1), Math.cos(phi2), Math.cos(phi3));
292 | 
293 |         var x = SIMD.Float32x4.mul(cosphi, theta);
294 |         var y = SIMD.Float32x4.mul(sinphi, theta);
295 |         var z = SIMD.Float32x4.sqrt(SIMD.Float32x4.sub(SIMD.Float32x4.splat(1.0), SIMD.Float32x4.mul(theta, theta)));
296 | 
297 |         var dirx = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].x)),
298 |                             SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].x)),
299 |                                      SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].x))));
300 |         var diry = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].y)),
301 |                             SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].y)),
302 |                                      SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].y))));
303 |         var dirz = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].z)),
304 |                             SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].z)),
305 |                                      SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].z))));
306 | 
307 |         var orgx = SIMD.Float32x4.splat(p.x);
308 |         var orgy = SIMD.Float32x4.splat(p.y);
309 |         var orgz = SIMD.Float32x4.splat(p.z);
310 | 
311 |         var occIsectA = {
312 |           t: SIMD.Float32x4.splat(1e17),
313 |           hit: SIMD.Bool32x4.splat(false)
314 |         };
315 |         var occIsectB = {
316 |           p: {
317 |             x: SIMD.Float32x4.splat(0.0),
318 |             y: SIMD.Float32x4.splat(0.0),
319 |             z: SIMD.Float32x4.splat(0.0)
320 |           },
321 |           n: {
322 |             x: SIMD.Float32x4.splat(0.0),
323 |             y: SIMD.Float32x4.splat(0.0),
324 |             z: SIMD.Float32x4.splat(0.0)
325 |           }
326 |         };
327 | 
328 |         ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[0]);
329 |         ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[1]);
330 |         ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[2]);
331 |         ray_plane_intersect_simd (occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, plane);
332 | 
333 |         occlusionx4 = SIMD.Float32x4.add(
334 |                         occlusionx4,
335 |                         SIMD.Float32x4.select(occIsectA.hit, SIMD.Float32x4.splat(1.0),
336 |                                                              SIMD.Float32x4.splat(0.0)));
337 | 
338 |       }
339 |     }
340 | 
341 |     occlusion = SIMD.Float32x4.extractLane(occlusionx4, 0) +
342 |         SIMD.Float32x4.extractLane(occlusionx4, 1) +
343 |         SIMD.Float32x4.extractLane(occlusionx4, 2) +
344 |         SIMD.Float32x4.extractLane(occlusionx4, 3);
345 | 
346 |     occlusion = (ntheta * nphi - occlusion) / (ntheta * nphi);
347 | 
348 |     col.x = occlusion;
349 |     col.y = occlusion;
350 |     col.z = occlusion;
351 | 
352 |     return col;
353 |   }
354 | 
355 |   function ray_sphere_intersect_simd(isectA, isectB, dirx, diry, dirz, orgx, orgy, orgz, sphere) {
356 | 
357 |     var rsx = SIMD.Float32x4.sub(orgx, SIMD.Float32x4.splat(sphere.center.x));
358 |     var rsy = SIMD.Float32x4.sub(orgy, SIMD.Float32x4.splat(sphere.center.y));
359 |     var rsz = SIMD.Float32x4.sub(orgz, SIMD.Float32x4.splat(sphere.center.z));
360 | 
361 |     var B = SIMD.Float32x4.add(SIMD.Float32x4.mul(rsx, dirx),
362 |                      SIMD.Float32x4.add(SIMD.Float32x4.mul(rsy, diry), SIMD.Float32x4.mul(rsz, dirz)));
363 |     var C = SIMD.Float32x4.sub(SIMD.Float32x4.add(SIMD.Float32x4.mul(rsx, rsx),
364 |                               SIMD.Float32x4.add(SIMD.Float32x4.mul(rsy, rsy), SIMD.Float32x4.mul(rsz, rsz))),
365 |                      SIMD.Float32x4.splat(sphere.radius * sphere.radius));
366 |     var D = SIMD.Float32x4.sub(SIMD.Float32x4.mul(B, B), C);
367 | 
368 |     var cond1 = SIMD.Float32x4.greaterThan(D, SIMD.Float32x4.splat(0.0));
369 |     if (SIMD.Bool32x4.anyTrue(cond1)) {
370 |       var t2 = SIMD.Float32x4.select(cond1, SIMD.Float32x4.sub(SIMD.Float32x4.neg(B), SIMD.Float32x4.sqrt(D)), SIMD.Float32x4.splat(0.0));
371 |       var cond2 = SIMD.Bool32x4.and(SIMD.Float32x4.greaterThan(t2, SIMD.Float32x4.splat(0.0)),
372 |                                     SIMD.Float32x4.lessThan(t2, isectA.t));
373 |       if (SIMD.Bool32x4.anyTrue(cond2)) {
374 |         isectA.t = SIMD.Float32x4.select(cond2, t2, isectA.t);
375 |         isectA.hit = SIMD.Bool32x4.or(cond2, isectA.hit);
376 | 
377 |         isectB.p.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirx, isectA.t)), isectB.p.x);
378 |         isectB.p.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(diry, isectA.t)), isectB.p.y);
379 |         isectB.p.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirz, isectA.t)), isectB.p.z);
380 | 
381 |         isectB.n.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.x, SIMD.Float32x4.splat(sphere.center.x)), isectB.n.x);
382 |         isectB.n.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.y, SIMD.Float32x4.splat(sphere.center.y)), isectB.n.y);
383 |         isectB.n.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.z, SIMD.Float32x4.splat(sphere.center.z)), isectB.n.z);
384 | 
385 |         var lengths = SIMD.Float32x4.sqrt(SIMD.Float32x4.add(SIMD.Float32x4.mul(isectB.n.x, isectB.n.x),
386 |                                           SIMD.Float32x4.add(SIMD.Float32x4.mul(isectB.n.y, isectB.n.y),
387 |                                                              SIMD.Float32x4.mul(isectB.n.z, isectB.n.z))));
388 |         var cond3 = SIMD.Float32x4.greaterThan(SIMD.Float32x4.abs(lengths), SIMD.Float32x4.splat(1e-17));
389 |         isectB.n.x = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.x, lengths), isectB.n.x);
390 |         isectB.n.y = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.y, lengths), isectB.n.y);
391 |         isectB.n.z = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.z, lengths), isectB.n.z);
392 |       }
393 |     }
394 |   }
395 | 
396 |   function ray_plane_intersect_simd(isectA, isectB, dirx, diry, dirz, orgx, orgy, orgz, plane) {
397 |     var d = SIMD.Float32x4.neg(SIMD.Float32x4.add(SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.x), SIMD.Float32x4.splat(plane.n.x)),
398 |                                SIMD.Float32x4.add(SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.y), SIMD.Float32x4.splat(plane.n.y)),
399 |                                         SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.z), SIMD.Float32x4.splat(plane.n.z)))));
400 |     var v = SIMD.Float32x4.add(SIMD.Float32x4.mul(dirx, SIMD.Float32x4.splat(plane.n.x)),
401 |                      SIMD.Float32x4.add(SIMD.Float32x4.mul(diry, SIMD.Float32x4.splat(plane.n.y)),
402 |                               SIMD.Float32x4.mul(dirz, SIMD.Float32x4.splat(plane.n.z))));
403 | 
404 |     var cond1 = SIMD.Float32x4.greaterThan(SIMD.Float32x4.abs(v), SIMD.Float32x4.splat(1e-17));
405 |     var dp = SIMD.Float32x4.add(SIMD.Float32x4.mul(orgx, SIMD.Float32x4.splat(plane.n.x)),
406 |                       SIMD.Float32x4.add(SIMD.Float32x4.mul(orgy, SIMD.Float32x4.splat(plane.n.y)),
407 |                                SIMD.Float32x4.mul(orgz, SIMD.Float32x4.splat(plane.n.z))));
408 |     var t2 = SIMD.Float32x4.select(cond1, SIMD.Float32x4.div(SIMD.Float32x4.neg(SIMD.Float32x4.add(dp, d)), v), SIMD.Float32x4.splat(0.0));
409 |     var cond2 = SIMD.Bool32x4.and(SIMD.Float32x4.greaterThan(t2, SIMD.Float32x4.splat(0.0)), SIMD.Float32x4.lessThan(t2, isectA.t));
410 |     if (SIMD.Bool32x4.anyTrue(cond2)) {
411 |       isectA.t = SIMD.Float32x4.select(cond2, t2, isectA.t);
412 |       isectA.hit = SIMD.Bool32x4.or(cond2, isectA.hit);
413 | 
414 |       isectB.p.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirx, isectA.t)), isectB.p.x);
415 |       isectB.p.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgy, SIMD.Float32x4.mul(diry, isectA.t)), isectB.p.y);
416 |       isectB.p.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgz, SIMD.Float32x4.mul(dirz, isectA.t)), isectB.p.z);
417 | 
418 |       isectB.n.x = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.x), isectB.n.x);
419 |       isectB.n.y = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.y), isectB.n.y);
420 |       isectB.n.z = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.z), isectB.n.z);
421 |     }
422 |   }
423 | 
424 |   // Utility calculation functions ----------------------------------------------------
425 | 
426 |   function vdot(v0, v1) {
427 |     return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
428 |   }
429 | 
430 |   function vcross(v0, v1) {
431 |     return {
432 |       x: v0.y * v1.z - v0.z * v1.y,
433 |       y: v0.z * v1.x - v0.x * v1.z,
434 |       z: v0.x * v1.y - v0.y * v1.x
435 |     };
436 |   }
437 | 
438 |   function vnormalize(c) {
439 |     var length = Math.sqrt(vdot(c, c));
440 |     if (Math.abs(length) > 1e-17) {
441 |       c.x /= length;
442 |       c.y /= length;
443 |       c.z /= length;
444 |     }
445 |   }
446 | 
447 |   function orthoBasis(basis, n) {
448 |     basis[2] = n;
449 |     basis[1] = { x: 0, y: 0, z: 0 };
450 | 
451 |     if ((n.x < 0.6) && (n.x > -0.6)) {
452 |       basis[1].x = 1.0;
453 |     }
454 |     else if ((n.y < 0.6) && (n.y > -0.6)) {
455 |       basis[1].y = 1.0;
456 |     }
457 |     else if ((n.z < 0.6) && (n.z > -0.6)) {
458 |       basis[1].z = 1.0;
459 |     }
460 |     else {
461 |       basis[1].x = 1.0;
462 |     }
463 | 
464 |     basis[0] = vcross(basis[1], basis[2]);
465 |     vnormalize(basis[0]);
466 | 
467 |     basis[1] = vcross(basis[2], basis[0]);
468 |     vnormalize(basis[1]);
469 |   }
470 | 
471 | } ());
472 | 


--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4.js:
--------------------------------------------------------------------------------
 1 | // Simple performance test of SIMD.add operation.  Use SIMD.add to average up elements
 2 | // in a Float32Array. Compare to scalar implementation of same function.
 3 | // Author: Peter Jensen
 4 | 
 5 | (function () {
 6 | 
 7 |   // Kernel configuration
 8 |   var kernelConfig = {
 9 |     kernelName:       "AverageFloat32x4",
10 |     kernelInit:       initArray,
11 |     kernelCleanup:    cleanup,
12 |     kernelSimd:       simdAverage,
13 |     kernelNonSimd:    average,
14 |     kernelIterations: 1000
15 |   };
16 | 
17 |   // Hook up to the harness
18 |   benchmarks.add(new Benchmark(kernelConfig));
19 | 
20 |   // Benchmark data, initialization and kernel functions
21 |   var a   = new Float32Array(10000);
22 | 
23 |   function sanityCheck() {
24 |      return Math.abs(average(1) - simdAverage(1)) < 0.0001;
25 |   }
26 | 
27 |   function initArray() {
28 |     var j = 0;
29 |     for (var i = 0, l = a.length; i < l; ++i) {
30 |       a[i] = 0.1;
31 |     }
32 |     // Check that the two kernel functions yields the same result, roughly
33 |     // Account for the fact that the simdAverage() is computed using float32
34 |     // precision and the average() is using double precision
35 |     return sanityCheck();
36 |   }
37 | 
38 |   function cleanup() {
39 |     return sanityCheck();
40 |   }
41 | 
42 |   function average(n) {
43 |     for (var i = 0; i < n; ++i) {
44 |       var sum = 0.0;
45 |       for (var j = 0, l = a.length; j < l; ++j) {
46 |         sum += a[j];
47 |       }
48 |     }
49 |     return sum/a.length;
50 |   }
51 | 
52 |   function simdAverage(n) {
53 |     var a_length = a.length;
54 |     for (var i = 0; i < n; ++i) {
55 |       var sum4 = SIMD.Float32x4.splat(0.0);
56 |       for (var j = 0; j < a_length; j += 4) {
57 |         sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load(a, j));
58 |       }
59 |     }
60 |     return (SIMD.Float32x4.extractLane(sum4, 0) +
61 |         SIMD.Float32x4.extractLane(sum4, 1) +
62 |         SIMD.Float32x4.extractLane(sum4, 2) +
63 |         SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
64 |   }
65 | 
66 | } ());
67 | 


--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4LoadFromInt8Array.js:
--------------------------------------------------------------------------------
 1 | // Simple performance test of SIMD.add operation.  Use SIMD.add to average up elements
 2 | // in a Float32Array. Compare to scalar implementation of same function.
 3 | // Author: Peter Jensen
 4 | 
 5 | (function () {
 6 | 
 7 |   // Kernel configuration
 8 |   var kernelConfig = {
 9 |     kernelName:       "AverageFloat32x4LoadFromInt8Array",
10 |     kernelInit:       initArray,
11 |     kernelCleanup:    cleanup,
12 |     kernelSimd:       simdAverage,
13 |     kernelNonSimd:    average,
14 |     kernelIterations: 1000
15 |   };
16 | 
17 |   // Hook up to the harness
18 |   benchmarks.add(new Benchmark(kernelConfig));
19 | 
20 |   // Benchmark data, initialization and kernel functions
21 |   var a   = new Float32Array(10000);
22 |   var b   = new Int8Array(a.buffer);
23 | 
24 |   function sanityCheck() {
25 |      return Math.abs(average(1) - simdAverage(1)) < 0.0001;
26 |   }
27 | 
28 |   function initArray() {
29 |     var j = 0;
30 |     for (var i = 0, l = a.length; i < l; ++i) {
31 |       a[i] = 0.1;
32 |     }
33 |     // Check that the two kernel functions yields the same result, roughly
34 |     // Account for the fact that the simdAverage() is computed using float32
35 |     // precision and the average() is using double precision
36 |     return sanityCheck();
37 |   }
38 | 
39 |   function cleanup() {
40 |     return sanityCheck();
41 |   }
42 | 
43 |   function average(n) {
44 |     for (var i = 0; i < n; ++i) {
45 |       var sum = 0.0;
46 |       for (var j = 0, l = a.length; j < l; ++j) {
47 |         sum += a[j];
48 |       }
49 |     }
50 |     return sum/a.length;
51 |   }
52 | 
53 |   function simdAverage(n) {
54 |     for (var i = 0; i < n; ++i) {
55 |       var sum4 = SIMD.Float32x4.splat(0.0);
56 |       for (var j = 0; j < a.length / 4; ++j) {
57 |         sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load(b, j << 4));
58 |       }
59 |     }
60 |     return (SIMD.Float32x4.extractLane(sum4, 0) +
61 |         SIMD.Float32x4.extractLane(sum4, 1) +
62 |         SIMD.Float32x4.extractLane(sum4, 2) +
63 |         SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
64 |   }
65 | 
66 | } ());
67 | 


--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4LoadX.js:
--------------------------------------------------------------------------------
 1 | // Simple performance test of SIMD.add operation.  Use SIMD.add to average up elements
 2 | // in a Float32Array. Compare to scalar implementation of same function.
 3 | // Author: Peter Jensen
 4 | 
 5 | (function () {
 6 | 
 7 |   // Kernel configuration
 8 |   var kernelConfig = {
 9 |     kernelName:       "AverageFloat32x4LoadX",
10 |     kernelInit:       initArray,
11 |     kernelCleanup:    cleanup,
12 |     kernelSimd:       simdAverageLoad,
13 |     kernelNonSimd:    average,
14 |     kernelIterations: 1000
15 |   };
16 | 
17 |   // Hook up to the harness
18 |   benchmarks.add(new Benchmark(kernelConfig));
19 | 
20 |   // Benchmark data, initialization and kernel functions
21 |   var a   = new Float32Array(10000);
22 |   var a1  = new Float32Array(10000);
23 |   var b = new Int8Array(a.buffer);
24 | 
25 |   function sanityCheck() {
26 |     return true;
27 |      return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001;
28 |   }
29 | 
30 |   function initArray() {
31 |     var j = 0;
32 |     for (var i = 0, l = a.length; i < l; ++i) {
33 |       a[i] = 0.1;
34 |     }
35 |     // Check that the two kernel functions yields the same result, roughly
36 |     // Account for the fact that the simdAverage() is computed using float32
37 |     // precision and the average() is using double precision
38 |     return sanityCheck();
39 |   }
40 | 
41 |   function cleanup() {
42 |     return sanityCheck();
43 |   }
44 | 
45 |   function average(n) {
46 |     for (var i = 0; i < n; ++i) {
47 |       var sum = 0.0;
48 |       for (var j = 0, l = a.length; j < l; ++j) {
49 |         sum += a[j];
50 |       }
51 |     }
52 |     return sum/a.length;
53 |   }
54 | 
55 |   function simdAverageLoad(n) {
56 |     var a_length = a.length;
57 |     for (var i = 0; i < n; ++i) {
58 |       var sum4 = SIMD.Float32x4.splat(0.0);
59 |       for (var j = 0; j < a_length; ++j) {
60 |         sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load1(a, j));
61 |       }
62 |     }
63 |     return (SIMD.Float32x4.extractLane(sum4, 0) +
64 |         SIMD.Float32x4.extractLane(sum4, 1) +
65 |         SIMD.Float32x4.extractLane(sum4, 2) +
66 |         SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
67 |   }
68 | 
69 | } ());
70 | 


--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4LoadXY.js:
--------------------------------------------------------------------------------
 1 | // Simple performance test of SIMD.add operation.  Use SIMD.add to average up elements
 2 | // in a Float32Array. Compare to scalar implementation of same function.
 3 | // Author: Peter Jensen
 4 | 
 5 | (function () {
 6 | 
 7 |   // Kernel configuration
 8 |   var kernelConfig = {
 9 |     kernelName:       "AverageFloat32x4LoadXY",
10 |     kernelInit:       initArray,
11 |     kernelCleanup:    cleanup,
12 |     kernelSimd:       simdAverageLoad,
13 |     kernelNonSimd:    average,
14 |     kernelIterations: 1000
15 |   };
16 | 
17 |   // Hook up to the harness
18 |   benchmarks.add(new Benchmark(kernelConfig));
19 | 
20 |   // Benchmark data, initialization and kernel functions
21 |   var a   = new Float32Array(10000);
22 |   var a1  = new Float32Array(10000);
23 |   var b = new Int8Array(a.buffer);
24 | 
25 |   function sanityCheck() {
26 |     return true;
27 |      return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001;
28 |   }
29 | 
30 |   function initArray() {
31 |     var j = 0;
32 |     for (var i = 0, l = a.length; i < l; ++i) {
33 |       a[i] = 0.1;
34 |     }
35 |     // Check that the two kernel functions yields the same result, roughly
36 |     // Account for the fact that the simdAverage() is computed using float32
37 |     // precision and the average() is using double precision
38 |     return sanityCheck();
39 |   }
40 | 
41 |   function cleanup() {
42 |     return sanityCheck();
43 |   }
44 | 
45 |   function average(n) {
46 |     for (var i = 0; i < n; ++i) {
47 |       var sum = 0.0;
48 |       for (var j = 0, l = a.length; j < l; ++j) {
49 |         sum += a[j];
50 |       }
51 |     }
52 |     return sum/a.length;
53 |   }
54 | 
55 |   function simdAverageLoad(n) {
56 |     var a_length = a.length;
57 |     for (var i = 0; i < n; ++i) {
58 |       var sum4 = SIMD.Float32x4.splat(0.0);
59 |       for (var j = 0; j < a_length / 2; ++j) {
60 |         sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load2(a, j << 1));
61 |         //SIMD.Float32x4.store(a1, j << 2, sum4);
62 |       }
63 |     }
64 |     return (SIMD.Float32x4.extractLane(sum4, 0) +
65 |         SIMD.Float32x4.extractLane(sum4, 1) +
66 |         SIMD.Float32x4.extractLane(sum4, 2) +
67 |         SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
68 |   }
69 | 
70 | } ());
71 | 


--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4LoadXYZ.js:
--------------------------------------------------------------------------------
 1 | // Simple performance test of SIMD.add operation.  Use SIMD.add to average up elements
 2 | // in a Float32Array. Compare to scalar implementation of same function.
 3 | // Author: Peter Jensen
 4 | 
 5 | (function () {
 6 | 
 7 |   // Kernel configuration
 8 |   var kernelConfig = {
 9 |     kernelName:       "AverageFloat32x4LoadXYZ",
10 |     kernelInit:       initArray,
11 |     kernelCleanup:    cleanup,
12 |     kernelSimd:       simdAverageLoad,
13 |     kernelNonSimd:    average,
14 |     kernelIterations: 1000
15 |   };
16 | 
17 |   // Hook up to the harness
18 |   benchmarks.add(new Benchmark(kernelConfig));
19 | 
20 |   // Benchmark data, initialization and kernel functions
21 |   var a   = new Float32Array(9999);
22 |   var a1  = new Float32Array(9999);
23 |   var b = new Int8Array(a.buffer);
24 | 
25 |   function sanityCheck() {
26 |     return true;
27 |      return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001;
28 |   }
29 | 
30 |   function initArray() {
31 |     var j = 0;
32 |     for (var i = 0, l = a.length; i < l; ++i) {
33 |       a[i] = 0.1;
34 |     }
35 |     // Check that the two kernel functions yields the same result, roughly
36 |     // Account for the fact that the simdAverage() is computed using float32
37 |     // precision and the average() is using double precision
38 |     return sanityCheck();
39 |   }
40 | 
41 |   function cleanup() {
42 |     return sanityCheck();
43 |   }
44 | 
45 |   function average(n) {
46 |     for (var i = 0; i < n; ++i) {
47 |       var sum = 0.0;
48 |       for (var j = 0, l = a.length; j < l; ++j) {
49 |         sum += a[j];
50 |       }
51 |     }
52 |     return sum/a.length;
53 |   }
54 | 
55 |   function simdAverageLoad(n) {
56 |     var a_length = a.length;
57 |     for (var i = 0; i < n; ++i) {
58 |       var sum4 = SIMD.Float32x4.splat(0.0);
59 |       for (var j = 0; j < a_length / 3 ; ++j) {
60 |         sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load3(a, j * 3));
61 |         //SIMD.Float32x4.store(a1, j << 2, sum4);
62 |       }
63 |     }
64 |     return (SIMD.Float32x4.extractLane(sum4, 0) +
65 |         SIMD.Float32x4.extractLane(sum4, 1) +
66 |         SIMD.Float32x4.extractLane(sum4, 2) +
67 |         SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
68 |   }
69 | 
70 | } ());
71 | 


--------------------------------------------------------------------------------
/src/benchmarks/averageInt32x4Load.js:
--------------------------------------------------------------------------------
 1 | // Simple performance test of SIMD.add operation.  Use SIMD.add to average up elements
 2 | // in a Int32Array. Compare to scalar implementation of same function.
 3 | // Author: Peter Jensen
 4 | 
 5 | (function () {
 6 | 
 7 |   // Kernel configuration
 8 |   var kernelConfig = {
 9 |     kernelName:       "AverageInt32x4Load",
10 |     kernelInit:       initArray,
11 |     kernelCleanup:    cleanup,
12 |     kernelSimd:       simdAverageLoad,
13 |     kernelNonSimd:    average,
14 |     kernelIterations: 1000
15 |   };
16 | 
17 |   // Hook up to the harness
18 |   benchmarks.add(new Benchmark(kernelConfig));
19 | 
20 |   // Benchmark data, initialization and kernel functions
21 |   var a   = new Int32Array(10000);
22 |   var a1  = new Int32Array(10000);
23 |   var b = new Int8Array(a.buffer);
24 | 
25 |   function sanityCheck() {
26 |     return true;
27 |      return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001;
28 |   }
29 | 
30 |   function initArray() {
31 |     var j = 0;
32 |     for (var i = 0, l = a.length; i < l; ++i) {
33 |       a[i] = 1;
34 |     }
35 |     // Check that the two kernel functions yields the same result, roughly
36 |     // Account for the fact that the simdAverage() is computed using float32
37 |     // precision and the average() is using double precision
38 |     return sanityCheck();
39 |   }
40 | 
41 |   function cleanup() {
42 |     return sanityCheck();
43 |   }
44 | 
45 |   function average(n) {
46 |     for (var i = 0; i < n; ++i) {
47 |       var sum = 0.0;
48 |       for (var j = 0, l = a.length; j < l; ++j) {
49 |         sum += a[j];
50 |       }
51 |     }
52 |     return sum/a.length;
53 |   }
54 | 
55 |   function simdAverageLoad(n) {
56 |     var a_length = a.length;
57 |     for (var i = 0; i < n; ++i) {
58 |       var sum4 = SIMD.Int32x4.splat(0);
59 |       for (var j = 0; j < a_length / 4; ++j) {
60 |         sum4 = SIMD.Int32x4.add(sum4, SIMD.Int32x4.load(a, j << 2));
61 |       }
62 |     }
63 |     return (SIMD.Int32x4.extractLane(sum4, 0) +
64 |         SIMD.Int32x4.extractLane(sum4, 1) +
65 |         SIMD.Int32x4.extractLane(sum4, 2) +
66 |         SIMD.Int32x4.extractLane(sum4, 3)) / a.length;
67 |   }
68 | 
69 | } ());
70 | 


--------------------------------------------------------------------------------
/src/benchmarks/base.js:
--------------------------------------------------------------------------------
  1 | // SIMD Kernel Benchmark Harness
  2 | // Author: Peter Jensen
  3 | 
  4 | function Benchmark (config) {
  5 |   this.config            = config;
  6 |   this.initOk            = true;    // Initialize all properties used on a Benchmark object
  7 |   this.cleanupOk         = true;
  8 |   this.useAutoIterations = true;
  9 |   this.autoIterations    = 0;
 10 |   this.actualIterations  = 0;
 11 |   this.simdTime          = 0;
 12 |   this.nonSimdTime       = 0;
 13 | }
 14 | 
 15 | function Benchmarks () {
 16 |   this.benchmarks = [];
 17 | }
 18 | 
 19 | Benchmarks.prototype.add = function (benchmark) {
 20 |   this.benchmarks.push (benchmark);
 21 |   return this.benchmarks.length - 1;
 22 | }
 23 | 
 24 | Benchmarks.prototype.runOne = function (benchmark) {
 25 | 
 26 |   function timeKernel(kernel, iterations) {
 27 |     var start, stop;
 28 |     start = Date.now();
 29 |     kernel(iterations);
 30 |     stop = Date.now();
 31 |     return stop - start;
 32 |   }
 33 | 
 34 |   function computeIterations() {
 35 |     var desiredRuntime = 1000;  // milliseconds for longest running kernel
 36 |     var testIterations = 10;    // iterations used to determine time for desiredRuntime
 37 | 
 38 |     // Make the slowest kernel run for at least 500ms
 39 |     var simdTime = timeKernel(benchmark.config.kernelSimd, testIterations);
 40 |     var nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, testIterations);
 41 |     var maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime;
 42 |     while (maxTime < 500) {
 43 |       testIterations *= 2;
 44 |       simdTime = timeKernel(benchmark.config.kernelSimd, testIterations);
 45 |       nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, testIterations);
 46 |       maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime;
 47 |     }
 48 |     maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime;
 49 | 
 50 |     // Compute iteration count for 1 second run of slowest kernel
 51 |     var iterations = Math.ceil(desiredRuntime * testIterations / maxTime);
 52 |     return iterations;
 53 |   }
 54 | 
 55 |   // Initialize the kernels and check the correctness status
 56 |   if (!benchmark.config.kernelInit()) {
 57 |     benchmark.initOk = false;
 58 |     return false;
 59 |   }
 60 | 
 61 |   // Determine how many iterations to use.
 62 |   if (benchmark.useAutoIterations) {
 63 |     benchmark.autoIterations = computeIterations();
 64 |     benchmark.actualIterations = benchmark.autoIterations;
 65 |   }
 66 |   else {
 67 |     benchmark.actualIterations = benchmark.config.kernelIterations;
 68 |   }
 69 | 
 70 |   // Run the SIMD kernel
 71 |   benchmark.simdTime = timeKernel(benchmark.config.kernelSimd, benchmark.actualIterations);
 72 | 
 73 |   // Run the non-SIMD kernel
 74 |   benchmark.nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, benchmark.actualIterations);
 75 | 
 76 |   // Do the final sanity check
 77 |   if (!benchmark.config.kernelCleanup()) {
 78 |     benchmark.cleanupOk = false;
 79 |     return false;
 80 |   }
 81 | 
 82 |   return true;
 83 | }
 84 | 
 85 | Benchmarks.prototype.report = function (benchmark, outputFunctions) {
 86 | 
 87 |   function fillRight(str, width) {
 88 |     str += ""; // make sure it's a string
 89 |     while (str.length < width) {
 90 |       str += " ";
 91 |     }
 92 |     return str;
 93 |   }
 94 | 
 95 |   function fillLeft(str, width) {
 96 |     str += ""; // make sure it's a string
 97 |     while (str.length < width) {
 98 |       str = " " + str;
 99 |     }
100 |     return str;
101 |   }
102 | 
103 |   if (!benchmark.initOk) {
104 |     outputFunctions.notifyError(fillRight(benchmark.config.kernelName + ": ", 23) + "FAILED INIT");
105 |     return;
106 |   }
107 |   if (!benchmark.cleanupOk) {
108 |     outputFunctions.notifyError(fillRight(benchmark.config.kernelName + ": ", 23) + "FAILED CLEANUP");
109 |     return;
110 |   }
111 | 
112 |   var ratio = benchmark.nonSimdTime / benchmark.simdTime;
113 |   outputFunctions.notifyResult(
114 |     fillRight(benchmark.config.kernelName + ": ", 23) +
115 |     "Iterations(" + fillLeft(benchmark.actualIterations, 10) + ")" +
116 |     ", SIMD(" + fillLeft(benchmark.simdTime + "ms)", 8) +
117 |     ", Non-SIMD(" + fillLeft(benchmark.nonSimdTime + "ms)", 8) +
118 |     ", Speedup(" + ratio.toFixed(3) + ")");
119 | }
120 | 
121 | Benchmarks.prototype.runAll = function (outputFunctions, useAutoIterations) {
122 |   if (typeof useAutoIterations === "undefined") {
123 |     useAutoIterations = false;
124 |   }
125 |   for (var i = 0, n = this.benchmarks.length; i < n; ++i) {
126 |     var benchmark = this.benchmarks[i];
127 |     benchmark.useAutoIterations = useAutoIterations;
128 |     this.runOne(benchmark);
129 |     this.report(benchmark, outputFunctions);
130 |   }
131 | }
132 | 
133 | var benchmarks = new Benchmarks ();
134 | 


--------------------------------------------------------------------------------
/src/benchmarks/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <title>EcmaScript SIMD benchmarks</title>
 6 | </head>
 7 | <body>
 8 |   <div id="logs">Running benchmarks...<br><br></div>
 9 |   <!-- Polyfill -->
10 |   <script src="../ecmascript_simd.js"></script>
11 |   <script src="base.js"></script>
12 |   <script src="kernel-template.js"></script>
13 |   <!-- Keep synced with run.js -->
14 |   <script src='averageFloat32x4.js'></script>
15 |   <script src='averageFloat32x4LoadFromInt8Array.js'></script>
16 |   <script src='averageFloat32x4LoadX.js'></script>
17 |   <script src='averageFloat32x4LoadXY.js'></script>
18 |   <script src='averageFloat32x4LoadXYZ.js'></script>
19 |   <script src='averageInt32x4Load.js'></script>
20 |   <script src='mandelbrot.js'></script>
21 |   <script src='matrix-multiplication.js'></script>
22 |   <script src='transform.js'></script>
23 |   <script src='shiftrows.js'></script>
24 |   <script src='aobench.js'></script>
25 |   <script src='transpose4x4.js'></script>
26 |   <script src='inverse4x4.js'></script>
27 |   <script src='sinx4.js'></script>
28 |   <script src='memset.js'></script>
29 |   <script src='memcpy.js'></script>
30 |   <!-- Execute -->
31 |   <script src="run_browser.js"></script>
32 | </body>
33 | </html>
34 | 


--------------------------------------------------------------------------------
/src/benchmarks/inverse4x4.js:
--------------------------------------------------------------------------------
  1 | // Kernel for doing a 4x4 Matrix Inverse operation
  2 | // Based on Cramer's rule.
  3 | // See: ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf
  4 | // Author: Peter Jensen
  5 | (function () {
  6 | 
  7 |   // Kernel configuration
  8 |   var kernelConfig = {
  9 |     kernelName:       "Matrix4x4Inverse",
 10 |     kernelInit:       init,
 11 |     kernelCleanup:    cleanup,
 12 |     kernelSimd:       simdMatrixInverseN,
 13 |     kernelNonSimd:    nonSimdMatrixInverseN,
 14 |     kernelIterations: 1000
 15 |   };
 16 | 
 17 |   // Hook up to the harness
 18 |   benchmarks.add (new Benchmark (kernelConfig));
 19 | 
 20 |   // Global Variables
 21 |   var src    = new Float32Array(16);            // Source matrix
 22 |   var dst    = new Float32Array(16);            // Result matrix
 23 |   var tsrc   = new Float32Array(16);            // Transposed version of 'src'
 24 |   var tmp   = new Float32Array(12);             // Temporary array of multiply results
 25 |   var ident = new Float32Array(
 26 |                     [1,0,0,0,
 27 |                      0,1,0,0,
 28 |                      0,0,1,0,
 29 |                      0,0,0,1]);
 30 | 
 31 |   function printMatrix(matrix) {
 32 |     for (var r = 0; r < 4; ++r) {
 33 |       var str = "";
 34 |       var ri = r*4;
 35 |       for (var c = 0; c < 4; ++c) {
 36 |         var value = matrix[ri + c];
 37 |         str += " " + value.toFixed(2);
 38 |       }
 39 |       print(str);
 40 |     }
 41 |   }
 42 | 
 43 |   function initMatrix(matrix) {
 44 |     // These values were chosen somewhat randomly, but they will at least yield a solution.
 45 |     matrix [0]  =  0;  matrix[1] =  1; matrix[2]  =  2; matrix[3]  =  3;
 46 |     matrix [4]  = -1; matrix[5]  = -2; matrix[6]  = -3; matrix[7]  = -4;
 47 |     matrix [8]  =  0;  matrix[9] =  0; matrix[10] =  2; matrix[11] =  3;
 48 |     matrix [12] = -1; matrix[13] = -2; matrix[14] =  0; matrix[15] = -4;
 49 |   }
 50 | 
 51 |   function mulMatrix(dst, op1, op2) {
 52 |     for (var r = 0; r < 4; ++r) {
 53 |       for (var c = 0; c < 4; ++c) {
 54 |         var ri = 4*r;
 55 |         dst[ri + c] = op1[ri]*op2[c] + op1[ri+1]*op2[c+4] + op1[ri+2]*op2[c+8] + op1[ri+3]*op2[c+12]
 56 |       }
 57 |     }
 58 |   }
 59 | 
 60 |   function checkMatrix(matrix) {
 61 |     // when multiplied with the src matrix it should yield the identity matrix
 62 |     mulMatrix(tsrc, src, matrix);
 63 |     for (var i = 0; i < 16; ++i) {
 64 |       if (Math.abs (tsrc[i] - ident[i]) > 0.00001) {
 65 |         return false;
 66 |       }
 67 |     }
 68 |     // printMatrix (tsrc);
 69 |     return true;
 70 |   }
 71 | 
 72 |   // Kernel Initializer
 73 |   function init() {
 74 |     initMatrix(src);
 75 |     // printMatrix(src);
 76 |     nonSimdMatrixInverseN(1);
 77 |     // printMatrix(dst);
 78 |     if (!checkMatrix(dst)) {
 79 |       return false;
 80 |     }
 81 | 
 82 |     initMatrix(src);
 83 |     simdMatrixInverseN(1);
 84 |     // printMatrix(dst);
 85 |     if (!checkMatrix(dst)) {
 86 |       return false;
 87 |     }
 88 | 
 89 |     return true;
 90 |   }
 91 | 
 92 |   function cleanup() {
 93 |     return init();
 94 |   }
 95 | 
 96 |   function simdMatrixInverse() {
 97 |     var src0, src1, src2, src3;
 98 |     var row0, row1, row2, row3;
 99 |     var tmp1;
100 |     var minor0, minor1, minor2, minor3;
101 |     var det;
102 | 
103 |     // Load the 4 rows
104 |     var src0 = SIMD.Float32x4.load(src, 0);
105 |     var src1 = SIMD.Float32x4.load(src, 4);
106 |     var src2 = SIMD.Float32x4.load(src, 8);
107 |     var src3 = SIMD.Float32x4.load(src, 16);
108 | 
109 |     // Transpose the source matrix.  Sort of.  Not a true transpose operation
110 | 
111 |     tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
112 |     row1 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
113 |     row0 = SIMD.Float32x4.shuffle(tmp1, row1, 0, 2, 4, 6);
114 |     row1 = SIMD.Float32x4.shuffle(row1, tmp1, 1, 3, 5, 7);
115 | 
116 |     tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
117 |     row3 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
118 |     row2 = SIMD.Float32x4.shuffle(tmp1, row3, 0, 2, 4, 6);
119 |     row3 = SIMD.Float32x4.shuffle(row3, tmp1, 1, 3, 5, 7);
120 | 
121 |     // This is a true transposition, but it will lead to an incorrect result
122 | 
123 |     //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
124 |     //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
125 |     //row0  = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6);
126 |     //row1  = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7);
127 | 
128 |     //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
129 |     //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
130 |     //row2  = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6);
131 |     //row3  = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7);
132 | 
133 |     // ----
134 |     tmp1   = SIMD.Float32x4.mul(row2, row3);
135 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
136 |     minor0 = SIMD.Float32x4.mul(row1, tmp1);
137 |     minor1 = SIMD.Float32x4.mul(row0, tmp1);
138 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
139 |     minor0 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row1, tmp1), minor0);
140 |     minor1 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor1);
141 |     minor1 = SIMD.Float32x4.swizzle(minor1, 2, 3, 0, 1); // 0x4E = 01001110
142 | 
143 |     // ----
144 |     tmp1   = SIMD.Float32x4.mul(row1, row2);
145 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
146 |     minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor0);
147 |     minor3 = SIMD.Float32x4.mul(row0, tmp1);
148 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
149 |     minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row3, tmp1));
150 |     minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor3);
151 |     minor3 = SIMD.Float32x4.swizzle(minor3, 2, 3, 0, 1); // 0x4E = 01001110
152 | 
153 |     // ----
154 |     tmp1   = SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(row1, 2, 3, 0, 1), row3); // 0x4E = 01001110
155 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
156 |     row2   = SIMD.Float32x4.swizzle(row2, 2, 3, 0, 1);  // 0x4E = 01001110
157 |     minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor0);
158 |     minor2 = SIMD.Float32x4.mul(row0, tmp1);
159 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
160 |     minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row2, tmp1));
161 |     minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor2);
162 |     minor2 = SIMD.Float32x4.swizzle(minor2, 2, 3, 0, 1); // 0x4E = 01001110
163 | 
164 |     // ----
165 |     tmp1   = SIMD.Float32x4.mul(row0, row1);
166 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
167 |     minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor2);
168 |     minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row2, tmp1), minor3);
169 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
170 |     minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row3, tmp1), minor2);
171 |     minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row2, tmp1));
172 | 
173 |     // ----
174 |     tmp1   = SIMD.Float32x4.mul(row0, row3);
175 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
176 |     minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row2, tmp1));
177 |     minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor2);
178 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
179 |     minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor1);
180 |     minor2 = SIMD.Float32x4.sub(minor2, SIMD.Float32x4.mul(row1, tmp1));
181 | 
182 |     // ----
183 |     tmp1   = SIMD.Float32x4.mul(row0, row2);
184 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
185 |     minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor1);
186 |     minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row1, tmp1));
187 |     tmp1   = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
188 |     minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row3, tmp1));
189 |     minor3 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor3);
190 | 
191 |     // Compute determinant
192 |     det   = SIMD.Float32x4.mul(row0, minor0);
193 |     det   = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 2, 3, 0, 1), det); // 0x4E = 01001110
194 |     det   = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 1, 0, 3, 2), det); // 0xB1 = 10110001
195 |     tmp1  = SIMD.Float32x4.reciprocalApproximation(det);
196 |     det   = SIMD.Float32x4.sub(SIMD.Float32x4.add(tmp1, tmp1), SIMD.Float32x4.mul(det, SIMD.Float32x4.mul(tmp1, tmp1)));
197 |     det   = SIMD.Float32x4.swizzle(det, 0, 0, 0, 0);
198 | 
199 |     // These shuffles aren't necessary if the faulty transposition is done
200 |     // up at the top of this function.
201 |     //minor0 = SIMD.Float32x4.swizzle(minor0, 2, 1, 0, 3);
202 |     //minor1 = SIMD.Float32x4.swizzle(minor1, 2, 1, 0, 3);
203 |     //minor2 = SIMD.Float32x4.swizzle(minor2, 2, 1, 0, 3);
204 |     //minor3 = SIMD.Float32x4.swizzle(minor3, 2, 1, 0, 3);
205 | 
206 |     // Compute final values by multiplying with 1/det
207 |     minor0 = SIMD.Float32x4.mul(det, minor0);
208 |     minor1 = SIMD.Float32x4.mul(det, minor1);
209 |     minor2 = SIMD.Float32x4.mul(det, minor2);
210 |     minor3 = SIMD.Float32x4.mul(det, minor3);
211 | 
212 |     SIMD.Float32x4.store(dst, 0,  minor0);
213 |     SIMD.Float32x4.store(dst, 4,  minor1);
214 |     SIMD.Float32x4.store(dst, 8,  minor2);
215 |     SIMD.Float32x4.store(dst, 12, minor3);
216 |   }
217 | 
218 |   function nonSimdMatrixInverse() {
219 | 
220 |     // Transpose the source matrix
221 |     for (var i = 0; i < 4; i++) {
222 |       tsrc[i]      = src[i*4];
223 |       tsrc[i + 4]  = src[i*4 + 1];
224 |       tsrc[i + 8]  = src[i*4 + 2];
225 |       tsrc[i + 12] = src[i*4 + 3];
226 |     }
227 | 
228 |     // Calculate pairs for first 8 elements (cofactors)
229 |     tmp[0] = tsrc[10] * tsrc[15];
230 |     tmp[1] = tsrc[11] * tsrc[14];
231 |     tmp[2] = tsrc[9]  * tsrc[15];
232 |     tmp[3] = tsrc[11] * tsrc[13];
233 |     tmp[4] = tsrc[9]  * tsrc[14];
234 |     tmp[5] = tsrc[10] * tsrc[13];
235 |     tmp[6] = tsrc[8]  * tsrc[15];
236 |     tmp[7] = tsrc[11] * tsrc[12];
237 |     tmp[8] = tsrc[8]  * tsrc[14];
238 |     tmp[9] = tsrc[10] * tsrc[12];
239 |     tmp[10] = tsrc[8] * tsrc[13];
240 |     tmp[11] = tsrc[9] * tsrc[12];
241 | 
242 |     // calculate first 8 elements (cofactors)
243 |     dst[0]  = tmp[0]*tsrc[5] + tmp[3]*tsrc[6] + tmp[4]*tsrc[7];
244 |     dst[0] -= tmp[1]*tsrc[5] + tmp[2]*tsrc[6] + tmp[5]*tsrc[7];
245 |     dst[1]  = tmp[1]*tsrc[4] + tmp[6]*tsrc[6] + tmp[9]*tsrc[7];
246 |     dst[1] -= tmp[0]*tsrc[4] + tmp[7]*tsrc[6] + tmp[8]*tsrc[7];
247 |     dst[2]  = tmp[2]*tsrc[4] + tmp[7]*tsrc[5] + tmp[10]*tsrc[7];
248 |     dst[2] -= tmp[3]*tsrc[4] + tmp[6]*tsrc[5] + tmp[11]*tsrc[7];
249 |     dst[3]  = tmp[5]*tsrc[4] + tmp[8]*tsrc[5] + tmp[11]*tsrc[6];
250 |     dst[3] -= tmp[4]*tsrc[4] + tmp[9]*tsrc[5] + tmp[10]*tsrc[6];
251 |     dst[4]  = tmp[1]*tsrc[1] + tmp[2]*tsrc[2] + tmp[5]*tsrc[3];
252 |     dst[4] -= tmp[0]*tsrc[1] + tmp[3]*tsrc[2] + tmp[4]*tsrc[3];
253 |     dst[5]  = tmp[0]*tsrc[0] + tmp[7]*tsrc[2] + tmp[8]*tsrc[3];
254 |     dst[5] -= tmp[1]*tsrc[0] + tmp[6]*tsrc[2] + tmp[9]*tsrc[3];
255 |     dst[6]  = tmp[3]*tsrc[0] + tmp[6]*tsrc[1] + tmp[11]*tsrc[3];
256 |     dst[6] -= tmp[2]*tsrc[0] + tmp[7]*tsrc[1] + tmp[10]*tsrc[3];
257 |     dst[7]  = tmp[4]*tsrc[0] + tmp[9]*tsrc[1] + tmp[10]*tsrc[2];
258 |     dst[7] -= tmp[5]*tsrc[0] + tmp[8]*tsrc[1] + tmp[11]*tsrc[2];
259 | 
260 |     // calculate pairs for second 8 elements (cofactors)
261 |     tmp[0]  = tsrc[2]*tsrc[7];
262 |     tmp[1]  = tsrc[3]*tsrc[6];
263 |     tmp[2]  = tsrc[1]*tsrc[7];
264 |     tmp[3]  = tsrc[3]*tsrc[5];
265 |     tmp[4]  = tsrc[1]*tsrc[6];
266 |     tmp[5]  = tsrc[2]*tsrc[5];
267 |     tmp[6]  = tsrc[0]*tsrc[7];
268 |     tmp[7]  = tsrc[3]*tsrc[4];
269 |     tmp[8]  = tsrc[0]*tsrc[6];
270 |     tmp[9]  = tsrc[2]*tsrc[4];
271 |     tmp[10] = tsrc[0]*tsrc[5];
272 |     tmp[11] = tsrc[1]*tsrc[4];
273 | 
274 |     // calculate second 8 elements (cofactors)
275 |     dst[8]  = tmp[0]*tsrc[13]  + tmp[3]*tsrc[14]  + tmp[4]*tsrc[15];
276 |     dst[8] -= tmp[1]*tsrc[13]  + tmp[2]*tsrc[14]  + tmp[5]*tsrc[15];
277 |     dst[9]  = tmp[1]*tsrc[12]  + tmp[6]*tsrc[14]  + tmp[9]*tsrc[15];
278 |     dst[9] -= tmp[0]*tsrc[12]  + tmp[7]*tsrc[14]  + tmp[8]*tsrc[15];
279 |     dst[10] = tmp[2]*tsrc[12]  + tmp[7]*tsrc[13]  + tmp[10]*tsrc[15];
280 |     dst[10]-= tmp[3]*tsrc[12]  + tmp[6]*tsrc[13]  + tmp[11]*tsrc[15];
281 |     dst[11] = tmp[5]*tsrc[12]  + tmp[8]*tsrc[13]  + tmp[11]*tsrc[14];
282 |     dst[11]-= tmp[4]*tsrc[12]  + tmp[9]*tsrc[13]  + tmp[10]*tsrc[14];
283 |     dst[12] = tmp[2]*tsrc[10]  + tmp[5]*tsrc[11]  + tmp[1]*tsrc[9];
284 |     dst[12]-= tmp[4]*tsrc[11]  + tmp[0]*tsrc[9]   + tmp[3]*tsrc[10];
285 |     dst[13] = tmp[8]*tsrc[11]  + tmp[0]*tsrc[8]   + tmp[7]*tsrc[10];
286 |     dst[13]-= tmp[6]*tsrc[10]  + tmp[9]*tsrc[11]  + tmp[1]*tsrc[8];
287 |     dst[14] = tmp[6]*tsrc[9]   + tmp[11]*tsrc[11] + tmp[3]*tsrc[8];
288 |     dst[14]-= tmp[10]*tsrc[11] + tmp[2]*tsrc[8]   + tmp[7]*tsrc[9];
289 |     dst[15] = tmp[10]*tsrc[10] + tmp[4]*tsrc[8]   + tmp[9]*tsrc[9];
290 |     dst[15]-= tmp[8]*tsrc[9]   + tmp[11]*tsrc[10] + tmp[5]*tsrc[8];
291 | 
292 |     // calculate determinant
293 |     var det = tsrc[0]*dst[0] + tsrc[1]*dst[1] + tsrc[2]*dst[2] + tsrc[3]*dst[3];
294 | 
295 |     // calculate matrix inverse
296 |     det = 1/det;
297 |     for (var j = 0; j < 16; j++) {
298 |       dst[j] *= det;
299 |     }
300 | 
301 |   }
302 | 
303 |   // SIMD version of the kernel
304 |   function simdMatrixInverseN(n) {
305 |     for (var iterations = 0; iterations < n; ++iterations) {
306 |       var src0, src1, src2, src3;
307 |       var row0, row1, row2, row3;
308 |       var tmp1;
309 |       var minor0, minor1, minor2, minor3;
310 |       var det;
311 | 
312 |       // Load the 4 rows
313 |       var src0 = SIMD.Float32x4.load(src, 0);
314 |       var src1 = SIMD.Float32x4.load(src, 4);
315 |       var src2 = SIMD.Float32x4.load(src, 8);
316 |       var src3 = SIMD.Float32x4.load(src, 12);
317 | 
318 |       // Transpose the source matrix.  Sort of.  Not a true transpose operation
319 | 
320 |       tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
321 |       row1 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
322 |       row0 = SIMD.Float32x4.shuffle(tmp1, row1, 0, 2, 4, 6);
323 |       row1 = SIMD.Float32x4.shuffle(row1, tmp1, 1, 3, 5, 7);
324 | 
325 |       tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
326 |       row3 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
327 |       row2 = SIMD.Float32x4.shuffle(tmp1, row3, 0, 2, 4, 6);
328 |       row3 = SIMD.Float32x4.shuffle(row3, tmp1, 1, 3, 5, 7);
329 | 
330 |       // This is a true transposition, but it will lead to an incorrect result
331 | 
332 |       //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
333 |       //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
334 |       //row0  = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6);
335 |       //row1  = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7);
336 | 
337 |       //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
338 |       //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
339 |       //row2  = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6);
340 |       //row3  = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7);
341 | 
342 |       // ----
343 |       tmp1 = SIMD.Float32x4.mul(row2, row3);
344 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
345 |       minor0 = SIMD.Float32x4.mul(row1, tmp1);
346 |       minor1 = SIMD.Float32x4.mul(row0, tmp1);
347 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
348 |       minor0 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row1, tmp1), minor0);
349 |       minor1 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor1);
350 |       minor1 = SIMD.Float32x4.swizzle(minor1, 2, 3, 0, 1); // 0x4E = 01001110
351 | 
352 |       // ----
353 |       tmp1 = SIMD.Float32x4.mul(row1, row2);
354 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
355 |       minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor0);
356 |       minor3 = SIMD.Float32x4.mul(row0, tmp1);
357 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
358 |       minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row3, tmp1));
359 |       minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor3);
360 |       minor3 = SIMD.Float32x4.swizzle(minor3, 2, 3, 0, 1); // 0x4E = 01001110
361 | 
362 |       // ----
363 |       tmp1 = SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(row1, 2, 3, 0, 1), row3); // 0x4E = 01001110
364 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
365 |       row2 = SIMD.Float32x4.swizzle(row2, 2, 3, 0, 1);  // 0x4E = 01001110
366 |       minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor0);
367 |       minor2 = SIMD.Float32x4.mul(row0, tmp1);
368 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
369 |       minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row2, tmp1));
370 |       minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor2);
371 |       minor2 = SIMD.Float32x4.swizzle(minor2, 2, 3, 0, 1); // 0x4E = 01001110
372 | 
373 |       // ----
374 |       tmp1 = SIMD.Float32x4.mul(row0, row1);
375 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
376 |       minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor2);
377 |       minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row2, tmp1), minor3);
378 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
379 |       minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row3, tmp1), minor2);
380 |       minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row2, tmp1));
381 | 
382 |       // ----
383 |       tmp1 = SIMD.Float32x4.mul(row0, row3);
384 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
385 |       minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row2, tmp1));
386 |       minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor2);
387 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
388 |       minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor1);
389 |       minor2 = SIMD.Float32x4.sub(minor2, SIMD.Float32x4.mul(row1, tmp1));
390 | 
391 |       // ----
392 |       tmp1 = SIMD.Float32x4.mul(row0, row2);
393 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
394 |       minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor1);
395 |       minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row1, tmp1));
396 |       tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
397 |       minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row3, tmp1));
398 |       minor3 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor3);
399 | 
400 |       // Compute determinant
401 |       det = SIMD.Float32x4.mul(row0, minor0);
402 |       det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 2, 3, 0, 1), det); // 0x4E = 01001110
403 |       det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 1, 0, 3, 2), det); // 0xB1 = 10110001
404 |       tmp1 = SIMD.Float32x4.reciprocalApproximation(det);
405 |       det = SIMD.Float32x4.sub(SIMD.Float32x4.add(tmp1, tmp1), SIMD.Float32x4.mul(det, SIMD.Float32x4.mul(tmp1, tmp1)));
406 |       det = SIMD.Float32x4.swizzle(det, 0, 0, 0, 0);
407 | 
408 |       // These shuffles aren't necessary if the faulty transposition is done
409 |       // up at the top of this function.
410 |       //minor0 = SIMD.Float32x4.swizzle(minor0, 2, 1, 0, 3);
411 |       //minor1 = SIMD.Float32x4.swizzle(minor1, 2, 1, 0, 3);
412 |       //minor2 = SIMD.Float32x4.swizzle(minor2, 2, 1, 0, 3);
413 |       //minor3 = SIMD.Float32x4.swizzle(minor3, 2, 1, 0, 3);
414 | 
415 |       // Compute final values by multiplying with 1/det
416 |       minor0 = SIMD.Float32x4.mul(det, minor0);
417 |       minor1 = SIMD.Float32x4.mul(det, minor1);
418 |       minor2 = SIMD.Float32x4.mul(det, minor2);
419 |       minor3 = SIMD.Float32x4.mul(det, minor3);
420 | 
421 |       SIMD.Float32x4.store(dst, 0,  minor0);
422 |       SIMD.Float32x4.store(dst, 4,  minor1);
423 |       SIMD.Float32x4.store(dst, 8,  minor2);
424 |       SIMD.Float32x4.store(dst, 12, minor3);
425 |     }
426 |   }
427 | 
428 |   // Non SIMD version of the kernel
429 |   function nonSimdMatrixInverseN(n) {
430 |     for (var iterations = 0; iterations < n; ++iterations) {
431 |       // Transpose the source matrix
432 |       for (var i = 0; i < 4; i++) {
433 |         tsrc[i] = src[i * 4];
434 |         tsrc[i + 4] = src[i * 4 + 1];
435 |         tsrc[i + 8] = src[i * 4 + 2];
436 |         tsrc[i + 12] = src[i * 4 + 3];
437 |       }
438 | 
439 |       // Calculate pairs for first 8 elements (cofactors)
440 |       tmp[0] = tsrc[10] * tsrc[15];
441 |       tmp[1] = tsrc[11] * tsrc[14];
442 |       tmp[2] = tsrc[9] * tsrc[15];
443 |       tmp[3] = tsrc[11] * tsrc[13];
444 |       tmp[4] = tsrc[9] * tsrc[14];
445 |       tmp[5] = tsrc[10] * tsrc[13];
446 |       tmp[6] = tsrc[8] * tsrc[15];
447 |       tmp[7] = tsrc[11] * tsrc[12];
448 |       tmp[8] = tsrc[8] * tsrc[14];
449 |       tmp[9] = tsrc[10] * tsrc[12];
450 |       tmp[10] = tsrc[8] * tsrc[13];
451 |       tmp[11] = tsrc[9] * tsrc[12];
452 | 
453 |       // calculate first 8 elements (cofactors)
454 |       dst[0] = tmp[0] * tsrc[5] + tmp[3] * tsrc[6] + tmp[4] * tsrc[7];
455 |       dst[0] -= tmp[1] * tsrc[5] + tmp[2] * tsrc[6] + tmp[5] * tsrc[7];
456 |       dst[1] = tmp[1] * tsrc[4] + tmp[6] * tsrc[6] + tmp[9] * tsrc[7];
457 |       dst[1] -= tmp[0] * tsrc[4] + tmp[7] * tsrc[6] + tmp[8] * tsrc[7];
458 |       dst[2] = tmp[2] * tsrc[4] + tmp[7] * tsrc[5] + tmp[10] * tsrc[7];
459 |       dst[2] -= tmp[3] * tsrc[4] + tmp[6] * tsrc[5] + tmp[11] * tsrc[7];
460 |       dst[3] = tmp[5] * tsrc[4] + tmp[8] * tsrc[5] + tmp[11] * tsrc[6];
461 |       dst[3] -= tmp[4] * tsrc[4] + tmp[9] * tsrc[5] + tmp[10] * tsrc[6];
462 |       dst[4] = tmp[1] * tsrc[1] + tmp[2] * tsrc[2] + tmp[5] * tsrc[3];
463 |       dst[4] -= tmp[0] * tsrc[1] + tmp[3] * tsrc[2] + tmp[4] * tsrc[3];
464 |       dst[5] = tmp[0] * tsrc[0] + tmp[7] * tsrc[2] + tmp[8] * tsrc[3];
465 |       dst[5] -= tmp[1] * tsrc[0] + tmp[6] * tsrc[2] + tmp[9] * tsrc[3];
466 |       dst[6] = tmp[3] * tsrc[0] + tmp[6] * tsrc[1] + tmp[11] * tsrc[3];
467 |       dst[6] -= tmp[2] * tsrc[0] + tmp[7] * tsrc[1] + tmp[10] * tsrc[3];
468 |       dst[7] = tmp[4] * tsrc[0] + tmp[9] * tsrc[1] + tmp[10] * tsrc[2];
469 |       dst[7] -= tmp[5] * tsrc[0] + tmp[8] * tsrc[1] + tmp[11] * tsrc[2];
470 | 
471 |       // calculate pairs for second 8 elements (cofactors)
472 |       tmp[0] = tsrc[2] * tsrc[7];
473 |       tmp[1] = tsrc[3] * tsrc[6];
474 |       tmp[2] = tsrc[1] * tsrc[7];
475 |       tmp[3] = tsrc[3] * tsrc[5];
476 |       tmp[4] = tsrc[1] * tsrc[6];
477 |       tmp[5] = tsrc[2] * tsrc[5];
478 |       tmp[6] = tsrc[0] * tsrc[7];
479 |       tmp[7] = tsrc[3] * tsrc[4];
480 |       tmp[8] = tsrc[0] * tsrc[6];
481 |       tmp[9] = tsrc[2] * tsrc[4];
482 |       tmp[10] = tsrc[0] * tsrc[5];
483 |       tmp[11] = tsrc[1] * tsrc[4];
484 | 
485 |       // calculate second 8 elements (cofactors)
486 |       dst[8] = tmp[0] * tsrc[13] + tmp[3] * tsrc[14] + tmp[4] * tsrc[15];
487 |       dst[8] -= tmp[1] * tsrc[13] + tmp[2] * tsrc[14] + tmp[5] * tsrc[15];
488 |       dst[9] = tmp[1] * tsrc[12] + tmp[6] * tsrc[14] + tmp[9] * tsrc[15];
489 |       dst[9] -= tmp[0] * tsrc[12] + tmp[7] * tsrc[14] + tmp[8] * tsrc[15];
490 |       dst[10] = tmp[2] * tsrc[12] + tmp[7] * tsrc[13] + tmp[10] * tsrc[15];
491 |       dst[10] -= tmp[3] * tsrc[12] + tmp[6] * tsrc[13] + tmp[11] * tsrc[15];
492 |       dst[11] = tmp[5] * tsrc[12] + tmp[8] * tsrc[13] + tmp[11] * tsrc[14];
493 |       dst[11] -= tmp[4] * tsrc[12] + tmp[9] * tsrc[13] + tmp[10] * tsrc[14];
494 |       dst[12] = tmp[2] * tsrc[10] + tmp[5] * tsrc[11] + tmp[1] * tsrc[9];
495 |       dst[12] -= tmp[4] * tsrc[11] + tmp[0] * tsrc[9] + tmp[3] * tsrc[10];
496 |       dst[13] = tmp[8] * tsrc[11] + tmp[0] * tsrc[8] + tmp[7] * tsrc[10];
497 |       dst[13] -= tmp[6] * tsrc[10] + tmp[9] * tsrc[11] + tmp[1] * tsrc[8];
498 |       dst[14] = tmp[6] * tsrc[9] + tmp[11] * tsrc[11] + tmp[3] * tsrc[8];
499 |       dst[14] -= tmp[10] * tsrc[11] + tmp[2] * tsrc[8] + tmp[7] * tsrc[9];
500 |       dst[15] = tmp[10] * tsrc[10] + tmp[4] * tsrc[8] + tmp[9] * tsrc[9];
501 |       dst[15] -= tmp[8] * tsrc[9] + tmp[11] * tsrc[10] + tmp[5] * tsrc[8];
502 | 
503 |       // calculate determinant
504 |       var det = tsrc[0] * dst[0] + tsrc[1] * dst[1] + tsrc[2] * dst[2] + tsrc[3] * dst[3];
505 | 
506 |       // calculate matrix inverse
507 |       det = 1 / det;
508 |       for (var j = 0; j < 16; j++) {
509 |         dst[j] *= det;
510 |       }
511 |     }
512 |   }
513 | 
514 | } ());
515 | 


--------------------------------------------------------------------------------
/src/benchmarks/kernel-template.js:
--------------------------------------------------------------------------------
 1 | // Kernel template
 2 | // Author: Peter Jensen
 3 | (function () {
 4 | 
 5 |   // Kernel configuration
 6 |   var kernelConfig = {
 7 |     kernelName:       "Test",
 8 |     kernelInit:       init,
 9 |     kernelCleanup:    cleanup,
10 |     kernelSimd:       simd,
11 |     kernelNonSimd:    nonSimd,
12 |     kernelIterations: 100000000
13 |   };
14 | 
15 |   // Hook up to the harness
16 |   benchmarks.add (new Benchmark (kernelConfig));
17 | 
18 |   // Kernel Initializer
19 |   function init () {
20 |     // Do initial sanity check and initialize data for the kernels.
21 |     // The sanity check should verify that the simd and nonSimd results
22 |     // are the same.
23 |     // It is recommended to do minimal object creation in the kernels
24 |     // themselves.  If global data needs to be initialized, here would
25 |     // be the place to do it.
26 |     // If the sanity checks fails the kernels will not be executed
27 |     // Returns:
28 |     //   true:  First run (unoptimized) of the kernels passed
29 |     //   false: First run (unoptimized) of the kernels failed
30 |     return simd (1) === nonSimd (1);
31 |   }
32 | 
33 |   // Kernel Cleanup
34 |   function cleanup () {
35 |     // Do final sanity check and perform cleanup.
36 |     // This function is called when all the kernel iterations have been
37 |     // executed, so they should be in their final optimized version.  The 
38 |     // sanity check done during initialization will probably be of the 
39 |     // initial unoptimized version.
40 |     // Returns:
41 |     //   true:  Last run (optimized) of the kernels passed
42 |     //   false: last run (optimized) of the kernels failed
43 |     return simd (1) === nonSimd (1);
44 |   }
45 | 
46 |   // SIMD version of the kernel
47 |   function simd (n) {
48 |     var s = 0;
49 |     for (var i = 0; i < n; ++i) {
50 |       s += i;
51 |     }
52 |     return s;
53 |   }
54 | 
55 |   // Non SIMD version of the kernel
56 |   function nonSimd (n) {
57 |     var s = 0;
58 |     for (var i = 0; i < n; ++i) {
59 |       s += i;
60 |     }
61 |     return s;
62 |   }
63 | 
64 | } ());
65 | 


--------------------------------------------------------------------------------
/src/benchmarks/mandelbrot.js:
--------------------------------------------------------------------------------
  1 | // Mandelbrot Benchmark
  2 | // Author: Peter Jensen
  3 | (function () {
  4 | 
  5 |   // Kernel configuration
  6 |   var kernelConfig = {
  7 |     kernelName:       "Mandelbrot",
  8 |     kernelInit:       initMandelbrot,
  9 |     kernelCleanup:    cleanupMandelbrot,
 10 |     kernelSimd:       simdMandelbrot,
 11 |     kernelNonSimd:    nonSimdMandelbrot,
 12 |     kernelIterations: 10000
 13 |   };
 14 | 
 15 |   // Hook up to the harness
 16 |   benchmarks.add (new Benchmark (kernelConfig));
 17 | 
 18 |   function Float32x4ToString (f4) {
 19 |     return "[" + SIMD.Float32x4.extractLane(f4, 0) + "," +
 20 |         SIMD.Float32x4.extractLane(f4, 1) + "," +
 21 |         SIMD.Float32x4.extractLane(f4, 2) + "," +
 22 |         SIMD.Float32x4.extractLane(f4, 3) + "]";
 23 |   }
 24 | 
 25 |   function Int32x4ToString (i4) {
 26 |     return "[" + SIMD.Int32x4.extractLane(i4, 0) + "," +
 27 |         SIMD.Int32x4.extractLane(i4, 1) + "," +
 28 |         SIMD.Int32x4.extractLane(i4, 2) + "," +
 29 |         SIMD.Int32x4.extractLane(i4, 3) + "]";
 30 |   }
 31 | 
 32 |   function mandelx1(c_re, c_im, max_iterations) {
 33 |     var z_re = c_re,
 34 |         z_im = c_im,
 35 |         i;
 36 |     for (i = 0; i < max_iterations; i++) {
 37 |       var z_re2 = z_re*z_re;
 38 |       var z_im2 = z_im*z_im;
 39 |       if (z_re2 + z_im2 > 4.0)
 40 |         break;
 41 | 
 42 |       var new_re = z_re2 - z_im2;
 43 |       var new_im = 2.0 * z_re * z_im;
 44 |       z_re = c_re + new_re;
 45 |       z_im = c_im + new_im;
 46 |     }
 47 |     return i;
 48 |   }
 49 | 
 50 |   function mandelx4(c_re4, c_im4, max_iterations) {
 51 |     var z_re4  = c_re4;
 52 |     var z_im4  = c_im4;
 53 |     var four4  = SIMD.Float32x4.splat (4.0);
 54 |     var two4   = SIMD.Float32x4.splat (2.0);
 55 |     var count4 = SIMD.Int32x4.splat (0);
 56 |     var zero4  = SIMD.Int32x4.splat (0);
 57 |     var one4   = SIMD.Int32x4.splat (1);
 58 | 
 59 |     for (var i = 0; i < max_iterations; ++i) {
 60 |       var z_re24 = SIMD.Float32x4.mul (z_re4, z_re4);
 61 |       var z_im24 = SIMD.Float32x4.mul (z_im4, z_im4);
 62 | 
 63 |       var mb4    = SIMD.Float32x4.lessThanOrEqual (SIMD.Float32x4.add (z_re24, z_im24), four4);
 64 |       // if all 4 values are greater than 4.0, there's no reason to continue
 65 |       if (!SIMD.Bool32x4.allTrue(mb4)) {
 66 |         break;
 67 |       }
 68 | 
 69 |       var new_re4 = SIMD.Float32x4.sub(z_re24, z_im24);
 70 |       var new_im4 = SIMD.Float32x4.mul(SIMD.Float32x4.mul (two4, z_re4), z_im4);
 71 |       z_re4       = SIMD.Float32x4.add(c_re4, new_re4);
 72 |       z_im4       = SIMD.Float32x4.add(c_im4, new_im4);
 73 |       count4      = SIMD.Int32x4.add(count4, SIMD.Int32x4.select(mb4, one4, zero4));
 74 |     }
 75 |     return count4;
 76 |   }
 77 | 
 78 |   function sanityCheck() {
 79 |     var simd    = simdMandelbrot(1);
 80 |     var nonSimd = nonSimdMandelbrot(1);
 81 |     if (simd.length !== nonSimd.length) {
 82 |       return false;
 83 |     }
 84 |     for (var i = 0, n = simd.length; i < n; ++i) {
 85 |       if (simd[i] !== nonSimd[i]) {
 86 |         return false;
 87 |       }
 88 |     }
 89 |     return true;
 90 |   }
 91 | 
 92 |   function initMandelbrot() {
 93 |     return sanityCheck();
 94 |   }
 95 | 
 96 |   function cleanupMandelbrot() {
 97 |     return sanityCheck();
 98 |   }
 99 | 
100 |   // Non SIMD version of the kernel
101 |   function nonSimdMandelbrot (n) {
102 |     var result = new Array (4);
103 |     for (var i = 0; i < n; ++i) {
104 |       result [0] = mandelx1 (0.01, 0.01, 100);
105 |       result [1] = mandelx1 (0.01, 0.01, 100);
106 |       result [2] = mandelx1 (0.01, 0.01, 100);
107 |       result [3] = mandelx1 (0.01, 0.01, 100);
108 |     }
109 |     return result;
110 |   }
111 | 
112 |   // SIMD version of the kernel
113 |   function simdMandelbrot (n) {
114 |     var result = new Array (4);
115 |     var vec0  = SIMD.Float32x4.splat (0.01);
116 |     for (var i = 0; i < n; ++i) {
117 |       var r = mandelx4 (vec0, vec0, 100);
118 |       result [0] = SIMD.Int32x4.extractLane(r, 0);
119 |       result [1] = SIMD.Int32x4.extractLane(r, 1);
120 |       result [2] = SIMD.Int32x4.extractLane(r, 2);
121 |       result [3] = SIMD.Int32x4.extractLane(r, 3);
122 |     }
123 |     return result;
124 |   }
125 | 
126 | } ());
127 | 


--------------------------------------------------------------------------------
/src/benchmarks/matrix-multiplication.js:
--------------------------------------------------------------------------------
  1 | // 4x4 matrix multiplication
  2 | // Author: John McCutchan
  3 | 
  4 | (function () {
  5 | 
  6 |   // Kernel configuration
  7 |   var kernelConfig = {
  8 |     kernelName:       "MatrixMultiplication",
  9 |     kernelInit:       init,
 10 |     kernelCleanup:    cleanup,
 11 |     kernelSimd:       simdMultiply,
 12 |     kernelNonSimd:    multiply,
 13 |     kernelIterations: 1000
 14 |   };
 15 | 
 16 |   // Hook up to the harness
 17 |   benchmarks.add(new Benchmark(kernelConfig));
 18 | 
 19 |   // Benchmark data, initialization and kernel functions
 20 |   var T1 = new Float32Array(16);
 21 |   var T2 = new Float32Array(16);
 22 |   var Out = new Float32Array(16);
 23 |   var T1x = new Float32Array(16);
 24 |   var T2x = new Float32Array(16);
 25 |   var Outx = new Float32Array(16);
 26 | 
 27 |   function equals(A, b) {
 28 |     return (A[0] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 0)) &&
 29 |            (A[1] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 1)) &&
 30 |            (A[2] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 2)) &&
 31 |            (A[3] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 3)) &&
 32 |            (A[4] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 0)) &&
 33 |            (A[5] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 1)) &&
 34 |            (A[6] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 2)) &&
 35 |            (A[7] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 3)) &&
 36 |            (A[8] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 0)) &&
 37 |            (A[9] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 1)) &&
 38 |            (A[10] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 2)) &&
 39 |            (A[11] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 3)) &&
 40 |            (A[12] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 0)) &&
 41 |            (A[13] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 1)) &&
 42 |            (A[14] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 2)) &&
 43 |            (A[15] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 3));
 44 |   }
 45 | 
 46 |   function init() {
 47 |     T1[0] = 1.0;
 48 |     T1[5] = 1.0;
 49 |     T1[10] = 1.0;
 50 |     T1[15] = 1.0;
 51 | 
 52 |     T2[0] = 2.0;
 53 |     T2[5] = 2.0;
 54 |     T2[10] = 2.0;
 55 |     T2[15] = 2.0;
 56 | 
 57 |     SIMD.Float32x4.store(T1x, 0,  SIMD.Float32x4(1.0, 0.0, 0.0, 0.0));
 58 |     SIMD.Float32x4.store(T1x, 4,  SIMD.Float32x4(0.0, 1.0, 0.0, 0.0));
 59 |     SIMD.Float32x4.store(T1x, 8,  SIMD.Float32x4(0.0, 0.0, 1.0, 0.0));
 60 |     SIMD.Float32x4.store(T1x, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 1.0));
 61 | 
 62 |     SIMD.Float32x4.store(T2x, 0,  SIMD.Float32x4(2.0, 0.0, 0.0, 0.0));
 63 |     SIMD.Float32x4.store(T2x, 4,  SIMD.Float32x4(0.0, 2.0, 0.0, 0.0));
 64 |     SIMD.Float32x4.store(T2x, 8,  SIMD.Float32x4(0.0, 0.0, 2.0, 0.0));
 65 |     SIMD.Float32x4.store(T2x, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 2.0));
 66 | 
 67 |     multiply(1);
 68 |     simdMultiply(1);
 69 |     return equals(T1, T1x) && equals(T2, T2x) && equals(Out, Outx);
 70 |   }
 71 | 
 72 |   function cleanup() {
 73 |     return init(); // Sanity checking before and after are the same
 74 |   }
 75 | 
 76 |   function multiply(n) {
 77 |     for (var i = 0; i < n; i++) {
 78 |       var a00 = T1[0];
 79 |       var a01 = T1[1];
 80 |       var a02 = T1[2];
 81 |       var a03 = T1[3];
 82 |       var a10 = T1[4];
 83 |       var a11 = T1[5];
 84 |       var a12 = T1[6];
 85 |       var a13 = T1[7];
 86 |       var a20 = T1[8];
 87 |       var a21 = T1[9];
 88 |       var a22 = T1[10];
 89 |       var a23 = T1[11];
 90 |       var a30 = T1[12];
 91 |       var a31 = T1[13];
 92 |       var a32 = T1[14];
 93 |       var a33 = T1[15];
 94 | 
 95 |       var b0 = T2[0];
 96 |       var b1 = T2[1];
 97 |       var b2 = T2[2];
 98 |       var b3 = T2[3];
 99 |       Out[0] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
100 |       Out[1] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
101 |       Out[2] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
102 |       Out[3] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
103 | 
104 |       b0 = T2[4];
105 |       b1 = T2[5];
106 |       b2 = T2[6];
107 |       b3 = T2[7];
108 |       Out[4] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
109 |       Out[5] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
110 |       Out[6] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
111 |       Out[7] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
112 | 
113 |       b0 = T2[8];
114 |       b1 = T2[9];
115 |       b2 = T2[10];
116 |       b3 = T2[11];
117 |       Out[8] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
118 |       Out[9] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
119 |       Out[10] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
120 |       Out[11] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
121 | 
122 |       b0 = T2[12];
123 |       b1 = T2[13];
124 |       b2 = T2[14];
125 |       b3 = T2[15];
126 |       Out[12] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
127 |       Out[13] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
128 |       Out[14] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
129 |       Out[15] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
130 |     }
131 |   }
132 | 
133 |   function simdMultiply(n) {
134 |     for (var i = 0; i < n; i++) {
135 |       var a0 = SIMD.Float32x4.load(T1x, 0);
136 |       var a1 = SIMD.Float32x4.load(T1x, 4);
137 |       var a2 = SIMD.Float32x4.load(T1x, 8);
138 |       var a3 = SIMD.Float32x4.load(T1x, 12);
139 |       var b0 = SIMD.Float32x4.load(T2x, 0);
140 |       SIMD.Float32x4.store(Outx, 0,
141 |                   SIMD.Float32x4.add(
142 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 0, 0, 0, 0), a0),
143 |                     SIMD.Float32x4.add(
144 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 1, 1, 1, 1), a1),
145 |                     SIMD.Float32x4.add(
146 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 2, 2, 2, 2), a2),
147 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 3, 3, 3, 3), a3)))));
148 |       var b1 = SIMD.Float32x4.load(T2x, 4);
149 |       SIMD.Float32x4.store(Outx, 4,
150 |                   SIMD.Float32x4.add(
151 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 0, 0, 0, 0), a0),
152 |                     SIMD.Float32x4.add(
153 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 1, 1, 1, 1), a1),
154 |                     SIMD.Float32x4.add(
155 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 2, 2, 2, 2), a2),
156 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 3, 3, 3, 3), a3)))));
157 |       var b2 = SIMD.Float32x4.load(T2x, 8);
158 |       SIMD.Float32x4.store(Outx, 8,
159 |                   SIMD.Float32x4.add(
160 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 0, 0, 0, 0), a0),
161 |                     SIMD.Float32x4.add(
162 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 1, 1, 1, 1), a1),
163 |                     SIMD.Float32x4.add(
164 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 2, 2, 2, 2), a2),
165 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 3, 3, 3, 3), a3)))));
166 |       var b3 = SIMD.Float32x4.load(T2x, 12);
167 |       SIMD.Float32x4.store(Outx, 12,
168 |                   SIMD.Float32x4.add(
169 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 0, 0, 0, 0), a0),
170 |                     SIMD.Float32x4.add(
171 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 1, 1, 1, 1), a1),
172 |                     SIMD.Float32x4.add(
173 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 2, 2, 2, 2), a2),
174 |                       SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 3, 3, 3, 3), a3)))));
175 |     }
176 |   }
177 | 
178 | } ());
179 | 


--------------------------------------------------------------------------------
/src/benchmarks/memcpy.js:
--------------------------------------------------------------------------------
  1 | // Simple performance test memcpy using SIMD.
  2 | // Author: Moh Haghighat 
  3 | // January 20, 2015
  4 | 
  5 | (function () {
  6 | 
  7 |   // Kernel configuration
  8 |   var kernelConfig = {
  9 |     kernelName:       "Memcpy",
 10 |     kernelInit:       initArray,
 11 |     kernelCleanup:    cleanup,
 12 |     kernelSimd:       simdMemcpy,
 13 |     kernelNonSimd:    memcpy,
 14 |     kernelIterations: 1000
 15 |   };
 16 | 
 17 |   // Hook up to the harness
 18 |   benchmarks.add(new Benchmark(kernelConfig));
 19 | 
 20 |   // Benchmark data, initialization and kernel functions
 21 |   var TOTAL_MEMORY = 4096*32;
 22 |   var buffer = new ArrayBuffer(TOTAL_MEMORY);
 23 |   var HEAP8 = new Int8Array(buffer);
 24 |   var HEAP32 = new Int32Array(buffer);
 25 |   var HEAPU8 = new Uint8Array(buffer);
 26 | 
 27 |   var LEN  = TOTAL_MEMORY/32;
 28 |   var ptr1 = 0;
 29 |   var ptr2 = ptr1 + 2 * LEN; 
 30 |   var ptr3 = ptr2 + 2 * LEN; 
 31 |   var VAL  = 200;
 32 | 
 33 |   function sanityCheck() {
 34 |     for (var j = 0; j < LEN; ++j) {
 35 |       if (HEAP8[ptr2+j] != HEAP8[ptr3+j]) {
 36 |         return false; 
 37 |       } 
 38 |     }
 39 |     return true; 
 40 |   }
 41 | 
 42 |   function initArray() {
 43 |     for (var j = 0; j < LEN; ++j) {
 44 |       HEAP8[ptr1+j] = (VAL+1*j)|0;
 45 |       HEAP8[ptr2+j] = (VAL+2*j)|0;
 46 |       HEAP8[ptr3+j] = (VAL+3*j)|0;
 47 |     }
 48 |     return true;
 49 |   }
 50 | 
 51 |   function cleanup() {
 52 |     return sanityCheck();
 53 |   }
 54 | 
 55 |   function _emscripten_memcpy_big(dest, src, num) {
 56 |     dest = dest; src = src; num = num; 
 57 |     HEAPU8.set(HEAPU8.subarray(src, src+num), dest);
 58 |     return dest;
 59 |   }
 60 | 
 61 |   function NonSimdAsmjsModule (global, imp, buffer) {
 62 |     "use asm"
 63 | 
 64 |     var HEAP8 = new global.Int8Array(buffer);
 65 |     var HEAP32 = new global.Int32Array(buffer);
 66 |     var _emscripten_memcpy_big = imp._emscripten_memcpy_big;
 67 | 
 68 |     function _memcpy(dest, src, num) {
 69 |       dest = dest|0; src = src|0; num = num|0;
 70 |       var ret = 0;
 71 |       if ((num|0) >= 4096) return _emscripten_memcpy_big(dest|0, src|0, num|0)|0;
 72 |       ret = dest|0;
 73 |       if ((dest&3) == (src&3)) {
 74 |         while (dest & 3) {
 75 |           if ((num|0) == 0) return ret|0;
 76 |           HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
 77 |           dest = (dest+1)|0;
 78 |           src = (src+1)|0;
 79 |           num = (num-1)|0;
 80 |         }
 81 |         while ((num|0) >= 4) {
 82 |           HEAP32[((dest)>>2)]=((HEAP32[((src)>>2)])|0);
 83 |           dest = (dest+4)|0;
 84 |           src = (src+4)|0;
 85 |           num = (num-4)|0;
 86 |         }
 87 |       }
 88 |       while ((num|0) > 0) {
 89 |         HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
 90 |         dest = (dest+1)|0;
 91 |         src = (src+1)|0;
 92 |         num = (num-1)|0;
 93 |       }
 94 |       return ret|0;
 95 |     }
 96 | 
 97 |     return _memcpy;
 98 |   }
 99 | 
100 |   function SimdAsmjsModule (global, imp, buffer) {
101 |     "use asm"
102 | 
103 |     var HEAP8 = new global.Int8Array(buffer);
104 |     var HEAP32 = new global.Int32Array(buffer);
105 |     var HEAPU8 = new global.Uint8Array(buffer);
106 |     var _emscripten_memcpy_big = imp._emscripten_memcpy_big;
107 |     var i4 = global.SIMD.Int32x4;
108 |     var i4load  = i4.load;
109 |     var i4store = i4.store;
110 | 
111 |     function _memcpy(dest, src, num) {
112 |       dest = dest|0; src = src|0; num = num|0;
113 |       var ret = 0;
114 |       if ((num|0) >= 4096) return _emscripten_memcpy_big(dest|0, src|0, num|0)|0;
115 |       ret = dest|0;
116 | 
117 |       if ((num|0) >= 16) { 
118 |         while (dest & 15) {
119 |           if ((num|0) == 0) return ret|0;
120 |           HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
121 |           dest = (dest+1)|0;
122 |           src = (src+1)|0;
123 |           num = (num-1)|0;
124 |         }
125 |         while ((num|0) >= 16) {
126 |           i4store(HEAPU8, ((dest)>>0), i4load(HEAPU8, ((src)>>0))); 
127 |           dest = (dest+16)|0;
128 |           src = (src+16)|0;
129 |           num = (num-16)|0;
130 |         }
131 |         if ((num|0) == 0) return ret|0;
132 |       } 
133 | 
134 |       if ((dest&3) == (src&3)) {
135 |         while (dest & 3) {
136 |           if ((num|0) == 0) return ret|0;
137 |           HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
138 |           dest = (dest+1)|0;
139 |           src = (src+1)|0;
140 |           num = (num-1)|0;
141 |         }
142 |         while ((num|0) >= 4) {
143 |           HEAP32[((dest)>>2)]=((HEAP32[((src)>>2)])|0);
144 |           dest = (dest+4)|0;
145 |           src = (src+4)|0;
146 |           num = (num-4)|0;
147 |         }
148 |       } 
149 | 
150 |       while ((num|0) > 0) {
151 |         HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
152 |         dest = (dest+1)|0;
153 |         src = (src+1)|0;
154 |         num = (num-1)|0;
155 |       }
156 | 
157 |       return ret|0;
158 |     }
159 | 
160 |     return _memcpy;
161 |   }
162 | 
163 |   function memcpy(n) {
164 |     var func = NonSimdAsmjsModule(this, {"_emscripten_memcpy_big": _emscripten_memcpy_big}, buffer);
165 |     for (var i = 0; i < n; ++i) {
166 |       // try memcpy of variable lengths, from 0 to LEN
167 |       for (var j = 0; j < LEN; ++j) {
168 |         // try different (alignment mod 16) from 0 to 15
169 |         for (var k = 0; k < 16; k++){
170 |           func (ptr2+k, ptr1, j);
171 |         }
172 |       }
173 |     }
174 |     return true;
175 |   }
176 | 
177 |   function simdMemcpy(n) {
178 |     var func = SimdAsmjsModule(this, {"_emscripten_memcpy_big": _emscripten_memcpy_big}, buffer);
179 |     for (var i = 0; i < n; ++i) {
180 |       // try memcpy of variable lengths, from 0 to LEN
181 |       for (var j = 0; j < LEN; ++j) {
182 |         // try different (alignment mod 16) from 0 to 15
183 |         for (var k = 0; k < 16; k++){
184 |           func (ptr3+k, ptr1, j);
185 |         }
186 |       }
187 |     }
188 |     return true;
189 |   }
190 | 
191 | } ());
192 | 


--------------------------------------------------------------------------------
/src/benchmarks/memset.js:
--------------------------------------------------------------------------------
  1 | // Simple performance test memset using SIMD.
  2 | // Author: Moh Haghighat 
  3 | // December 10, 2014
  4 | 
  5 | (function () {
  6 | 
  7 |   // Kernel configuration
  8 |   var kernelConfig = {
  9 |     kernelName:       "Memset",
 10 |     kernelInit:       initArray,
 11 |     kernelCleanup:    cleanup,
 12 |     kernelSimd:       simdMemset,
 13 |     kernelNonSimd:    memset,
 14 |     kernelIterations: 1000
 15 |   };
 16 | 
 17 |   // Hook up to the harness
 18 |   benchmarks.add(new Benchmark(kernelConfig));
 19 | 
 20 |   // Benchmark data, initialization and kernel functions
 21 |   var TOTAL_MEMORY = 4096*32;
 22 |   var buffer = new ArrayBuffer(TOTAL_MEMORY);
 23 |   var HEAP8 = new Int8Array(buffer);
 24 |   var HEAP32 = new Int32Array(buffer);
 25 |   var HEAPU8 = new Uint8Array(buffer);
 26 | 
 27 |   var LEN  = TOTAL_MEMORY/16;
 28 |   var ptr1 = 0;
 29 |   var ptr2 = ptr1 + LEN; 
 30 |   var VAL  = 200;
 31 | 
 32 |   function sanityCheck() {
 33 |     for (var j = 0; j < LEN; ++j) {
 34 |       if (HEAP8[ptr1+j] != HEAP8[ptr2+j]) {
 35 |         return false; 
 36 |       } 
 37 |     }
 38 |     return true; 
 39 |   }
 40 | 
 41 |   function initArray() {
 42 |     return true;
 43 |   }
 44 | 
 45 |   function cleanup() {
 46 |     return sanityCheck();
 47 |   }
 48 | 
 49 |   function NonSimdAsmjsModule (global, imp, buffer) {
 50 |     "use asm"
 51 | 
 52 |     var HEAP8 = new global.Int8Array(buffer);
 53 |     var HEAP32 = new global.Int32Array(buffer);
 54 | 
 55 |     function _memset(ptr, value, num) {
 56 |       ptr = ptr|0;
 57 |       value = value|0;
 58 |       num = num|0;
 59 |       var stop = 0, value4 = 0, stop4 = 0, unaligned = 0;
 60 |       stop = (ptr + num)|0;
 61 |       if ((num|0) >= 20) {
 62 |         // This is unaligned, but quite large, so work hard to get to aligned settings
 63 |         value = value & 0xff;
 64 |         unaligned = ptr & 3;
 65 |         value4 = value | (value << 8) | (value << 16) | (value << 24);
 66 |         stop4 = stop & ~3;
 67 |         if (unaligned) {
 68 |           unaligned = (ptr + 4 - unaligned)|0;
 69 |           while ((ptr|0) < (unaligned|0)) { // no need to check for stop, since we have large num
 70 |             HEAP8[((ptr)>>0)]=value;
 71 |             ptr = (ptr+1)|0;
 72 |           }
 73 |         }
 74 |         while ((ptr|0) < (stop4|0)) {
 75 |           HEAP32[((ptr)>>2)]=value4;
 76 |           ptr = (ptr+4)|0;
 77 |         }
 78 |       }
 79 |       while ((ptr|0) < (stop|0)) {
 80 |         HEAP8[((ptr)>>0)]=value;
 81 |         ptr = (ptr+1)|0;
 82 |       }
 83 |       return (ptr-num)|0;
 84 |     }
 85 | 
 86 |     return _memset;
 87 |   }
 88 | 
 89 |   function SimdAsmjsModule (global, imp, buffer) {
 90 |     "use asm"
 91 | 
 92 |     var HEAP8 = new global.Int8Array(buffer);
 93 |     var HEAP32 = new global.Int32Array(buffer);
 94 |     var HEAPU8 = new global.Uint8Array(buffer);
 95 |     var i4 = global.SIMD.Int32x4;
 96 |     var i4splat = i4.splat;
 97 |     var i4store = i4.store;
 98 | 
 99 |     function _simdMemset(ptr, value, num) {
100 |       ptr = ptr|0;
101 |       value = value|0;
102 |       num = num|0;
103 | 
104 |       var value2 = 0, value4 = 0, value16 = i4(0, 0, 0, 0), stop = 0, stop4 = 0, stop16 = 0, unaligned = 0;
105 | 
106 |       stop = (ptr + num)|0;
107 |       if ((num|0) >= 16) {
108 |         // This is unaligned, but quite large, so work hard to get to aligned settings
109 |         value = value & 0xff;
110 | 
111 |         unaligned = ptr & 0xf;
112 |         if (unaligned) {
113 |           // Initialize the 16-byte unaligned leading part 
114 |           unaligned = (ptr + 16 - unaligned)|0;
115 |           while ((ptr|0) < (unaligned|0)) { // no need to check for stop, since we have large num
116 |             HEAP8[((ptr)>>0)]=value;
117 |             ptr = (ptr+1)|0;
118 |           }
119 |         }
120 | 
121 |         value2 = (value  | (value  << 8))|0;  
122 |         value4 = (value2 | (value2 << 16))|0;
123 |         value16 =i4splat(value4);
124 |         stop16 = stop & ~15;
125 | 
126 | 
127 |         while ((ptr|0) < (stop16|0)) {
128 |           i4store(HEAPU8, ((ptr)>>0), value16);
129 |           ptr = (ptr+16)|0;
130 |         }
131 | 
132 |         stop4 = stop & ~3;
133 |         while ((ptr|0) < (stop4|0)) {
134 |           HEAP32[((ptr)>>2)]=value4;
135 |           ptr = (ptr+4)|0;
136 |         }
137 |       }
138 |       while ((ptr|0) < (stop|0)) {
139 |         HEAP8[((ptr)>>0)]=value;
140 |         ptr = (ptr+1)|0;
141 |       }
142 |       return (ptr-num)|0;
143 |     }
144 | 
145 |     return _simdMemset;
146 |   }
147 | 
148 |   function memset(n) {
149 |     var func = NonSimdAsmjsModule(this, {}, buffer);
150 |     for (var i = 0; i < n; ++i) {
151 |       func (ptr1, VAL, LEN);
152 |     }
153 |     return true;
154 |   }
155 | 
156 |   function simdMemset(n) {
157 |     var func = SimdAsmjsModule(this, {}, buffer);
158 |     for (var i = 0; i < n; ++i) {
159 |       func (ptr2, VAL, LEN);
160 |     }
161 |     return true;
162 |   }
163 | 
164 | } ());
165 | 


--------------------------------------------------------------------------------
/src/benchmarks/run.js:
--------------------------------------------------------------------------------
 1 | "use strict"
 2 | 
 3 | load ('../ecmascript_simd.js');
 4 | load ('base.js');
 5 | 
 6 | // load individual benchmarks
 7 | 
 8 | load ('kernel-template.js');
 9 | load ('averageFloat32x4.js');
10 | load ('averageFloat32x4LoadFromInt8Array.js');
11 | load ('averageFloat32x4LoadX.js');
12 | load ('averageFloat32x4LoadXY.js');
13 | load ('averageFloat32x4LoadXYZ.js');
14 | load ('averageInt32x4Load.js');
15 | load ('mandelbrot.js');
16 | load ('matrix-multiplication.js');
17 | load ('transform.js');
18 | load ('shiftrows.js');
19 | load ('aobench.js');
20 | load ('transform.js');
21 | load ('transpose4x4.js');
22 | load ('inverse4x4.js');
23 | load ('sinx4.js');
24 | load ('memset.js');
25 | load ('memcpy.js');
26 | 
27 | function printResult (str) {
28 |   print (str);
29 | }
30 | 
31 | function printError (str) {
32 |   print (str);
33 | }
34 | 
35 | function printScore (str) {
36 |   print (str);
37 | }
38 | 
39 | benchmarks.runAll ({notifyResult: printResult,
40 |                     notifyError:  printError,
41 |                     notifyScore:  printScore},
42 |                    true);
43 | 


--------------------------------------------------------------------------------
/src/benchmarks/run_browser.js:
--------------------------------------------------------------------------------
 1 | var logs = document.getElementById("logs");
 2 | 
 3 | function printResult(str) {
 4 |   console.log(str);
 5 |   logs.innerHTML += str + '<br>';
 6 | }
 7 | 
 8 | function printError(str) {
 9 |   console.log(str);
10 |   logs.innerHTML += str + '<br>';
11 | }
12 | 
13 | function printScore(str) {
14 |   console.log(str);
15 |   logs.innerHTML += str + '<br>';
16 | }
17 | 
18 | window.onload = function() {
19 |   console.log('Running benchmarks.');
20 |   benchmarks.runAll({notifyResult: printResult,
21 |                      notifyError:  printError,
22 |                      notifyScore:  printScore}, true);
23 |   printResult('Benchmarks completed.');
24 | }


--------------------------------------------------------------------------------
/src/benchmarks/shiftrows.js:
--------------------------------------------------------------------------------
  1 | // ShiftRows is a hot function in the implementation of the Rijndael cipher
  2 | // For documentation see: http://asmaes.sourceforge.net/rijndael/rijndaelImplementation.pdf
  3 | // Author: Peter Jensen
  4 | (function() {
  5 | 
  6 |   // Kernel configuration
  7 |   var kernelConfig = {
  8 |     kernelName:       "ShiftRows",
  9 |     kernelInit:       init,
 10 |     kernelCleanup:    cleanup,
 11 |     kernelSimd:       simdShiftRowsN,
 12 |     kernelNonSimd:    shiftRowsN,
 13 |     kernelIterations: 1000
 14 |   };
 15 | 
 16 |   // Hook up to the harness
 17 |   benchmarks.add(new Benchmark(kernelConfig));
 18 | 
 19 |   // Do the object allocations globally, so the performance of the kernel
 20 |   // functions aren't overshadowed by object creations
 21 | 
 22 |   var state   = new Int32Array(16);    // 4x4 state matrix
 23 |   var temp    = new Int32Array (1000); // Big enough for 1000 columns
 24 | 
 25 |   function printState() {
 26 |     for (var r = 0; r < 4; ++r) {
 27 |       var str = "";
 28 |       var ri = r*4;
 29 |       for (var c = 0; c < 4; ++c) {
 30 |         var value = state[ri + c];
 31 |         if (value < 10) {
 32 |           str += " ";
 33 |         }
 34 |         str += " " + state[ri + c];
 35 |       }
 36 |       print(str);
 37 |     }
 38 |   }
 39 | 
 40 |   // initialize the 4x4 state matrix
 41 |   function initState() {
 42 |     for (var i = 0; i < 16; ++i) {
 43 |       state[i] = i;
 44 |     }
 45 |   }
 46 | 
 47 |   // Verify the result of calling shiftRows(state, 4)
 48 |   function checkState() {
 49 |     var expected = new Uint32Array(
 50 |       [ 0,  1,  2,  3,
 51 |         5,  6,  7,  4,
 52 |        10, 11,  8,  9,
 53 |        15, 12, 13, 14]);
 54 |     for (var i = 0; i < 16; ++i) {
 55 |       if (state[i] !== expected[i]) {
 56 |         return false;
 57 |       }
 58 |     }
 59 |     return true;
 60 |   }
 61 | 
 62 |   function init() {
 63 |     // Check that shiftRows yields the right result
 64 |     initState();
 65 |     shiftRowsN(1);
 66 |     if (!checkState()) {
 67 |       return false;
 68 |     }
 69 | 
 70 |     // Check that simdShiftRows yields the right result
 71 |     initState();
 72 |     simdShiftRowsN(1);
 73 |     if (!checkState()) {
 74 |       return false;
 75 |     }
 76 |     return true;
 77 |   }
 78 | 
 79 |   function cleanup() {
 80 |     return init(); // Sanity checking before and after are the same
 81 |   }
 82 | 
 83 |   // This is the typical implementation of the shiftRows function
 84 |   function shiftRows(state, Nc) {
 85 |     for (var r = 1; r < 4; ++r) {
 86 |       var ri = r*Nc; // get the starting index of row 'r'
 87 |       var c;
 88 |       for (c = 0; c < Nc; ++c) {
 89 |         temp[c] = state[ri + ((c + r) % Nc)];
 90 |       }
 91 |       for (c = 0; c < Nc; ++c) {
 92 |         state[ri + c] = temp[c];
 93 |       }
 94 |     }
 95 |   }
 96 | 
 97 |   // The SIMD optimized version of the shiftRows function
 98 |   // The function is special cased for a 4 column setting (Nc == 4).
 99 |   // This is the value used for AES blocks (see documentation for details)
100 |   function simdShiftRows(state, Nc) {
101 |     if (Nc !== 4) {
102 |       shiftRows(state, Nc);
103 |     }
104 |     for (var r = 1; r < 4; ++r) {
105 |       var rx4 = SIMD.Int32x4.load(state, r << 2);
106 |       if (r == 1) {
107 |         SIMD.Int32x4.store(state, 4, SIMD.Int32x4.swizzle(rx4, 1, 2, 3, 0));
108 |       }
109 |       else if (r == 2) {
110 |         SIMD.Int32x4.store(state, 8, SIMD.Int32x4.swizzle(rx4, 2, 3, 0, 1));
111 |       }
112 |       else { // r == 3
113 |         SIMD.Int32x4.store(state, 12, SIMD.Int32x4.swizzle(rx4, 3, 0, 1, 2));
114 |       }
115 |     }
116 |   }
117 | 
118 |   function shiftRowsN(iterations) {
119 |     for (var i = 0; i < iterations; ++i) {
120 |       shiftRows(state, 4);
121 |     }
122 |   }
123 | 
124 |   function simdShiftRowsN(iterations) {
125 |     for (var i = 0; i < iterations; ++i) {
126 |       simdShiftRows(state, 4);
127 |     }
128 |   }
129 | } ());
130 | 


--------------------------------------------------------------------------------
/src/benchmarks/sinx4.js:
--------------------------------------------------------------------------------
  1 | // Compute sin() in 4 lanes:
  2 | // Algorithm adopted from: http://gruntthepeon.free.fr/ssemath/
  3 | // Author: Peter Jensen
  4 | (function () {
  5 | 
  6 |   // Kernel configuration
  7 |   var kernelConfig = {
  8 |     kernelName:       "Sine",
  9 |     kernelInit:       init,
 10 |     kernelCleanup:    cleanup,
 11 |     kernelSimd:       simd,
 12 |     kernelNonSimd:    nonSimd,
 13 |     kernelIterations: 100000000
 14 |   };
 15 | 
 16 |   // Hook up to the harness
 17 |   benchmarks.add (new Benchmark (kernelConfig));
 18 | 
 19 |   // Kernel Initializer
 20 |   function init () {
 21 |     // Do initial sanity check and initialize data for the kernels.
 22 |     // The sanity check should verify that the simd and nonSimd results
 23 |     // are the same.
 24 |     // It is recommended to do minimal object creation in the kernels
 25 |     // themselves.  If global data needs to be initialized, here would
 26 |     // be the place to do it.
 27 |     // If the sanity checks fails the kernels will not be executed
 28 |     // Returns:
 29 |     //   true:  First run (unoptimized) of the kernels passed
 30 |     //   false: First run (unoptimized) of the kernels failed
 31 |     var simdResult    = simd(1);
 32 |     var nonSimdResult = nonSimd(1);
 33 |     return almostEqual (simdResult, nonSimdResult);
 34 |   }
 35 | 
 36 |   // Kernel Cleanup
 37 |   function cleanup () {
 38 |     // Do final sanity check and perform cleanup.
 39 |     // This function is called when all the kernel iterations have been
 40 |     // executed, so they should be in their final optimized version.  The
 41 |     // sanity check done during initialization will probably be of the
 42 |     // initial unoptimized version.
 43 |     // Returns:
 44 |     //   true:  Last run (optimized) of the kernels passed
 45 |     //   false: last run (optimized) of the kernels failed
 46 |     var simdResult    = simd(1);
 47 |     var nonSimdResult = nonSimd(1);
 48 |     return almostEqual (simdResult, nonSimdResult);
 49 |   }
 50 | 
 51 |   function almostEqual(a, b) {
 52 |     for (var i = 0; i < 4; ++i) {
 53 |       if (Math.abs (a - b) > 0.00001) {
 54 |         return false;
 55 |       }
 56 |     }
 57 |     return true;
 58 |   }
 59 | 
 60 |   function printFloat32x4(msg, v) {
 61 |     print (msg, SIMD.Float32x4.extractLane(v, 0).toFixed(6),
 62 |                 SIMD.Float32x4.extractLane(v, 1).toFixed(6),
 63 |                 SIMD.Float32x4.extractLane(v, 2).toFixed(6),
 64 |                 SIMD.Float32x4.extractLane(v, 3).toFixed(6));
 65 |   }
 66 | 
 67 |   function printInt32x4(msg, v) {
 68 |     print (msg, SIMD.Float32x4.extractLane(v, 0),
 69 |                 SIMD.Float32x4.extractLane(v, 1),
 70 |                 SIMD.Float32x4.extractLane(v, 2),
 71 |                 SIMD.Float32x4.extractLane(v, 3));
 72 |   }
 73 | 
 74 |   function sinx4Test() {
 75 |     var x = SIMD.Float32x4(1.0, 2.0, 3.0, 4.0);
 76 |     var sinx4 = simdSin(x);
 77 |     print (SIMD.Float32x4.extractLane(sinx4, 0),
 78 |            SIMD.Float32x4.extractLane(sinx4, 1),
 79 |            SIMD.Float32x4.extractLane(sinx4, 2),
 80 |            SIMD.Float32x4.extractLane(sinx4, 3));
 81 |     print (Math.sin(SIMD.Float32x4.extractLane(x, 0)),
 82 |            Math.sin(SIMD.Float32x4.extractLane(x, 1)),
 83 |            Math.sin(SIMD.Float32x4.extractLane(x, 2)),
 84 |            Math.sin(SIMD.Float32x4.extractLane(x, 3)));
 85 |   }
 86 | 
 87 |   var _ps_sign_mask        = SIMD.Int32x4.splat(0x80000000);
 88 |   var _ps_inv_sign_mask    = SIMD.Int32x4.not(_ps_sign_mask);
 89 |   var _ps_cephes_FOPI      = SIMD.Float32x4.splat(1.27323954473516);
 90 |   var _pi32_1              = SIMD.Int32x4.splat(1);
 91 |   var _pi32_inv1           = SIMD.Int32x4.not(_pi32_1);
 92 |   var _pi32_4              = SIMD.Int32x4.splat(4);
 93 |   var _pi32_2              = SIMD.Int32x4.splat(2);
 94 |   var _ps_minus_cephes_DP1 = SIMD.Float32x4.splat(-0.78515625);
 95 |   var _ps_minus_cephes_DP2 = SIMD.Float32x4.splat(-2.4187564849853515625E-4);
 96 |   var _ps_minus_cephes_DP3 = SIMD.Float32x4.splat(-3.77489497744594108E-8);
 97 |   var _ps_coscof_p0        = SIMD.Float32x4.splat(2.443315711809948E-5);
 98 |   var _ps_coscof_p1        = SIMD.Float32x4.splat(-1.388731625493765E-3);
 99 |   var _ps_coscof_p2        = SIMD.Float32x4.splat(4.166664568298827E-2);
100 |   var _ps_0p5              = SIMD.Float32x4.splat(0.5);
101 |   var _ps_1                = SIMD.Float32x4.splat(1.0);
102 |   var _ps_sincof_p0        = SIMD.Float32x4.splat(-1.9515295891E-4);
103 |   var _ps_sincof_p1        = SIMD.Float32x4.splat(8.3321608736E-3);
104 |   var _ps_sincof_p2        = SIMD.Float32x4.splat(-1.6666654611E-1);
105 | 
106 |   function sinx4 (x) {
107 |     var xmm1;
108 |     var xmm2;
109 |     var xmm3;
110 |     var sign_bit;
111 |     var swap_sign_bit;
112 |     var poly_mask;
113 |     var y;
114 |     var y2;
115 |     var z;
116 |     var tmp;
117 | 
118 |     var emm0;
119 |     var emm2;
120 |     var emm2mask;
121 | 
122 |     sign_bit = x;
123 |     x        = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(x), _ps_inv_sign_mask));
124 |     sign_bit = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(sign_bit), _ps_sign_mask));
125 |     y        = SIMD.Float32x4.mul(x, _ps_cephes_FOPI);
126 |     //printFloat32x4 ("Probe 6", y);
127 |     emm2     = SIMD.Int32x4.fromFloat32x4(y);
128 |     emm2     = SIMD.Int32x4.add(emm2, _pi32_1);
129 |     emm2     = SIMD.Int32x4.and(emm2, _pi32_inv1);
130 |     //printInt32x4 ("Probe 8", emm2);
131 |     y        = SIMD.Float32x4.fromInt32x4(emm2);
132 |     //printFloat32x4 ("Probe 7", y);
133 |     emm0     = SIMD.Int32x4.and(emm2, _pi32_4);
134 |     emm0     = SIMD.Int32x4.shiftLeftByScalar(emm0, 29);
135 | 
136 |     emm2     = SIMD.Int32x4.and(emm2, _pi32_2);
137 |     emm2mask = SIMD.Int32x4.equal(emm2, SIMD.Int32x4.splat(0));
138 |     emm2     = SIMD.Int32x4.select(emm2mask, SIMD.Int32x4.splat(-1), SIMD.Int32x4.splat(0));
139 | 
140 |     swap_sign_bit = SIMD.Float32x4.fromInt32x4Bits(emm0);
141 |     poly_mask     = SIMD.Float32x4.fromInt32x4Bits(emm2);
142 |     sign_bit      = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.xor(SIMD.Int32x4.fromFloat32x4Bits(sign_bit), SIMD.Int32x4.fromFloat32x4Bits(swap_sign_bit)));
143 |     //printFloat32x4 ("Probe 1", sign_bit);
144 | 
145 |     //printFloat32x4 ("Probe 4", y);
146 |     //printFloat32x4 ("Probe 5", _ps_minus_cephes_DP1);
147 |     xmm1 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP1);
148 |     //printFloat32x4 ("Probe 3", xmm1);
149 |     xmm2 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP2);
150 |     xmm3 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP3);
151 |     x    = SIMD.Float32x4.add(x, xmm1);
152 |     x    = SIMD.Float32x4.add(x, xmm2);
153 |     x    = SIMD.Float32x4.add(x, xmm3);
154 |     //printFloat32x4 ("Probe 2", x);
155 | 
156 |     y    = _ps_coscof_p0;
157 |     z    = SIMD.Float32x4.mul(x, x);
158 |     y    = SIMD.Float32x4.mul(y, z);
159 |     y    = SIMD.Float32x4.add(y, _ps_coscof_p1);
160 |     y    = SIMD.Float32x4.mul(y, z);
161 |     y    = SIMD.Float32x4.add(y, _ps_coscof_p2);
162 |     y    = SIMD.Float32x4.mul(y, z);
163 |     y    = SIMD.Float32x4.mul(y, z);
164 |     tmp  = SIMD.Float32x4.mul(z, _ps_0p5);
165 |     y    = SIMD.Float32x4.sub(y, tmp);
166 |     y    = SIMD.Float32x4.add(y, _ps_1);
167 | 
168 |     y2   = _ps_sincof_p0;
169 |     //printFloat32x4 ("Probe 11", y2);
170 |     //printFloat32x4 ("Probe 12", z);
171 |     y2   = SIMD.Float32x4.mul(y2, z);
172 |     y2   = SIMD.Float32x4.add(y2, _ps_sincof_p1);
173 |     //printFloat32x4 ("Probe 13", y2);
174 |     y2   = SIMD.Float32x4.mul(y2, z);
175 |     y2   = SIMD.Float32x4.add(y2, _ps_sincof_p2);
176 |     y2   = SIMD.Float32x4.mul(y2, z);
177 |     y2   = SIMD.Float32x4.mul(y2, x);
178 |     y2   = SIMD.Float32x4.add(y2, x);
179 | 
180 |     xmm3 = poly_mask;
181 |     y2   = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(xmm3), SIMD.Int32x4.fromFloat32x4Bits(y2)));
182 |     //printFloat32x4 ("Probe 10", y2);
183 |     y    = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.not(SIMD.Int32x4.fromFloat32x4Bits(xmm3)), SIMD.Int32x4.fromFloat32x4Bits(y)));
184 |     y    = SIMD.Float32x4.add(y, y2);
185 | 
186 |     //printFloat32x4 ("Probe 9", y);
187 |     y    = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.xor(SIMD.Int32x4.fromFloat32x4Bits(y), SIMD.Int32x4.fromFloat32x4Bits(sign_bit)));
188 |     return y;
189 |   }
190 | 
191 |   var simdInput    = SIMD.Float32x4 (1.0, 2.0, 3.0, 4.0);
192 |   var nonSimdInput = [1.0, 2.0, 3.0, 4.0];
193 | 
194 |   // SIMD version of the kernel
195 |   function simd (n) {
196 |     var result ;
197 |     for (var i = 0; i < n; ++i) {
198 |       result = sinx4 (simdInput);
199 |     }
200 |     return [SIMD.Float32x4.extractLane(result, 0),
201 |         SIMD.Float32x4.extractLane(result, 1),
202 |         SIMD.Float32x4.extractLane(result, 2),
203 |         SIMD.Float32x4.extractLane(result, 3)];
204 |   }
205 | 
206 |   // Non SIMD version of the kernel
207 |   function nonSimd (n) {
208 |     var s = 0;
209 |     var x = nonSimdInput[0];
210 |     var y = nonSimdInput[1];
211 |     var z = nonSimdInput[2];
212 |     var w = nonSimdInput[3];
213 |     var rx, ry, rz, rw;
214 |     for (var i = 0; i < n; ++i) {
215 |       rx = Math.sin(x);
216 |       ry = Math.sin(y);
217 |       rz = Math.sin(z);
218 |       rw = Math.sin(w);
219 |     }
220 |     return [rx, ry, rz, rw];
221 |   }
222 | 
223 | } ());
224 | 


--------------------------------------------------------------------------------
/src/benchmarks/transform.js:
--------------------------------------------------------------------------------
 1 | // Transform vertex by 4x4 transformation matrix.
 2 | // Author: John McCutchan
 3 | 
 4 | (function () {
 5 | 
 6 |   // Kernel configuration
 7 |   var kernelConfig = {
 8 |     kernelName:       "VertexTransform",
 9 |     kernelInit:       init,
10 |     kernelCleanup:    cleanup,
11 |     kernelSimd:       simdVertexTransform,
12 |     kernelNonSimd:    vertexTransform,
13 |     kernelIterations: 1000
14 |   };
15 | 
16 |   // Hook up to the harness
17 |   benchmarks.add(new Benchmark(kernelConfig));
18 | 
19 |   // Benchmark data, initialization and kernel functions
20 |   var T = new Float32Array(16);
21 |   var V = new Float32Array(4);
22 |   var Out = new Float32Array(4);
23 |   var Tx = new Float32Array(16);
24 |   var Vx = new Float32Array(4);
25 |   var Outx = new Float32Array(4);
26 | 
27 |   function init() {
28 |     T[0] = 1.0;
29 |     T[5] = 1.0;
30 |     T[10] = 1.0;
31 |     T[15] = 1.0;
32 |     V[0] = 1.0;
33 |     V[1] = 2.0;
34 |     V[2] = 3.0;
35 |     V[3] = 1.0;
36 |     SIMD.Float32x4.store(Tx, 0,  SIMD.Float32x4(1.0, 0.0, 0.0, 0.0));
37 |     SIMD.Float32x4.store(Tx, 4,  SIMD.Float32x4(0.0, 1.0, 0.0, 0.0));
38 |     SIMD.Float32x4.store(Tx, 8,  SIMD.Float32x4(0.0, 0.0, 1.0, 0.0));
39 |     SIMD.Float32x4.store(Tx, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 1.0));
40 |     SIMD.Float32x4.store(Vx, 0, SIMD.Float32x4(1.0, 2.0, 3.0, 1.0));
41 |     simdVertexTransform(1);
42 |     vertexTransform(1);
43 |     return (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 0) == Out[0]) &&
44 |         (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 1) == Out[1]) &&
45 |         (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 2) == Out[2]) &&
46 |         (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 3) == Out[3]);
47 |   }
48 | 
49 |   function cleanup() {
50 |     return init(); // Sanity checking before and after are the same
51 |   }
52 | 
53 |   function vertexTransform(n) {
54 |     for (var i = 0; i < n; i++) {
55 |       var x = V[0];
56 |       var y = V[1];
57 |       var z = V[2];
58 |       var w = V[3];
59 |       var m0 = T[0];
60 |       var m4 = T[4];
61 |       var m8 = T[8];
62 |       var m12 = T[12];
63 |       Out[0] = (m0 * x + m4 * y + m8 * z + m12 * w);
64 |       var m1 = T[1];
65 |       var m5 = T[5];
66 |       var m9 = T[9];
67 |       var m13 = T[13];
68 |       Out[1] = (m1 * x + m5 * y + m9 * z + m13 * w);
69 |       var m2 = T[2];
70 |       var m6 = T[6];
71 |       var m10 = T[10];
72 |       var m14 = T[14];
73 |       Out[2] = (m2 * x + m6 * y + m10 * z + m14 * w);
74 |       var m3 = T[3];
75 |       var m7 = T[7];
76 |       var m11 = T[11];
77 |       var m15 = T[15];
78 |       Out[3] = (m3 * x + m7 * y + m11 * z + m15 * w);
79 |     }
80 |   }
81 | 
82 |   function simdVertexTransform(n) {
83 |     for (var i = 0; i < n; i++) {
84 |       var xxxx = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 0, 0, 0, 0);
85 |       var z = SIMD.Float32x4.splat(0.0);
86 |       z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(xxxx, SIMD.Float32x4.load(Tx, 0)));
87 |       var yyyy = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 1, 1, 1, 1);
88 |       z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(yyyy, SIMD.Float32x4.load(Tx, 4)));
89 |       var zzzz = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 2, 2, 2, 2);
90 |       z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(zzzz, SIMD.Float32x4.load(Tx, 8)));
91 |       var wwww = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 3, 3, 3, 3);
92 |       z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(wwww, SIMD.Float32x4.load(Tx, 12)));
93 |       SIMD.Float32x4.store(Outx, 0, z);
94 |     }
95 |   }
96 | 
97 | } ());
98 | 


--------------------------------------------------------------------------------
/src/benchmarks/transpose4x4.js:
--------------------------------------------------------------------------------
  1 | // Transpose a 4x4 matrix
  2 | // Author: Peter Jensen
  3 | (function () {
  4 | 
  5 |   // Kernel configuration
  6 |   var kernelConfig = {
  7 |     kernelName:       "Transpose4x4",
  8 |     kernelInit:       init,
  9 |     kernelCleanup:    cleanup,
 10 |     kernelSimd:       simdTransposeN,
 11 |     kernelNonSimd:    transposeN,
 12 |     kernelIterations: 100000000
 13 |   };
 14 | 
 15 |   // Hook up to the harness
 16 |   benchmarks.add (new Benchmark (kernelConfig));
 17 | 
 18 |   // Global object allocations
 19 | 
 20 |   var src    = new Float32Array(16);
 21 |   var dst    = new Float32Array(16);
 22 |   var tsrc   = new Float32Array(16);
 23 | 
 24 |   var sel_ttff = SIMD.Bool32x4(true, true, false, false);
 25 | 
 26 |   function initMatrix(matrix, matrixTransposed) {
 27 |     for (var r = 0; r < 4; ++r) {
 28 |       var r4 = 4*r;
 29 |       for (var c = 0; c < 4; ++c) {
 30 |         matrix[r4 + c]            = r4 + c;
 31 |         matrixTransposed[r + c*4] = r4 + c;
 32 |       }
 33 |     }
 34 |   }
 35 | 
 36 |   function printMatrix(matrix) {
 37 |     for (var r = 0; r < 4; ++r) {
 38 |       var str = "";
 39 |       var ri = r*4;
 40 |       for (var c = 0; c < 4; ++c) {
 41 |         var value = matrix[ri + c];
 42 |         str += " " + value.toFixed(2);
 43 |       }
 44 |       print(str);
 45 |     }
 46 |   }
 47 | 
 48 |   function compareEqualMatrix(m1, m2) {
 49 |     for (var i = 0; i < 16; ++i) {
 50 |       if (m1[i] !== m2[i]) {
 51 |         return false;
 52 |       }
 53 |     }
 54 |     return true;
 55 |   }
 56 | 
 57 |   // Kernel Initializer
 58 |   function init () {
 59 |     initMatrix(src, tsrc);
 60 |     transposeN(1);
 61 |     if (!compareEqualMatrix (tsrc, dst)) {
 62 |       return false;
 63 |     }
 64 | 
 65 |     simdTransposeN(1);
 66 | //    printMatrix(dst);
 67 |     if (!compareEqualMatrix (tsrc, dst)) {
 68 |       return false;
 69 |     }
 70 | 
 71 |     return true;
 72 |   }
 73 | 
 74 |   // Kernel Cleanup
 75 |   function cleanup () {
 76 |     return init();
 77 |   }
 78 | 
 79 |   // SIMD version of the kernel with SIMD.Float32x4.shuffle operation
 80 |   function simdTransposeMix() {
 81 |     var src0     = SIMD.Float32x4.load(src, 0);
 82 |     var src1     = SIMD.Float32x4.load(src, 4);
 83 |     var src2     = SIMD.Float32x4.load(src, 8);
 84 |     var src3     = SIMD.Float32x4.load(src, 12);
 85 |     var dst0;
 86 |     var dst1;
 87 |     var dst2;
 88 |     var dst3;
 89 |     var tmp01;
 90 |     var tmp23;
 91 | 
 92 |     tmp01 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
 93 |     tmp23 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
 94 |     dst0  = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6);
 95 |     dst1  = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7);
 96 | 
 97 |     tmp01 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
 98 |     tmp23 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
 99 |     dst2  = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6);
100 |     dst3  = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7);
101 | 
102 |     SIMD.Float32x4.store(dst, 0,  dst0);
103 |     SIMD.Float32x4.store(dst, 4,  dst1);
104 |     SIMD.Float32x4.store(dst, 8,  dst2);
105 |     SIMD.Float32x4.store(dst, 12, dst3);
106 |   }
107 | 
108 |   // SIMD version of the kernel
109 |   function simdTranspose() {
110 |     var src0     = SIMD.Float32x4.load(src, 0);
111 |     var src1     = SIMD.Float32x4.load(src, 4);
112 |     var src2     = SIMD.Float32x4.load(src, 8);
113 |     var src3     = SIMD.Float32x4.load(src, 12);
114 |     var dst0;
115 |     var dst1;
116 |     var dst2;
117 |     var dst3;
118 |     var tmp01;
119 |     var tmp23;
120 | 
121 |     tmp01 = SIMD.Float32x4.select(sel_ttff, src0, SIMD.Float32x4.swizzle(src1, 0, 0, 0, 1));
122 |     tmp23 = SIMD.Float32x4.select(sel_ttff, src2, SIMD.Float32x4.swizzle(src3, 0, 0, 0, 1));
123 |     dst0  = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 0, 2, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 0, 2));
124 |     dst1  = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 1, 3, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 1, 3));
125 | 
126 |     tmp01 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(src0, 2, 3, 0, 0), src1);
127 |     tmp23 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(src2, 2, 3, 0, 0), src3);
128 |     dst2  = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 0, 2, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 0, 2));
129 |     dst3  = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 1, 3, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 1, 3));
130 | 
131 |     SIMD.Float32x4.store(dst, 0,  dst0);
132 |     SIMD.Float32x4.store(dst, 4,  dst1);
133 |     SIMD.Float32x4.store(dst, 8,  dst2);
134 |     SIMD.Float32x4.store(dst, 12, dst3);
135 |   }
136 | 
137 |   // Non SIMD version of the kernel
138 |   function transpose() {
139 |     dst[0]  = src[0];
140 |     dst[1]  = src[4];
141 |     dst[2]  = src[8];
142 |     dst[3]  = src[12];
143 |     dst[4]  = src[1];
144 |     dst[5]  = src[5];
145 |     dst[6]  = src[9];
146 |     dst[7]  = src[13];
147 |     dst[8]  = src[2];
148 |     dst[9]  = src[6];
149 |     dst[10] = src[10];
150 |     dst[11] = src[14];
151 |     dst[12] = src[3];
152 |     dst[13] = src[7];
153 |     dst[14] = src[11];
154 |     dst[15] = src[15];
155 |   }
156 | 
157 |   function simdTransposeN(n) {
158 |     for (var i = 0; i < n; ++i) {
159 |       var src0 = SIMD.Float32x4.load(src, 0);
160 |       var src1 = SIMD.Float32x4.load(src, 4);
161 |       var src2 = SIMD.Float32x4.load(src, 8);
162 |       var src3 = SIMD.Float32x4.load(src, 12);
163 |       var dst0;
164 |       var dst1;
165 |       var dst2;
166 |       var dst3;
167 |       var tmp01;
168 |       var tmp23;
169 | 
170 |       tmp01 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
171 |       tmp23 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
172 |       dst0 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6);
173 |       dst1 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7);
174 | 
175 |       tmp01 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
176 |       tmp23 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
177 |       dst2 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6);
178 |       dst3 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7);
179 | 
180 |       SIMD.Float32x4.store(dst, 0,  dst0);
181 |       SIMD.Float32x4.store(dst, 4,  dst1);
182 |       SIMD.Float32x4.store(dst, 8,  dst2);
183 |       SIMD.Float32x4.store(dst, 12, dst3);
184 |     }
185 |   }
186 | 
187 |   function transposeN(n) {
188 |     for (var i = 0; i < n; ++i) {
189 |       dst[0] = src[0];
190 |       dst[1] = src[4];
191 |       dst[2] = src[8];
192 |       dst[3] = src[12];
193 |       dst[4] = src[1];
194 |       dst[5] = src[5];
195 |       dst[6] = src[9];
196 |       dst[7] = src[13];
197 |       dst[8] = src[2];
198 |       dst[9] = src[6];
199 |       dst[10] = src[10];
200 |       dst[11] = src[14];
201 |       dst[12] = src[3];
202 |       dst[13] = src[7];
203 |       dst[14] = src[11];
204 |       dst[15] = src[15];
205 |     }
206 |   }
207 | 
208 | } ());
209 | 


--------------------------------------------------------------------------------
/src/ecmascript_simd.js:
--------------------------------------------------------------------------------
   1 | /*
   2 |   vim: set ts=8 sts=2 et sw=2 tw=79:
   3 |   Copyright (C) 2013
   4 | 
   5 |   This software is provided 'as-is', without any express or implied
   6 |   warranty.  In no event will the authors be held liable for any damages
   7 |   arising from the use of this software.
   8 | 
   9 |   Permission is granted to anyone to use this software for any purpose,
  10 |   including commercial applications, and to alter it and redistribute it
  11 |   freely, subject to the following restrictions:
  12 | 
  13 |   1. The origin of this software must not be misrepresented; you must not
  14 |      claim that you wrote the original software. If you use this software
  15 |      in a product, an acknowledgment in the product documentation would be
  16 |      appreciated but is not required.
  17 |   2. Altered source versions must be plainly marked as such, and must not be
  18 |      misrepresented as being the original software.
  19 |   3. This notice may not be removed or altered from any source distribution.
  20 | */
  21 | 
  22 | // A conforming SIMD.js implementation may contain the following deviations to
  23 | // normal JS numeric behavior:
  24 | //  - Subnormal numbers may or may not be flushed to zero on input or output of
  25 | //    any SIMD operation.
  26 | 
  27 | // Many of the operations in SIMD.js have semantics which correspond to scalar
  28 | // operations in JS, however there are a few differences:
  29 | //  - Vector shifts don't mask the shift count.
  30 | //  - Conversions from float to int32 throw on error.
  31 | //  - Load and store operations throw when out of bounds.
  32 | 
  33 | (function(global) {
  34 | 
  35 | if (typeof global.SIMD === "undefined") {
  36 |   // SIMD module.
  37 |   global.SIMD = {};
  38 | }
  39 | 
  40 | if (typeof module !== "undefined") {
  41 |   // For CommonJS modules
  42 |   module.exports = global.SIMD;
  43 | }
  44 | 
  45 | var SIMD = global.SIMD;
  46 | 
  47 | // Buffers for bit casting and coercing lane values to those representable in
  48 | // the underlying lane type.
  49 | var _f32x4 = new Float32Array(4);
  50 | var _f64x2 = new Float64Array(_f32x4.buffer);
  51 | var _i32x4 = new Int32Array(_f32x4.buffer);
  52 | var _i16x8 = new Int16Array(_f32x4.buffer);
  53 | var _i8x16 = new Int8Array(_f32x4.buffer);
  54 | var _ui32x4 = new Uint32Array(_f32x4.buffer);
  55 | var _ui16x8 = new Uint16Array(_f32x4.buffer);
  56 | var _ui8x16 = new Uint8Array(_f32x4.buffer);
  57 | 
  58 | function convertValue(buffer, value) {
  59 |   buffer[0] = value;
  60 |   return buffer[0];
  61 | }
  62 | 
  63 | function convertArray(buffer, array) {
  64 |   for (var i = 0; i < array.length; i++)
  65 |     array[i] = convertValue(buffer, array[i]);
  66 |   return array;
  67 | }
  68 | 
  69 | // Utility functions.
  70 | 
  71 | function isInt32(o) {
  72 |   return (o | 0) === o;
  73 | }
  74 | 
  75 | function isTypedArray(o) {
  76 |   return (o instanceof Int8Array) ||
  77 |          (o instanceof Uint8Array) ||
  78 |          (o instanceof Uint8ClampedArray) ||
  79 |          (o instanceof Int16Array) ||
  80 |          (o instanceof Uint16Array) ||
  81 |          (o instanceof Int32Array) ||
  82 |          (o instanceof Uint32Array) ||
  83 |          (o instanceof Float32Array) ||
  84 |          (o instanceof Float64Array);
  85 | }
  86 | 
  87 | function minNum(x, y) {
  88 |   return x != x ? y :
  89 |          y != y ? x :
  90 |          Math.min(x, y);
  91 | }
  92 | 
  93 | function maxNum(x, y) {
  94 |   return x != x ? y :
  95 |          y != y ? x :
  96 |          Math.max(x, y);
  97 | }
  98 | 
  99 | function clamp(a, min, max) {
 100 |   if (a < min)
 101 |     return min;
 102 |   if (a > max)
 103 |     return max;
 104 |   return a;
 105 | }
 106 | 
 107 | // SIMD implementation functions
 108 | 
 109 | function simdCoerceIndex(index) {
 110 |     index = +index;
 111 |     if (index != Math.floor(index))
 112 |         throw new RangeError("SIMD index must be an integer");
 113 |     return index;
 114 | }
 115 | 
 116 | function simdCheckLaneIndex(index, lanes) {
 117 |   if (!isInt32(index))
 118 |     throw new TypeError('Lane index must be an int32');
 119 |   if (index < 0 || index >= lanes)
 120 |     throw new RangeError('Lane index must be in bounds');
 121 | }
 122 | 
 123 | // Global lanes array for constructing SIMD values.
 124 | var lanes = [];
 125 | 
 126 | function simdCreate(type) {
 127 |   return type.fn.apply(type.fn, lanes);
 128 | }
 129 | 
 130 | function simdToString(type, a) {
 131 |   a = type.fn.check(a);
 132 |   var str = "SIMD." + type.name + "(";
 133 |   str += type.fn.extractLane(a, 0);
 134 |   for (var i = 1; i < type.lanes; i++) {
 135 |     str += ", " + type.fn.extractLane(a, i);
 136 |   }
 137 |   return str + ")";
 138 | }
 139 | 
 140 | function simdToLocaleString(type, a) {
 141 |   a = type.fn.check(a);
 142 |   var str = "SIMD." + type.name + "(";
 143 |   str += type.fn.extractLane(a, 0).toLocaleString();
 144 |   for (var i = 1; i < type.lanes; i++) {
 145 |     str += ", " + type.fn.extractLane(a, i).toLocaleString();
 146 |   }
 147 |   return str + ")";
 148 | }
 149 | 
 150 | function simdSplat(type, s) {
 151 |   for (var i = 0; i < type.lanes; i++)
 152 |     lanes[i] = s;
 153 |   return simdCreate(type);
 154 | }
 155 | 
 156 | function simdReplaceLane(type, a, i, s) {
 157 |   a = type.fn.check(a);
 158 |   simdCheckLaneIndex(i, type.lanes);
 159 |   for (var j = 0; j < type.lanes; j++)
 160 |     lanes[j] = type.fn.extractLane(a, j);
 161 |   lanes[i] = s;
 162 |   return simdCreate(type);
 163 | }
 164 | 
 165 | function simdFrom(toType, fromType, a) {
 166 |   a = fromType.fn.check(a);
 167 |   for (var i = 0; i < fromType.lanes; i++) {
 168 |     var v = Math.trunc(fromType.fn.extractLane(a, i));
 169 |     if (toType.minVal !== undefined &&
 170 |         !(toType.minVal <= v && v <= toType.maxVal)) {
 171 |       throw new RangeError("Can't convert value");
 172 |     }
 173 |     lanes[i] = v;
 174 |   }
 175 |   return simdCreate(toType);
 176 | }
 177 | 
 178 | function simdFromBits(toType, fromType, a) {
 179 |   a = fromType.fn.check(a);
 180 |   var newValue = new toType.fn();
 181 |   newValue.s_ = new toType.view(a.s_.buffer);
 182 |   return newValue;
 183 | }
 184 | 
 185 | function simdSelect(type, selector, a, b) {
 186 |   selector = type.boolType.fn.check(selector);
 187 |   a = type.fn.check(a);
 188 |   b = type.fn.check(b);
 189 |   for (var i = 0; i < type.lanes; i++) {
 190 |     lanes[i] = type.boolType.fn.extractLane(selector, i) ?
 191 |                type.fn.extractLane(a, i) : type.fn.extractLane(b, i);
 192 |   }
 193 |   return simdCreate(type);
 194 | }
 195 | 
 196 | function simdSwizzle(type, a, indices) {
 197 |   a = type.fn.check(a);
 198 |   for (var i = 0; i < indices.length; i++) {
 199 |     simdCheckLaneIndex(indices[i], type.lanes);
 200 |     lanes[i] = type.fn.extractLane(a, indices[i]);
 201 |   }
 202 |   return simdCreate(type);
 203 | }
 204 | 
 205 | function simdShuffle(type, a, b, indices) {
 206 |   a = type.fn.check(a);
 207 |   b = type.fn.check(b);
 208 |   for (var i = 0; i < indices.length; i++) {
 209 |     simdCheckLaneIndex(indices[i], 2 * type.lanes);
 210 |     lanes[i] = indices[i] < type.lanes ?
 211 |                type.fn.extractLane(a, indices[i]) :
 212 |                type.fn.extractLane(b, indices[i] - type.lanes);
 213 |   }
 214 |   return simdCreate(type);
 215 | }
 216 | 
 217 | function unaryNeg(a) { return -a; }
 218 | function unaryBitwiseNot(a) { return ~a; }
 219 | function unaryLogicalNot(a) { return !a; }
 220 | 
 221 | function simdUnaryOp(type, op, a) {
 222 |   a = type.fn.check(a);
 223 |   for (var i = 0; i < type.lanes; i++)
 224 |     lanes[i] = op(type.fn.extractLane(a, i));
 225 |   return simdCreate(type);
 226 | }
 227 | 
 228 | function binaryAnd(a, b) { return a & b; }
 229 | function binaryOr(a, b) { return a | b; }
 230 | function binaryXor(a, b) { return a ^ b; }
 231 | function binaryAdd(a, b) { return a + b; }
 232 | function binarySub(a, b) { return a - b; }
 233 | function binaryMul(a, b) { return a * b; }
 234 | function binaryDiv(a, b) { return a / b; }
 235 | 
 236 | var binaryImul;
 237 | if (typeof Math.imul !== 'undefined') {
 238 |   binaryImul = Math.imul;
 239 | } else {
 240 |   binaryImul = function(a, b) {
 241 |     var ah = (a >>> 16) & 0xffff;
 242 |     var al = a & 0xffff;
 243 |     var bh = (b >>> 16) & 0xffff;
 244 |     var bl = b & 0xffff;
 245 |     // the shift by 0 fixes the sign on the high part
 246 |     // the final |0 converts the unsigned value into a signed value
 247 |     return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0);
 248 |   };
 249 | }
 250 | 
 251 | function simdBinaryOp(type, op, a, b) {
 252 |   a = type.fn.check(a);
 253 |   b = type.fn.check(b);
 254 |   for (var i = 0; i < type.lanes; i++)
 255 |     lanes[i] = op(type.fn.extractLane(a, i), type.fn.extractLane(b, i));
 256 |   return simdCreate(type);
 257 | }
 258 | 
 259 | function binaryEqual(a, b) { return a == b; }
 260 | function binaryNotEqual(a, b) { return a != b; }
 261 | function binaryLess(a, b) { return a < b; }
 262 | function binaryLessEqual(a, b) { return a <= b; }
 263 | function binaryGreater(a, b) { return a > b; }
 264 | function binaryGreaterEqual(a, b) { return a >= b; }
 265 | 
 266 | function simdRelationalOp(type, op, a, b) {
 267 |   a = type.fn.check(a);
 268 |   b = type.fn.check(b);
 269 |   for (var i = 0; i < type.lanes; i++)
 270 |     lanes[i] = op(type.fn.extractLane(a, i), type.fn.extractLane(b, i));
 271 |   return simdCreate(type.boolType);
 272 | }
 273 | 
 274 | function simdAnyTrue(type, a) {
 275 |   a = type.fn.check(a);
 276 |   for (var i = 0; i < type.lanes; i++)
 277 |     if (type.fn.extractLane(a, i)) return true;
 278 |   return false;
 279 | }
 280 | 
 281 | function simdAllTrue(type, a) {
 282 |   a = type.fn.check(a);
 283 |   for (var i = 0; i < type.lanes; i++)
 284 |     if (!type.fn.extractLane(a, i)) return false;
 285 |   return true;
 286 | }
 287 | 
 288 | function binaryShiftLeft(a, bits) { return a << bits; }
 289 | function binaryShiftRightArithmetic(a, bits) { return a >> bits; }
 290 | function binaryShiftRightLogical(a, bits) { return a >>> bits; }
 291 | 
 292 | function simdShiftOp(type, op, a, bits) {
 293 |   a = type.fn.check(a);
 294 |   for (var i = 0; i < type.lanes; i++)
 295 |     lanes[i] = op(type.fn.extractLane(a, i), bits);
 296 |   return simdCreate(type);
 297 | }
 298 | 
 299 | function simdLoad(type, tarray, index, count) {
 300 |   if (!isTypedArray(tarray))
 301 |     throw new TypeError("The 1st argument must be a typed array.");
 302 |   index = simdCoerceIndex(index);
 303 |   var bpe = tarray.BYTES_PER_ELEMENT;
 304 |   var bytes = count * type.laneSize;
 305 |   if (index < 0 || (index * bpe + bytes) > tarray.byteLength)
 306 |     throw new RangeError("The value of index is invalid.");
 307 | 
 308 |   var newValue = type.fn();
 309 |   var dst = new Uint8Array(newValue.s_.buffer);
 310 |   var src = new Uint8Array(tarray.buffer, tarray.byteOffset + index * bpe, bytes);
 311 | 
 312 |   for (var i = 0; i < bytes; i++) {
 313 |     dst[i] = src[i];
 314 |   }
 315 |   var typeBytes = type.lanes * type.laneSize;
 316 |   for (var i = bytes; i < typeBytes; i++) {
 317 |     dst[i] = 0;
 318 |   }
 319 |   return newValue;
 320 | }
 321 | 
 322 | function simdStore(type, tarray, index, a, count) {
 323 |   if (!isTypedArray(tarray))
 324 |     throw new TypeError("The 1st argument must be a typed array.");
 325 |   index = simdCoerceIndex(index);
 326 |   var bpe = tarray.BYTES_PER_ELEMENT;
 327 |   var bytes = count * type.laneSize;
 328 |   if (index < 0 || (index * bpe + bytes) > tarray.byteLength)
 329 |     throw new RangeError("The value of index is invalid.");
 330 | 
 331 |   a = type.fn.check(a);
 332 | 
 333 |   // The underlying buffers are copied byte by byte, to avoid float
 334 |   // canonicalization.
 335 |   var src = new Uint8Array(a.s_.buffer);
 336 |   var dst = new Uint8Array(tarray.buffer, tarray.byteOffset + index * bpe, bytes);
 337 |   for (var i = 0; i < bytes; i++) {
 338 |     dst[i] = src[i];
 339 |   }
 340 |   return a;
 341 | }
 342 | 
 343 | // Constructors and extractLane functions are closely related and must be
 344 | // polyfilled together.
 345 | 
 346 | // Float32x4
 347 | if (typeof SIMD.Float32x4 === "undefined" ||
 348 |     typeof SIMD.Float32x4.extractLane === "undefined") {
 349 |   SIMD.Float32x4 = function(s0, s1, s2, s3) {
 350 |     if (!(this instanceof SIMD.Float32x4)) {
 351 |       return new SIMD.Float32x4(s0, s1, s2, s3);
 352 |     }
 353 |     this.s_ = convertArray(_f32x4, new Float32Array([s0, s1, s2, s3]));
 354 |   }
 355 | 
 356 |   SIMD.Float32x4.extractLane = function(v, i) {
 357 |     v = SIMD.Float32x4.check(v);
 358 |     simdCheckLaneIndex(i, 4);
 359 |     return v.s_[i];
 360 |   }
 361 | }
 362 | 
 363 | // Miscellaneous functions that aren't easily parameterized on type.
 364 | 
 365 | if (typeof SIMD.Float32x4.swizzle === "undefined") {
 366 |   SIMD.Float32x4.swizzle = function(a, s0, s1, s2, s3) {
 367 |     return simdSwizzle(float32x4, a, [s0, s1, s2, s3]);
 368 |   }
 369 | }
 370 | 
 371 | if (typeof SIMD.Float32x4.shuffle === "undefined") {
 372 |   SIMD.Float32x4.shuffle = function(a, b, s0, s1, s2, s3) {
 373 |     return simdShuffle(float32x4, a, b, [s0, s1, s2, s3]);
 374 |   }
 375 | }
 376 | 
 377 | // Int32x4
 378 | if (typeof SIMD.Int32x4 === "undefined" ||
 379 |     typeof SIMD.Int32x4.extractLane === "undefined") {
 380 |   SIMD.Int32x4 = function(s0, s1, s2, s3) {
 381 |     if (!(this instanceof SIMD.Int32x4)) {
 382 |       return new SIMD.Int32x4(s0, s1, s2, s3);
 383 |     }
 384 |     this.s_ = convertArray(_i32x4, new Int32Array([s0, s1, s2, s3]));
 385 |   }
 386 | 
 387 |   SIMD.Int32x4.extractLane = function(v, i) {
 388 |     v = SIMD.Int32x4.check(v);
 389 |     simdCheckLaneIndex(i, 4);
 390 |     return v.s_[i];
 391 |   }
 392 | }
 393 | 
 394 | if (typeof SIMD.Int32x4.swizzle === "undefined") {
 395 |   SIMD.Int32x4.swizzle = function(a, s0, s1, s2, s3) {
 396 |     return simdSwizzle(int32x4, a, [s0, s1, s2, s3]);
 397 |   }
 398 | }
 399 | 
 400 | if (typeof SIMD.Int32x4.shuffle === "undefined") {
 401 |   SIMD.Int32x4.shuffle = function(a, b, s0, s1, s2, s3) {
 402 |     return simdShuffle(int32x4, a, b, [s0, s1, s2, s3]);
 403 |   }
 404 | }
 405 | 
 406 | // Int16x8
 407 | if (typeof SIMD.Int16x8 === "undefined" ||
 408 |     typeof SIMD.Int16x8.extractLane === "undefined") {
 409 |   SIMD.Int16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) {
 410 |     if (!(this instanceof SIMD.Int16x8)) {
 411 |       return new SIMD.Int16x8(s0, s1, s2, s3, s4, s5, s6, s7);
 412 |     }
 413 |     this.s_ = convertArray(_i16x8, new Int16Array([s0, s1, s2, s3, s4, s5, s6, s7]));
 414 |   }
 415 | 
 416 |   SIMD.Int16x8.extractLane = function(v, i) {
 417 |     v = SIMD.Int16x8.check(v);
 418 |     simdCheckLaneIndex(i, 8);
 419 |     return v.s_[i];
 420 |   }
 421 | }
 422 | 
 423 | if (typeof SIMD.Int16x8.swizzle === "undefined") {
 424 |   SIMD.Int16x8.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7) {
 425 |     return simdSwizzle(int16x8, a, [s0, s1, s2, s3, s4, s5, s6, s7]);
 426 |   }
 427 | }
 428 | 
 429 | if (typeof SIMD.Int16x8.shuffle === "undefined") {
 430 |   SIMD.Int16x8.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7) {
 431 |     return simdShuffle(int16x8, a, b, [s0, s1, s2, s3, s4, s5, s6, s7]);
 432 |   }
 433 | }
 434 | 
 435 | // Int8x16
 436 | if (typeof SIMD.Int8x16 === "undefined" ||
 437 |     typeof SIMD.Int8x16.extractLane === "undefined") {
 438 |   SIMD.Int8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7,
 439 |                           s8, s9, s10, s11, s12, s13, s14, s15) {
 440 |     if (!(this instanceof SIMD.Int8x16)) {
 441 |       return new SIMD.Int8x16(s0, s1, s2, s3, s4, s5, s6, s7,
 442 |                               s8, s9, s10, s11, s12, s13, s14, s15);
 443 |     }
 444 |     this.s_ = convertArray(_i8x16, new Int8Array([s0, s1, s2, s3, s4, s5, s6, s7,
 445 |                                     s8, s9, s10, s11, s12, s13, s14, s15]));
 446 |   }
 447 | 
 448 |   SIMD.Int8x16.extractLane = function(v, i) {
 449 |     v = SIMD.Int8x16.check(v);
 450 |     simdCheckLaneIndex(i, 16);
 451 |     return v.s_[i];
 452 |   }
 453 | }
 454 | 
 455 | if (typeof SIMD.Int8x16.swizzle === "undefined") {
 456 |   SIMD.Int8x16.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7,
 457 |                                      s8, s9, s10, s11, s12, s13, s14, s15) {
 458 |     return simdSwizzle(int8x16, a, [s0, s1, s2, s3, s4, s5, s6, s7,
 459 |                                     s8, s9, s10, s11, s12, s13, s14, s15]);
 460 |   }
 461 | }
 462 | 
 463 | if (typeof SIMD.Int8x16.shuffle === "undefined") {
 464 |   SIMD.Int8x16.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7,
 465 |                                         s8, s9, s10, s11, s12, s13, s14, s15) {
 466 |     return simdShuffle(int8x16, a, b, [s0, s1, s2, s3, s4, s5, s6, s7,
 467 |                                        s8, s9, s10, s11, s12, s13, s14, s15]);
 468 |   }
 469 | }
 470 | 
 471 | // Uint32x4
 472 | if (typeof SIMD.Uint32x4 === "undefined" ||
 473 |     typeof SIMD.Uint32x4.extractLane === "undefined") {
 474 |   SIMD.Uint32x4 = function(s0, s1, s2, s3) {
 475 |     if (!(this instanceof SIMD.Uint32x4)) {
 476 |       return new SIMD.Uint32x4(s0, s1, s2, s3);
 477 |     }
 478 |     this.s_ = convertArray(_ui32x4, new Uint32Array([s0, s1, s2, s3]));
 479 |   }
 480 | 
 481 |   SIMD.Uint32x4.extractLane = function(v, i) {
 482 |     v = SIMD.Uint32x4.check(v);
 483 |     simdCheckLaneIndex(i, 4);
 484 |     return v.s_[i];
 485 |   }
 486 | }
 487 | 
 488 | if (typeof SIMD.Uint32x4.swizzle === "undefined") {
 489 |   SIMD.Uint32x4.swizzle = function(a, s0, s1, s2, s3) {
 490 |     return simdSwizzle(uint32x4, a, [s0, s1, s2, s3]);
 491 |   }
 492 | }
 493 | 
 494 | if (typeof SIMD.Uint32x4.shuffle === "undefined") {
 495 |   SIMD.Uint32x4.shuffle = function(a, b, s0, s1, s2, s3) {
 496 |     return simdShuffle(uint32x4, a, b, [s0, s1, s2, s3]);
 497 |   }
 498 | }
 499 | 
 500 | // Uint16x8
 501 | if (typeof SIMD.Uint16x8 === "undefined" ||
 502 |     typeof SIMD.Uint16x8.extractLane === "undefined") {
 503 |   SIMD.Uint16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) {
 504 |     if (!(this instanceof SIMD.Uint16x8)) {
 505 |       return new SIMD.Uint16x8(s0, s1, s2, s3, s4, s5, s6, s7);
 506 |     }
 507 |     this.s_ = convertArray(_ui16x8, new Uint16Array([s0, s1, s2, s3, s4, s5, s6, s7]));
 508 |   }
 509 | 
 510 |   SIMD.Uint16x8.extractLane = function(v, i) {
 511 |     v = SIMD.Uint16x8.check(v);
 512 |     simdCheckLaneIndex(i, 8);
 513 |     return v.s_[i];
 514 |   }
 515 | }
 516 | 
 517 | if (typeof SIMD.Uint16x8.swizzle === "undefined") {
 518 |   SIMD.Uint16x8.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7) {
 519 |     return simdSwizzle(uint16x8, a, [s0, s1, s2, s3, s4, s5, s6, s7]);
 520 |   }
 521 | }
 522 | 
 523 | if (typeof SIMD.Uint16x8.shuffle === "undefined") {
 524 |   SIMD.Uint16x8.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7) {
 525 |     return simdShuffle(uint16x8, a, b, [s0, s1, s2, s3, s4, s5, s6, s7]);
 526 |   }
 527 | }
 528 | 
 529 | // Uint8x16
 530 | if (typeof SIMD.Uint8x16 === "undefined" ||
 531 |     typeof SIMD.Uint8x16.extractLane === "undefined") {
 532 |   SIMD.Uint8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7,
 533 |                            s8, s9, s10, s11, s12, s13, s14, s15) {
 534 |     if (!(this instanceof SIMD.Uint8x16)) {
 535 |       return new SIMD.Uint8x16(s0, s1, s2, s3, s4, s5, s6, s7,
 536 |                                s8, s9, s10, s11, s12, s13, s14, s15);
 537 |     }
 538 |     this.s_ = convertArray(_ui8x16, new Uint8Array([s0, s1, s2, s3, s4, s5, s6, s7,
 539 |                                      s8, s9, s10, s11, s12, s13, s14, s15]));
 540 |   }
 541 | 
 542 |   SIMD.Uint8x16.extractLane = function(v, i) {
 543 |     v = SIMD.Uint8x16.check(v);
 544 |     simdCheckLaneIndex(i, 16);
 545 |     return v.s_[i];
 546 |   }
 547 | }
 548 | 
 549 | if (typeof SIMD.Uint8x16.swizzle === "undefined") {
 550 |   SIMD.Uint8x16.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7,
 551 |                                       s8, s9, s10, s11, s12, s13, s14, s15) {
 552 |     return simdSwizzle(uint8x16, a, [s0, s1, s2, s3, s4, s5, s6, s7,
 553 |                                      s8, s9, s10, s11, s12, s13, s14, s15]);
 554 |   }
 555 | }
 556 | 
 557 | if (typeof SIMD.Uint8x16.shuffle === "undefined") {
 558 |   SIMD.Uint8x16.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7,
 559 |                                          s8, s9, s10, s11, s12, s13, s14, s15) {
 560 |     return simdShuffle(uint8x16, a, b, [s0, s1, s2, s3, s4, s5, s6, s7,
 561 |                                         s8, s9, s10, s11, s12, s13, s14, s15]);
 562 |   }
 563 | }
 564 | 
 565 | // Bool32x4
 566 | if (typeof SIMD.Bool32x4 === "undefined" ||
 567 |     typeof SIMD.Bool32x4.extractLane === "undefined") {
 568 |   SIMD.Bool32x4 = function(s0, s1, s2, s3) {
 569 |     if (!(this instanceof SIMD.Bool32x4)) {
 570 |       return new SIMD.Bool32x4(s0, s1, s2, s3);
 571 |     }
 572 |     this.s_ = [!!s0, !!s1, !!s2, !!s3];
 573 |   }
 574 | 
 575 |   SIMD.Bool32x4.extractLane = function(v, i) {
 576 |     v = SIMD.Bool32x4.check(v);
 577 |     simdCheckLaneIndex(i, 4);
 578 |     return v.s_[i];
 579 |   }
 580 | }
 581 | 
 582 | // Bool16x8
 583 | if (typeof SIMD.Bool16x8 === "undefined" ||
 584 |     typeof SIMD.Bool16x8.extractLane === "undefined") {
 585 |   SIMD.Bool16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) {
 586 |     if (!(this instanceof SIMD.Bool16x8)) {
 587 |       return new SIMD.Bool16x8(s0, s1, s2, s3, s4, s5, s6, s7);
 588 |     }
 589 |     this.s_ = [!!s0, !!s1, !!s2, !!s3, !!s4, !!s5, !!s6, !!s7];
 590 |   }
 591 | 
 592 |   SIMD.Bool16x8.extractLane = function(v, i) {
 593 |     v = SIMD.Bool16x8.check(v);
 594 |     simdCheckLaneIndex(i, 8);
 595 |     return v.s_[i];
 596 |   }
 597 | }
 598 | 
 599 | // Bool8x16
 600 | if (typeof SIMD.Bool8x16 === "undefined" ||
 601 |     typeof SIMD.Bool8x16.extractLane === "undefined") {
 602 |   SIMD.Bool8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7,
 603 |                            s8, s9, s10, s11, s12, s13, s14, s15) {
 604 |     if (!(this instanceof SIMD.Bool8x16)) {
 605 |       return new SIMD.Bool8x16(s0, s1, s2, s3, s4, s5, s6, s7,
 606 |                                s8, s9, s10, s11, s12, s13, s14, s15);
 607 |     }
 608 |     this.s_ = [!!s0, !!s1, !!s2, !!s3, !!s4, !!s5, !!s6, !!s7,
 609 |                !!s8, !!s9, !!s10, !!s11, !!s12, !!s13, !!s14, !!s15];
 610 |   }
 611 | 
 612 |   SIMD.Bool8x16.extractLane = function(v, i) {
 613 |     v = SIMD.Bool8x16.check(v);
 614 |     simdCheckLaneIndex(i, 16);
 615 |     return v.s_[i];
 616 |   }
 617 | }
 618 | 
 619 | // Type data to generate the remaining functions.
 620 | 
 621 | var float32x4 = {
 622 |   name: "Float32x4",
 623 |   fn: SIMD.Float32x4,
 624 |   lanes: 4,
 625 |   laneSize: 4,
 626 |   buffer: _f32x4,
 627 |   view: Float32Array,
 628 |   mulFn: binaryMul,
 629 |   fns: ["check", "splat", "replaceLane", "select",
 630 |         "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
 631 |         "add", "sub", "mul", "div", "neg", "abs", "min", "max", "minNum", "maxNum",
 632 |         "reciprocalApproximation", "reciprocalSqrtApproximation", "sqrt",
 633 |         "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"],
 634 | }
 635 | 
 636 | var int32x4 = {
 637 |   name: "Int32x4",
 638 |   fn: SIMD.Int32x4,
 639 |   lanes: 4,
 640 |   laneSize: 4,
 641 |   minVal: -0x80000000,
 642 |   maxVal: 0x7FFFFFFF,
 643 |   buffer: _i32x4,
 644 |   notFn: unaryBitwiseNot,
 645 |   view: Int32Array,
 646 |   mulFn: binaryImul,
 647 |   fns: ["check", "splat", "replaceLane", "select",
 648 |         "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
 649 |         "and", "or", "xor", "not",
 650 |         "add", "sub", "mul", "neg",
 651 |         "shiftLeftByScalar", "shiftRightByScalar",
 652 |         "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"],
 653 | }
 654 | 
 655 | var int16x8 = {
 656 |   name: "Int16x8",
 657 |   fn: SIMD.Int16x8,
 658 |   lanes: 8,
 659 |   laneSize: 2,
 660 |   minVal: -0x8000,
 661 |   maxVal: 0x7FFF,
 662 |   buffer: _i16x8,
 663 |   notFn: unaryBitwiseNot,
 664 |   view: Int16Array,
 665 |   mulFn: binaryMul,
 666 |   fns: ["check", "splat", "replaceLane", "select",
 667 |         "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
 668 |         "and", "or", "xor", "not",
 669 |         "add", "sub", "mul", "neg",
 670 |         "shiftLeftByScalar", "shiftRightByScalar",
 671 |         "addSaturate", "subSaturate",
 672 |         "load", "store"],
 673 | }
 674 | 
 675 | var int8x16 = {
 676 |   name: "Int8x16",
 677 |   fn: SIMD.Int8x16,
 678 |   lanes: 16,
 679 |   laneSize: 1,
 680 |   minVal: -0x80,
 681 |   maxVal: 0x7F,
 682 |   buffer: _i8x16,
 683 |   notFn: unaryBitwiseNot,
 684 |   view: Int8Array,
 685 |   mulFn: binaryMul,
 686 |   fns: ["check", "splat", "replaceLane", "select",
 687 |         "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
 688 |         "and", "or", "xor", "not",
 689 |         "add", "sub", "mul", "neg",
 690 |         "shiftLeftByScalar", "shiftRightByScalar",
 691 |         "addSaturate", "subSaturate",
 692 |         "load", "store"],
 693 | }
 694 | 
 695 | var uint32x4 = {
 696 |   name: "Uint32x4",
 697 |   fn: SIMD.Uint32x4,
 698 |   lanes: 4,
 699 |   laneSize: 4,
 700 |   minVal: 0,
 701 |   maxVal: 0xFFFFFFFF,
 702 |   unsigned: true,
 703 |   buffer: _ui32x4,
 704 |   notFn: unaryBitwiseNot,
 705 |   view: Uint32Array,
 706 |   mulFn: binaryImul,
 707 |   fns: ["check", "splat", "replaceLane", "select",
 708 |         "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
 709 |         "and", "or", "xor", "not",
 710 |         "add", "sub", "mul",
 711 |         "shiftLeftByScalar", "shiftRightByScalar",
 712 |         "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"],
 713 | }
 714 | 
 715 | var uint16x8 = {
 716 |   name: "Uint16x8",
 717 |   fn: SIMD.Uint16x8,
 718 |   lanes: 8,
 719 |   laneSize: 2,
 720 |   unsigned: true,
 721 |   minVal: 0,
 722 |   maxVal: 0xFFFF,
 723 |   buffer: _ui16x8,
 724 |   notFn: unaryBitwiseNot,
 725 |   view: Uint16Array,
 726 |   mulFn: binaryMul,
 727 |   fns: ["check", "splat", "replaceLane", "select",
 728 |         "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
 729 |         "and", "or", "xor", "not",
 730 |         "add", "sub", "mul",
 731 |         "shiftLeftByScalar", "shiftRightByScalar",
 732 |         "addSaturate", "subSaturate",
 733 |         "load", "store"],
 734 | }
 735 | 
 736 | var uint8x16 = {
 737 |   name: "Uint8x16",
 738 |   fn: SIMD.Uint8x16,
 739 |   lanes: 16,
 740 |   laneSize: 1,
 741 |   unsigned: true,
 742 |   minVal: 0,
 743 |   maxVal: 0xFF,
 744 |   buffer: _ui8x16,
 745 |   notFn: unaryBitwiseNot,
 746 |   view: Uint8Array,
 747 |   mulFn: binaryMul,
 748 |   fns: ["check", "splat", "replaceLane", "select",
 749 |         "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
 750 |         "and", "or", "xor", "not",
 751 |         "add", "sub", "mul",
 752 |         "shiftLeftByScalar", "shiftRightByScalar",
 753 |         "addSaturate", "subSaturate",
 754 |         "load", "store"],
 755 | }
 756 | 
 757 | var bool32x4 = {
 758 |   name: "Bool32x4",
 759 |   fn: SIMD.Bool32x4,
 760 |   lanes: 4,
 761 |   laneSize: 4,
 762 |   notFn: unaryLogicalNot,
 763 |   fns: ["check", "splat", "replaceLane",
 764 |         "allTrue", "anyTrue", "and", "or", "xor", "not"],
 765 | }
 766 | 
 767 | var bool16x8 = {
 768 |   name: "Bool16x8",
 769 |   fn: SIMD.Bool16x8,
 770 |   lanes: 8,
 771 |   laneSize: 2,
 772 |   notFn: unaryLogicalNot,
 773 |   fns: ["check", "splat", "replaceLane",
 774 |         "allTrue", "anyTrue", "and", "or", "xor", "not"],
 775 | }
 776 | 
 777 | var bool8x16 = {
 778 |   name: "Bool8x16",
 779 |   fn: SIMD.Bool8x16,
 780 |   lanes: 16,
 781 |   laneSize: 1,
 782 |   notFn: unaryLogicalNot,
 783 |   fns: ["check", "splat", "replaceLane",
 784 |         "allTrue", "anyTrue", "and", "or", "xor", "not"],
 785 | }
 786 | 
 787 | // Each SIMD type has a corresponding Boolean SIMD type, which is returned by
 788 | // relational ops.
 789 | float32x4.boolType = int32x4.boolType = uint32x4.boolType = bool32x4;
 790 | int16x8.boolType = uint16x8.boolType = bool16x8;
 791 | int8x16.boolType = uint8x16.boolType = bool8x16;
 792 | 
 793 | // SIMD from<type> types.
 794 | float32x4.from = [int32x4, uint32x4];
 795 | int32x4.from = [float32x4, uint32x4];
 796 | int16x8.from = [uint16x8];
 797 | int8x16.from = [uint8x16];
 798 | uint32x4.from = [float32x4, int32x4];
 799 | uint16x8.from = [int16x8];
 800 | uint8x16.from = [int8x16];
 801 | 
 802 | // SIMD from<type>Bits types.
 803 | float32x4.fromBits = [int32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
 804 | int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
 805 | int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16];
 806 | int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16];
 807 | uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16];
 808 | uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16];
 809 | uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8];
 810 | 
 811 | var simdTypes = [float32x4,
 812 |                  int32x4, int16x8, int8x16,
 813 |                  uint32x4, uint16x8, uint8x16,
 814 |                  bool32x4, bool16x8, bool8x16];
 815 | 
 816 | // SIMD Phase2 types.
 817 | 
 818 | if (typeof simdPhase2 !== 'undefined') {
 819 |   // Float64x2
 820 |   if (typeof SIMD.Float64x2 === "undefined" ||
 821 |       typeof SIMD.Float64x2.extractLane === "undefined") {
 822 |     SIMD.Float64x2 = function(s0, s1) {
 823 |       if (!(this instanceof SIMD.Float64x2)) {
 824 |         return new SIMD.Float64x2(s0, s1);
 825 |       }
 826 |       this.s_ = convertArray(_f64x2, new Float64Array([s0, s1]));
 827 |     }
 828 | 
 829 |     SIMD.Float64x2.extractLane = function(v, i) {
 830 |       v = SIMD.Float64x2.check(v);
 831 |       simdCheckLaneIndex(i, 2);
 832 |       return v.s_[i];
 833 |     }
 834 |   }
 835 | 
 836 |   if (typeof SIMD.Float64x2.swizzle === "undefined") {
 837 |     SIMD.Float64x2.swizzle = function(a, s0, s1) {
 838 |       return simdSwizzle(float64x2, a, [s0, s1]);
 839 |     }
 840 |   }
 841 | 
 842 |   if (typeof SIMD.Float64x2.shuffle === "undefined") {
 843 |     SIMD.Float64x2.shuffle = function(a, b, s0, s1) {
 844 |       return simdShuffle(float64x2, a, b, [s0, s1]);
 845 |     }
 846 |   }
 847 | 
 848 |   // Bool64x2
 849 |   if (typeof SIMD.Bool64x2 === "undefined" ||
 850 |       typeof SIMD.Bool64x2.extractLane === "undefined") {
 851 |     SIMD.Bool64x2 = function(s0, s1) {
 852 |       if (!(this instanceof SIMD.Bool64x2)) {
 853 |         return new SIMD.Bool64x2(s0, s1);
 854 |       }
 855 |       this.s_ = [!!s0, !!s1];
 856 |     }
 857 | 
 858 |     SIMD.Bool64x2.extractLane = function(v, i) {
 859 |       v = SIMD.Bool64x2.check(v);
 860 |       simdCheckLaneIndex(i, 2);
 861 |       return v.s_[i];
 862 |     }
 863 |   }
 864 | 
 865 |   var float64x2 = {
 866 |     name: "Float64x2",
 867 |     fn: SIMD.Float64x2,
 868 |     lanes: 2,
 869 |     laneSize: 8,
 870 |     buffer: _f64x2,
 871 |     view: Float64Array,
 872 |     mulFn: binaryMul,
 873 |     fns: ["check", "splat", "replaceLane", "select",
 874 |           "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
 875 |           "add", "sub", "mul", "div", "neg", "abs", "min", "max", "minNum", "maxNum",
 876 |           "reciprocalApproximation", "reciprocalSqrtApproximation", "sqrt",
 877 |           "load", "store"],
 878 |   }
 879 | 
 880 |   var bool64x2 = {
 881 |     name: "Bool64x2",
 882 |     fn: SIMD.Bool64x2,
 883 |     lanes: 2,
 884 |     laneSize: 8,
 885 |     notFn: unaryLogicalNot,
 886 |     fns: ["check", "splat", "replaceLane",
 887 |           "allTrue", "anyTrue", "and", "or", "xor", "not"],
 888 |   }
 889 | 
 890 |   float64x2.boolType = bool64x2;
 891 | 
 892 |   float32x4.fromBits.push(float64x2);
 893 |   int32x4.fromBits.push(float64x2);
 894 |   int16x8.fromBits.push(float64x2);
 895 |   int8x16.fromBits.push(float64x2);
 896 |   uint32x4.fromBits.push(float64x2);
 897 |   uint16x8.fromBits.push(float64x2);
 898 |   uint8x16.fromBits.push(float64x2);
 899 | 
 900 |   float64x2.fromBits = [float32x4, int32x4, int16x8, int8x16,
 901 |                         uint32x4, uint16x8, uint8x16];
 902 | 
 903 |   simdTypes.push(float64x2);
 904 |   simdTypes.push(bool64x2);
 905 | }
 906 | 
 907 | // SIMD prototype functions.
 908 | var prototypeFns = {
 909 |   valueOf:
 910 |     function(type) {
 911 |       return function() {
 912 |         throw new TypeError(type.name + " cannot be converted to a number");
 913 |       }
 914 |     },
 915 | 
 916 |   toString:
 917 |     function(type) {
 918 |       return function() {
 919 |         return simdToString(type, this);
 920 |       }
 921 |     },
 922 | 
 923 |   toLocaleString:
 924 |     function(type) {
 925 |       return function() {
 926 |         return simdToLocaleString(type, this);
 927 |       }
 928 |     },
 929 | };
 930 | 
 931 | // SIMD constructor functions.
 932 | 
 933 | var simdFns = {
 934 |   check:
 935 |     function(type) {
 936 |       return function(a) {
 937 |         if (!(a instanceof type.fn)) {
 938 |           throw new TypeError("Argument is not a " + type.name + ".");
 939 |         }
 940 |         return a;
 941 |       }
 942 |     },
 943 | 
 944 |   splat:
 945 |     function(type) {
 946 |       return function(s) { return simdSplat(type, s); }
 947 |     },
 948 | 
 949 |   replaceLane:
 950 |     function(type) {
 951 |       return function(a, i, s) { return simdReplaceLane(type, a, i, s); }
 952 |     },
 953 | 
 954 |   allTrue:
 955 |     function(type) {
 956 |       return function(a) { return simdAllTrue(type, a); }
 957 |     },
 958 | 
 959 |   anyTrue:
 960 |     function(type) {
 961 |       return function(a) { return simdAnyTrue(type, a); }
 962 |     },
 963 | 
 964 |   and:
 965 |     function(type) {
 966 |       return function(a, b) {
 967 |         return simdBinaryOp(type, binaryAnd, a, b);
 968 |       }
 969 |     },
 970 | 
 971 |   or:
 972 |     function(type) {
 973 |       return function(a, b) {
 974 |         return simdBinaryOp(type, binaryOr, a, b);
 975 |       }
 976 |     },
 977 | 
 978 |   xor:
 979 |     function(type) {
 980 |       return function(a, b) {
 981 |         return simdBinaryOp(type, binaryXor, a, b);
 982 |       }
 983 |     },
 984 | 
 985 |   not:
 986 |     function(type) {
 987 |       return function(a) {
 988 |         return simdUnaryOp(type, type.notFn, a);
 989 |       }
 990 |     },
 991 | 
 992 |   equal:
 993 |     function(type) {
 994 |       return function(a, b) {
 995 |         return simdRelationalOp(type, binaryEqual, a, b);
 996 |       }
 997 |     },
 998 | 
 999 |   notEqual:
1000 |     function(type) {
1001 |       return function(a, b) {
1002 |         return simdRelationalOp(type, binaryNotEqual, a, b);
1003 |       }
1004 |     },
1005 | 
1006 |   lessThan:
1007 |     function(type) {
1008 |       return function(a, b) {
1009 |         return simdRelationalOp(type, binaryLess, a, b);
1010 |       }
1011 |     },
1012 | 
1013 |   lessThanOrEqual:
1014 |     function(type) {
1015 |       return function(a, b) {
1016 |         return simdRelationalOp(type, binaryLessEqual, a, b);
1017 |       }
1018 |     },
1019 | 
1020 |   greaterThan:
1021 |     function(type) {
1022 |       return function(a, b) {
1023 |         return simdRelationalOp(type, binaryGreater, a, b);
1024 |       }
1025 |     },
1026 | 
1027 |   greaterThanOrEqual:
1028 |     function(type) {
1029 |       return function(a, b) {
1030 |         return simdRelationalOp(type, binaryGreaterEqual, a, b);
1031 |       }
1032 |     },
1033 | 
1034 |   add:
1035 |     function(type) {
1036 |       return function(a, b) {
1037 |         return simdBinaryOp(type, binaryAdd, a, b);
1038 |       }
1039 |     },
1040 | 
1041 |   sub:
1042 |     function(type) {
1043 |       return function(a, b) {
1044 |         return simdBinaryOp(type, binarySub, a, b);
1045 |       }
1046 |     },
1047 | 
1048 |   mul:
1049 |     function(type) {
1050 |       return function(a, b) {
1051 |         return simdBinaryOp(type, type.mulFn, a, b);
1052 |       }
1053 |     },
1054 | 
1055 |   div:
1056 |     function(type) {
1057 |       return function(a, b) {
1058 |         return simdBinaryOp(type, binaryDiv, a, b);
1059 |       }
1060 |     },
1061 | 
1062 |   neg:
1063 |     function(type) {
1064 |       return function(a) {
1065 |         return simdUnaryOp(type, unaryNeg, a);
1066 |       }
1067 |     },
1068 | 
1069 |   abs:
1070 |     function(type) {
1071 |       return function(a) {
1072 |         return simdUnaryOp(type, Math.abs, a);
1073 |       }
1074 |     },
1075 | 
1076 |   min:
1077 |     function(type) {
1078 |       return function(a, b) {
1079 |         return simdBinaryOp(type, Math.min, a, b);
1080 |       }
1081 |     },
1082 | 
1083 |   max:
1084 |     function(type) {
1085 |       return function(a, b) {
1086 |         return simdBinaryOp(type, Math.max, a, b);
1087 |       }
1088 |     },
1089 | 
1090 |   minNum:
1091 |     function(type) {
1092 |       return function(a, b) {
1093 |         return simdBinaryOp(type, minNum, a, b);
1094 |       }
1095 |     },
1096 | 
1097 |   maxNum:
1098 |     function(type) {
1099 |       return function(a, b) {
1100 |         return simdBinaryOp(type, maxNum, a, b);
1101 |       }
1102 |     },
1103 | 
1104 |   load:
1105 |     function(type) {
1106 |       return function(tarray, index) {
1107 |         return simdLoad(type, tarray, index, type.lanes);
1108 |       }
1109 |     },
1110 | 
1111 |   load1:
1112 |     function(type) {
1113 |       return function(tarray, index) {
1114 |         return simdLoad(type, tarray, index, 1);
1115 |       }
1116 |     },
1117 | 
1118 |   load2:
1119 |     function(type) {
1120 |       return function(tarray, index) {
1121 |         return simdLoad(type, tarray, index, 2);
1122 |       }
1123 |     },
1124 | 
1125 |   load3:
1126 |     function(type) {
1127 |       return function(tarray, index) {
1128 |         return simdLoad(type, tarray, index, 3);
1129 |       }
1130 |     },
1131 | 
1132 |   store:
1133 |     function(type) {
1134 |       return function(tarray, index, a) {
1135 |         return simdStore(type, tarray, index, a, type.lanes);
1136 |       }
1137 |     },
1138 | 
1139 |   store1:
1140 |     function(type) {
1141 |       return function(tarray, index, a) {
1142 |         return simdStore(type, tarray, index, a, 1);
1143 |       }
1144 |     },
1145 | 
1146 |   store2:
1147 |     function(type) {
1148 |       return function(tarray, index, a) {
1149 |         return simdStore(type, tarray, index, a, 2);
1150 |       }
1151 |     },
1152 | 
1153 |   store3:
1154 |     function(type) {
1155 |       return function(tarray, index, a) {
1156 |         return simdStore(type, tarray, index, a, 3);
1157 |       }
1158 |     },
1159 | 
1160 |   select:
1161 |     function(type) {
1162 |       return function(selector, a, b) {
1163 |         return simdSelect(type, selector, a, b);
1164 |       }
1165 |     },
1166 | 
1167 | 
1168 |   reciprocalApproximation:
1169 |     function(type) {
1170 |       return function(a) {
1171 |         a = type.fn.check(a);
1172 |         return type.fn.div(type.fn.splat(1.0), a);
1173 |       }
1174 |     },
1175 | 
1176 |   reciprocalSqrtApproximation:
1177 |     function(type) {
1178 |       return function(a) {
1179 |         a = type.fn.check(a);
1180 |         return type.fn.reciprocalApproximation(type.fn.sqrt(a));
1181 |       }
1182 |     },
1183 | 
1184 |   sqrt:
1185 |     function(type) {
1186 |       return function(a) {
1187 |         return simdUnaryOp(type, Math.sqrt, a);
1188 |       }
1189 |     },
1190 | 
1191 |   shiftLeftByScalar:
1192 |     function(type) {
1193 |       return function(a, bits) {
1194 |         bits &= type.laneSize * 8 - 1;
1195 |         return simdShiftOp(type, binaryShiftLeft, a, bits);
1196 |       }
1197 |     },
1198 | 
1199 |   shiftRightByScalar:
1200 |     function(type) {
1201 |       if (type.unsigned) {
1202 |         return function(a, bits) {
1203 |           bits &= type.laneSize * 8 - 1;
1204 |           return simdShiftOp(type, binaryShiftRightLogical, a, bits);
1205 |         }
1206 |       } else {
1207 |         return function(a, bits) {
1208 |           bits &= type.laneSize * 8 - 1;
1209 |           return simdShiftOp(type, binaryShiftRightArithmetic, a, bits);
1210 |         }
1211 |       }
1212 |     },
1213 | 
1214 |   addSaturate:
1215 |     function(type) {
1216 |       function addSaturate(a, b) {
1217 |         return clamp(a + b, type.minVal, type.maxVal);
1218 |       }
1219 |       return function(a, b) { return simdBinaryOp(type, addSaturate, a, b); }
1220 |     },
1221 | 
1222 |   subSaturate:
1223 |     function(type) {
1224 |       function subSaturate(a, b) {
1225 |         return clamp(a - b, type.minVal, type.maxVal);
1226 |       }
1227 |       return function(a, b) { return simdBinaryOp(type, subSaturate, a, b); }
1228 |     },
1229 | }
1230 | 
1231 | // Install functions.
1232 | 
1233 | simdTypes.forEach(function(type) {
1234 |   // Install each prototype function on each SIMD prototype.
1235 |   var simdFn = type.fn;
1236 |   var proto = simdFn.prototype;
1237 |   for (var name in prototypeFns) {
1238 |     if (!proto.hasOwnProperty(name))
1239 |       proto[name] = prototypeFns[name](type);
1240 |   }
1241 |   // Install regular functions.
1242 |   type.fns.forEach(function(name) {
1243 |     if (typeof simdFn[name] === "undefined")
1244 |       simdFn[name] = simdFns[name](type);
1245 |   });
1246 |   // Install 'fromTIMD' functions.
1247 |   if (type.from) {
1248 |     type.from.forEach(function(fromType) {
1249 |       var name = "from" + fromType.name;
1250 |       var toType = type;  // pull type into closure.
1251 |       if (typeof type.fn[name] === "undefined") {
1252 |         type.fn[name] =
1253 |             function(a) { return simdFrom(toType, fromType, a); }
1254 |       }
1255 |     });
1256 |   }
1257 |   // Install 'fromTIMDBits' functions.
1258 |   if (type.fromBits) {
1259 |     type.fromBits.forEach(function(fromType) {
1260 |       var name = "from" + fromType.name + "Bits";
1261 |       var toType = type;  // pull type into closure.
1262 |       if (typeof type.fn[name] === "undefined") {
1263 |         type.fn[name] =
1264 |             function(a) { return simdFromBits(toType, fromType, a); }
1265 |       }
1266 |     });
1267 |   }
1268 | });
1269 | 
1270 | // If we're in a browser, the global namespace is named 'window'. If we're
1271 | // in node, it's named 'global'. If we're in a web worker, it's named
1272 | // 'self'. If we're in a shell, 'this' might work.
1273 | })(typeof window !== "undefined"
1274 |    ? window
1275 |    : (typeof process === 'object' &&
1276 |       typeof require === 'function' &&
1277 |       typeof global === 'object')
1278 |      ? global
1279 |      : typeof self === 'object'
1280 |        ? self
1281 |        : this);
1282 | 


--------------------------------------------------------------------------------
/src/ecmascript_simd_tests.js:
--------------------------------------------------------------------------------
   1 | /*
   2 |   Copyright (C) 2013
   3 | 
   4 |   This software is provided 'as-is', without any express or implied
   5 |   warranty.  In no event will the authors be held liable for any damages
   6 |   arising from the use of this software.
   7 | 
   8 |   Permission is granted to anyone to use this software for any purpose,
   9 |   including commercial applications, and to alter it and redistribute it
  10 |   freely, subject to the following restrictions:
  11 | 
  12 |   1. The origin of this software must not be misrepresented; you must not
  13 |      claim that you wrote the original software. If you use this software
  14 |      in a product, an acknowledgment in the product documentation would be
  15 |      appreciated but is not required.
  16 |   2. Altered source versions must be plainly marked as such, and must not be
  17 |      misrepresented as being the original software.
  18 |   3. This notice may not be removed or altered from any source distribution.
  19 | */
  20 | 
  21 | function minNum(x, y) {
  22 |   return x != x ? y :
  23 |          y != y ? x :
  24 |          Math.min(x, y);
  25 | }
  26 | 
  27 | function maxNum(x, y) {
  28 |   return x != x ? y :
  29 |          y != y ? x :
  30 |          Math.max(x, y);
  31 | }
  32 | 
  33 | function sameValue(x, y) {
  34 |   if (x == y)
  35 |     return x != 0 || y != 0 || (1/x == 1/y);
  36 | 
  37 |   return x != x && y != y;
  38 | }
  39 | 
  40 | function sameValueZero(x, y) {
  41 |   if (x == y) return true;
  42 |   return x != x & y != y;
  43 | }
  44 | 
  45 | function binaryMul(a, b) { return a * b; }
  46 | var binaryImul;
  47 | if (typeof Math.imul !== 'undefined') {
  48 |   binaryImul = Math.imul;
  49 | } else {
  50 |   binaryImul = function(a, b) {
  51 |     var ah = (a >>> 16) & 0xffff;
  52 |     var al = a & 0xffff;
  53 |     var bh = (b >>> 16) & 0xffff;
  54 |     var bl = b & 0xffff;
  55 |     // the shift by 0 fixes the sign on the high part
  56 |     // the final |0 converts the unsigned value into a signed value
  57 |     return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0);
  58 |   };
  59 | }
  60 | 
  61 | var _f32x4 = new Float32Array(4);
  62 | var _f64x2 = new Float64Array(_f32x4.buffer);
  63 | var _i32x4 = new Int32Array(_f32x4.buffer);
  64 | var _i16x8 = new Int16Array(_f32x4.buffer);
  65 | var _i8x16 = new Int8Array(_f32x4.buffer);
  66 | var _ui32x4 = new Uint32Array(_f32x4.buffer);
  67 | var _ui16x8 = new Uint16Array(_f32x4.buffer);
  68 | var _ui8x16 = new Uint8Array(_f32x4.buffer);
  69 | 
  70 | var float32x4 = {
  71 |   name: "Float32x4",
  72 |   fn: SIMD.Float32x4,
  73 |   floatLane: true,
  74 |   signed: true,
  75 |   numerical: true,
  76 |   lanes: 4,
  77 |   laneSize: 4,
  78 |   interestingValues: [0, -0, 1, -1, 0.9, -0.9, 1.414, 0x7F, -0x80, -0x8000, -0x80000000, 0x7FFF, 0x7FFFFFFF, Infinity, -Infinity, NaN],
  79 |   view: Float32Array,
  80 |   buffer: _f32x4,
  81 |   mulFn: binaryMul,
  82 | }
  83 | 
  84 | var int32x4 = {
  85 |   name: "Int32x4",
  86 |   fn: SIMD.Int32x4,
  87 |   intLane: true,
  88 |   signed: true,
  89 |   numerical: true,
  90 |   logical: true,
  91 |   lanes: 4,
  92 |   laneSize: 4,
  93 |   minVal: -0x80000000,
  94 |   maxVal: 0x7FFFFFFF,
  95 |   interestingValues: [0, 1, -1, 0x40000000, 0x7FFFFFFF, -0x80000000],
  96 |   view: Int32Array,
  97 |   buffer: _i32x4,
  98 |   mulFn: binaryImul,
  99 | }
 100 | 
 101 | var int16x8 = {
 102 |   name: "Int16x8",
 103 |   fn: SIMD.Int16x8,
 104 |   intLane: true,
 105 |   signed: true,
 106 |   numerical: true,
 107 |   logical: true,
 108 |   lanes: 8,
 109 |   laneSize: 2,
 110 |   laneMask: 0xFFFF,
 111 |   minVal: -0x8000,
 112 |   maxVal: 0x7FFF,
 113 |   interestingValues: [0, 1, -1, 0x4000, 0x7FFF, -0x8000],
 114 |   view: Int16Array,
 115 |   buffer: _i16x8,
 116 |   mulFn: binaryMul,
 117 | }
 118 | 
 119 | var int8x16 = {
 120 |   name: "Int8x16",
 121 |   fn: SIMD.Int8x16,
 122 |   intLane: true,
 123 |   signed: true,
 124 |   numerical: true,
 125 |   logical: true,
 126 |   lanes: 16,
 127 |   laneSize: 1,
 128 |   laneMask: 0xFF,
 129 |   minVal: -0x80,
 130 |   maxVal: 0x7F,
 131 |   interestingValues: [0, 1, -1, 0x40, 0x7F, -0x80],
 132 |   view: Int8Array,
 133 |   buffer: _i8x16,
 134 |   mulFn: binaryMul,
 135 | }
 136 | 
 137 | var uint32x4 = {
 138 |   name: "Uint32x4",
 139 |   fn: SIMD.Uint32x4,
 140 |   intLane: true,
 141 |   unsigned: true,
 142 |   numerical: true,
 143 |   logical: true,
 144 |   lanes: 4,
 145 |   laneSize: 4,
 146 |   minVal: 0,
 147 |   maxVal: 0xFFFFFFFF,
 148 |   interestingValues: [0, 1, 0x40000000, 0x7FFFFFFF, 0xFFFFFFFF],
 149 |   view: Uint32Array,
 150 |   buffer: _ui32x4,
 151 |   mulFn: binaryImul,
 152 | }
 153 | 
 154 | var uint16x8 = {
 155 |   name: "Uint16x8",
 156 |   fn: SIMD.Uint16x8,
 157 |   intLane: true,
 158 |   unsigned: true,
 159 |   numerical: true,
 160 |   logical: true,
 161 |   lanes: 8,
 162 |   laneSize: 2,
 163 |   laneMask: 0xFFFF,
 164 |   minVal: 0,
 165 |   maxVal: 0xFFFF,
 166 |   interestingValues: [0, 1, 0x4000, 0x7FFF, 0xFFFF],
 167 |   view: Uint16Array,
 168 |   buffer: _ui16x8,
 169 |   mulFn: binaryMul,
 170 | }
 171 | 
 172 | var uint8x16 = {
 173 |   name: "Uint8x16",
 174 |   fn: SIMD.Uint8x16,
 175 |   intLane: true,
 176 |   unsigned: true,
 177 |   numerical: true,
 178 |   logical: true,
 179 |   lanes: 16,
 180 |   laneSize: 1,
 181 |   laneMask: 0xFF,
 182 |   minVal: 0,
 183 |   maxVal: 0xFF,
 184 |   interestingValues: [0, 1, 0x40, 0x7F, 0xFF],
 185 |   view: Int8Array,
 186 |   buffer: _ui8x16,
 187 |   mulFn: binaryMul,
 188 | }
 189 | 
 190 | var bool32x4 = {
 191 |   name: "Bool32x4",
 192 |   fn: SIMD.Bool32x4,
 193 |   boolLane: true,
 194 |   logical: true,
 195 |   lanes: 4,
 196 |   laneSize: 4,
 197 |   interestingValues: [true, false],
 198 | }
 199 | 
 200 | var bool16x8 = {
 201 |   name: "Bool16x8",
 202 |   fn: SIMD.Bool16x8,
 203 |   boolLane: true,
 204 |   logical: true,
 205 |   lanes: 8,
 206 |   laneSize: 2,
 207 |   interestingValues: [true, false],
 208 | }
 209 | 
 210 | var bool8x16 = {
 211 |   name: "Bool8x16",
 212 |   fn: SIMD.Bool8x16,
 213 |   boolLane: true,
 214 |   logical: true,
 215 |   lanes: 16,
 216 |   laneSize: 1,
 217 |   interestingValues: [true, false],
 218 | }
 219 | 
 220 | // Filter functions.
 221 | function isFloatType(type) { return type.floatLane; }
 222 | function isIntType(type) { return type.intLane; }
 223 | function isBoolType(type) { return type.boolLane; }
 224 | function isNumerical(type) { return type.numerical; }
 225 | function isLogical(type) { return type.logical; }
 226 | function isSigned(type) { return type.signed; }
 227 | function isSignedIntType(type) { return type.intLane && type.signed; }
 228 | function isUnsignedIntType(type) { return type.intLane && type.unsigned; }
 229 | function isSmallIntType(type) { return type.intLane && type.lanes >= 8; }
 230 | function isSmallUnsignedIntType(type) { return type.intLane && type.unsigned && type.lanes >= 8; }
 231 | function hasLoadStore123(type) { return !type.boolLane && type.lanes == 4; }
 232 | 
 233 | // Each SIMD type has a corresponding Boolean SIMD type, which is returned by
 234 | // relational ops.
 235 | float32x4.boolType = int32x4.boolType = uint32x4.boolType = bool32x4;
 236 | int16x8.boolType = uint16x8.boolType = bool16x8;
 237 | int8x16.boolType = uint8x16.boolType = bool8x16;
 238 | 
 239 | // SIMD fromTIMD types.
 240 | float32x4.from = [int32x4, uint32x4];
 241 | int32x4.from = [float32x4, uint32x4];
 242 | int16x8.from = [uint16x8];
 243 | int8x16.from = [uint8x16];
 244 | uint32x4.from = [float32x4, int32x4];
 245 | uint16x8.from = [int16x8];
 246 | uint8x16.from = [int8x16];
 247 | 
 248 | // SIMD fromBits types.
 249 | float32x4.fromBits = [int32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
 250 | int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
 251 | int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16];
 252 | int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16];
 253 | uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16];
 254 | uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16];
 255 | uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8];
 256 | 
 257 | var simdTypes = [float32x4,
 258 |                  int32x4, int16x8, int8x16,
 259 |                  uint32x4, uint16x8, uint8x16,
 260 |                  bool32x4, bool16x8, bool8x16];
 261 | 
 262 | if (typeof simdPhase2 !== 'undefined') {
 263 |   var float64x2 = {
 264 |     name: "Float64x2",
 265 |     fn: SIMD.Float64x2,
 266 |     floatLane: true,
 267 |     signed: true,
 268 |     numerical: true,
 269 |     lanes: 2,
 270 |     laneSize: 8,
 271 |     interestingValues: [0, -0, 1, -1, 1.414, 0x7F, -0x80, -0x8000, -0x80000000, 0x7FFF, 0x7FFFFFFF, Infinity, -Infinity, NaN],
 272 |     view: Float64Array,
 273 |     buffer: _f64x2,
 274 |     mulFn: binaryMul,
 275 |   }
 276 | 
 277 |   var bool64x2 = {
 278 |     name: "Bool64x2",
 279 |     fn: SIMD.Bool64x2,
 280 |     boolLane: true,
 281 |     lanes: 2,
 282 |     laneSize: 8,
 283 |     interestingValues: [true, false],
 284 |   }
 285 | 
 286 |   float64x2.boolType = bool64x2;
 287 | 
 288 |   float32x4.fromBits.push(float64x2);
 289 |   int32x4.fromBits.push(float64x2);
 290 |   int16x8.fromBits.push(float64x2);
 291 |   int8x16.fromBits.push(float64x2);
 292 |   uint32x4.fromBits.push(float64x2);
 293 |   uint16x8.fromBits.push(float64x2);
 294 |   uint8x16.fromBits.push(float64x2);
 295 | 
 296 |   float64x2.fromBits = [float32x4, int32x4, int16x8, int8x16,
 297 |                         uint32x4, uint16x8, uint8x16];
 298 | 
 299 |   int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
 300 |   int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16];
 301 |   int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16];
 302 |   uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16];
 303 |   uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16];
 304 |   uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8];
 305 | 
 306 |   simdTypes.push(float64x2);
 307 |   simdTypes.push(bool64x2);
 308 | }
 309 | 
 310 | // SIMD reference functions.
 311 | 
 312 | function simdConvert(type, value) {
 313 |   if (type.buffer === undefined) return !!value;  // bool types
 314 |   type.buffer[0] = value;
 315 |   return type.buffer[0];
 316 | }
 317 | 
 318 | // Reference implementation of toString.
 319 | function simdToString(type, value) {
 320 |   value = type.fn.check(value);
 321 |   var str = "SIMD." + type.name + "(";
 322 |   str += type.fn.extractLane(value, 0);
 323 |   for (var i = 1; i < type.lanes; i++) {
 324 |     str += ", " + type.fn.extractLane(value, i);
 325 |   }
 326 |   return str + ")";
 327 | }
 328 | 
 329 | // Reference implementation of toLocaleString.
 330 | function simdToLocaleString(type, value) {
 331 |   value = type.fn.check(value);
 332 |   var str = "SIMD." + type.name + "(";
 333 |   str += type.fn.extractLane(value, 0).toLocaleString();
 334 |   for (var i = 1; i < type.lanes; i++) {
 335 |     str += ", " + type.fn.extractLane(value, i).toLocaleString();
 336 |   }
 337 |   return str + ")";
 338 | }
 339 | 
 340 | // Utility functions.
 341 | 
 342 | // Create a value for testing, with vanilla lane values, i.e. [0, 1, 2, ..]
 343 | // for numeric types, [false, true, true, ..] for boolean types. These test
 344 | // values shouldn't contain NaNs or other "interesting" values.
 345 | function createTestValue(type) {
 346 |   var lanes = [];
 347 |   for (var i = 0; i < type.lanes; i++)
 348 |     lanes.push(i);
 349 |   return type.fn.apply(type.fn, lanes);
 350 | }
 351 | 
 352 | function createSplatValue(type, v) {
 353 |   var lanes = [];
 354 |   for (var i = 0; i < type.lanes; i++)
 355 |     lanes.push(v);
 356 |   return type.fn.apply(type.fn, lanes);
 357 | }
 358 | 
 359 | function checkValue(type, a, expect) {
 360 |   var ok = true;
 361 |   for (var i = 0; i < type.lanes; i++) {
 362 |     var v = type.fn.extractLane(a, i);
 363 |     var ev = simdConvert(type, expect(i));
 364 |     if (!sameValue(ev, v) && Math.abs(ev - v) >= 0.00001)
 365 |       ok = false;
 366 |   }
 367 |   if (!ok) {
 368 |     var lanes = [];
 369 |     for (var i = 0; i < type.lanes; i++)
 370 |       lanes.push(simdConvert(type, expect(i)));
 371 |     fail('expected SIMD.' + type.name + '(' + lanes + ') but found ' + a.toString());
 372 |   }
 373 | }
 374 | 
 375 | // Test methods for the different kinds of operations.
 376 | 
 377 | // Test the constructor and splat with the given lane values.
 378 | function testConstructor(type) {
 379 |   equal('function', typeof type.fn);
 380 |   equal('function', typeof type.fn.splat);
 381 |   for (var v of type.interestingValues) {
 382 |     var expected = simdConvert(type, v);
 383 |     var result = createSplatValue(type, v);
 384 |     checkValue(type, result, function(index) { return expected; });
 385 |     // splat.
 386 |     result = type.fn.splat(v);
 387 |     checkValue(type, result, function(index) { return expected; });
 388 |   }
 389 | }
 390 | 
 391 | function testCheck(type) {
 392 |   equal('function', typeof type.fn.check);
 393 |   // Other SIMD types shouldn't check for this type.
 394 |   var a = type.fn();
 395 |   for (var otherType of simdTypes) {
 396 |     if (otherType === type) {
 397 |       var result = type.fn.check(a);
 398 |       checkValue(type, result, function(index) { return type.fn.extractLane(a, index); });
 399 |     } else {
 400 |       throws(function() { otherType.check(a); });
 401 |     }
 402 |   }
 403 |   // Neither should other types.
 404 |   for (var x of [ {}, "", 0, 1, true, false, undefined, null, NaN, Infinity]) {
 405 |     throws(function() { type.fn.check(x); });
 406 |   }
 407 | }
 408 | 
 409 | function testReplaceLane(type) {
 410 |   equal('function', typeof type.fn.replaceLane);
 411 |   var a = createTestValue(type);
 412 |   for (var v of type.interestingValues) {
 413 |     var expected = simdConvert(type, v);
 414 |     for (var i = 0; i < type.lanes; i++) {
 415 |       var result = type.fn.replaceLane(a, i, v);
 416 |       checkValue(type, result,
 417 |                  function(index) {
 418 |                    return index == i ? expected : type.fn.extractLane(a, index);
 419 |                  });
 420 |     }
 421 |   }
 422 | 
 423 |   function testIndexCheck(index) {
 424 |     throws(function() { type.fn.replaceLane(a, index, 0); });
 425 |   }
 426 |   testIndexCheck(type.lanes);
 427 |   testIndexCheck(13.37);
 428 |   testIndexCheck(null);
 429 |   testIndexCheck(undefined);
 430 |   testIndexCheck({});
 431 |   testIndexCheck(true);
 432 |   testIndexCheck('yo');
 433 |   testIndexCheck(-1);
 434 |   testIndexCheck(128);
 435 | }
 436 | 
 437 | // Compare unary op's behavior to ref op at each lane.
 438 | function testUnaryOp(type, op, refOp) {
 439 |   equal('function', typeof type.fn[op]);
 440 |   for (var v of type.interestingValues) {
 441 |     var expected = simdConvert(type, refOp(v));
 442 |     var a = type.fn.splat(v);
 443 |     var result = type.fn[op](a);
 444 |     checkValue(type, result, function(index) { return expected; });
 445 |   }
 446 | }
 447 | 
 448 | // Compare binary op's behavior to ref op at each lane with the Cartesian
 449 | // product of the given values.
 450 | function testBinaryOp(type, op, refOp) {
 451 |   equal('function', typeof type.fn[op]);
 452 |   var zero = type.fn();
 453 |   for (var av of type.interestingValues) {
 454 |     for (var bv of type.interestingValues) {
 455 |       var expected = simdConvert(type, refOp(simdConvert(type, av), simdConvert(type, bv)));
 456 |       var a = type.fn.splat(av);
 457 |       var b = type.fn.splat(bv);
 458 |       var result = type.fn[op](a, b);
 459 |       checkValue(type, result, function(index) { return expected; });
 460 |     }
 461 |   }
 462 | }
 463 | 
 464 | // Compare relational op's behavior to ref op at each lane with the Cartesian
 465 | // product of the given values.
 466 | function testRelationalOp(type, op, refOp) {
 467 |   equal('function', typeof type.fn[op]);
 468 |   var zero = type.fn();
 469 |   for (var av of type.interestingValues) {
 470 |     for (var bv of type.interestingValues) {
 471 |       var expected = refOp(simdConvert(type, av), simdConvert(type, bv));
 472 |       var a = type.fn.splat(av);
 473 |       var b = type.fn.splat(bv);
 474 |       var result = type.fn[op](a, b);
 475 |       checkValue(type.boolType, result, function(index) { return expected; });
 476 |     }
 477 |   }
 478 | }
 479 | 
 480 | // Compare shift op's behavior to ref op at each lane.
 481 | function testShiftOp(type, op, refOp) {
 482 |   equal('function', typeof type.fn[op]);
 483 |   var zero = type.fn();
 484 |   for (var v of type.interestingValues) {
 485 |     var s = type.laneSize * 8;
 486 |     for (var bits of [-1, 0, 1, 2, s - 1, s, s + 1]) {
 487 |       var expected = simdConvert(type, refOp(simdConvert(type, v), bits));
 488 |       var a = type.fn.splat(v);
 489 |       var result = type.fn[op](a, bits);
 490 |       checkValue(type, result, function(index) { return expected; });
 491 |     }
 492 |   }
 493 | }
 494 | 
 495 | function testFrom(toType, fromType, name) {
 496 |   equal('function', typeof toType.fn[name]);
 497 |   for (var v of fromType.interestingValues) {
 498 |     var fromValue = createSplatValue(fromType, v);
 499 |     v = Math.trunc(simdConvert(fromType, v));
 500 |     if (toType.minVal !== undefined &&
 501 |         !(toType.minVal <= v && v <= toType.maxVal)) {
 502 |       throws(function() { toType.fn[name](fromValue) });
 503 |     } else {
 504 |       v = simdConvert(toType, v);
 505 |       var result = toType.fn[name](fromValue);
 506 |       checkValue(toType, result, function(index) { return v; });
 507 |     }
 508 |   }
 509 | }
 510 | 
 511 | function testFromBits(toType, fromType, name) {
 512 |   equal('function', typeof toType.fn[name]);
 513 |   for (var v of fromType.interestingValues) {
 514 |     var fromValue = createSplatValue(fromType, v);
 515 |     var result = toType.fn[name](fromValue);
 516 |     for (var i = 0; i < fromType.lanes; i++)
 517 |       fromType.buffer[i] = fromType.fn.extractLane(fromValue, i);
 518 |     checkValue(toType, result, function(index) { return toType.buffer[index]; });
 519 |   }
 520 | }
 521 | 
 522 | function testAnyTrue(type) {
 523 |   equal('function', typeof type.fn.anyTrue);
 524 |   // All lanes 'false'.
 525 |   var a = type.fn.splat(false);
 526 |   ok(!type.fn.anyTrue(a));
 527 |   // One lane 'true'.
 528 |   for (var i = 0; i < type.lanes; i++) {
 529 |     a = type.fn.replaceLane(a, i, true);
 530 |     ok(type.fn.anyTrue(a));
 531 |   }
 532 |   // All lanes 'true'.
 533 |   a = type.fn.splat(true);
 534 |   ok(type.fn.anyTrue(a));
 535 | }
 536 | 
 537 | function testAllTrue(type) {
 538 |   equal('function', typeof type.fn.allTrue);
 539 |   // All lanes 'true'.
 540 |   var a = type.fn.splat(true);
 541 |   ok(type.fn.allTrue(a));
 542 |   // One lane 'false'.
 543 |   for (var i = 0; i < type.lanes; i++) {
 544 |     a = type.fn.replaceLane(a, i, false);
 545 |     ok(!type.fn.allTrue(a));
 546 |   }
 547 |   // All lanes 'false'.
 548 |   a = type.fn.splat(false);
 549 |   ok(!type.fn.allTrue(a));
 550 | }
 551 | 
 552 | function testSelect(type) {
 553 |   equal('function', typeof type.fn.select);
 554 |   // set a and b to values that are different for all numerical types.
 555 |   var av = 1;
 556 |   var bv = 2;
 557 |   var a = type.fn.splat(av);
 558 |   var b = type.fn.splat(bv);
 559 |   // test all selectors with a single 'true' lane.
 560 |   for (var i = 0; i < type.lanes; i++) {
 561 |     var selector = type.boolType.fn();
 562 |     selector = type.boolType.fn.replaceLane(selector, i, true);
 563 |     var result = type.fn.select(selector, a, b);
 564 |     checkValue(type, result, function(index) { return index == i ? av : bv; });
 565 |   }
 566 | }
 567 | 
 568 | function testSwizzle(type) {
 569 |   equal('function', typeof type.fn.swizzle);
 570 |   var a = createTestValue(type);  // 0, 1, 2, 3, 4, 5, 6, ...
 571 |   var indices = [];
 572 |   // Identity swizzle.
 573 |   for (var i = 0; i < type.lanes; i++) indices.push(i);
 574 |   var result = type.fn.swizzle.apply(type.fn, [a].concat(indices));
 575 |   checkValue(type, result, function(index) { return type.fn.extractLane(a, index); });
 576 |   // Reverse swizzle.
 577 |   indices.reverse();
 578 |   var result = type.fn.swizzle.apply(type.fn, [a].concat(indices));
 579 |   checkValue(type, result, function(index) { return type.fn.extractLane(a, type.lanes - index - 1); });
 580 | 
 581 |   function testIndexCheck(index) {
 582 |     for (var i = 0; i < type.lanes; i++) {
 583 |       var args = [a].concat(indices);
 584 |       args[i + 1] = index;
 585 |       throws(function() { type.fn.swizzle.apply(type.fn, args); });
 586 |     }
 587 |   }
 588 |   testIndexCheck(type.lanes);
 589 |   testIndexCheck(13.37);
 590 |   testIndexCheck(null);
 591 |   testIndexCheck(undefined);
 592 |   testIndexCheck({});
 593 |   testIndexCheck(true);
 594 |   testIndexCheck('yo');
 595 |   testIndexCheck(-1);
 596 |   testIndexCheck(128);
 597 | }
 598 | 
 599 | function testShuffle(type) {
 600 |   equal('function', typeof type.fn.shuffle);
 601 |   var indices = [];
 602 |   for (var i = 0; i < type.lanes; i++) indices.push(i);
 603 | 
 604 |   var a = type.fn.apply(type.fn, indices);            // 0, 1, 2, 3, 4 ...
 605 |   var b = type.fn.add(a, type.fn.splat(type.lanes));  // lanes, lanes+1 ...
 606 |   // All lanes from a.
 607 |   var result = type.fn.shuffle.apply(type.fn, [a, b].concat(indices));
 608 |   checkValue(type, result, function(index) { return type.fn.extractLane(a, index); });
 609 |   // One lane from b.
 610 |   for (var i = 0; i < type.lanes; i++) {
 611 |     var args = [a, b].concat(indices);
 612 |     args[2 + i] += type.lanes;
 613 |     var result = type.fn.shuffle.apply(type.fn, args);
 614 |     checkValue(type, result, function(index) {
 615 |       var val = index == i ? b : a;
 616 |       return type.fn.extractLane(val, index);
 617 |     });
 618 |   }
 619 |   // All lanes from b.
 620 |   for (var i = 0; i < type.lanes; i++) indices[i] += type.lanes;
 621 |   var result = type.fn.shuffle.apply(type.fn, [a, b].concat(indices));
 622 |   checkValue(type, result, function(index) { return type.fn.extractLane(b, index); });
 623 | 
 624 |   function testIndexCheck(index) {
 625 |     for (var i = 0; i < type.lanes; i++) {
 626 |       var args = [a, b].concat(indices);
 627 |       args[i + 2] = index;
 628 |       throws(function() { type.fn.shuffle.apply(type.fn, args); });
 629 |     }
 630 |   }
 631 |   testIndexCheck(2 * type.lanes);
 632 |   testIndexCheck(13.37);
 633 |   testIndexCheck(null);
 634 |   testIndexCheck(undefined);
 635 |   testIndexCheck({});
 636 |   testIndexCheck(true);
 637 |   testIndexCheck('yo');
 638 |   testIndexCheck(-1);
 639 |   testIndexCheck(128);
 640 | }
 641 | 
 642 | function testLoad(type, name, count) {
 643 |   var loadFn = type.fn[name];
 644 |   equal('function', typeof loadFn);
 645 |   var bufLanes = 2 * type.lanes;  // Test all alignments.
 646 |   var bufSize = bufLanes * type.laneSize + 8;  // Extra for over-alignment test.
 647 |   var ab = new ArrayBuffer(bufSize);
 648 |   var buf = new type.view(ab);
 649 |   for (var i = 0; i < bufLanes; i++) buf[i] = i; // Number buffer sequentially.
 650 |   // Test aligned loads.
 651 |   for (var i = 0; i < type.lanes; i++) {
 652 |     var a = loadFn(buf, i);
 653 |     checkValue(type, a, function(index) { return index < count ? i + index : 0; });
 654 |   }
 655 | 
 656 |   // Test index coercions.
 657 |   // Unlike typedArray[index], non-canonical strings are allowed here.
 658 |   checkValue(type, loadFn(buf, "0"),      function(index) { return index < count ? index : 0; });
 659 |   checkValue(type, loadFn(buf, " -0.0 "), function(index) { return index < count ? index : 0; });
 660 |   checkValue(type, loadFn(buf, "00"),     function(index) { return index < count ? index : 0; });
 661 |   checkValue(type, loadFn(buf, false),    function(index) { return index < count ? index : 0; });
 662 |   checkValue(type, loadFn(buf, null),     function(index) { return index < count ? index : 0; });
 663 |   checkValue(type, loadFn(buf, "01"),     function(index) { return index < count ? 1 + index : 0; });
 664 |   checkValue(type, loadFn(buf, " +1e0"),  function(index) { return index < count ? 1 + index : 0; });
 665 |   checkValue(type, loadFn(buf, true),     function(index) { return index < count ? 1 + index : 0; });
 666 | 
 667 |   // Test the 2 possible over-alignments.
 668 |   var f64 = new Float64Array(ab);
 669 |   var stride = 8 / type.laneSize;
 670 |   for (var i = 0; i < 1; i++) {
 671 |     var a = loadFn(f64, i);
 672 |     checkValue(type, a, function(index) { return index < count ? stride * i + index : 0; });
 673 |   }
 674 |   // Test the 7 possible mis-alignments.
 675 |   var i8 = new Int8Array(ab);
 676 |   for (var misalignment = 1; misalignment < 8; misalignment++) {
 677 |     // Shift the buffer up by 1 byte.
 678 |     for (var i = i8.length - 1; i > 0; i--)
 679 |       i8[i] = i8[i - 1];
 680 |     var a = loadFn(i8, misalignment);
 681 |     checkValue(type, a, function(index) { return index < count ? i + index : 0; });
 682 |   }
 683 | 
 684 |   function testIndexCheck(buf, index) {
 685 |     throws(function () { loadFn(buf, index); });
 686 |   }
 687 |   testIndexCheck(buf, -1);
 688 |   testIndexCheck(buf, 0.7);
 689 |   testIndexCheck(buf, -0.1);
 690 |   testIndexCheck(buf, NaN);
 691 |   testIndexCheck(buf, bufSize / type.laneSize - count + 1);
 692 |   testIndexCheck(buf.buffer, 1);
 693 |   testIndexCheck(buf, "a");
 694 | }
 695 | 
 696 | function testStore(type, name, count) {
 697 |   var storeFn = type.fn[name];
 698 |   equal('function', typeof storeFn);
 699 |   var bufLanes = 2 * type.lanes;  // Test all alignments.
 700 |   var bufSize = bufLanes * type.laneSize + 8;  // Extra for over-alignment test.
 701 |   var ab = new ArrayBuffer(bufSize);
 702 |   var buf = new type.view(ab);
 703 |   var a = createTestValue(type); // Value containing 0, 1, 2, 3 ...
 704 |   function checkBuffer(offset) {
 705 |     for (var i = 0; i < count; i++)
 706 |       if (buf[offset + i] != i) return false;
 707 |     return true;
 708 |   }
 709 |   // Test aligned stores.
 710 |   for (var i = 0; i < type.lanes; i++) {
 711 |     storeFn(buf, i, a);
 712 |     ok(checkBuffer(i));
 713 |   }
 714 | 
 715 |   // Test index coercions.
 716 |   storeFn(buf, "0", a);      ok(checkBuffer(0));
 717 |   storeFn(buf, "01", a);     ok(checkBuffer(1));
 718 |   storeFn(buf, " -0.0 ", a); ok(checkBuffer(0));
 719 |   storeFn(buf, " +1e0", a);  ok(checkBuffer(1));
 720 |   storeFn(buf, false, a);    ok(checkBuffer(0));
 721 |   storeFn(buf, true, a);     ok(checkBuffer(1));
 722 |   storeFn(buf, null, a);     ok(checkBuffer(0));
 723 | 
 724 |   // Test the 2 over-alignments.
 725 |   var f64 = new Float64Array(ab);
 726 |   var stride = 8 / type.laneSize;
 727 |   for (var i = 0; i < 1; i++) {
 728 |     storeFn(f64, i, a);
 729 |     ok(checkBuffer(stride * i));
 730 |   }
 731 |   // Test the 7 mis-alignments.
 732 |   var i8 = new Int8Array(ab);
 733 |   for (var misalignment = 1; misalignment < 8; misalignment++) {
 734 |     storeFn(i8, misalignment, a);
 735 |     // Shift the buffer down by misalignment.
 736 |     for (var i = 0; i < i8.length - misalignment; i++)
 737 |       i8[i] = i8[i + misalignment];
 738 |     ok(checkBuffer(0));
 739 |   }
 740 | 
 741 |   function testIndexCheck(buf, index) {
 742 |     throws(function () { storeFn(buf, index, type.fn()); });
 743 |   }
 744 |   testIndexCheck(buf, -1);
 745 |   testIndexCheck(buf, bufSize / type.laneSize - count + 1);
 746 |   testIndexCheck(buf.buffer, 1);
 747 |   testIndexCheck(buf, "a");
 748 | }
 749 | 
 750 | function testOperators(type) {
 751 |   var inst = createTestValue(type);
 752 |   throws(function() { Number(inst) });
 753 |   throws(function() { +inst });
 754 |   throws(function() { -inst });
 755 |   throws(function() { ~inst });
 756 |   throws(function() { Math.fround(inst) });
 757 |   throws(function() { inst|0} );
 758 |   throws(function() { inst&0 });
 759 |   throws(function() { inst^0 });
 760 |   throws(function() { inst>>>0 });
 761 |   throws(function() { inst>>0 });
 762 |   throws(function() { inst<<0 });
 763 |   throws(function() { (inst + inst) });
 764 |   throws(function() { inst - inst });
 765 |   throws(function() { inst * inst });
 766 |   throws(function() { inst / inst });
 767 |   throws(function() { inst % inst });
 768 |   throws(function() { inst < inst });
 769 |   throws(function() { inst > inst });
 770 |   throws(function() { inst <= inst });
 771 |   throws(function() { inst >= inst });
 772 |   throws(function() { inst(); });
 773 | 
 774 |   equal(inst[0], undefined);
 775 |   equal(inst.a, undefined);
 776 |   equal(!inst, false);
 777 |   equal(!inst, false);
 778 |   equal(inst ? 1 : 2, 1);
 779 |   equal(inst ? 1 : 2, 1);
 780 | 
 781 |   equal('function', typeof inst.toString);
 782 |   equal(inst.toString(), simdToString(type, inst));
 783 |   equal('function', typeof inst.toLocaleString);
 784 |   equal(inst.toLocaleString(), simdToLocaleString(type, inst));
 785 |   // TODO: test valueOf?
 786 | }
 787 | 
 788 | // Tests value semantics for a given type.
 789 | // TODO: more complete tests for Object wrappers, sameValue, sameValueZero, etc.
 790 | function testValueSemantics(type) {
 791 |   // Create a vanilla test value.
 792 |   var x = createTestValue(type);
 793 | 
 794 |   // Check against non-SIMD types.
 795 |   var otherTypeValues = [0, 1.275, NaN, Infinity, "string", null, undefined,
 796 |                          {}, function() {}];
 797 |   for (var other of simdTypes) {
 798 |     if (type !== other)
 799 |       otherTypeValues.push(createTestValue(other));
 800 |   }
 801 |   otherTypeValues.forEach(function(y) {
 802 |     equal(y == x, false);
 803 |     equal(x == y, false);
 804 |     equal(y != x, true);
 805 |     equal(x != y, true);
 806 |     equal(y === x, false);
 807 |     equal(x === y, false);
 808 |     equal(y !== x, true);
 809 |     equal(x !== y, true);
 810 |   });
 811 | 
 812 |   // Test that f(a, b) is the same as f(SIMD(a), SIMD(b)) for equality and
 813 |   // strict equality, at every lane.
 814 |   function test(a, b) {
 815 |     for (var i = 0; i < type.lanes; i++) {
 816 |       var aval = type.fn.replaceLane(x, i, a);
 817 |       var bval = type.fn.replaceLane(x, i, b);
 818 |       equal(a == b, aval == bval);
 819 |       equal(a === b, aval === bval);
 820 |     }
 821 |   }
 822 |   for (var a of type.interestingValues) {
 823 |     for (var b of type.interestingValues) {
 824 |       test(a, b);
 825 |     }
 826 |   }
 827 | }
 828 | 
 829 | 
 830 | simdTypes.forEach(function(type) {
 831 |   test(type.name + ' constructor', function() {
 832 |     testConstructor(type);
 833 |   });
 834 |   test(type.name + ' check', function() {
 835 |     testCheck(type);
 836 |   });
 837 |   test(type.name + ' operators', function() {
 838 |     testOperators(type);
 839 |   });
 840 |   // Note: This fails in the polyfill due to the lack of value semantics.
 841 |   test(type.name + ' value semantics', function() {
 842 |     testValueSemantics(type);
 843 |   });
 844 |   test(type.name + ' replaceLane', function() {
 845 |     testReplaceLane(type);
 846 |   });
 847 | });
 848 | 
 849 | simdTypes.filter(isNumerical).forEach(function(type) {
 850 |   test(type.name + ' equal', function() {
 851 |     testRelationalOp(type, 'equal', function(a, b) { return a == b; });
 852 |   });
 853 |   test(type.name + ' notEqual', function() {
 854 |     testRelationalOp(type, 'notEqual', function(a, b) { return a != b; });
 855 |   });
 856 |   test(type.name + ' lessThan', function() {
 857 |     testRelationalOp(type, 'lessThan', function(a, b) { return a < b; });
 858 |   });
 859 |   test(type.name + ' lessThanOrEqual', function() {
 860 |     testRelationalOp(type, 'lessThanOrEqual', function(a, b) { return a <= b; });
 861 |   });
 862 |   test(type.name + ' greaterThan', function() {
 863 |     testRelationalOp(type, 'greaterThan', function(a, b) { return a > b; });
 864 |   });
 865 |   test(type.name + ' greaterThanOrEqual', function() {
 866 |     testRelationalOp(type, 'greaterThanOrEqual', function(a, b) { return a >= b; });
 867 |   });
 868 |   test(type.name + ' add', function() {
 869 |     testBinaryOp(type, 'add', function(a, b) { return a + b; });
 870 |   });
 871 |   test(type.name + ' sub', function() {
 872 |     testBinaryOp(type, 'sub', function(a, b) { return a - b; });
 873 |   });
 874 |   test(type.name + ' mul', function() {
 875 |     testBinaryOp(type, 'mul', type.mulFn);
 876 |   });
 877 |   test(type.name + ' select', function() {
 878 |     testSelect(type);
 879 |   });
 880 |   test(type.name + ' swizzle', function() {
 881 |     testSwizzle(type);
 882 |   });
 883 |   test(type.name + ' shuffle', function() {
 884 |     testShuffle(type);
 885 |   });
 886 |   test(type.name + ' load', function() {
 887 |     testLoad(type, 'load', type.lanes);
 888 |   });
 889 |   test(type.name + ' store', function() {
 890 |     testStore(type, 'store', type.lanes);
 891 |   });
 892 | });
 893 | 
 894 | simdTypes.filter(hasLoadStore123).forEach(function(type) {
 895 |   test(type.name + ' load1', function() {
 896 |     testLoad(type, 'load1', 1);
 897 |   });
 898 |   test(type.name + ' load2', function() {
 899 |     testLoad(type, 'load2', 2);
 900 |   });
 901 |   test(type.name + ' load3', function() {
 902 |     testLoad(type, 'load3', 3);
 903 |   });
 904 |   test(type.name + ' store1', function() {
 905 |     testStore(type, 'store1', 1);
 906 |   });
 907 |   test(type.name + ' store1', function() {
 908 |     testStore(type, 'store2', 2);
 909 |   });
 910 |   test(type.name + ' store3', function() {
 911 |     testStore(type, 'store3', 3);
 912 |   });
 913 | });
 914 | 
 915 | simdTypes.filter(isLogical).forEach(function(type) {
 916 |   test(type.name + ' and', function() {
 917 |     testBinaryOp(type, 'and', function(a, b) { return a & b; });
 918 |   });
 919 |   test(type.name + ' or', function() {
 920 |     testBinaryOp(type, 'or', function(a, b) { return a | b; });
 921 |   });
 922 |   test(type.name + ' xor', function() {
 923 |     testBinaryOp(type, 'xor', function(a, b) { return a ^ b; });
 924 |   });
 925 | });
 926 | 
 927 | simdTypes.filter(isSigned).forEach(function(type) {
 928 |   test(type.name + ' neg', function() {
 929 |     testUnaryOp(type, 'neg', function(a) { return -a; });
 930 |   });
 931 | });
 932 | 
 933 | simdTypes.filter(isFloatType).forEach(function(type) {
 934 |   test(type.name + ' div', function() {
 935 |     testBinaryOp(type, 'div', function(a, b) { return a / b; });
 936 |   });
 937 |   test(type.name + ' abs', function() {
 938 |     testUnaryOp(type, 'abs', Math.abs);
 939 |   });
 940 |   test(type.name + ' min', function() {
 941 |     testBinaryOp(type, 'min', Math.min);
 942 |   });
 943 |   test(type.name + ' max', function() {
 944 |     testBinaryOp(type, 'max', Math.max);
 945 |   });
 946 |   test(type.name + ' minNum', function() {
 947 |     testBinaryOp(type, 'minNum', minNum);
 948 |   });
 949 |   test(type.name + ' maxNum', function() {
 950 |     testBinaryOp(type, 'maxNum', maxNum);
 951 |   });
 952 |   test(type.name + ' sqrt', function() {
 953 |     testUnaryOp(type, 'sqrt', function(a) { return Math.sqrt(a); });
 954 |   });
 955 |   test(type.name + ' reciprocalApproximation', function() {
 956 |     testUnaryOp(type, 'reciprocalApproximation', function(a) { return 1 / a; });
 957 |   });
 958 |   test(type.name + ' reciprocalSqrtApproximation', function() {
 959 |     testUnaryOp(type, 'reciprocalSqrtApproximation', function(a) { return 1 / Math.sqrt(a); });
 960 |   });
 961 | })
 962 | 
 963 | simdTypes.filter(isIntType).forEach(function(type) {
 964 |   test(type.name + ' not', function() {
 965 |     testUnaryOp(type, 'not', function(a) { return ~a; });
 966 |   });
 967 |   test(type.name + ' shiftLeftByScalar', function() {
 968 |     function shift(a, bits) {
 969 |       bits &= type.laneSize * 8 - 1;
 970 |       return a << bits;
 971 |     }
 972 |     testShiftOp(type, 'shiftLeftByScalar', shift);
 973 |   });
 974 | });
 975 | 
 976 | simdTypes.filter(isSignedIntType).forEach(function(type) {
 977 |   test(type.name + ' shiftRightByScalar', function() {
 978 |     function shift(a, bits) {
 979 |       bits &= type.laneSize * 8 - 1;
 980 |       return a >> bits;
 981 |     }
 982 |     testShiftOp(type, 'shiftRightByScalar', shift);
 983 |   });
 984 | });
 985 | 
 986 | simdTypes.filter(isUnsignedIntType).forEach(function(type) {
 987 |   test(type.name + ' shiftRightByScalar', function() {
 988 |     function shift(a, bits) {
 989 |       bits &= type.laneSize * 8 - 1;
 990 |       if (type.laneMask)
 991 |         a &= type.laneMask;
 992 |       return a >>> bits;
 993 |     }
 994 |     testShiftOp(type, 'shiftRightByScalar', shift);
 995 |   });
 996 | });
 997 | 
 998 | simdTypes.filter(isSmallIntType).forEach(function(type) {
 999 |   function saturate(type, a) {
1000 |     if (a < type.minVal) return type.minVal;
1001 |     if (a > type.maxVal) return type.maxVal;
1002 |     return a;
1003 |   }
1004 |   test(type.name + ' addSaturate', function() {
1005 |     testBinaryOp(type, 'addSaturate', function(a, b) { return saturate(type, a + b); });
1006 |   });
1007 |   test(type.name + ' subSaturate', function() {
1008 |     testBinaryOp(type, 'subSaturate', function(a, b) { return saturate(type, a - b); });
1009 |   });
1010 | });
1011 | 
1012 | simdTypes.filter(isBoolType).forEach(function(type) {
1013 |   test(type.name + ' not', function() {
1014 |     testUnaryOp(type, 'not', function(a) { return !a; });
1015 |   });
1016 |   test(type.name + ' anyTrue', function() {
1017 |     testAnyTrue(type, 'anyTrue');
1018 |   });
1019 |   test(type.name + ' allTrue', function() {
1020 |     testAllTrue(type, 'allTrue');
1021 |   });
1022 | });
1023 | 
1024 | // From<type> functions.
1025 | simdTypes.forEach(function(toType) {
1026 |   if (!toType.from) return;
1027 |   for (var fromType of toType.from) {
1028 |     var fn = 'from' + fromType.name;
1029 |     test(toType.name + ' ' + fn, function() {
1030 |       testFrom(toType, fromType, fn);
1031 |     });
1032 |   }
1033 | });
1034 | 
1035 | // From<type>Bits functions.
1036 | simdTypes.forEach(function(toType) {
1037 |   if (!toType.fromBits) return;
1038 |   for (var fromType of toType.fromBits) {
1039 |     var fn = 'from' + fromType.name + 'Bits';
1040 |     test(toType.name + ' ' + fn, function() {
1041 |       testFromBits(toType, fromType, fn);
1042 |     });
1043 |   }
1044 | });
1045 | 
1046 | // Miscellaneous test methods.
1047 | 
1048 | test('Float32x4 Int32x4 bit conversion', function() {
1049 |   var m = SIMD.Int32x4(0x3F800000, 0x40000000, 0x40400000, 0x40800000);
1050 |   var n = SIMD.Float32x4.fromInt32x4Bits(m);
1051 |   equal(1.0, SIMD.Float32x4.extractLane(n, 0));
1052 |   equal(2.0, SIMD.Float32x4.extractLane(n, 1));
1053 |   equal(3.0, SIMD.Float32x4.extractLane(n, 2));
1054 |   equal(4.0, SIMD.Float32x4.extractLane(n, 3));
1055 |   n = SIMD.Float32x4(5.0, 6.0, 7.0, 8.0);
1056 |   m = SIMD.Int32x4.fromFloat32x4Bits(n);
1057 |   equal(0x40A00000, SIMD.Int32x4.extractLane(m, 0));
1058 |   equal(0x40C00000, SIMD.Int32x4.extractLane(m, 1));
1059 |   equal(0x40E00000, SIMD.Int32x4.extractLane(m, 2));
1060 |   equal(0x41000000, SIMD.Int32x4.extractLane(m, 3));
1061 |   // Flip sign using bit-wise operators.
1062 |   n = SIMD.Float32x4(9.0, 10.0, 11.0, 12.0);
1063 |   m = SIMD.Int32x4(0x80000000, 0x80000000, 0x80000000, 0x80000000);
1064 |   var nMask = SIMD.Int32x4.fromFloat32x4Bits(n);
1065 |   nMask = SIMD.Int32x4.xor(nMask, m); // flip sign.
1066 |   n = SIMD.Float32x4.fromInt32x4Bits(nMask);
1067 |   equal(-9.0, SIMD.Float32x4.extractLane(n, 0));
1068 |   equal(-10.0, SIMD.Float32x4.extractLane(n, 1));
1069 |   equal(-11.0, SIMD.Float32x4.extractLane(n, 2));
1070 |   equal(-12.0, SIMD.Float32x4.extractLane(n, 3));
1071 |   nMask = SIMD.Int32x4.fromFloat32x4Bits(n);
1072 |   nMask = SIMD.Int32x4.xor(nMask, m); // flip sign.
1073 |   n = SIMD.Float32x4.fromInt32x4Bits(nMask);
1074 |   equal(9.0, SIMD.Float32x4.extractLane(n, 0));
1075 |   equal(10.0, SIMD.Float32x4.extractLane(n, 1));
1076 |   equal(11.0, SIMD.Float32x4.extractLane(n, 2));
1077 |   equal(12.0, SIMD.Float32x4.extractLane(n, 3));
1078 | });
1079 | 
1080 | function equalInt32x4(a, b) {
1081 |   equal(SIMD.Int32x4.extractLane(a, 0), SIMD.Int32x4.extractLane(b, 0));
1082 |   equal(SIMD.Int32x4.extractLane(a, 1), SIMD.Int32x4.extractLane(b, 1));
1083 |   equal(SIMD.Int32x4.extractLane(a, 2), SIMD.Int32x4.extractLane(b, 2));
1084 |   equal(SIMD.Int32x4.extractLane(a, 3), SIMD.Int32x4.extractLane(b, 3));
1085 | }
1086 | 
1087 | test('Float32x4 Int32x4 round trip', function() {
1088 |   // NaNs should stay unmodified across bit conversions
1089 |   var m = SIMD.Int32x4(0xFFFFFFFF, 0xFFFF0000, 0x80000000, 0x0);
1090 |   var m2 = SIMD.Int32x4.fromFloat32x4Bits(SIMD.Float32x4.fromInt32x4Bits(m));
1091 |   // NaNs may be canonicalized, so these tests may fail in some implementations.
1092 |   equalInt32x4(m, m2);
1093 | });
1094 | 
1095 | test('Float32x4 Int32x4 load/store bit preservation', function() {
1096 |    // NaNs should stay unmodified when storing and loading to Float32Array
1097 |   var taf32 = new Float32Array(4);
1098 |   var tai32 = new Int32Array(4);
1099 |   var i4a, i4b;
1100 |   i4a = SIMD.Int32x4(0x7fc00000,0x7fe00000,0x7ff00000,0x7ff80000);
1101 |   SIMD.Int32x4.store(taf32, 0, i4a);
1102 |   i4b = SIMD.Int32x4.load(taf32, 0);
1103 |   equalInt32x4(i4a, i4b);
1104 | 
1105 |   // NaNs should stay unmodified when loading as Float32x4 and storing as Int32x4
1106 |   SIMD.Int32x4.store(taf32, 0, i4a);
1107 |   var f4 = SIMD.Float32x4.load(taf32, 0);
1108 |   SIMD.Float32x4.store(tai32, 0, f4);
1109 |   i4b = SIMD.Int32x4.load(tai32, 0);
1110 |   equalInt32x4(i4a, i4b);
1111 | });
1112 | 


--------------------------------------------------------------------------------
/src/external/qunit.css:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * QUnit 1.18.0
  3 |  * http://qunitjs.com/
  4 |  *
  5 |  * Copyright jQuery Foundation and other contributors
  6 |  * Released under the MIT license
  7 |  * http://jquery.org/license
  8 |  *
  9 |  * Date: 2015-04-03T10:23Z
 10 |  */
 11 | 
 12 | /** Font Family and Sizes */
 13 | 
 14 | #qunit-tests, #qunit-header, #qunit-banner, #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult {
 15 | 	font-family: "Helvetica Neue Light", "HelveticaNeue-Light", "Helvetica Neue", Calibri, Helvetica, Arial, sans-serif;
 16 | }
 17 | 
 18 | #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult, #qunit-tests li { font-size: small; }
 19 | #qunit-tests { font-size: smaller; }
 20 | 
 21 | 
 22 | /** Resets */
 23 | 
 24 | #qunit-tests, #qunit-header, #qunit-banner, #qunit-userAgent, #qunit-testresult, #qunit-modulefilter {
 25 | 	margin: 0;
 26 | 	padding: 0;
 27 | }
 28 | 
 29 | 
 30 | /** Header */
 31 | 
 32 | #qunit-header {
 33 | 	padding: 0.5em 0 0.5em 1em;
 34 | 
 35 | 	color: #8699A4;
 36 | 	background-color: #0D3349;
 37 | 
 38 | 	font-size: 1.5em;
 39 | 	line-height: 1em;
 40 | 	font-weight: 400;
 41 | 
 42 | 	border-radius: 5px 5px 0 0;
 43 | }
 44 | 
 45 | #qunit-header a {
 46 | 	text-decoration: none;
 47 | 	color: #C2CCD1;
 48 | }
 49 | 
 50 | #qunit-header a:hover,
 51 | #qunit-header a:focus {
 52 | 	color: #FFF;
 53 | }
 54 | 
 55 | #qunit-testrunner-toolbar label {
 56 | 	display: inline-block;
 57 | 	padding: 0 0.5em 0 0.1em;
 58 | }
 59 | 
 60 | #qunit-banner {
 61 | 	height: 5px;
 62 | }
 63 | 
 64 | #qunit-testrunner-toolbar {
 65 | 	padding: 0.5em 1em 0.5em 1em;
 66 | 	color: #5E740B;
 67 | 	background-color: #EEE;
 68 | 	overflow: hidden;
 69 | }
 70 | 
 71 | #qunit-userAgent {
 72 | 	padding: 0.5em 1em 0.5em 1em;
 73 | 	background-color: #2B81AF;
 74 | 	color: #FFF;
 75 | 	text-shadow: rgba(0, 0, 0, 0.5) 2px 2px 1px;
 76 | }
 77 | 
 78 | #qunit-modulefilter-container {
 79 | 	float: right;
 80 | 	padding: 0.2em;
 81 | }
 82 | 
 83 | .qunit-url-config {
 84 | 	display: inline-block;
 85 | 	padding: 0.1em;
 86 | }
 87 | 
 88 | .qunit-filter {
 89 | 	display: block;
 90 | 	float: right;
 91 | 	margin-left: 1em;
 92 | }
 93 | 
 94 | /** Tests: Pass/Fail */
 95 | 
 96 | #qunit-tests {
 97 | 	list-style-position: inside;
 98 | }
 99 | 
100 | #qunit-tests li {
101 | 	padding: 0.4em 1em 0.4em 1em;
102 | 	border-bottom: 1px solid #FFF;
103 | 	list-style-position: inside;
104 | }
105 | 
106 | #qunit-tests > li {
107 | 	display: none;
108 | }
109 | 
110 | #qunit-tests li.running,
111 | #qunit-tests li.pass,
112 | #qunit-tests li.fail,
113 | #qunit-tests li.skipped {
114 | 	display: list-item;
115 | }
116 | 
117 | #qunit-tests.hidepass li.running,
118 | #qunit-tests.hidepass li.pass {
119 | 	visibility: hidden;
120 | 	position: absolute;
121 | 	width:   0px;
122 | 	height:  0px;
123 | 	padding: 0;
124 | 	border:  0;
125 | 	margin:  0;
126 | }
127 | 
128 | #qunit-tests li strong {
129 | 	cursor: pointer;
130 | }
131 | 
132 | #qunit-tests li.skipped strong {
133 | 	cursor: default;
134 | }
135 | 
136 | #qunit-tests li a {
137 | 	padding: 0.5em;
138 | 	color: #C2CCD1;
139 | 	text-decoration: none;
140 | }
141 | 
142 | #qunit-tests li p a {
143 | 	padding: 0.25em;
144 | 	color: #6B6464;
145 | }
146 | #qunit-tests li a:hover,
147 | #qunit-tests li a:focus {
148 | 	color: #000;
149 | }
150 | 
151 | #qunit-tests li .runtime {
152 | 	float: right;
153 | 	font-size: smaller;
154 | }
155 | 
156 | .qunit-assert-list {
157 | 	margin-top: 0.5em;
158 | 	padding: 0.5em;
159 | 
160 | 	background-color: #FFF;
161 | 
162 | 	border-radius: 5px;
163 | }
164 | 
165 | .qunit-collapsed {
166 | 	display: none;
167 | }
168 | 
169 | #qunit-tests table {
170 | 	border-collapse: collapse;
171 | 	margin-top: 0.2em;
172 | }
173 | 
174 | #qunit-tests th {
175 | 	text-align: right;
176 | 	vertical-align: top;
177 | 	padding: 0 0.5em 0 0;
178 | }
179 | 
180 | #qunit-tests td {
181 | 	vertical-align: top;
182 | }
183 | 
184 | #qunit-tests pre {
185 | 	margin: 0;
186 | 	white-space: pre-wrap;
187 | 	word-wrap: break-word;
188 | }
189 | 
190 | #qunit-tests del {
191 | 	background-color: #E0F2BE;
192 | 	color: #374E0C;
193 | 	text-decoration: none;
194 | }
195 | 
196 | #qunit-tests ins {
197 | 	background-color: #FFCACA;
198 | 	color: #500;
199 | 	text-decoration: none;
200 | }
201 | 
202 | /*** Test Counts */
203 | 
204 | #qunit-tests b.counts                       { color: #000; }
205 | #qunit-tests b.passed                       { color: #5E740B; }
206 | #qunit-tests b.failed                       { color: #710909; }
207 | 
208 | #qunit-tests li li {
209 | 	padding: 5px;
210 | 	background-color: #FFF;
211 | 	border-bottom: none;
212 | 	list-style-position: inside;
213 | }
214 | 
215 | /*** Passing Styles */
216 | 
217 | #qunit-tests li li.pass {
218 | 	color: #3C510C;
219 | 	background-color: #FFF;
220 | 	border-left: 10px solid #C6E746;
221 | }
222 | 
223 | #qunit-tests .pass                          { color: #528CE0; background-color: #D2E0E6; }
224 | #qunit-tests .pass .test-name               { color: #366097; }
225 | 
226 | #qunit-tests .pass .test-actual,
227 | #qunit-tests .pass .test-expected           { color: #999; }
228 | 
229 | #qunit-banner.qunit-pass                    { background-color: #C6E746; }
230 | 
231 | /*** Failing Styles */
232 | 
233 | #qunit-tests li li.fail {
234 | 	color: #710909;
235 | 	background-color: #FFF;
236 | 	border-left: 10px solid #EE5757;
237 | 	white-space: pre;
238 | }
239 | 
240 | #qunit-tests > li:last-child {
241 | 	border-radius: 0 0 5px 5px;
242 | }
243 | 
244 | #qunit-tests .fail                          { color: #000; background-color: #EE5757; }
245 | #qunit-tests .fail .test-name,
246 | #qunit-tests .fail .module-name             { color: #000; }
247 | 
248 | #qunit-tests .fail .test-actual             { color: #EE5757; }
249 | #qunit-tests .fail .test-expected           { color: #008000; }
250 | 
251 | #qunit-banner.qunit-fail                    { background-color: #EE5757; }
252 | 
253 | /*** Skipped tests */
254 | 
255 | #qunit-tests .skipped {
256 | 	background-color: #EBECE9;
257 | }
258 | 
259 | #qunit-tests .qunit-skipped-label {
260 | 	background-color: #F4FF77;
261 | 	display: inline-block;
262 | 	font-style: normal;
263 | 	color: #366097;
264 | 	line-height: 1.8em;
265 | 	padding: 0 0.5em;
266 | 	margin: -0.4em 0.4em -0.4em 0;
267 | }
268 | 
269 | /** Result */
270 | 
271 | #qunit-testresult {
272 | 	padding: 0.5em 1em 0.5em 1em;
273 | 
274 | 	color: #2B81AF;
275 | 	background-color: #D2E0E6;
276 | 
277 | 	border-bottom: 1px solid #FFF;
278 | }
279 | #qunit-testresult .module-name {
280 | 	font-weight: 700;
281 | }
282 | 
283 | /** Fixture */
284 | 
285 | #qunit-fixture {
286 | 	position: absolute;
287 | 	top: -10000px;
288 | 	left: -10000px;
289 | 	width: 1000px;
290 | 	height: 1000px;
291 | }
292 | 


--------------------------------------------------------------------------------
/src/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <title>EcmaScript SIMD numeric type tests</title>
 6 |   <link rel="stylesheet" href="external/qunit.css">
 7 | </head>
 8 | <body>
 9 |   <div id="qunit"></div>
10 |   <div id="qunit-fixture"></div>
11 |   <script src="external/qunit.js"></script>
12 |   <script src="ecmascript_simd.js"></script>
13 |   <script src="ecmascript_simd_tests.js"></script>
14 | </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/src/shell_test_runner.js:
--------------------------------------------------------------------------------
 1 | // This is a simple script for running the tests from a standalone JS shell.
 2 | 
 3 | load("ecmascript_simd.js");
 4 | 
 5 | // clearer marking
 6 | var currentName = '<global>';
 7 | var numFails = 0;
 8 | 
 9 | if (typeof skipValueTests === 'undefined')
10 |   skipValueTests = false;
11 | if (typeof skipFromBitsTests === 'undefined')
12 |   skipValueTests = true;
13 | 
14 | function printIndented(str) {
15 |   console.log(str.split('\n').map(function (s) { return '  ' + s }).join('\n'));
16 | }
17 | 
18 | function fail(str) {
19 |   var e = Error(str);
20 |   console.log(e.toString());
21 |   printIndented(e.stack);
22 |   numFails++;
23 | }
24 | 
25 | function test(name, func) {
26 |   currentName = name;
27 |   if (typeof skipValueTests !== 'undefined' && skipValueTests &&
28 |       name.indexOf('value semantics') != -1) return;
29 |   try {
30 |     func();
31 |   } catch (e) {
32 |     console.log('exception thrown from ' + currentName + ': ' + e.toString());
33 |     if (e.stack)
34 |       printIndented(e.stack);
35 |     numFails++;
36 |   }
37 | }
38 | 
39 | function equal(a, b) {
40 |   if (a != b)
41 |     fail('equal(' + a + ', ' + b + ') failed in ' + currentName);
42 | }
43 | 
44 | function notEqual(a, b) {
45 |   if (a == b)
46 |     fail('notEqual(' + a + ', ' + b + ') failed in ' + currentName);
47 | }
48 | 
49 | function throws(func) {
50 |   var pass = false;
51 |   try {
52 |     func();
53 |   } catch (e) {
54 |     pass = true;
55 |   }
56 |   if (!pass)
57 |     fail('throws failed in ' + currentName);
58 | }
59 | 
60 | function ok(x) {
61 |   if (!x)
62 |     fail('not ok in ' + currentName);
63 | }
64 | 
65 | load("ecmascript_simd_tests.js");
66 | 
67 | if (numFails > 0) {
68 |   print('total number of fails and exceptions: ' + numFails);
69 |   quit(1);
70 | }
71 | 


--------------------------------------------------------------------------------
/src/test.js:
--------------------------------------------------------------------------------
1 | // To specifically test the p(r)olyfill.
2 | 
3 | if (typeof SIMD != 'undefined')
4 |   SIMD = void 0;
5 | 
6 | load('./shell_test_runner.js');
7 | 


--------------------------------------------------------------------------------
/tc39/SIMD-128 TC-39.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tc39/ecmascript_simd/a5529db02b6144256b7458bc96e2e7e117b6e5e9/tc39/SIMD-128 TC-39.pdf


--------------------------------------------------------------------------------