├── LICENSE.txt
├── README.md
├── extended-api.md
├── run_dev_server.sh
├── src
├── benchmarks
│ ├── aobench.js
│ ├── averageFloat32x4.js
│ ├── averageFloat32x4LoadFromInt8Array.js
│ ├── averageFloat32x4LoadX.js
│ ├── averageFloat32x4LoadXY.js
│ ├── averageFloat32x4LoadXYZ.js
│ ├── averageInt32x4Load.js
│ ├── base.js
│ ├── index.html
│ ├── inverse4x4.js
│ ├── kernel-template.js
│ ├── mandelbrot.js
│ ├── matrix-multiplication.js
│ ├── memcpy.js
│ ├── memset.js
│ ├── run.js
│ ├── run_browser.js
│ ├── shiftrows.js
│ ├── sinx4.js
│ ├── transform.js
│ └── transpose4x4.js
├── ecmascript_simd.js
├── ecmascript_simd_tests.js
├── external
│ ├── qunit.css
│ └── qunit.js
├── index.html
├── shell_test_runner.js
└── test.js
└── tc39
├── SIMD-128 TC-39.pdf
└── spec.html
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2013
3 |
4 | This software is provided 'as-is', without any express or implied
5 | warranty. In no event will the authors be held liable for any damages
6 | arising from the use of this software.
7 |
8 | Permission is granted to anyone to use this software for any purpose,
9 | including commercial applications, and to alter it and redistribute it
10 | freely, subject to the following restrictions:
11 |
12 | 1. The origin of this software must not be misrepresented; you must not
13 | claim that you wrote the original software. If you use this software
14 | in a product, an acknowledgment in the product documentation would be
15 | appreciated but is not required.
16 | 2. Altered source versions must be plainly marked as such, and must not be
17 | misrepresented as being the original software.
18 | 3. This notice may not be removed or altered from any source distribution.
19 | */
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SIMD.js
2 | ===============
3 |
4 | SIMD.js has been taken out of active development in TC39 and removed
5 | from Stage 3, and is not being pursued by web browsers for
6 | implementation. SIMD operations exposed to the web are under active
7 | development within WebAssembly, with operations based on the SIMD.js
8 | operations. With WebAssembly in advanced development or shipping in
9 | multiple browsers, it seems like an adequate vehicle to subsume asm.js
10 | use cases, which are judged to be the broader cases. Although some
11 | developers have expressed interest in using SIMD.js outside of asm.js,
12 | implementers have found that implementing and optimizing for this case
13 | reliably creates a lot of complexity, and have made the decision to
14 | focus instead on delivering WebAssembly and SIMD instructions in WASM.
15 |
16 | See https://github.com/WebAssembly/simd for current development.
17 |
18 | This repository retains a historical snapshot of the SIMD.js specification work:
19 | * The authoritative API reference documentation is generated from tc39/spec.html. You can view a rendered copy at http://tc39.github.io/ecmascript_simd/ .
20 | * A polyfill at src/ecmascript_simd.js, which can't implement value semantics, but includes a correct implementation of all functions
21 | * Extensive tests at src/ecmascript_simd_tests.js, which can be run using other files in src/. Benchmarks and example code live in the same directory.
22 | * A presentation explaining the motivation and outlining the approach at [tc39/SIMD-128 TC-39.pdf](https://github.com/tc39/ecmascript_simd/blob/master/tc39/SIMD-128%20TC-39.pdf)
23 |
--------------------------------------------------------------------------------
/extended-api.md:
--------------------------------------------------------------------------------
1 | SIMD.js Extended API Proposal
2 | =============================
3 |
4 | This document proposes an extended API for SIMD.js which is meant provide access
5 | to platforms-specific optimizations. It will sit on top of and complement the
6 | base API.
7 |
8 | The expectation is that most users will use the base API most of the time. While
9 | some compromises are being made to serve portability, most of the base API will
10 | still be fast, and it will deliver the most consistent results. The extension API
11 | will offer opportunities for performance tuning, will support specialized code
12 | sequences, and will aid in porting of code from other platforms.
13 |
14 | This proposal splits the problem space into two parts:
15 | - operations which are portable, but with semantic differences
16 | - operations which are only available on some platforms
17 |
18 | Operations which are portable, but with semantic differences
19 | ------------------------------------------------------------
20 |
21 | Primarily, this will use a new `SIMD.Relaxed` namespace:
22 |
23 | ```
24 | SIMD.Relaxed.Int32x4.fromFloat32x4 // relaxed on NaN or overflow
25 | SIMD.Relaxed.Float32x4.max // relaxed on NaN, 0 and -0 fungible
26 | SIMD.Relaxed.Int32x4.shiftLeftByScalar // relaxed on shift count overflow
27 | ...
28 | ```
29 |
30 | Functions in `SIMD.Relaxed` mimic functions in the base API with corresponding names,
31 | and provide weaker portability with greater potential for performance, for example by
32 | having unspecified results if NaN appear in any part of the (implied) computation, by
33 | treating negative zero as interchangeable with zero, or by having unspecified
34 | results if an overflow occurs.
35 |
36 | Note that an implementation in which these are all identical to their corresponding
37 | functions in the base namespace will be fully conforming.
38 |
39 | Accompanying this is a new `SIMD.Checked` namespace to help developers find errors:
40 |
41 | ```
42 | SIMD.Checked.Int32x4.fromFloat32x4
43 | SIMD.Checked.Float32x4.max
44 | SIMD.Checked.Int32x4.shiftLeftByScalar
45 | ...
46 | ```
47 |
48 | Functions in `SIMD.Checked` all correspond to functions in `SIMD.Relaxed` and
49 | throw on any value which would produce unspecified results. They may also
50 | canonicalize negative zero to positive zero. We'll publish a standard polyfill for
51 | these functions which implementations or users can use if they wish.
52 |
53 | Operations which are only available on some platforms
54 | -----------------------------------------------------
55 |
56 | Operations from all platforms are collected together in a single `SIMD.Universe` namespace:
57 |
58 | ```
59 | SIMD.Universe.Float32x4.fma
60 | SIMD.Universe.Int32x4.rotateLeft
61 | SIMD.Universe.Int32x4.rotateRight
62 | SIMD.Universe.Int32x4.signMask // movmskps on x86
63 | SIMD.Universe.Int32x4.bitInsertIfTrue // vbit on ARM
64 | ...
65 | ```
66 |
67 | Unlike in the `SIMD.Relaxed` namespace, these operations all have fairly strict
68 | semantics.
69 |
70 | We'll publish a standard polyfill that will fill in all functions in the
71 | `SIMD.Universe` namespace that the JIT doesn't predefine. This will ensure that
72 | programs continue to at least execute across platforms, though of course the
73 | performance may vary widely.
74 |
75 | Some indication of the performance will be made:
76 |
77 | ```
78 | SIMD.isFast
79 | ```
80 |
81 | This function takes a single argument, a function in the `SIMD.Universe` API,
82 | and returns a bool indicating whether the given function is "fast" -- roughly
83 | meaning a single operation in the underlying platform.
84 |
--------------------------------------------------------------------------------
/run_dev_server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | python -m SimpleHTTPServer
3 |
--------------------------------------------------------------------------------
/src/benchmarks/aobench.js:
--------------------------------------------------------------------------------
1 | // AOBench
2 | // ambient occlusion renderer
3 | // See full demo at https://github.com/wahbahdoo/aobench
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "AOBench",
10 | kernelInit: initAobench,
11 | kernelCleanup: cleanupAobench,
12 | kernelSimd: simdAobench,
13 | kernelNonSimd: nonSimdAobench
14 | };
15 |
16 | // Hook up to the harness
17 | benchmarks.add (new Benchmark (kernelConfig));
18 |
19 | // Global variables
20 | var NAO_SAMPLES = 8;
21 | var spheres;
22 | var plane;
23 | var rands1;
24 | var rands2;
25 | var isect0;
26 |
27 | // Initialization and verification
28 | function initAobench () {
29 | init_scene();
30 | var A = ambient_occlusion(isect0);
31 | var B = ambient_occlusion_simd(isect0);
32 | return ((A.x == B.x) && (A.y == B.y) && (A.z == B.z));
33 | }
34 |
35 | function cleanupAobench() {
36 | return initAobench();
37 | }
38 |
39 | // Non SIMD version of the kernel
40 | function nonSimdAobench (n) {
41 | for (var i = 0; i < n; i++) {
42 | ambient_occlusion(isect0);
43 | }
44 | }
45 |
46 | // SIMD version of the kernel
47 | function simdAobench (n) {
48 | for (var i = 0; i < n; i++) {
49 | ambient_occlusion_simd(isect0);
50 | }
51 | }
52 |
53 | // AOBench initialization of objects and pseudorand numbers (for benchmark predictability)
54 | function init_scene() {
55 | spheres = new Array();
56 | spheres[0] = {
57 | center: {
58 | x: -2.0,
59 | y: 0.0,
60 | z: -3.5
61 | },
62 | radius: 0.5
63 | };
64 | spheres[1] = {
65 | center: {
66 | x: -0.5,
67 | y: 0.0,
68 | z: -3.0
69 | },
70 | radius: 0.5
71 | };
72 | spheres[2] = {
73 | center: {
74 | x: 1.0,
75 | y: 0.0,
76 | z: -2.2
77 | },
78 | radius: 0.5
79 | };
80 | plane = {
81 | p: {
82 | x: 0.0,
83 | y: -0.5,
84 | z: 0.0
85 | },
86 | n: {
87 | x: 0.0,
88 | y: 1.0,
89 | z: 0.0
90 | }
91 | };
92 | rands1 = new Array(0.1352356830611825, 0.288015044759959, 0.7678821850568056, 0.2686317905317992,
93 | 0.3331136927008629, 0.8684257145505399, 0.781927386065945, 0.5896540696267039,
94 | 0.44623699225485325, 0.9686877066269517, 0.07219804194755852, 0.32867410429753363,
95 | 0.25455036014318466, 0.6900878311134875, 0.32115139183588326, 0.8623794671148062,
96 | 0.41069260938093066, 0.999176808167249, 0.31144002149812877, 0.21190544497221708,
97 | 0.589751492254436, 0.618399447761476, 0.7838233797810972, 0.22662024036981165,
98 | 0.5274769144598395, 0.8913978524506092, 0.2461202829144895, 0.575232774252072,
99 | 0.20723191439174116, 0.15211533522233367, 0.5140219402965158, 0.695398824987933,
100 | 0.7201623972505331, 0.1737971710972488, 0.3138047114480287, 0.09142904286272824,
101 | 0.15824169223196805, 0.11588017432950437, 0.4076798539608717, 0.06385629274882376,
102 | 0.9907234299462289, 0.1742915315553546, 0.9236432255711406, 0.8344372694846243,
103 | 0.05793144227936864, 0.35464465571567416, 0.3937969475518912, 0.8209003841038793,
104 | 0.6443945677019656, 0.15443599177524447, 0.8957053178455681, 0.4145913925021887,
105 | 0.4667414356954396, 0.42764953384175897, 0.03486692951992154, 0.13391495239920914,
106 | 0.6122364429756999, 0.7934473238419741, 0.13505113637074828, 0.7279673060402274,
107 | 0.3638722419273108, 0.30750402715057135, 0.8705337035935372, 0.3060465627349913);
108 |
109 | rands2 = new Array(0.6100146626122296, 0.8141843967605382, 0.7538463387172669, 0.538857217412442,
110 | 0.7884696905966848, 0.2656198723707348, 0.3280213042162359, 0.25133296218700707,
111 | 0.18718935316428542, 0.7374026740435511, 0.8333564973436296, 0.22081619454547763,
112 | 0.08140448946505785, 0.7737920694053173, 0.9531879865098745, 0.385226191021502,
113 | 0.8437968089710921, 0.45293551217764616, 0.11351405014283955, 0.6402874339837581,
114 | 0.9657228307332844, 0.5241556512191892, 0.9501411342062056, 0.7991736396215856,
115 | 0.7572617880068719, 0.6777111298870295, 0.19950113398954272, 0.09956562682054937,
116 | 0.03746219468303025, 0.18719390942715108, 0.1519025124143809, 0.8241845818702132,
117 | 0.9609565436840057, 0.7231316142715514, 0.26712060417048633, 0.7414182834327221,
118 | 0.4706993775907904, 0.9619642498437315, 0.14598079677671194, 0.1517641346435994,
119 | 0.5583144023548812, 0.7664180144201964, 0.8109071112703532, 0.4008640209212899,
120 | 0.10891564912162721, 0.8558103002142161, 0.03816548571921885, 0.4263107746373862,
121 | 0.280488790711388, 0.915016517508775, 0.8379701666999608, 0.5821647725533694,
122 | 0.3671900019980967, 0.6120628621429205, 0.5861144624650478, 0.5639409353025258,
123 | 0.4884668991435319, 0.9718172331340611, 0.4438377188052982, 0.9853541473858058,
124 | 0.021908782655373216,0.6144221667200327, 0.11301262397319078, 0.17565111187286675);
125 | isect0 = {
126 | t: 0.7907924036719444,
127 | hit: 1,
128 | p: {
129 | x: 0.3484251968503937,
130 | y: -0.49999999999999994,
131 | z: -0.5039370078740157
132 | },
133 | n: {
134 | x: 0,
135 | y: 1,
136 | z: 0
137 | }
138 | };
139 | }
140 |
141 | // Sequential AO calculation functions ----------------------------------------------
142 |
143 | function ambient_occlusion(isect) {
144 | var col = {};
145 |
146 | var ntheta = NAO_SAMPLES;
147 | var nphi = NAO_SAMPLES;
148 | var eps = 0.0001;
149 |
150 | var p = {
151 | x: isect.p.x + eps * isect.n.x,
152 | y: isect.p.y + eps * isect.n.y,
153 | z: isect.p.z + eps * isect.n.z
154 | };
155 |
156 | var basis = new Array({}, {}, {});
157 | orthoBasis(basis, isect.n);
158 |
159 | var occlusion = 0;
160 |
161 | for (var j = 0; j < ntheta; j++) {
162 | for (var i = 0; i < nphi; i++) {
163 | var theta = Math.sqrt(rands1[j * ntheta + i]);
164 | var phi = 2 * Math.PI * rands2[j * ntheta + i];
165 |
166 | var x = Math.cos(phi) * theta;
167 | var y = Math.sin(phi) * theta;
168 | var z = Math.sqrt(1 - theta * theta);
169 |
170 | var rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
171 | var ry = x * basis[0].y + y * basis[1].y + z * basis[2].y;
172 | var rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;
173 |
174 | var ray = {
175 | org: p,
176 | dir: {
177 | x: rx,
178 | y: ry,
179 | z: rz
180 | }
181 | };
182 |
183 | var occIsectA = {
184 | t: 1e17,
185 | hit: 0
186 | }
187 | var occIsectB = {
188 | p: { x:0, y:0, z:0 },
189 | n: { x:0, y:0, z:0 }
190 | };
191 |
192 | ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[0]);
193 | ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[1]);
194 | ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[2]);
195 | ray_plane_intersect(occIsectA, occIsectB, ray, plane);
196 |
197 | if (occIsectA.hit) occlusion += 1.0;
198 |
199 | }
200 | }
201 |
202 | occlusion = (ntheta * nphi - occlusion) / (ntheta * nphi);
203 |
204 | col.x = occlusion;
205 | col.y = occlusion;
206 | col.z = occlusion;
207 |
208 | return col;
209 | }
210 |
211 | function ray_sphere_intersect(isectA, isectB, ray, sphere) {
212 | var rs = {
213 | x: ray.org.x - sphere.center.x,
214 | y: ray.org.y - sphere.center.y,
215 | z: ray.org.z - sphere.center.z
216 | };
217 |
218 | var B = vdot(rs, ray.dir);
219 | var C = vdot(rs, rs) - sphere.radius * sphere.radius;
220 | var D = B * B - C;
221 |
222 | if (D > 0) {
223 | var t = -B - Math.sqrt(D);
224 | if ((t > 0) && (t < isectA.t)) {
225 |
226 | isectA.t = t;
227 | isectA.hit = 1;
228 |
229 | isectB.p.x = ray.org.x + ray.dir.x * t;
230 | isectB.p.y = ray.org.y + ray.dir.y * t;
231 | isectB.p.z = ray.org.z + ray.dir.z * t;
232 |
233 | isectB.n.x = isectB.p.x - sphere.center.x;
234 | isectB.n.y = isectB.p.y - sphere.center.y;
235 | isectB.n.z = isectB.p.z - sphere.center.z;
236 |
237 | vnormalize(isectB.n);
238 | }
239 | }
240 |
241 | }
242 |
243 | function ray_plane_intersect(isectA, isectB, ray, plane) {
244 | var d = -vdot(plane.p, plane.n);
245 | var v = vdot(ray.dir, plane.n);
246 |
247 | if (Math.abs(v) < 1e-17) return;
248 |
249 | var t = -(vdot(ray.org, plane.n) + d) / v;
250 |
251 | if ((t > 0) && (t < isectA.t)) {
252 | isectA.t = t;
253 | isectA.hit = 1;
254 | isectB.p.x = ray.org.x + ray.dir.x * t;
255 | isectB.p.y = ray.org.y + ray.dir.y * t;
256 | isectB.p.z = ray.org.z + ray.dir.z * t;
257 | isectB.n = plane.n;
258 | }
259 | }
260 |
261 | // SIMD AO calculation functions ----------------------------------------------------
262 |
263 | function ambient_occlusion_simd(isect) {
264 | var col = {};
265 |
266 | var i, j;
267 | var ntheta = NAO_SAMPLES;
268 | var nphi = NAO_SAMPLES;
269 | var eps = 0.0001;
270 |
271 | var p = {
272 | x: isect.p.x + eps * isect.n.x,
273 | y: isect.p.y + eps * isect.n.y,
274 | z: isect.p.z + eps * isect.n.z
275 | };
276 |
277 | var basis = new Array({}, {}, {});
278 | orthoBasis(basis, isect.n);
279 |
280 | var occlusion = 0;
281 | var occlusionx4 = SIMD.Float32x4.splat(0.0);
282 |
283 | for (j = 0; j < ntheta; j++) {
284 | for (i = 0; i < nphi; i += 4) {
285 | var theta = SIMD.Float32x4.sqrt(SIMD.Float32x4(rands1[j * ntheta + i], rands1[j * ntheta + i + 1], rands1[j * ntheta + i + 2], rands1[j * ntheta + i + 3]));
286 | var phi0 = 2 * Math.PI * rands2[j * ntheta + i];
287 | var phi1 = 2 * Math.PI * rands2[j * ntheta + i + 1];
288 | var phi2 = 2 * Math.PI * rands2[j * ntheta + i + 2];
289 | var phi3 = 2 * Math.PI * rands2[j * ntheta + i + 3];
290 | var sinphi = SIMD.Float32x4(Math.sin(phi0), Math.sin(phi1), Math.sin(phi2), Math.sin(phi3));
291 | var cosphi = SIMD.Float32x4(Math.cos(phi0), Math.cos(phi1), Math.cos(phi2), Math.cos(phi3));
292 |
293 | var x = SIMD.Float32x4.mul(cosphi, theta);
294 | var y = SIMD.Float32x4.mul(sinphi, theta);
295 | var z = SIMD.Float32x4.sqrt(SIMD.Float32x4.sub(SIMD.Float32x4.splat(1.0), SIMD.Float32x4.mul(theta, theta)));
296 |
297 | var dirx = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].x)),
298 | SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].x)),
299 | SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].x))));
300 | var diry = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].y)),
301 | SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].y)),
302 | SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].y))));
303 | var dirz = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].z)),
304 | SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].z)),
305 | SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].z))));
306 |
307 | var orgx = SIMD.Float32x4.splat(p.x);
308 | var orgy = SIMD.Float32x4.splat(p.y);
309 | var orgz = SIMD.Float32x4.splat(p.z);
310 |
311 | var occIsectA = {
312 | t: SIMD.Float32x4.splat(1e17),
313 | hit: SIMD.Bool32x4.splat(false)
314 | };
315 | var occIsectB = {
316 | p: {
317 | x: SIMD.Float32x4.splat(0.0),
318 | y: SIMD.Float32x4.splat(0.0),
319 | z: SIMD.Float32x4.splat(0.0)
320 | },
321 | n: {
322 | x: SIMD.Float32x4.splat(0.0),
323 | y: SIMD.Float32x4.splat(0.0),
324 | z: SIMD.Float32x4.splat(0.0)
325 | }
326 | };
327 |
328 | ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[0]);
329 | ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[1]);
330 | ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[2]);
331 | ray_plane_intersect_simd (occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, plane);
332 |
333 | occlusionx4 = SIMD.Float32x4.add(
334 | occlusionx4,
335 | SIMD.Float32x4.select(occIsectA.hit, SIMD.Float32x4.splat(1.0),
336 | SIMD.Float32x4.splat(0.0)));
337 |
338 | }
339 | }
340 |
341 | occlusion = SIMD.Float32x4.extractLane(occlusionx4, 0) +
342 | SIMD.Float32x4.extractLane(occlusionx4, 1) +
343 | SIMD.Float32x4.extractLane(occlusionx4, 2) +
344 | SIMD.Float32x4.extractLane(occlusionx4, 3);
345 |
346 | occlusion = (ntheta * nphi - occlusion) / (ntheta * nphi);
347 |
348 | col.x = occlusion;
349 | col.y = occlusion;
350 | col.z = occlusion;
351 |
352 | return col;
353 | }
354 |
355 | function ray_sphere_intersect_simd(isectA, isectB, dirx, diry, dirz, orgx, orgy, orgz, sphere) {
356 |
357 | var rsx = SIMD.Float32x4.sub(orgx, SIMD.Float32x4.splat(sphere.center.x));
358 | var rsy = SIMD.Float32x4.sub(orgy, SIMD.Float32x4.splat(sphere.center.y));
359 | var rsz = SIMD.Float32x4.sub(orgz, SIMD.Float32x4.splat(sphere.center.z));
360 |
361 | var B = SIMD.Float32x4.add(SIMD.Float32x4.mul(rsx, dirx),
362 | SIMD.Float32x4.add(SIMD.Float32x4.mul(rsy, diry), SIMD.Float32x4.mul(rsz, dirz)));
363 | var C = SIMD.Float32x4.sub(SIMD.Float32x4.add(SIMD.Float32x4.mul(rsx, rsx),
364 | SIMD.Float32x4.add(SIMD.Float32x4.mul(rsy, rsy), SIMD.Float32x4.mul(rsz, rsz))),
365 | SIMD.Float32x4.splat(sphere.radius * sphere.radius));
366 | var D = SIMD.Float32x4.sub(SIMD.Float32x4.mul(B, B), C);
367 |
368 | var cond1 = SIMD.Float32x4.greaterThan(D, SIMD.Float32x4.splat(0.0));
369 | if (SIMD.Bool32x4.anyTrue(cond1)) {
370 | var t2 = SIMD.Float32x4.select(cond1, SIMD.Float32x4.sub(SIMD.Float32x4.neg(B), SIMD.Float32x4.sqrt(D)), SIMD.Float32x4.splat(0.0));
371 | var cond2 = SIMD.Bool32x4.and(SIMD.Float32x4.greaterThan(t2, SIMD.Float32x4.splat(0.0)),
372 | SIMD.Float32x4.lessThan(t2, isectA.t));
373 | if (SIMD.Bool32x4.anyTrue(cond2)) {
374 | isectA.t = SIMD.Float32x4.select(cond2, t2, isectA.t);
375 | isectA.hit = SIMD.Bool32x4.or(cond2, isectA.hit);
376 |
377 | isectB.p.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirx, isectA.t)), isectB.p.x);
378 | isectB.p.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(diry, isectA.t)), isectB.p.y);
379 | isectB.p.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirz, isectA.t)), isectB.p.z);
380 |
381 | isectB.n.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.x, SIMD.Float32x4.splat(sphere.center.x)), isectB.n.x);
382 | isectB.n.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.y, SIMD.Float32x4.splat(sphere.center.y)), isectB.n.y);
383 | isectB.n.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.z, SIMD.Float32x4.splat(sphere.center.z)), isectB.n.z);
384 |
385 | var lengths = SIMD.Float32x4.sqrt(SIMD.Float32x4.add(SIMD.Float32x4.mul(isectB.n.x, isectB.n.x),
386 | SIMD.Float32x4.add(SIMD.Float32x4.mul(isectB.n.y, isectB.n.y),
387 | SIMD.Float32x4.mul(isectB.n.z, isectB.n.z))));
388 | var cond3 = SIMD.Float32x4.greaterThan(SIMD.Float32x4.abs(lengths), SIMD.Float32x4.splat(1e-17));
389 | isectB.n.x = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.x, lengths), isectB.n.x);
390 | isectB.n.y = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.y, lengths), isectB.n.y);
391 | isectB.n.z = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.z, lengths), isectB.n.z);
392 | }
393 | }
394 | }
395 |
396 | function ray_plane_intersect_simd(isectA, isectB, dirx, diry, dirz, orgx, orgy, orgz, plane) {
397 | var d = SIMD.Float32x4.neg(SIMD.Float32x4.add(SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.x), SIMD.Float32x4.splat(plane.n.x)),
398 | SIMD.Float32x4.add(SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.y), SIMD.Float32x4.splat(plane.n.y)),
399 | SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.z), SIMD.Float32x4.splat(plane.n.z)))));
400 | var v = SIMD.Float32x4.add(SIMD.Float32x4.mul(dirx, SIMD.Float32x4.splat(plane.n.x)),
401 | SIMD.Float32x4.add(SIMD.Float32x4.mul(diry, SIMD.Float32x4.splat(plane.n.y)),
402 | SIMD.Float32x4.mul(dirz, SIMD.Float32x4.splat(plane.n.z))));
403 |
404 | var cond1 = SIMD.Float32x4.greaterThan(SIMD.Float32x4.abs(v), SIMD.Float32x4.splat(1e-17));
405 | var dp = SIMD.Float32x4.add(SIMD.Float32x4.mul(orgx, SIMD.Float32x4.splat(plane.n.x)),
406 | SIMD.Float32x4.add(SIMD.Float32x4.mul(orgy, SIMD.Float32x4.splat(plane.n.y)),
407 | SIMD.Float32x4.mul(orgz, SIMD.Float32x4.splat(plane.n.z))));
408 | var t2 = SIMD.Float32x4.select(cond1, SIMD.Float32x4.div(SIMD.Float32x4.neg(SIMD.Float32x4.add(dp, d)), v), SIMD.Float32x4.splat(0.0));
409 | var cond2 = SIMD.Bool32x4.and(SIMD.Float32x4.greaterThan(t2, SIMD.Float32x4.splat(0.0)), SIMD.Float32x4.lessThan(t2, isectA.t));
410 | if (SIMD.Bool32x4.anyTrue(cond2)) {
411 | isectA.t = SIMD.Float32x4.select(cond2, t2, isectA.t);
412 | isectA.hit = SIMD.Bool32x4.or(cond2, isectA.hit);
413 |
414 | isectB.p.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirx, isectA.t)), isectB.p.x);
415 | isectB.p.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgy, SIMD.Float32x4.mul(diry, isectA.t)), isectB.p.y);
416 | isectB.p.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgz, SIMD.Float32x4.mul(dirz, isectA.t)), isectB.p.z);
417 |
418 | isectB.n.x = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.x), isectB.n.x);
419 | isectB.n.y = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.y), isectB.n.y);
420 | isectB.n.z = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.z), isectB.n.z);
421 | }
422 | }
423 |
424 | // Utility calculation functions ----------------------------------------------------
425 |
426 | function vdot(v0, v1) {
427 | return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
428 | }
429 |
430 | function vcross(v0, v1) {
431 | return {
432 | x: v0.y * v1.z - v0.z * v1.y,
433 | y: v0.z * v1.x - v0.x * v1.z,
434 | z: v0.x * v1.y - v0.y * v1.x
435 | };
436 | }
437 |
438 | function vnormalize(c) {
439 | var length = Math.sqrt(vdot(c, c));
440 | if (Math.abs(length) > 1e-17) {
441 | c.x /= length;
442 | c.y /= length;
443 | c.z /= length;
444 | }
445 | }
446 |
447 | function orthoBasis(basis, n) {
448 | basis[2] = n;
449 | basis[1] = { x: 0, y: 0, z: 0 };
450 |
451 | if ((n.x < 0.6) && (n.x > -0.6)) {
452 | basis[1].x = 1.0;
453 | }
454 | else if ((n.y < 0.6) && (n.y > -0.6)) {
455 | basis[1].y = 1.0;
456 | }
457 | else if ((n.z < 0.6) && (n.z > -0.6)) {
458 | basis[1].z = 1.0;
459 | }
460 | else {
461 | basis[1].x = 1.0;
462 | }
463 |
464 | basis[0] = vcross(basis[1], basis[2]);
465 | vnormalize(basis[0]);
466 |
467 | basis[1] = vcross(basis[2], basis[0]);
468 | vnormalize(basis[1]);
469 | }
470 |
471 | } ());
472 |
--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4.js:
--------------------------------------------------------------------------------
1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements
2 | // in a Float32Array. Compare to scalar implementation of same function.
3 | // Author: Peter Jensen
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "AverageFloat32x4",
10 | kernelInit: initArray,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdAverage,
13 | kernelNonSimd: average,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add(new Benchmark(kernelConfig));
19 |
20 | // Benchmark data, initialization and kernel functions
21 | var a = new Float32Array(10000);
22 |
23 | function sanityCheck() {
24 | return Math.abs(average(1) - simdAverage(1)) < 0.0001;
25 | }
26 |
27 | function initArray() {
28 | var j = 0;
29 | for (var i = 0, l = a.length; i < l; ++i) {
30 | a[i] = 0.1;
31 | }
32 | // Check that the two kernel functions yields the same result, roughly
33 | // Account for the fact that the simdAverage() is computed using float32
34 | // precision and the average() is using double precision
35 | return sanityCheck();
36 | }
37 |
38 | function cleanup() {
39 | return sanityCheck();
40 | }
41 |
42 | function average(n) {
43 | for (var i = 0; i < n; ++i) {
44 | var sum = 0.0;
45 | for (var j = 0, l = a.length; j < l; ++j) {
46 | sum += a[j];
47 | }
48 | }
49 | return sum/a.length;
50 | }
51 |
52 | function simdAverage(n) {
53 | var a_length = a.length;
54 | for (var i = 0; i < n; ++i) {
55 | var sum4 = SIMD.Float32x4.splat(0.0);
56 | for (var j = 0; j < a_length; j += 4) {
57 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load(a, j));
58 | }
59 | }
60 | return (SIMD.Float32x4.extractLane(sum4, 0) +
61 | SIMD.Float32x4.extractLane(sum4, 1) +
62 | SIMD.Float32x4.extractLane(sum4, 2) +
63 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
64 | }
65 |
66 | } ());
67 |
--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4LoadFromInt8Array.js:
--------------------------------------------------------------------------------
1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements
2 | // in a Float32Array. Compare to scalar implementation of same function.
3 | // Author: Peter Jensen
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "AverageFloat32x4LoadFromInt8Array",
10 | kernelInit: initArray,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdAverage,
13 | kernelNonSimd: average,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add(new Benchmark(kernelConfig));
19 |
20 | // Benchmark data, initialization and kernel functions
21 | var a = new Float32Array(10000);
22 | var b = new Int8Array(a.buffer);
23 |
24 | function sanityCheck() {
25 | return Math.abs(average(1) - simdAverage(1)) < 0.0001;
26 | }
27 |
28 | function initArray() {
29 | var j = 0;
30 | for (var i = 0, l = a.length; i < l; ++i) {
31 | a[i] = 0.1;
32 | }
33 | // Check that the two kernel functions yields the same result, roughly
34 | // Account for the fact that the simdAverage() is computed using float32
35 | // precision and the average() is using double precision
36 | return sanityCheck();
37 | }
38 |
39 | function cleanup() {
40 | return sanityCheck();
41 | }
42 |
43 | function average(n) {
44 | for (var i = 0; i < n; ++i) {
45 | var sum = 0.0;
46 | for (var j = 0, l = a.length; j < l; ++j) {
47 | sum += a[j];
48 | }
49 | }
50 | return sum/a.length;
51 | }
52 |
53 | function simdAverage(n) {
54 | for (var i = 0; i < n; ++i) {
55 | var sum4 = SIMD.Float32x4.splat(0.0);
56 | for (var j = 0; j < a.length / 4; ++j) {
57 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load(b, j << 4));
58 | }
59 | }
60 | return (SIMD.Float32x4.extractLane(sum4, 0) +
61 | SIMD.Float32x4.extractLane(sum4, 1) +
62 | SIMD.Float32x4.extractLane(sum4, 2) +
63 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
64 | }
65 |
66 | } ());
67 |
--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4LoadX.js:
--------------------------------------------------------------------------------
1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements
2 | // in a Float32Array. Compare to scalar implementation of same function.
3 | // Author: Peter Jensen
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "AverageFloat32x4LoadX",
10 | kernelInit: initArray,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdAverageLoad,
13 | kernelNonSimd: average,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add(new Benchmark(kernelConfig));
19 |
20 | // Benchmark data, initialization and kernel functions
21 | var a = new Float32Array(10000);
22 | var a1 = new Float32Array(10000);
23 | var b = new Int8Array(a.buffer);
24 |
25 | function sanityCheck() {
26 | return true;
27 | return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001;
28 | }
29 |
30 | function initArray() {
31 | var j = 0;
32 | for (var i = 0, l = a.length; i < l; ++i) {
33 | a[i] = 0.1;
34 | }
35 | // Check that the two kernel functions yields the same result, roughly
36 | // Account for the fact that the simdAverage() is computed using float32
37 | // precision and the average() is using double precision
38 | return sanityCheck();
39 | }
40 |
41 | function cleanup() {
42 | return sanityCheck();
43 | }
44 |
45 | function average(n) {
46 | for (var i = 0; i < n; ++i) {
47 | var sum = 0.0;
48 | for (var j = 0, l = a.length; j < l; ++j) {
49 | sum += a[j];
50 | }
51 | }
52 | return sum/a.length;
53 | }
54 |
55 | function simdAverageLoad(n) {
56 | var a_length = a.length;
57 | for (var i = 0; i < n; ++i) {
58 | var sum4 = SIMD.Float32x4.splat(0.0);
59 | for (var j = 0; j < a_length; ++j) {
60 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load1(a, j));
61 | }
62 | }
63 | return (SIMD.Float32x4.extractLane(sum4, 0) +
64 | SIMD.Float32x4.extractLane(sum4, 1) +
65 | SIMD.Float32x4.extractLane(sum4, 2) +
66 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
67 | }
68 |
69 | } ());
70 |
--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4LoadXY.js:
--------------------------------------------------------------------------------
1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements
2 | // in a Float32Array. Compare to scalar implementation of same function.
3 | // Author: Peter Jensen
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "AverageFloat32x4LoadXY",
10 | kernelInit: initArray,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdAverageLoad,
13 | kernelNonSimd: average,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add(new Benchmark(kernelConfig));
19 |
20 | // Benchmark data, initialization and kernel functions
21 | var a = new Float32Array(10000);
22 | var a1 = new Float32Array(10000);
23 | var b = new Int8Array(a.buffer);
24 |
25 | function sanityCheck() {
26 | return true;
27 | return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001;
28 | }
29 |
30 | function initArray() {
31 | var j = 0;
32 | for (var i = 0, l = a.length; i < l; ++i) {
33 | a[i] = 0.1;
34 | }
35 | // Check that the two kernel functions yields the same result, roughly
36 | // Account for the fact that the simdAverage() is computed using float32
37 | // precision and the average() is using double precision
38 | return sanityCheck();
39 | }
40 |
41 | function cleanup() {
42 | return sanityCheck();
43 | }
44 |
45 | function average(n) {
46 | for (var i = 0; i < n; ++i) {
47 | var sum = 0.0;
48 | for (var j = 0, l = a.length; j < l; ++j) {
49 | sum += a[j];
50 | }
51 | }
52 | return sum/a.length;
53 | }
54 |
55 | function simdAverageLoad(n) {
56 | var a_length = a.length;
57 | for (var i = 0; i < n; ++i) {
58 | var sum4 = SIMD.Float32x4.splat(0.0);
59 | for (var j = 0; j < a_length / 2; ++j) {
60 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load2(a, j << 1));
61 | //SIMD.Float32x4.store(a1, j << 2, sum4);
62 | }
63 | }
64 | return (SIMD.Float32x4.extractLane(sum4, 0) +
65 | SIMD.Float32x4.extractLane(sum4, 1) +
66 | SIMD.Float32x4.extractLane(sum4, 2) +
67 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
68 | }
69 |
70 | } ());
71 |
--------------------------------------------------------------------------------
/src/benchmarks/averageFloat32x4LoadXYZ.js:
--------------------------------------------------------------------------------
1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements
2 | // in a Float32Array. Compare to scalar implementation of same function.
3 | // Author: Peter Jensen
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "AverageFloat32x4LoadXYZ",
10 | kernelInit: initArray,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdAverageLoad,
13 | kernelNonSimd: average,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add(new Benchmark(kernelConfig));
19 |
20 | // Benchmark data, initialization and kernel functions
21 | var a = new Float32Array(9999);
22 | var a1 = new Float32Array(9999);
23 | var b = new Int8Array(a.buffer);
24 |
25 | function sanityCheck() {
26 | return true;
27 | return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001;
28 | }
29 |
30 | function initArray() {
31 | var j = 0;
32 | for (var i = 0, l = a.length; i < l; ++i) {
33 | a[i] = 0.1;
34 | }
35 | // Check that the two kernel functions yields the same result, roughly
36 | // Account for the fact that the simdAverage() is computed using float32
37 | // precision and the average() is using double precision
38 | return sanityCheck();
39 | }
40 |
41 | function cleanup() {
42 | return sanityCheck();
43 | }
44 |
45 | function average(n) {
46 | for (var i = 0; i < n; ++i) {
47 | var sum = 0.0;
48 | for (var j = 0, l = a.length; j < l; ++j) {
49 | sum += a[j];
50 | }
51 | }
52 | return sum/a.length;
53 | }
54 |
55 | function simdAverageLoad(n) {
56 | var a_length = a.length;
57 | for (var i = 0; i < n; ++i) {
58 | var sum4 = SIMD.Float32x4.splat(0.0);
59 | for (var j = 0; j < a_length / 3 ; ++j) {
60 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load3(a, j * 3));
61 | //SIMD.Float32x4.store(a1, j << 2, sum4);
62 | }
63 | }
64 | return (SIMD.Float32x4.extractLane(sum4, 0) +
65 | SIMD.Float32x4.extractLane(sum4, 1) +
66 | SIMD.Float32x4.extractLane(sum4, 2) +
67 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length;
68 | }
69 |
70 | } ());
71 |
--------------------------------------------------------------------------------
/src/benchmarks/averageInt32x4Load.js:
--------------------------------------------------------------------------------
1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements
2 | // in a Int32Array. Compare to scalar implementation of same function.
3 | // Author: Peter Jensen
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "AverageInt32x4Load",
10 | kernelInit: initArray,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdAverageLoad,
13 | kernelNonSimd: average,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add(new Benchmark(kernelConfig));
19 |
20 | // Benchmark data, initialization and kernel functions
21 | var a = new Int32Array(10000);
22 | var a1 = new Int32Array(10000);
23 | var b = new Int8Array(a.buffer);
24 |
25 | function sanityCheck() {
26 | return true;
27 | return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001;
28 | }
29 |
30 | function initArray() {
31 | var j = 0;
32 | for (var i = 0, l = a.length; i < l; ++i) {
33 | a[i] = 1;
34 | }
35 | // Check that the two kernel functions yields the same result, roughly
36 | // Account for the fact that the simdAverage() is computed using float32
37 | // precision and the average() is using double precision
38 | return sanityCheck();
39 | }
40 |
41 | function cleanup() {
42 | return sanityCheck();
43 | }
44 |
45 | function average(n) {
46 | for (var i = 0; i < n; ++i) {
47 | var sum = 0.0;
48 | for (var j = 0, l = a.length; j < l; ++j) {
49 | sum += a[j];
50 | }
51 | }
52 | return sum/a.length;
53 | }
54 |
55 | function simdAverageLoad(n) {
56 | var a_length = a.length;
57 | for (var i = 0; i < n; ++i) {
58 | var sum4 = SIMD.Int32x4.splat(0);
59 | for (var j = 0; j < a_length / 4; ++j) {
60 | sum4 = SIMD.Int32x4.add(sum4, SIMD.Int32x4.load(a, j << 2));
61 | }
62 | }
63 | return (SIMD.Int32x4.extractLane(sum4, 0) +
64 | SIMD.Int32x4.extractLane(sum4, 1) +
65 | SIMD.Int32x4.extractLane(sum4, 2) +
66 | SIMD.Int32x4.extractLane(sum4, 3)) / a.length;
67 | }
68 |
69 | } ());
70 |
--------------------------------------------------------------------------------
/src/benchmarks/base.js:
--------------------------------------------------------------------------------
1 | // SIMD Kernel Benchmark Harness
2 | // Author: Peter Jensen
3 |
4 | function Benchmark (config) {
5 | this.config = config;
6 | this.initOk = true; // Initialize all properties used on a Benchmark object
7 | this.cleanupOk = true;
8 | this.useAutoIterations = true;
9 | this.autoIterations = 0;
10 | this.actualIterations = 0;
11 | this.simdTime = 0;
12 | this.nonSimdTime = 0;
13 | }
14 |
15 | function Benchmarks () {
16 | this.benchmarks = [];
17 | }
18 |
19 | Benchmarks.prototype.add = function (benchmark) {
20 | this.benchmarks.push (benchmark);
21 | return this.benchmarks.length - 1;
22 | }
23 |
24 | Benchmarks.prototype.runOne = function (benchmark) {
25 |
26 | function timeKernel(kernel, iterations) {
27 | var start, stop;
28 | start = Date.now();
29 | kernel(iterations);
30 | stop = Date.now();
31 | return stop - start;
32 | }
33 |
34 | function computeIterations() {
35 | var desiredRuntime = 1000; // milliseconds for longest running kernel
36 | var testIterations = 10; // iterations used to determine time for desiredRuntime
37 |
38 | // Make the slowest kernel run for at least 500ms
39 | var simdTime = timeKernel(benchmark.config.kernelSimd, testIterations);
40 | var nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, testIterations);
41 | var maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime;
42 | while (maxTime < 500) {
43 | testIterations *= 2;
44 | simdTime = timeKernel(benchmark.config.kernelSimd, testIterations);
45 | nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, testIterations);
46 | maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime;
47 | }
48 | maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime;
49 |
50 | // Compute iteration count for 1 second run of slowest kernel
51 | var iterations = Math.ceil(desiredRuntime * testIterations / maxTime);
52 | return iterations;
53 | }
54 |
55 | // Initialize the kernels and check the correctness status
56 | if (!benchmark.config.kernelInit()) {
57 | benchmark.initOk = false;
58 | return false;
59 | }
60 |
61 | // Determine how many iterations to use.
62 | if (benchmark.useAutoIterations) {
63 | benchmark.autoIterations = computeIterations();
64 | benchmark.actualIterations = benchmark.autoIterations;
65 | }
66 | else {
67 | benchmark.actualIterations = benchmark.config.kernelIterations;
68 | }
69 |
70 | // Run the SIMD kernel
71 | benchmark.simdTime = timeKernel(benchmark.config.kernelSimd, benchmark.actualIterations);
72 |
73 | // Run the non-SIMD kernel
74 | benchmark.nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, benchmark.actualIterations);
75 |
76 | // Do the final sanity check
77 | if (!benchmark.config.kernelCleanup()) {
78 | benchmark.cleanupOk = false;
79 | return false;
80 | }
81 |
82 | return true;
83 | }
84 |
85 | Benchmarks.prototype.report = function (benchmark, outputFunctions) {
86 |
87 | function fillRight(str, width) {
88 | str += ""; // make sure it's a string
89 | while (str.length < width) {
90 | str += " ";
91 | }
92 | return str;
93 | }
94 |
95 | function fillLeft(str, width) {
96 | str += ""; // make sure it's a string
97 | while (str.length < width) {
98 | str = " " + str;
99 | }
100 | return str;
101 | }
102 |
103 | if (!benchmark.initOk) {
104 | outputFunctions.notifyError(fillRight(benchmark.config.kernelName + ": ", 23) + "FAILED INIT");
105 | return;
106 | }
107 | if (!benchmark.cleanupOk) {
108 | outputFunctions.notifyError(fillRight(benchmark.config.kernelName + ": ", 23) + "FAILED CLEANUP");
109 | return;
110 | }
111 |
112 | var ratio = benchmark.nonSimdTime / benchmark.simdTime;
113 | outputFunctions.notifyResult(
114 | fillRight(benchmark.config.kernelName + ": ", 23) +
115 | "Iterations(" + fillLeft(benchmark.actualIterations, 10) + ")" +
116 | ", SIMD(" + fillLeft(benchmark.simdTime + "ms)", 8) +
117 | ", Non-SIMD(" + fillLeft(benchmark.nonSimdTime + "ms)", 8) +
118 | ", Speedup(" + ratio.toFixed(3) + ")");
119 | }
120 |
121 | Benchmarks.prototype.runAll = function (outputFunctions, useAutoIterations) {
122 | if (typeof useAutoIterations === "undefined") {
123 | useAutoIterations = false;
124 | }
125 | for (var i = 0, n = this.benchmarks.length; i < n; ++i) {
126 | var benchmark = this.benchmarks[i];
127 | benchmark.useAutoIterations = useAutoIterations;
128 | this.runOne(benchmark);
129 | this.report(benchmark, outputFunctions);
130 | }
131 | }
132 |
133 | var benchmarks = new Benchmarks ();
134 |
--------------------------------------------------------------------------------
/src/benchmarks/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | EcmaScript SIMD benchmarks
6 |
7 |
8 | Running benchmarks...
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/src/benchmarks/inverse4x4.js:
--------------------------------------------------------------------------------
1 | // Kernel for doing a 4x4 Matrix Inverse operation
2 | // Based on Cramer's rule.
3 | // See: ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf
4 | // Author: Peter Jensen
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "Matrix4x4Inverse",
10 | kernelInit: init,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdMatrixInverseN,
13 | kernelNonSimd: nonSimdMatrixInverseN,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add (new Benchmark (kernelConfig));
19 |
20 | // Global Variables
21 | var src = new Float32Array(16); // Source matrix
22 | var dst = new Float32Array(16); // Result matrix
23 | var tsrc = new Float32Array(16); // Transposed version of 'src'
24 | var tmp = new Float32Array(12); // Temporary array of multiply results
25 | var ident = new Float32Array(
26 | [1,0,0,0,
27 | 0,1,0,0,
28 | 0,0,1,0,
29 | 0,0,0,1]);
30 |
31 | function printMatrix(matrix) {
32 | for (var r = 0; r < 4; ++r) {
33 | var str = "";
34 | var ri = r*4;
35 | for (var c = 0; c < 4; ++c) {
36 | var value = matrix[ri + c];
37 | str += " " + value.toFixed(2);
38 | }
39 | print(str);
40 | }
41 | }
42 |
43 | function initMatrix(matrix) {
44 | // These values were chosen somewhat randomly, but they will at least yield a solution.
45 | matrix [0] = 0; matrix[1] = 1; matrix[2] = 2; matrix[3] = 3;
46 | matrix [4] = -1; matrix[5] = -2; matrix[6] = -3; matrix[7] = -4;
47 | matrix [8] = 0; matrix[9] = 0; matrix[10] = 2; matrix[11] = 3;
48 | matrix [12] = -1; matrix[13] = -2; matrix[14] = 0; matrix[15] = -4;
49 | }
50 |
51 | function mulMatrix(dst, op1, op2) {
52 | for (var r = 0; r < 4; ++r) {
53 | for (var c = 0; c < 4; ++c) {
54 | var ri = 4*r;
55 | dst[ri + c] = op1[ri]*op2[c] + op1[ri+1]*op2[c+4] + op1[ri+2]*op2[c+8] + op1[ri+3]*op2[c+12]
56 | }
57 | }
58 | }
59 |
60 | function checkMatrix(matrix) {
61 | // when multiplied with the src matrix it should yield the identity matrix
62 | mulMatrix(tsrc, src, matrix);
63 | for (var i = 0; i < 16; ++i) {
64 | if (Math.abs (tsrc[i] - ident[i]) > 0.00001) {
65 | return false;
66 | }
67 | }
68 | // printMatrix (tsrc);
69 | return true;
70 | }
71 |
72 | // Kernel Initializer
73 | function init() {
74 | initMatrix(src);
75 | // printMatrix(src);
76 | nonSimdMatrixInverseN(1);
77 | // printMatrix(dst);
78 | if (!checkMatrix(dst)) {
79 | return false;
80 | }
81 |
82 | initMatrix(src);
83 | simdMatrixInverseN(1);
84 | // printMatrix(dst);
85 | if (!checkMatrix(dst)) {
86 | return false;
87 | }
88 |
89 | return true;
90 | }
91 |
92 | function cleanup() {
93 | return init();
94 | }
95 |
96 | function simdMatrixInverse() {
97 | var src0, src1, src2, src3;
98 | var row0, row1, row2, row3;
99 | var tmp1;
100 | var minor0, minor1, minor2, minor3;
101 | var det;
102 |
103 | // Load the 4 rows
104 | var src0 = SIMD.Float32x4.load(src, 0);
105 | var src1 = SIMD.Float32x4.load(src, 4);
106 | var src2 = SIMD.Float32x4.load(src, 8);
107 | var src3 = SIMD.Float32x4.load(src, 16);
108 |
109 | // Transpose the source matrix. Sort of. Not a true transpose operation
110 |
111 | tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
112 | row1 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
113 | row0 = SIMD.Float32x4.shuffle(tmp1, row1, 0, 2, 4, 6);
114 | row1 = SIMD.Float32x4.shuffle(row1, tmp1, 1, 3, 5, 7);
115 |
116 | tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
117 | row3 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
118 | row2 = SIMD.Float32x4.shuffle(tmp1, row3, 0, 2, 4, 6);
119 | row3 = SIMD.Float32x4.shuffle(row3, tmp1, 1, 3, 5, 7);
120 |
121 | // This is a true transposition, but it will lead to an incorrect result
122 |
123 | //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
124 | //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
125 | //row0 = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6);
126 | //row1 = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7);
127 |
128 | //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
129 | //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
130 | //row2 = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6);
131 | //row3 = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7);
132 |
133 | // ----
134 | tmp1 = SIMD.Float32x4.mul(row2, row3);
135 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
136 | minor0 = SIMD.Float32x4.mul(row1, tmp1);
137 | minor1 = SIMD.Float32x4.mul(row0, tmp1);
138 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
139 | minor0 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row1, tmp1), minor0);
140 | minor1 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor1);
141 | minor1 = SIMD.Float32x4.swizzle(minor1, 2, 3, 0, 1); // 0x4E = 01001110
142 |
143 | // ----
144 | tmp1 = SIMD.Float32x4.mul(row1, row2);
145 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
146 | minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor0);
147 | minor3 = SIMD.Float32x4.mul(row0, tmp1);
148 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
149 | minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row3, tmp1));
150 | minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor3);
151 | minor3 = SIMD.Float32x4.swizzle(minor3, 2, 3, 0, 1); // 0x4E = 01001110
152 |
153 | // ----
154 | tmp1 = SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(row1, 2, 3, 0, 1), row3); // 0x4E = 01001110
155 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
156 | row2 = SIMD.Float32x4.swizzle(row2, 2, 3, 0, 1); // 0x4E = 01001110
157 | minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor0);
158 | minor2 = SIMD.Float32x4.mul(row0, tmp1);
159 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
160 | minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row2, tmp1));
161 | minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor2);
162 | minor2 = SIMD.Float32x4.swizzle(minor2, 2, 3, 0, 1); // 0x4E = 01001110
163 |
164 | // ----
165 | tmp1 = SIMD.Float32x4.mul(row0, row1);
166 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
167 | minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor2);
168 | minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row2, tmp1), minor3);
169 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
170 | minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row3, tmp1), minor2);
171 | minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row2, tmp1));
172 |
173 | // ----
174 | tmp1 = SIMD.Float32x4.mul(row0, row3);
175 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
176 | minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row2, tmp1));
177 | minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor2);
178 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
179 | minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor1);
180 | minor2 = SIMD.Float32x4.sub(minor2, SIMD.Float32x4.mul(row1, tmp1));
181 |
182 | // ----
183 | tmp1 = SIMD.Float32x4.mul(row0, row2);
184 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
185 | minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor1);
186 | minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row1, tmp1));
187 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
188 | minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row3, tmp1));
189 | minor3 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor3);
190 |
191 | // Compute determinant
192 | det = SIMD.Float32x4.mul(row0, minor0);
193 | det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 2, 3, 0, 1), det); // 0x4E = 01001110
194 | det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 1, 0, 3, 2), det); // 0xB1 = 10110001
195 | tmp1 = SIMD.Float32x4.reciprocalApproximation(det);
196 | det = SIMD.Float32x4.sub(SIMD.Float32x4.add(tmp1, tmp1), SIMD.Float32x4.mul(det, SIMD.Float32x4.mul(tmp1, tmp1)));
197 | det = SIMD.Float32x4.swizzle(det, 0, 0, 0, 0);
198 |
199 | // These shuffles aren't necessary if the faulty transposition is done
200 | // up at the top of this function.
201 | //minor0 = SIMD.Float32x4.swizzle(minor0, 2, 1, 0, 3);
202 | //minor1 = SIMD.Float32x4.swizzle(minor1, 2, 1, 0, 3);
203 | //minor2 = SIMD.Float32x4.swizzle(minor2, 2, 1, 0, 3);
204 | //minor3 = SIMD.Float32x4.swizzle(minor3, 2, 1, 0, 3);
205 |
206 | // Compute final values by multiplying with 1/det
207 | minor0 = SIMD.Float32x4.mul(det, minor0);
208 | minor1 = SIMD.Float32x4.mul(det, minor1);
209 | minor2 = SIMD.Float32x4.mul(det, minor2);
210 | minor3 = SIMD.Float32x4.mul(det, minor3);
211 |
212 | SIMD.Float32x4.store(dst, 0, minor0);
213 | SIMD.Float32x4.store(dst, 4, minor1);
214 | SIMD.Float32x4.store(dst, 8, minor2);
215 | SIMD.Float32x4.store(dst, 12, minor3);
216 | }
217 |
218 | function nonSimdMatrixInverse() {
219 |
220 | // Transpose the source matrix
221 | for (var i = 0; i < 4; i++) {
222 | tsrc[i] = src[i*4];
223 | tsrc[i + 4] = src[i*4 + 1];
224 | tsrc[i + 8] = src[i*4 + 2];
225 | tsrc[i + 12] = src[i*4 + 3];
226 | }
227 |
228 | // Calculate pairs for first 8 elements (cofactors)
229 | tmp[0] = tsrc[10] * tsrc[15];
230 | tmp[1] = tsrc[11] * tsrc[14];
231 | tmp[2] = tsrc[9] * tsrc[15];
232 | tmp[3] = tsrc[11] * tsrc[13];
233 | tmp[4] = tsrc[9] * tsrc[14];
234 | tmp[5] = tsrc[10] * tsrc[13];
235 | tmp[6] = tsrc[8] * tsrc[15];
236 | tmp[7] = tsrc[11] * tsrc[12];
237 | tmp[8] = tsrc[8] * tsrc[14];
238 | tmp[9] = tsrc[10] * tsrc[12];
239 | tmp[10] = tsrc[8] * tsrc[13];
240 | tmp[11] = tsrc[9] * tsrc[12];
241 |
242 | // calculate first 8 elements (cofactors)
243 | dst[0] = tmp[0]*tsrc[5] + tmp[3]*tsrc[6] + tmp[4]*tsrc[7];
244 | dst[0] -= tmp[1]*tsrc[5] + tmp[2]*tsrc[6] + tmp[5]*tsrc[7];
245 | dst[1] = tmp[1]*tsrc[4] + tmp[6]*tsrc[6] + tmp[9]*tsrc[7];
246 | dst[1] -= tmp[0]*tsrc[4] + tmp[7]*tsrc[6] + tmp[8]*tsrc[7];
247 | dst[2] = tmp[2]*tsrc[4] + tmp[7]*tsrc[5] + tmp[10]*tsrc[7];
248 | dst[2] -= tmp[3]*tsrc[4] + tmp[6]*tsrc[5] + tmp[11]*tsrc[7];
249 | dst[3] = tmp[5]*tsrc[4] + tmp[8]*tsrc[5] + tmp[11]*tsrc[6];
250 | dst[3] -= tmp[4]*tsrc[4] + tmp[9]*tsrc[5] + tmp[10]*tsrc[6];
251 | dst[4] = tmp[1]*tsrc[1] + tmp[2]*tsrc[2] + tmp[5]*tsrc[3];
252 | dst[4] -= tmp[0]*tsrc[1] + tmp[3]*tsrc[2] + tmp[4]*tsrc[3];
253 | dst[5] = tmp[0]*tsrc[0] + tmp[7]*tsrc[2] + tmp[8]*tsrc[3];
254 | dst[5] -= tmp[1]*tsrc[0] + tmp[6]*tsrc[2] + tmp[9]*tsrc[3];
255 | dst[6] = tmp[3]*tsrc[0] + tmp[6]*tsrc[1] + tmp[11]*tsrc[3];
256 | dst[6] -= tmp[2]*tsrc[0] + tmp[7]*tsrc[1] + tmp[10]*tsrc[3];
257 | dst[7] = tmp[4]*tsrc[0] + tmp[9]*tsrc[1] + tmp[10]*tsrc[2];
258 | dst[7] -= tmp[5]*tsrc[0] + tmp[8]*tsrc[1] + tmp[11]*tsrc[2];
259 |
260 | // calculate pairs for second 8 elements (cofactors)
261 | tmp[0] = tsrc[2]*tsrc[7];
262 | tmp[1] = tsrc[3]*tsrc[6];
263 | tmp[2] = tsrc[1]*tsrc[7];
264 | tmp[3] = tsrc[3]*tsrc[5];
265 | tmp[4] = tsrc[1]*tsrc[6];
266 | tmp[5] = tsrc[2]*tsrc[5];
267 | tmp[6] = tsrc[0]*tsrc[7];
268 | tmp[7] = tsrc[3]*tsrc[4];
269 | tmp[8] = tsrc[0]*tsrc[6];
270 | tmp[9] = tsrc[2]*tsrc[4];
271 | tmp[10] = tsrc[0]*tsrc[5];
272 | tmp[11] = tsrc[1]*tsrc[4];
273 |
274 | // calculate second 8 elements (cofactors)
275 | dst[8] = tmp[0]*tsrc[13] + tmp[3]*tsrc[14] + tmp[4]*tsrc[15];
276 | dst[8] -= tmp[1]*tsrc[13] + tmp[2]*tsrc[14] + tmp[5]*tsrc[15];
277 | dst[9] = tmp[1]*tsrc[12] + tmp[6]*tsrc[14] + tmp[9]*tsrc[15];
278 | dst[9] -= tmp[0]*tsrc[12] + tmp[7]*tsrc[14] + tmp[8]*tsrc[15];
279 | dst[10] = tmp[2]*tsrc[12] + tmp[7]*tsrc[13] + tmp[10]*tsrc[15];
280 | dst[10]-= tmp[3]*tsrc[12] + tmp[6]*tsrc[13] + tmp[11]*tsrc[15];
281 | dst[11] = tmp[5]*tsrc[12] + tmp[8]*tsrc[13] + tmp[11]*tsrc[14];
282 | dst[11]-= tmp[4]*tsrc[12] + tmp[9]*tsrc[13] + tmp[10]*tsrc[14];
283 | dst[12] = tmp[2]*tsrc[10] + tmp[5]*tsrc[11] + tmp[1]*tsrc[9];
284 | dst[12]-= tmp[4]*tsrc[11] + tmp[0]*tsrc[9] + tmp[3]*tsrc[10];
285 | dst[13] = tmp[8]*tsrc[11] + tmp[0]*tsrc[8] + tmp[7]*tsrc[10];
286 | dst[13]-= tmp[6]*tsrc[10] + tmp[9]*tsrc[11] + tmp[1]*tsrc[8];
287 | dst[14] = tmp[6]*tsrc[9] + tmp[11]*tsrc[11] + tmp[3]*tsrc[8];
288 | dst[14]-= tmp[10]*tsrc[11] + tmp[2]*tsrc[8] + tmp[7]*tsrc[9];
289 | dst[15] = tmp[10]*tsrc[10] + tmp[4]*tsrc[8] + tmp[9]*tsrc[9];
290 | dst[15]-= tmp[8]*tsrc[9] + tmp[11]*tsrc[10] + tmp[5]*tsrc[8];
291 |
292 | // calculate determinant
293 | var det = tsrc[0]*dst[0] + tsrc[1]*dst[1] + tsrc[2]*dst[2] + tsrc[3]*dst[3];
294 |
295 | // calculate matrix inverse
296 | det = 1/det;
297 | for (var j = 0; j < 16; j++) {
298 | dst[j] *= det;
299 | }
300 |
301 | }
302 |
303 | // SIMD version of the kernel
304 | function simdMatrixInverseN(n) {
305 | for (var iterations = 0; iterations < n; ++iterations) {
306 | var src0, src1, src2, src3;
307 | var row0, row1, row2, row3;
308 | var tmp1;
309 | var minor0, minor1, minor2, minor3;
310 | var det;
311 |
312 | // Load the 4 rows
313 | var src0 = SIMD.Float32x4.load(src, 0);
314 | var src1 = SIMD.Float32x4.load(src, 4);
315 | var src2 = SIMD.Float32x4.load(src, 8);
316 | var src3 = SIMD.Float32x4.load(src, 12);
317 |
318 | // Transpose the source matrix. Sort of. Not a true transpose operation
319 |
320 | tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
321 | row1 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
322 | row0 = SIMD.Float32x4.shuffle(tmp1, row1, 0, 2, 4, 6);
323 | row1 = SIMD.Float32x4.shuffle(row1, tmp1, 1, 3, 5, 7);
324 |
325 | tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
326 | row3 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
327 | row2 = SIMD.Float32x4.shuffle(tmp1, row3, 0, 2, 4, 6);
328 | row3 = SIMD.Float32x4.shuffle(row3, tmp1, 1, 3, 5, 7);
329 |
330 | // This is a true transposition, but it will lead to an incorrect result
331 |
332 | //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
333 | //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
334 | //row0 = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6);
335 | //row1 = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7);
336 |
337 | //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
338 | //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
339 | //row2 = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6);
340 | //row3 = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7);
341 |
342 | // ----
343 | tmp1 = SIMD.Float32x4.mul(row2, row3);
344 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
345 | minor0 = SIMD.Float32x4.mul(row1, tmp1);
346 | minor1 = SIMD.Float32x4.mul(row0, tmp1);
347 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
348 | minor0 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row1, tmp1), minor0);
349 | minor1 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor1);
350 | minor1 = SIMD.Float32x4.swizzle(minor1, 2, 3, 0, 1); // 0x4E = 01001110
351 |
352 | // ----
353 | tmp1 = SIMD.Float32x4.mul(row1, row2);
354 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
355 | minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor0);
356 | minor3 = SIMD.Float32x4.mul(row0, tmp1);
357 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
358 | minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row3, tmp1));
359 | minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor3);
360 | minor3 = SIMD.Float32x4.swizzle(minor3, 2, 3, 0, 1); // 0x4E = 01001110
361 |
362 | // ----
363 | tmp1 = SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(row1, 2, 3, 0, 1), row3); // 0x4E = 01001110
364 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
365 | row2 = SIMD.Float32x4.swizzle(row2, 2, 3, 0, 1); // 0x4E = 01001110
366 | minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor0);
367 | minor2 = SIMD.Float32x4.mul(row0, tmp1);
368 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
369 | minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row2, tmp1));
370 | minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor2);
371 | minor2 = SIMD.Float32x4.swizzle(minor2, 2, 3, 0, 1); // 0x4E = 01001110
372 |
373 | // ----
374 | tmp1 = SIMD.Float32x4.mul(row0, row1);
375 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
376 | minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor2);
377 | minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row2, tmp1), minor3);
378 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
379 | minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row3, tmp1), minor2);
380 | minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row2, tmp1));
381 |
382 | // ----
383 | tmp1 = SIMD.Float32x4.mul(row0, row3);
384 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
385 | minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row2, tmp1));
386 | minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor2);
387 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
388 | minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor1);
389 | minor2 = SIMD.Float32x4.sub(minor2, SIMD.Float32x4.mul(row1, tmp1));
390 |
391 | // ----
392 | tmp1 = SIMD.Float32x4.mul(row0, row2);
393 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001
394 | minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor1);
395 | minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row1, tmp1));
396 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110
397 | minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row3, tmp1));
398 | minor3 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor3);
399 |
400 | // Compute determinant
401 | det = SIMD.Float32x4.mul(row0, minor0);
402 | det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 2, 3, 0, 1), det); // 0x4E = 01001110
403 | det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 1, 0, 3, 2), det); // 0xB1 = 10110001
404 | tmp1 = SIMD.Float32x4.reciprocalApproximation(det);
405 | det = SIMD.Float32x4.sub(SIMD.Float32x4.add(tmp1, tmp1), SIMD.Float32x4.mul(det, SIMD.Float32x4.mul(tmp1, tmp1)));
406 | det = SIMD.Float32x4.swizzle(det, 0, 0, 0, 0);
407 |
408 | // These shuffles aren't necessary if the faulty transposition is done
409 | // up at the top of this function.
410 | //minor0 = SIMD.Float32x4.swizzle(minor0, 2, 1, 0, 3);
411 | //minor1 = SIMD.Float32x4.swizzle(minor1, 2, 1, 0, 3);
412 | //minor2 = SIMD.Float32x4.swizzle(minor2, 2, 1, 0, 3);
413 | //minor3 = SIMD.Float32x4.swizzle(minor3, 2, 1, 0, 3);
414 |
415 | // Compute final values by multiplying with 1/det
416 | minor0 = SIMD.Float32x4.mul(det, minor0);
417 | minor1 = SIMD.Float32x4.mul(det, minor1);
418 | minor2 = SIMD.Float32x4.mul(det, minor2);
419 | minor3 = SIMD.Float32x4.mul(det, minor3);
420 |
421 | SIMD.Float32x4.store(dst, 0, minor0);
422 | SIMD.Float32x4.store(dst, 4, minor1);
423 | SIMD.Float32x4.store(dst, 8, minor2);
424 | SIMD.Float32x4.store(dst, 12, minor3);
425 | }
426 | }
427 |
428 | // Non SIMD version of the kernel
429 | function nonSimdMatrixInverseN(n) {
430 | for (var iterations = 0; iterations < n; ++iterations) {
431 | // Transpose the source matrix
432 | for (var i = 0; i < 4; i++) {
433 | tsrc[i] = src[i * 4];
434 | tsrc[i + 4] = src[i * 4 + 1];
435 | tsrc[i + 8] = src[i * 4 + 2];
436 | tsrc[i + 12] = src[i * 4 + 3];
437 | }
438 |
439 | // Calculate pairs for first 8 elements (cofactors)
440 | tmp[0] = tsrc[10] * tsrc[15];
441 | tmp[1] = tsrc[11] * tsrc[14];
442 | tmp[2] = tsrc[9] * tsrc[15];
443 | tmp[3] = tsrc[11] * tsrc[13];
444 | tmp[4] = tsrc[9] * tsrc[14];
445 | tmp[5] = tsrc[10] * tsrc[13];
446 | tmp[6] = tsrc[8] * tsrc[15];
447 | tmp[7] = tsrc[11] * tsrc[12];
448 | tmp[8] = tsrc[8] * tsrc[14];
449 | tmp[9] = tsrc[10] * tsrc[12];
450 | tmp[10] = tsrc[8] * tsrc[13];
451 | tmp[11] = tsrc[9] * tsrc[12];
452 |
453 | // calculate first 8 elements (cofactors)
454 | dst[0] = tmp[0] * tsrc[5] + tmp[3] * tsrc[6] + tmp[4] * tsrc[7];
455 | dst[0] -= tmp[1] * tsrc[5] + tmp[2] * tsrc[6] + tmp[5] * tsrc[7];
456 | dst[1] = tmp[1] * tsrc[4] + tmp[6] * tsrc[6] + tmp[9] * tsrc[7];
457 | dst[1] -= tmp[0] * tsrc[4] + tmp[7] * tsrc[6] + tmp[8] * tsrc[7];
458 | dst[2] = tmp[2] * tsrc[4] + tmp[7] * tsrc[5] + tmp[10] * tsrc[7];
459 | dst[2] -= tmp[3] * tsrc[4] + tmp[6] * tsrc[5] + tmp[11] * tsrc[7];
460 | dst[3] = tmp[5] * tsrc[4] + tmp[8] * tsrc[5] + tmp[11] * tsrc[6];
461 | dst[3] -= tmp[4] * tsrc[4] + tmp[9] * tsrc[5] + tmp[10] * tsrc[6];
462 | dst[4] = tmp[1] * tsrc[1] + tmp[2] * tsrc[2] + tmp[5] * tsrc[3];
463 | dst[4] -= tmp[0] * tsrc[1] + tmp[3] * tsrc[2] + tmp[4] * tsrc[3];
464 | dst[5] = tmp[0] * tsrc[0] + tmp[7] * tsrc[2] + tmp[8] * tsrc[3];
465 | dst[5] -= tmp[1] * tsrc[0] + tmp[6] * tsrc[2] + tmp[9] * tsrc[3];
466 | dst[6] = tmp[3] * tsrc[0] + tmp[6] * tsrc[1] + tmp[11] * tsrc[3];
467 | dst[6] -= tmp[2] * tsrc[0] + tmp[7] * tsrc[1] + tmp[10] * tsrc[3];
468 | dst[7] = tmp[4] * tsrc[0] + tmp[9] * tsrc[1] + tmp[10] * tsrc[2];
469 | dst[7] -= tmp[5] * tsrc[0] + tmp[8] * tsrc[1] + tmp[11] * tsrc[2];
470 |
471 | // calculate pairs for second 8 elements (cofactors)
472 | tmp[0] = tsrc[2] * tsrc[7];
473 | tmp[1] = tsrc[3] * tsrc[6];
474 | tmp[2] = tsrc[1] * tsrc[7];
475 | tmp[3] = tsrc[3] * tsrc[5];
476 | tmp[4] = tsrc[1] * tsrc[6];
477 | tmp[5] = tsrc[2] * tsrc[5];
478 | tmp[6] = tsrc[0] * tsrc[7];
479 | tmp[7] = tsrc[3] * tsrc[4];
480 | tmp[8] = tsrc[0] * tsrc[6];
481 | tmp[9] = tsrc[2] * tsrc[4];
482 | tmp[10] = tsrc[0] * tsrc[5];
483 | tmp[11] = tsrc[1] * tsrc[4];
484 |
485 | // calculate second 8 elements (cofactors)
486 | dst[8] = tmp[0] * tsrc[13] + tmp[3] * tsrc[14] + tmp[4] * tsrc[15];
487 | dst[8] -= tmp[1] * tsrc[13] + tmp[2] * tsrc[14] + tmp[5] * tsrc[15];
488 | dst[9] = tmp[1] * tsrc[12] + tmp[6] * tsrc[14] + tmp[9] * tsrc[15];
489 | dst[9] -= tmp[0] * tsrc[12] + tmp[7] * tsrc[14] + tmp[8] * tsrc[15];
490 | dst[10] = tmp[2] * tsrc[12] + tmp[7] * tsrc[13] + tmp[10] * tsrc[15];
491 | dst[10] -= tmp[3] * tsrc[12] + tmp[6] * tsrc[13] + tmp[11] * tsrc[15];
492 | dst[11] = tmp[5] * tsrc[12] + tmp[8] * tsrc[13] + tmp[11] * tsrc[14];
493 | dst[11] -= tmp[4] * tsrc[12] + tmp[9] * tsrc[13] + tmp[10] * tsrc[14];
494 | dst[12] = tmp[2] * tsrc[10] + tmp[5] * tsrc[11] + tmp[1] * tsrc[9];
495 | dst[12] -= tmp[4] * tsrc[11] + tmp[0] * tsrc[9] + tmp[3] * tsrc[10];
496 | dst[13] = tmp[8] * tsrc[11] + tmp[0] * tsrc[8] + tmp[7] * tsrc[10];
497 | dst[13] -= tmp[6] * tsrc[10] + tmp[9] * tsrc[11] + tmp[1] * tsrc[8];
498 | dst[14] = tmp[6] * tsrc[9] + tmp[11] * tsrc[11] + tmp[3] * tsrc[8];
499 | dst[14] -= tmp[10] * tsrc[11] + tmp[2] * tsrc[8] + tmp[7] * tsrc[9];
500 | dst[15] = tmp[10] * tsrc[10] + tmp[4] * tsrc[8] + tmp[9] * tsrc[9];
501 | dst[15] -= tmp[8] * tsrc[9] + tmp[11] * tsrc[10] + tmp[5] * tsrc[8];
502 |
503 | // calculate determinant
504 | var det = tsrc[0] * dst[0] + tsrc[1] * dst[1] + tsrc[2] * dst[2] + tsrc[3] * dst[3];
505 |
506 | // calculate matrix inverse
507 | det = 1 / det;
508 | for (var j = 0; j < 16; j++) {
509 | dst[j] *= det;
510 | }
511 | }
512 | }
513 |
514 | } ());
515 |
--------------------------------------------------------------------------------
/src/benchmarks/kernel-template.js:
--------------------------------------------------------------------------------
1 | // Kernel template
2 | // Author: Peter Jensen
3 | (function () {
4 |
5 | // Kernel configuration
6 | var kernelConfig = {
7 | kernelName: "Test",
8 | kernelInit: init,
9 | kernelCleanup: cleanup,
10 | kernelSimd: simd,
11 | kernelNonSimd: nonSimd,
12 | kernelIterations: 100000000
13 | };
14 |
15 | // Hook up to the harness
16 | benchmarks.add (new Benchmark (kernelConfig));
17 |
18 | // Kernel Initializer
19 | function init () {
20 | // Do initial sanity check and initialize data for the kernels.
21 | // The sanity check should verify that the simd and nonSimd results
22 | // are the same.
23 | // It is recommended to do minimal object creation in the kernels
24 | // themselves. If global data needs to be initialized, here would
25 | // be the place to do it.
26 | // If the sanity checks fails the kernels will not be executed
27 | // Returns:
28 | // true: First run (unoptimized) of the kernels passed
29 | // false: First run (unoptimized) of the kernels failed
30 | return simd (1) === nonSimd (1);
31 | }
32 |
33 | // Kernel Cleanup
34 | function cleanup () {
35 | // Do final sanity check and perform cleanup.
36 | // This function is called when all the kernel iterations have been
37 | // executed, so they should be in their final optimized version. The
38 | // sanity check done during initialization will probably be of the
39 | // initial unoptimized version.
40 | // Returns:
41 | // true: Last run (optimized) of the kernels passed
42 | // false: last run (optimized) of the kernels failed
43 | return simd (1) === nonSimd (1);
44 | }
45 |
46 | // SIMD version of the kernel
47 | function simd (n) {
48 | var s = 0;
49 | for (var i = 0; i < n; ++i) {
50 | s += i;
51 | }
52 | return s;
53 | }
54 |
55 | // Non SIMD version of the kernel
56 | function nonSimd (n) {
57 | var s = 0;
58 | for (var i = 0; i < n; ++i) {
59 | s += i;
60 | }
61 | return s;
62 | }
63 |
64 | } ());
65 |
--------------------------------------------------------------------------------
/src/benchmarks/mandelbrot.js:
--------------------------------------------------------------------------------
1 | // Mandelbrot Benchmark
2 | // Author: Peter Jensen
3 | (function () {
4 |
5 | // Kernel configuration
6 | var kernelConfig = {
7 | kernelName: "Mandelbrot",
8 | kernelInit: initMandelbrot,
9 | kernelCleanup: cleanupMandelbrot,
10 | kernelSimd: simdMandelbrot,
11 | kernelNonSimd: nonSimdMandelbrot,
12 | kernelIterations: 10000
13 | };
14 |
15 | // Hook up to the harness
16 | benchmarks.add (new Benchmark (kernelConfig));
17 |
18 | function Float32x4ToString (f4) {
19 | return "[" + SIMD.Float32x4.extractLane(f4, 0) + "," +
20 | SIMD.Float32x4.extractLane(f4, 1) + "," +
21 | SIMD.Float32x4.extractLane(f4, 2) + "," +
22 | SIMD.Float32x4.extractLane(f4, 3) + "]";
23 | }
24 |
25 | function Int32x4ToString (i4) {
26 | return "[" + SIMD.Int32x4.extractLane(i4, 0) + "," +
27 | SIMD.Int32x4.extractLane(i4, 1) + "," +
28 | SIMD.Int32x4.extractLane(i4, 2) + "," +
29 | SIMD.Int32x4.extractLane(i4, 3) + "]";
30 | }
31 |
32 | function mandelx1(c_re, c_im, max_iterations) {
33 | var z_re = c_re,
34 | z_im = c_im,
35 | i;
36 | for (i = 0; i < max_iterations; i++) {
37 | var z_re2 = z_re*z_re;
38 | var z_im2 = z_im*z_im;
39 | if (z_re2 + z_im2 > 4.0)
40 | break;
41 |
42 | var new_re = z_re2 - z_im2;
43 | var new_im = 2.0 * z_re * z_im;
44 | z_re = c_re + new_re;
45 | z_im = c_im + new_im;
46 | }
47 | return i;
48 | }
49 |
50 | function mandelx4(c_re4, c_im4, max_iterations) {
51 | var z_re4 = c_re4;
52 | var z_im4 = c_im4;
53 | var four4 = SIMD.Float32x4.splat (4.0);
54 | var two4 = SIMD.Float32x4.splat (2.0);
55 | var count4 = SIMD.Int32x4.splat (0);
56 | var zero4 = SIMD.Int32x4.splat (0);
57 | var one4 = SIMD.Int32x4.splat (1);
58 |
59 | for (var i = 0; i < max_iterations; ++i) {
60 | var z_re24 = SIMD.Float32x4.mul (z_re4, z_re4);
61 | var z_im24 = SIMD.Float32x4.mul (z_im4, z_im4);
62 |
63 | var mb4 = SIMD.Float32x4.lessThanOrEqual (SIMD.Float32x4.add (z_re24, z_im24), four4);
64 | // if all 4 values are greater than 4.0, there's no reason to continue
65 | if (!SIMD.Bool32x4.allTrue(mb4)) {
66 | break;
67 | }
68 |
69 | var new_re4 = SIMD.Float32x4.sub(z_re24, z_im24);
70 | var new_im4 = SIMD.Float32x4.mul(SIMD.Float32x4.mul (two4, z_re4), z_im4);
71 | z_re4 = SIMD.Float32x4.add(c_re4, new_re4);
72 | z_im4 = SIMD.Float32x4.add(c_im4, new_im4);
73 | count4 = SIMD.Int32x4.add(count4, SIMD.Int32x4.select(mb4, one4, zero4));
74 | }
75 | return count4;
76 | }
77 |
78 | function sanityCheck() {
79 | var simd = simdMandelbrot(1);
80 | var nonSimd = nonSimdMandelbrot(1);
81 | if (simd.length !== nonSimd.length) {
82 | return false;
83 | }
84 | for (var i = 0, n = simd.length; i < n; ++i) {
85 | if (simd[i] !== nonSimd[i]) {
86 | return false;
87 | }
88 | }
89 | return true;
90 | }
91 |
92 | function initMandelbrot() {
93 | return sanityCheck();
94 | }
95 |
96 | function cleanupMandelbrot() {
97 | return sanityCheck();
98 | }
99 |
100 | // Non SIMD version of the kernel
101 | function nonSimdMandelbrot (n) {
102 | var result = new Array (4);
103 | for (var i = 0; i < n; ++i) {
104 | result [0] = mandelx1 (0.01, 0.01, 100);
105 | result [1] = mandelx1 (0.01, 0.01, 100);
106 | result [2] = mandelx1 (0.01, 0.01, 100);
107 | result [3] = mandelx1 (0.01, 0.01, 100);
108 | }
109 | return result;
110 | }
111 |
112 | // SIMD version of the kernel
113 | function simdMandelbrot (n) {
114 | var result = new Array (4);
115 | var vec0 = SIMD.Float32x4.splat (0.01);
116 | for (var i = 0; i < n; ++i) {
117 | var r = mandelx4 (vec0, vec0, 100);
118 | result [0] = SIMD.Int32x4.extractLane(r, 0);
119 | result [1] = SIMD.Int32x4.extractLane(r, 1);
120 | result [2] = SIMD.Int32x4.extractLane(r, 2);
121 | result [3] = SIMD.Int32x4.extractLane(r, 3);
122 | }
123 | return result;
124 | }
125 |
126 | } ());
127 |
--------------------------------------------------------------------------------
/src/benchmarks/matrix-multiplication.js:
--------------------------------------------------------------------------------
1 | // 4x4 matrix multiplication
2 | // Author: John McCutchan
3 |
4 | (function () {
5 |
6 | // Kernel configuration
7 | var kernelConfig = {
8 | kernelName: "MatrixMultiplication",
9 | kernelInit: init,
10 | kernelCleanup: cleanup,
11 | kernelSimd: simdMultiply,
12 | kernelNonSimd: multiply,
13 | kernelIterations: 1000
14 | };
15 |
16 | // Hook up to the harness
17 | benchmarks.add(new Benchmark(kernelConfig));
18 |
19 | // Benchmark data, initialization and kernel functions
20 | var T1 = new Float32Array(16);
21 | var T2 = new Float32Array(16);
22 | var Out = new Float32Array(16);
23 | var T1x = new Float32Array(16);
24 | var T2x = new Float32Array(16);
25 | var Outx = new Float32Array(16);
26 |
27 | function equals(A, b) {
28 | return (A[0] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 0)) &&
29 | (A[1] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 1)) &&
30 | (A[2] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 2)) &&
31 | (A[3] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 3)) &&
32 | (A[4] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 0)) &&
33 | (A[5] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 1)) &&
34 | (A[6] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 2)) &&
35 | (A[7] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 3)) &&
36 | (A[8] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 0)) &&
37 | (A[9] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 1)) &&
38 | (A[10] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 2)) &&
39 | (A[11] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 3)) &&
40 | (A[12] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 0)) &&
41 | (A[13] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 1)) &&
42 | (A[14] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 2)) &&
43 | (A[15] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 3));
44 | }
45 |
46 | function init() {
47 | T1[0] = 1.0;
48 | T1[5] = 1.0;
49 | T1[10] = 1.0;
50 | T1[15] = 1.0;
51 |
52 | T2[0] = 2.0;
53 | T2[5] = 2.0;
54 | T2[10] = 2.0;
55 | T2[15] = 2.0;
56 |
57 | SIMD.Float32x4.store(T1x, 0, SIMD.Float32x4(1.0, 0.0, 0.0, 0.0));
58 | SIMD.Float32x4.store(T1x, 4, SIMD.Float32x4(0.0, 1.0, 0.0, 0.0));
59 | SIMD.Float32x4.store(T1x, 8, SIMD.Float32x4(0.0, 0.0, 1.0, 0.0));
60 | SIMD.Float32x4.store(T1x, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 1.0));
61 |
62 | SIMD.Float32x4.store(T2x, 0, SIMD.Float32x4(2.0, 0.0, 0.0, 0.0));
63 | SIMD.Float32x4.store(T2x, 4, SIMD.Float32x4(0.0, 2.0, 0.0, 0.0));
64 | SIMD.Float32x4.store(T2x, 8, SIMD.Float32x4(0.0, 0.0, 2.0, 0.0));
65 | SIMD.Float32x4.store(T2x, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 2.0));
66 |
67 | multiply(1);
68 | simdMultiply(1);
69 | return equals(T1, T1x) && equals(T2, T2x) && equals(Out, Outx);
70 | }
71 |
72 | function cleanup() {
73 | return init(); // Sanity checking before and after are the same
74 | }
75 |
76 | function multiply(n) {
77 | for (var i = 0; i < n; i++) {
78 | var a00 = T1[0];
79 | var a01 = T1[1];
80 | var a02 = T1[2];
81 | var a03 = T1[3];
82 | var a10 = T1[4];
83 | var a11 = T1[5];
84 | var a12 = T1[6];
85 | var a13 = T1[7];
86 | var a20 = T1[8];
87 | var a21 = T1[9];
88 | var a22 = T1[10];
89 | var a23 = T1[11];
90 | var a30 = T1[12];
91 | var a31 = T1[13];
92 | var a32 = T1[14];
93 | var a33 = T1[15];
94 |
95 | var b0 = T2[0];
96 | var b1 = T2[1];
97 | var b2 = T2[2];
98 | var b3 = T2[3];
99 | Out[0] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
100 | Out[1] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
101 | Out[2] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
102 | Out[3] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
103 |
104 | b0 = T2[4];
105 | b1 = T2[5];
106 | b2 = T2[6];
107 | b3 = T2[7];
108 | Out[4] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
109 | Out[5] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
110 | Out[6] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
111 | Out[7] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
112 |
113 | b0 = T2[8];
114 | b1 = T2[9];
115 | b2 = T2[10];
116 | b3 = T2[11];
117 | Out[8] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
118 | Out[9] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
119 | Out[10] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
120 | Out[11] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
121 |
122 | b0 = T2[12];
123 | b1 = T2[13];
124 | b2 = T2[14];
125 | b3 = T2[15];
126 | Out[12] = b0*a00 + b1*a10 + b2*a20 + b3*a30;
127 | Out[13] = b0*a01 + b1*a11 + b2*a21 + b3*a31;
128 | Out[14] = b0*a02 + b1*a12 + b2*a22 + b3*a32;
129 | Out[15] = b0*a03 + b1*a13 + b2*a23 + b3*a33;
130 | }
131 | }
132 |
133 | function simdMultiply(n) {
134 | for (var i = 0; i < n; i++) {
135 | var a0 = SIMD.Float32x4.load(T1x, 0);
136 | var a1 = SIMD.Float32x4.load(T1x, 4);
137 | var a2 = SIMD.Float32x4.load(T1x, 8);
138 | var a3 = SIMD.Float32x4.load(T1x, 12);
139 | var b0 = SIMD.Float32x4.load(T2x, 0);
140 | SIMD.Float32x4.store(Outx, 0,
141 | SIMD.Float32x4.add(
142 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 0, 0, 0, 0), a0),
143 | SIMD.Float32x4.add(
144 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 1, 1, 1, 1), a1),
145 | SIMD.Float32x4.add(
146 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 2, 2, 2, 2), a2),
147 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 3, 3, 3, 3), a3)))));
148 | var b1 = SIMD.Float32x4.load(T2x, 4);
149 | SIMD.Float32x4.store(Outx, 4,
150 | SIMD.Float32x4.add(
151 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 0, 0, 0, 0), a0),
152 | SIMD.Float32x4.add(
153 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 1, 1, 1, 1), a1),
154 | SIMD.Float32x4.add(
155 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 2, 2, 2, 2), a2),
156 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 3, 3, 3, 3), a3)))));
157 | var b2 = SIMD.Float32x4.load(T2x, 8);
158 | SIMD.Float32x4.store(Outx, 8,
159 | SIMD.Float32x4.add(
160 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 0, 0, 0, 0), a0),
161 | SIMD.Float32x4.add(
162 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 1, 1, 1, 1), a1),
163 | SIMD.Float32x4.add(
164 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 2, 2, 2, 2), a2),
165 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 3, 3, 3, 3), a3)))));
166 | var b3 = SIMD.Float32x4.load(T2x, 12);
167 | SIMD.Float32x4.store(Outx, 12,
168 | SIMD.Float32x4.add(
169 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 0, 0, 0, 0), a0),
170 | SIMD.Float32x4.add(
171 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 1, 1, 1, 1), a1),
172 | SIMD.Float32x4.add(
173 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 2, 2, 2, 2), a2),
174 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 3, 3, 3, 3), a3)))));
175 | }
176 | }
177 |
178 | } ());
179 |
--------------------------------------------------------------------------------
/src/benchmarks/memcpy.js:
--------------------------------------------------------------------------------
1 | // Simple performance test memcpy using SIMD.
2 | // Author: Moh Haghighat
3 | // January 20, 2015
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "Memcpy",
10 | kernelInit: initArray,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdMemcpy,
13 | kernelNonSimd: memcpy,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add(new Benchmark(kernelConfig));
19 |
20 | // Benchmark data, initialization and kernel functions
21 | var TOTAL_MEMORY = 4096*32;
22 | var buffer = new ArrayBuffer(TOTAL_MEMORY);
23 | var HEAP8 = new Int8Array(buffer);
24 | var HEAP32 = new Int32Array(buffer);
25 | var HEAPU8 = new Uint8Array(buffer);
26 |
27 | var LEN = TOTAL_MEMORY/32;
28 | var ptr1 = 0;
29 | var ptr2 = ptr1 + 2 * LEN;
30 | var ptr3 = ptr2 + 2 * LEN;
31 | var VAL = 200;
32 |
33 | function sanityCheck() {
34 | for (var j = 0; j < LEN; ++j) {
35 | if (HEAP8[ptr2+j] != HEAP8[ptr3+j]) {
36 | return false;
37 | }
38 | }
39 | return true;
40 | }
41 |
42 | function initArray() {
43 | for (var j = 0; j < LEN; ++j) {
44 | HEAP8[ptr1+j] = (VAL+1*j)|0;
45 | HEAP8[ptr2+j] = (VAL+2*j)|0;
46 | HEAP8[ptr3+j] = (VAL+3*j)|0;
47 | }
48 | return true;
49 | }
50 |
51 | function cleanup() {
52 | return sanityCheck();
53 | }
54 |
55 | function _emscripten_memcpy_big(dest, src, num) {
56 | dest = dest; src = src; num = num;
57 | HEAPU8.set(HEAPU8.subarray(src, src+num), dest);
58 | return dest;
59 | }
60 |
61 | function NonSimdAsmjsModule (global, imp, buffer) {
62 | "use asm"
63 |
64 | var HEAP8 = new global.Int8Array(buffer);
65 | var HEAP32 = new global.Int32Array(buffer);
66 | var _emscripten_memcpy_big = imp._emscripten_memcpy_big;
67 |
68 | function _memcpy(dest, src, num) {
69 | dest = dest|0; src = src|0; num = num|0;
70 | var ret = 0;
71 | if ((num|0) >= 4096) return _emscripten_memcpy_big(dest|0, src|0, num|0)|0;
72 | ret = dest|0;
73 | if ((dest&3) == (src&3)) {
74 | while (dest & 3) {
75 | if ((num|0) == 0) return ret|0;
76 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
77 | dest = (dest+1)|0;
78 | src = (src+1)|0;
79 | num = (num-1)|0;
80 | }
81 | while ((num|0) >= 4) {
82 | HEAP32[((dest)>>2)]=((HEAP32[((src)>>2)])|0);
83 | dest = (dest+4)|0;
84 | src = (src+4)|0;
85 | num = (num-4)|0;
86 | }
87 | }
88 | while ((num|0) > 0) {
89 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
90 | dest = (dest+1)|0;
91 | src = (src+1)|0;
92 | num = (num-1)|0;
93 | }
94 | return ret|0;
95 | }
96 |
97 | return _memcpy;
98 | }
99 |
100 | function SimdAsmjsModule (global, imp, buffer) {
101 | "use asm"
102 |
103 | var HEAP8 = new global.Int8Array(buffer);
104 | var HEAP32 = new global.Int32Array(buffer);
105 | var HEAPU8 = new global.Uint8Array(buffer);
106 | var _emscripten_memcpy_big = imp._emscripten_memcpy_big;
107 | var i4 = global.SIMD.Int32x4;
108 | var i4load = i4.load;
109 | var i4store = i4.store;
110 |
111 | function _memcpy(dest, src, num) {
112 | dest = dest|0; src = src|0; num = num|0;
113 | var ret = 0;
114 | if ((num|0) >= 4096) return _emscripten_memcpy_big(dest|0, src|0, num|0)|0;
115 | ret = dest|0;
116 |
117 | if ((num|0) >= 16) {
118 | while (dest & 15) {
119 | if ((num|0) == 0) return ret|0;
120 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
121 | dest = (dest+1)|0;
122 | src = (src+1)|0;
123 | num = (num-1)|0;
124 | }
125 | while ((num|0) >= 16) {
126 | i4store(HEAPU8, ((dest)>>0), i4load(HEAPU8, ((src)>>0)));
127 | dest = (dest+16)|0;
128 | src = (src+16)|0;
129 | num = (num-16)|0;
130 | }
131 | if ((num|0) == 0) return ret|0;
132 | }
133 |
134 | if ((dest&3) == (src&3)) {
135 | while (dest & 3) {
136 | if ((num|0) == 0) return ret|0;
137 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
138 | dest = (dest+1)|0;
139 | src = (src+1)|0;
140 | num = (num-1)|0;
141 | }
142 | while ((num|0) >= 4) {
143 | HEAP32[((dest)>>2)]=((HEAP32[((src)>>2)])|0);
144 | dest = (dest+4)|0;
145 | src = (src+4)|0;
146 | num = (num-4)|0;
147 | }
148 | }
149 |
150 | while ((num|0) > 0) {
151 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0);
152 | dest = (dest+1)|0;
153 | src = (src+1)|0;
154 | num = (num-1)|0;
155 | }
156 |
157 | return ret|0;
158 | }
159 |
160 | return _memcpy;
161 | }
162 |
163 | function memcpy(n) {
164 | var func = NonSimdAsmjsModule(this, {"_emscripten_memcpy_big": _emscripten_memcpy_big}, buffer);
165 | for (var i = 0; i < n; ++i) {
166 | // try memcpy of variable lengths, from 0 to LEN
167 | for (var j = 0; j < LEN; ++j) {
168 | // try different (alignment mod 16) from 0 to 15
169 | for (var k = 0; k < 16; k++){
170 | func (ptr2+k, ptr1, j);
171 | }
172 | }
173 | }
174 | return true;
175 | }
176 |
177 | function simdMemcpy(n) {
178 | var func = SimdAsmjsModule(this, {"_emscripten_memcpy_big": _emscripten_memcpy_big}, buffer);
179 | for (var i = 0; i < n; ++i) {
180 | // try memcpy of variable lengths, from 0 to LEN
181 | for (var j = 0; j < LEN; ++j) {
182 | // try different (alignment mod 16) from 0 to 15
183 | for (var k = 0; k < 16; k++){
184 | func (ptr3+k, ptr1, j);
185 | }
186 | }
187 | }
188 | return true;
189 | }
190 |
191 | } ());
192 |
--------------------------------------------------------------------------------
/src/benchmarks/memset.js:
--------------------------------------------------------------------------------
1 | // Simple performance test memset using SIMD.
2 | // Author: Moh Haghighat
3 | // December 10, 2014
4 |
5 | (function () {
6 |
7 | // Kernel configuration
8 | var kernelConfig = {
9 | kernelName: "Memset",
10 | kernelInit: initArray,
11 | kernelCleanup: cleanup,
12 | kernelSimd: simdMemset,
13 | kernelNonSimd: memset,
14 | kernelIterations: 1000
15 | };
16 |
17 | // Hook up to the harness
18 | benchmarks.add(new Benchmark(kernelConfig));
19 |
20 | // Benchmark data, initialization and kernel functions
21 | var TOTAL_MEMORY = 4096*32;
22 | var buffer = new ArrayBuffer(TOTAL_MEMORY);
23 | var HEAP8 = new Int8Array(buffer);
24 | var HEAP32 = new Int32Array(buffer);
25 | var HEAPU8 = new Uint8Array(buffer);
26 |
27 | var LEN = TOTAL_MEMORY/16;
28 | var ptr1 = 0;
29 | var ptr2 = ptr1 + LEN;
30 | var VAL = 200;
31 |
32 | function sanityCheck() {
33 | for (var j = 0; j < LEN; ++j) {
34 | if (HEAP8[ptr1+j] != HEAP8[ptr2+j]) {
35 | return false;
36 | }
37 | }
38 | return true;
39 | }
40 |
41 | function initArray() {
42 | return true;
43 | }
44 |
45 | function cleanup() {
46 | return sanityCheck();
47 | }
48 |
49 | function NonSimdAsmjsModule (global, imp, buffer) {
50 | "use asm"
51 |
52 | var HEAP8 = new global.Int8Array(buffer);
53 | var HEAP32 = new global.Int32Array(buffer);
54 |
55 | function _memset(ptr, value, num) {
56 | ptr = ptr|0;
57 | value = value|0;
58 | num = num|0;
59 | var stop = 0, value4 = 0, stop4 = 0, unaligned = 0;
60 | stop = (ptr + num)|0;
61 | if ((num|0) >= 20) {
62 | // This is unaligned, but quite large, so work hard to get to aligned settings
63 | value = value & 0xff;
64 | unaligned = ptr & 3;
65 | value4 = value | (value << 8) | (value << 16) | (value << 24);
66 | stop4 = stop & ~3;
67 | if (unaligned) {
68 | unaligned = (ptr + 4 - unaligned)|0;
69 | while ((ptr|0) < (unaligned|0)) { // no need to check for stop, since we have large num
70 | HEAP8[((ptr)>>0)]=value;
71 | ptr = (ptr+1)|0;
72 | }
73 | }
74 | while ((ptr|0) < (stop4|0)) {
75 | HEAP32[((ptr)>>2)]=value4;
76 | ptr = (ptr+4)|0;
77 | }
78 | }
79 | while ((ptr|0) < (stop|0)) {
80 | HEAP8[((ptr)>>0)]=value;
81 | ptr = (ptr+1)|0;
82 | }
83 | return (ptr-num)|0;
84 | }
85 |
86 | return _memset;
87 | }
88 |
89 | function SimdAsmjsModule (global, imp, buffer) {
90 | "use asm"
91 |
92 | var HEAP8 = new global.Int8Array(buffer);
93 | var HEAP32 = new global.Int32Array(buffer);
94 | var HEAPU8 = new global.Uint8Array(buffer);
95 | var i4 = global.SIMD.Int32x4;
96 | var i4splat = i4.splat;
97 | var i4store = i4.store;
98 |
99 | function _simdMemset(ptr, value, num) {
100 | ptr = ptr|0;
101 | value = value|0;
102 | num = num|0;
103 |
104 | var value2 = 0, value4 = 0, value16 = i4(0, 0, 0, 0), stop = 0, stop4 = 0, stop16 = 0, unaligned = 0;
105 |
106 | stop = (ptr + num)|0;
107 | if ((num|0) >= 16) {
108 | // This is unaligned, but quite large, so work hard to get to aligned settings
109 | value = value & 0xff;
110 |
111 | unaligned = ptr & 0xf;
112 | if (unaligned) {
113 | // Initialize the 16-byte unaligned leading part
114 | unaligned = (ptr + 16 - unaligned)|0;
115 | while ((ptr|0) < (unaligned|0)) { // no need to check for stop, since we have large num
116 | HEAP8[((ptr)>>0)]=value;
117 | ptr = (ptr+1)|0;
118 | }
119 | }
120 |
121 | value2 = (value | (value << 8))|0;
122 | value4 = (value2 | (value2 << 16))|0;
123 | value16 =i4splat(value4);
124 | stop16 = stop & ~15;
125 |
126 |
127 | while ((ptr|0) < (stop16|0)) {
128 | i4store(HEAPU8, ((ptr)>>0), value16);
129 | ptr = (ptr+16)|0;
130 | }
131 |
132 | stop4 = stop & ~3;
133 | while ((ptr|0) < (stop4|0)) {
134 | HEAP32[((ptr)>>2)]=value4;
135 | ptr = (ptr+4)|0;
136 | }
137 | }
138 | while ((ptr|0) < (stop|0)) {
139 | HEAP8[((ptr)>>0)]=value;
140 | ptr = (ptr+1)|0;
141 | }
142 | return (ptr-num)|0;
143 | }
144 |
145 | return _simdMemset;
146 | }
147 |
148 | function memset(n) {
149 | var func = NonSimdAsmjsModule(this, {}, buffer);
150 | for (var i = 0; i < n; ++i) {
151 | func (ptr1, VAL, LEN);
152 | }
153 | return true;
154 | }
155 |
156 | function simdMemset(n) {
157 | var func = SimdAsmjsModule(this, {}, buffer);
158 | for (var i = 0; i < n; ++i) {
159 | func (ptr2, VAL, LEN);
160 | }
161 | return true;
162 | }
163 |
164 | } ());
165 |
--------------------------------------------------------------------------------
/src/benchmarks/run.js:
--------------------------------------------------------------------------------
1 | "use strict"
2 |
3 | load ('../ecmascript_simd.js');
4 | load ('base.js');
5 |
6 | // load individual benchmarks
7 |
8 | load ('kernel-template.js');
9 | load ('averageFloat32x4.js');
10 | load ('averageFloat32x4LoadFromInt8Array.js');
11 | load ('averageFloat32x4LoadX.js');
12 | load ('averageFloat32x4LoadXY.js');
13 | load ('averageFloat32x4LoadXYZ.js');
14 | load ('averageInt32x4Load.js');
15 | load ('mandelbrot.js');
16 | load ('matrix-multiplication.js');
17 | load ('transform.js');
18 | load ('shiftrows.js');
19 | load ('aobench.js');
20 | load ('transform.js');
21 | load ('transpose4x4.js');
22 | load ('inverse4x4.js');
23 | load ('sinx4.js');
24 | load ('memset.js');
25 | load ('memcpy.js');
26 |
27 | function printResult (str) {
28 | print (str);
29 | }
30 |
31 | function printError (str) {
32 | print (str);
33 | }
34 |
35 | function printScore (str) {
36 | print (str);
37 | }
38 |
39 | benchmarks.runAll ({notifyResult: printResult,
40 | notifyError: printError,
41 | notifyScore: printScore},
42 | true);
43 |
--------------------------------------------------------------------------------
/src/benchmarks/run_browser.js:
--------------------------------------------------------------------------------
1 | var logs = document.getElementById("logs");
2 |
3 | function printResult(str) {
4 | console.log(str);
5 | logs.innerHTML += str + '
';
6 | }
7 |
8 | function printError(str) {
9 | console.log(str);
10 | logs.innerHTML += str + '
';
11 | }
12 |
13 | function printScore(str) {
14 | console.log(str);
15 | logs.innerHTML += str + '
';
16 | }
17 |
18 | window.onload = function() {
19 | console.log('Running benchmarks.');
20 | benchmarks.runAll({notifyResult: printResult,
21 | notifyError: printError,
22 | notifyScore: printScore}, true);
23 | printResult('Benchmarks completed.');
24 | }
--------------------------------------------------------------------------------
/src/benchmarks/shiftrows.js:
--------------------------------------------------------------------------------
1 | // ShiftRows is a hot function in the implementation of the Rijndael cipher
2 | // For documentation see: http://asmaes.sourceforge.net/rijndael/rijndaelImplementation.pdf
3 | // Author: Peter Jensen
4 | (function() {
5 |
6 | // Kernel configuration
7 | var kernelConfig = {
8 | kernelName: "ShiftRows",
9 | kernelInit: init,
10 | kernelCleanup: cleanup,
11 | kernelSimd: simdShiftRowsN,
12 | kernelNonSimd: shiftRowsN,
13 | kernelIterations: 1000
14 | };
15 |
16 | // Hook up to the harness
17 | benchmarks.add(new Benchmark(kernelConfig));
18 |
19 | // Do the object allocations globally, so the performance of the kernel
20 | // functions aren't overshadowed by object creations
21 |
22 | var state = new Int32Array(16); // 4x4 state matrix
23 | var temp = new Int32Array (1000); // Big enough for 1000 columns
24 |
25 | function printState() {
26 | for (var r = 0; r < 4; ++r) {
27 | var str = "";
28 | var ri = r*4;
29 | for (var c = 0; c < 4; ++c) {
30 | var value = state[ri + c];
31 | if (value < 10) {
32 | str += " ";
33 | }
34 | str += " " + state[ri + c];
35 | }
36 | print(str);
37 | }
38 | }
39 |
40 | // initialize the 4x4 state matrix
41 | function initState() {
42 | for (var i = 0; i < 16; ++i) {
43 | state[i] = i;
44 | }
45 | }
46 |
47 | // Verify the result of calling shiftRows(state, 4)
48 | function checkState() {
49 | var expected = new Uint32Array(
50 | [ 0, 1, 2, 3,
51 | 5, 6, 7, 4,
52 | 10, 11, 8, 9,
53 | 15, 12, 13, 14]);
54 | for (var i = 0; i < 16; ++i) {
55 | if (state[i] !== expected[i]) {
56 | return false;
57 | }
58 | }
59 | return true;
60 | }
61 |
62 | function init() {
63 | // Check that shiftRows yields the right result
64 | initState();
65 | shiftRowsN(1);
66 | if (!checkState()) {
67 | return false;
68 | }
69 |
70 | // Check that simdShiftRows yields the right result
71 | initState();
72 | simdShiftRowsN(1);
73 | if (!checkState()) {
74 | return false;
75 | }
76 | return true;
77 | }
78 |
79 | function cleanup() {
80 | return init(); // Sanity checking before and after are the same
81 | }
82 |
83 | // This is the typical implementation of the shiftRows function
84 | function shiftRows(state, Nc) {
85 | for (var r = 1; r < 4; ++r) {
86 | var ri = r*Nc; // get the starting index of row 'r'
87 | var c;
88 | for (c = 0; c < Nc; ++c) {
89 | temp[c] = state[ri + ((c + r) % Nc)];
90 | }
91 | for (c = 0; c < Nc; ++c) {
92 | state[ri + c] = temp[c];
93 | }
94 | }
95 | }
96 |
97 | // The SIMD optimized version of the shiftRows function
98 | // The function is special cased for a 4 column setting (Nc == 4).
99 | // This is the value used for AES blocks (see documentation for details)
100 | function simdShiftRows(state, Nc) {
101 | if (Nc !== 4) {
102 | shiftRows(state, Nc);
103 | }
104 | for (var r = 1; r < 4; ++r) {
105 | var rx4 = SIMD.Int32x4.load(state, r << 2);
106 | if (r == 1) {
107 | SIMD.Int32x4.store(state, 4, SIMD.Int32x4.swizzle(rx4, 1, 2, 3, 0));
108 | }
109 | else if (r == 2) {
110 | SIMD.Int32x4.store(state, 8, SIMD.Int32x4.swizzle(rx4, 2, 3, 0, 1));
111 | }
112 | else { // r == 3
113 | SIMD.Int32x4.store(state, 12, SIMD.Int32x4.swizzle(rx4, 3, 0, 1, 2));
114 | }
115 | }
116 | }
117 |
118 | function shiftRowsN(iterations) {
119 | for (var i = 0; i < iterations; ++i) {
120 | shiftRows(state, 4);
121 | }
122 | }
123 |
124 | function simdShiftRowsN(iterations) {
125 | for (var i = 0; i < iterations; ++i) {
126 | simdShiftRows(state, 4);
127 | }
128 | }
129 | } ());
130 |
--------------------------------------------------------------------------------
/src/benchmarks/sinx4.js:
--------------------------------------------------------------------------------
1 | // Compute sin() in 4 lanes:
2 | // Algorithm adopted from: http://gruntthepeon.free.fr/ssemath/
3 | // Author: Peter Jensen
4 | (function () {
5 |
6 | // Kernel configuration
7 | var kernelConfig = {
8 | kernelName: "Sine",
9 | kernelInit: init,
10 | kernelCleanup: cleanup,
11 | kernelSimd: simd,
12 | kernelNonSimd: nonSimd,
13 | kernelIterations: 100000000
14 | };
15 |
16 | // Hook up to the harness
17 | benchmarks.add (new Benchmark (kernelConfig));
18 |
19 | // Kernel Initializer
20 | function init () {
21 | // Do initial sanity check and initialize data for the kernels.
22 | // The sanity check should verify that the simd and nonSimd results
23 | // are the same.
24 | // It is recommended to do minimal object creation in the kernels
25 | // themselves. If global data needs to be initialized, here would
26 | // be the place to do it.
27 | // If the sanity checks fails the kernels will not be executed
28 | // Returns:
29 | // true: First run (unoptimized) of the kernels passed
30 | // false: First run (unoptimized) of the kernels failed
31 | var simdResult = simd(1);
32 | var nonSimdResult = nonSimd(1);
33 | return almostEqual (simdResult, nonSimdResult);
34 | }
35 |
36 | // Kernel Cleanup
37 | function cleanup () {
38 | // Do final sanity check and perform cleanup.
39 | // This function is called when all the kernel iterations have been
40 | // executed, so they should be in their final optimized version. The
41 | // sanity check done during initialization will probably be of the
42 | // initial unoptimized version.
43 | // Returns:
44 | // true: Last run (optimized) of the kernels passed
45 | // false: last run (optimized) of the kernels failed
46 | var simdResult = simd(1);
47 | var nonSimdResult = nonSimd(1);
48 | return almostEqual (simdResult, nonSimdResult);
49 | }
50 |
51 | function almostEqual(a, b) {
52 | for (var i = 0; i < 4; ++i) {
53 | if (Math.abs (a - b) > 0.00001) {
54 | return false;
55 | }
56 | }
57 | return true;
58 | }
59 |
60 | function printFloat32x4(msg, v) {
61 | print (msg, SIMD.Float32x4.extractLane(v, 0).toFixed(6),
62 | SIMD.Float32x4.extractLane(v, 1).toFixed(6),
63 | SIMD.Float32x4.extractLane(v, 2).toFixed(6),
64 | SIMD.Float32x4.extractLane(v, 3).toFixed(6));
65 | }
66 |
67 | function printInt32x4(msg, v) {
68 | print (msg, SIMD.Float32x4.extractLane(v, 0),
69 | SIMD.Float32x4.extractLane(v, 1),
70 | SIMD.Float32x4.extractLane(v, 2),
71 | SIMD.Float32x4.extractLane(v, 3));
72 | }
73 |
74 | function sinx4Test() {
75 | var x = SIMD.Float32x4(1.0, 2.0, 3.0, 4.0);
76 | var sinx4 = simdSin(x);
77 | print (SIMD.Float32x4.extractLane(sinx4, 0),
78 | SIMD.Float32x4.extractLane(sinx4, 1),
79 | SIMD.Float32x4.extractLane(sinx4, 2),
80 | SIMD.Float32x4.extractLane(sinx4, 3));
81 | print (Math.sin(SIMD.Float32x4.extractLane(x, 0)),
82 | Math.sin(SIMD.Float32x4.extractLane(x, 1)),
83 | Math.sin(SIMD.Float32x4.extractLane(x, 2)),
84 | Math.sin(SIMD.Float32x4.extractLane(x, 3)));
85 | }
86 |
87 | var _ps_sign_mask = SIMD.Int32x4.splat(0x80000000);
88 | var _ps_inv_sign_mask = SIMD.Int32x4.not(_ps_sign_mask);
89 | var _ps_cephes_FOPI = SIMD.Float32x4.splat(1.27323954473516);
90 | var _pi32_1 = SIMD.Int32x4.splat(1);
91 | var _pi32_inv1 = SIMD.Int32x4.not(_pi32_1);
92 | var _pi32_4 = SIMD.Int32x4.splat(4);
93 | var _pi32_2 = SIMD.Int32x4.splat(2);
94 | var _ps_minus_cephes_DP1 = SIMD.Float32x4.splat(-0.78515625);
95 | var _ps_minus_cephes_DP2 = SIMD.Float32x4.splat(-2.4187564849853515625E-4);
96 | var _ps_minus_cephes_DP3 = SIMD.Float32x4.splat(-3.77489497744594108E-8);
97 | var _ps_coscof_p0 = SIMD.Float32x4.splat(2.443315711809948E-5);
98 | var _ps_coscof_p1 = SIMD.Float32x4.splat(-1.388731625493765E-3);
99 | var _ps_coscof_p2 = SIMD.Float32x4.splat(4.166664568298827E-2);
100 | var _ps_0p5 = SIMD.Float32x4.splat(0.5);
101 | var _ps_1 = SIMD.Float32x4.splat(1.0);
102 | var _ps_sincof_p0 = SIMD.Float32x4.splat(-1.9515295891E-4);
103 | var _ps_sincof_p1 = SIMD.Float32x4.splat(8.3321608736E-3);
104 | var _ps_sincof_p2 = SIMD.Float32x4.splat(-1.6666654611E-1);
105 |
106 | function sinx4 (x) {
107 | var xmm1;
108 | var xmm2;
109 | var xmm3;
110 | var sign_bit;
111 | var swap_sign_bit;
112 | var poly_mask;
113 | var y;
114 | var y2;
115 | var z;
116 | var tmp;
117 |
118 | var emm0;
119 | var emm2;
120 | var emm2mask;
121 |
122 | sign_bit = x;
123 | x = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(x), _ps_inv_sign_mask));
124 | sign_bit = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(sign_bit), _ps_sign_mask));
125 | y = SIMD.Float32x4.mul(x, _ps_cephes_FOPI);
126 | //printFloat32x4 ("Probe 6", y);
127 | emm2 = SIMD.Int32x4.fromFloat32x4(y);
128 | emm2 = SIMD.Int32x4.add(emm2, _pi32_1);
129 | emm2 = SIMD.Int32x4.and(emm2, _pi32_inv1);
130 | //printInt32x4 ("Probe 8", emm2);
131 | y = SIMD.Float32x4.fromInt32x4(emm2);
132 | //printFloat32x4 ("Probe 7", y);
133 | emm0 = SIMD.Int32x4.and(emm2, _pi32_4);
134 | emm0 = SIMD.Int32x4.shiftLeftByScalar(emm0, 29);
135 |
136 | emm2 = SIMD.Int32x4.and(emm2, _pi32_2);
137 | emm2mask = SIMD.Int32x4.equal(emm2, SIMD.Int32x4.splat(0));
138 | emm2 = SIMD.Int32x4.select(emm2mask, SIMD.Int32x4.splat(-1), SIMD.Int32x4.splat(0));
139 |
140 | swap_sign_bit = SIMD.Float32x4.fromInt32x4Bits(emm0);
141 | poly_mask = SIMD.Float32x4.fromInt32x4Bits(emm2);
142 | sign_bit = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.xor(SIMD.Int32x4.fromFloat32x4Bits(sign_bit), SIMD.Int32x4.fromFloat32x4Bits(swap_sign_bit)));
143 | //printFloat32x4 ("Probe 1", sign_bit);
144 |
145 | //printFloat32x4 ("Probe 4", y);
146 | //printFloat32x4 ("Probe 5", _ps_minus_cephes_DP1);
147 | xmm1 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP1);
148 | //printFloat32x4 ("Probe 3", xmm1);
149 | xmm2 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP2);
150 | xmm3 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP3);
151 | x = SIMD.Float32x4.add(x, xmm1);
152 | x = SIMD.Float32x4.add(x, xmm2);
153 | x = SIMD.Float32x4.add(x, xmm3);
154 | //printFloat32x4 ("Probe 2", x);
155 |
156 | y = _ps_coscof_p0;
157 | z = SIMD.Float32x4.mul(x, x);
158 | y = SIMD.Float32x4.mul(y, z);
159 | y = SIMD.Float32x4.add(y, _ps_coscof_p1);
160 | y = SIMD.Float32x4.mul(y, z);
161 | y = SIMD.Float32x4.add(y, _ps_coscof_p2);
162 | y = SIMD.Float32x4.mul(y, z);
163 | y = SIMD.Float32x4.mul(y, z);
164 | tmp = SIMD.Float32x4.mul(z, _ps_0p5);
165 | y = SIMD.Float32x4.sub(y, tmp);
166 | y = SIMD.Float32x4.add(y, _ps_1);
167 |
168 | y2 = _ps_sincof_p0;
169 | //printFloat32x4 ("Probe 11", y2);
170 | //printFloat32x4 ("Probe 12", z);
171 | y2 = SIMD.Float32x4.mul(y2, z);
172 | y2 = SIMD.Float32x4.add(y2, _ps_sincof_p1);
173 | //printFloat32x4 ("Probe 13", y2);
174 | y2 = SIMD.Float32x4.mul(y2, z);
175 | y2 = SIMD.Float32x4.add(y2, _ps_sincof_p2);
176 | y2 = SIMD.Float32x4.mul(y2, z);
177 | y2 = SIMD.Float32x4.mul(y2, x);
178 | y2 = SIMD.Float32x4.add(y2, x);
179 |
180 | xmm3 = poly_mask;
181 | y2 = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(xmm3), SIMD.Int32x4.fromFloat32x4Bits(y2)));
182 | //printFloat32x4 ("Probe 10", y2);
183 | y = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.not(SIMD.Int32x4.fromFloat32x4Bits(xmm3)), SIMD.Int32x4.fromFloat32x4Bits(y)));
184 | y = SIMD.Float32x4.add(y, y2);
185 |
186 | //printFloat32x4 ("Probe 9", y);
187 | y = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.xor(SIMD.Int32x4.fromFloat32x4Bits(y), SIMD.Int32x4.fromFloat32x4Bits(sign_bit)));
188 | return y;
189 | }
190 |
191 | var simdInput = SIMD.Float32x4 (1.0, 2.0, 3.0, 4.0);
192 | var nonSimdInput = [1.0, 2.0, 3.0, 4.0];
193 |
194 | // SIMD version of the kernel
195 | function simd (n) {
196 | var result ;
197 | for (var i = 0; i < n; ++i) {
198 | result = sinx4 (simdInput);
199 | }
200 | return [SIMD.Float32x4.extractLane(result, 0),
201 | SIMD.Float32x4.extractLane(result, 1),
202 | SIMD.Float32x4.extractLane(result, 2),
203 | SIMD.Float32x4.extractLane(result, 3)];
204 | }
205 |
206 | // Non SIMD version of the kernel
207 | function nonSimd (n) {
208 | var s = 0;
209 | var x = nonSimdInput[0];
210 | var y = nonSimdInput[1];
211 | var z = nonSimdInput[2];
212 | var w = nonSimdInput[3];
213 | var rx, ry, rz, rw;
214 | for (var i = 0; i < n; ++i) {
215 | rx = Math.sin(x);
216 | ry = Math.sin(y);
217 | rz = Math.sin(z);
218 | rw = Math.sin(w);
219 | }
220 | return [rx, ry, rz, rw];
221 | }
222 |
223 | } ());
224 |
--------------------------------------------------------------------------------
/src/benchmarks/transform.js:
--------------------------------------------------------------------------------
1 | // Transform vertex by 4x4 transformation matrix.
2 | // Author: John McCutchan
3 |
4 | (function () {
5 |
6 | // Kernel configuration
7 | var kernelConfig = {
8 | kernelName: "VertexTransform",
9 | kernelInit: init,
10 | kernelCleanup: cleanup,
11 | kernelSimd: simdVertexTransform,
12 | kernelNonSimd: vertexTransform,
13 | kernelIterations: 1000
14 | };
15 |
16 | // Hook up to the harness
17 | benchmarks.add(new Benchmark(kernelConfig));
18 |
19 | // Benchmark data, initialization and kernel functions
20 | var T = new Float32Array(16);
21 | var V = new Float32Array(4);
22 | var Out = new Float32Array(4);
23 | var Tx = new Float32Array(16);
24 | var Vx = new Float32Array(4);
25 | var Outx = new Float32Array(4);
26 |
27 | function init() {
28 | T[0] = 1.0;
29 | T[5] = 1.0;
30 | T[10] = 1.0;
31 | T[15] = 1.0;
32 | V[0] = 1.0;
33 | V[1] = 2.0;
34 | V[2] = 3.0;
35 | V[3] = 1.0;
36 | SIMD.Float32x4.store(Tx, 0, SIMD.Float32x4(1.0, 0.0, 0.0, 0.0));
37 | SIMD.Float32x4.store(Tx, 4, SIMD.Float32x4(0.0, 1.0, 0.0, 0.0));
38 | SIMD.Float32x4.store(Tx, 8, SIMD.Float32x4(0.0, 0.0, 1.0, 0.0));
39 | SIMD.Float32x4.store(Tx, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 1.0));
40 | SIMD.Float32x4.store(Vx, 0, SIMD.Float32x4(1.0, 2.0, 3.0, 1.0));
41 | simdVertexTransform(1);
42 | vertexTransform(1);
43 | return (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 0) == Out[0]) &&
44 | (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 1) == Out[1]) &&
45 | (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 2) == Out[2]) &&
46 | (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 3) == Out[3]);
47 | }
48 |
49 | function cleanup() {
50 | return init(); // Sanity checking before and after are the same
51 | }
52 |
53 | function vertexTransform(n) {
54 | for (var i = 0; i < n; i++) {
55 | var x = V[0];
56 | var y = V[1];
57 | var z = V[2];
58 | var w = V[3];
59 | var m0 = T[0];
60 | var m4 = T[4];
61 | var m8 = T[8];
62 | var m12 = T[12];
63 | Out[0] = (m0 * x + m4 * y + m8 * z + m12 * w);
64 | var m1 = T[1];
65 | var m5 = T[5];
66 | var m9 = T[9];
67 | var m13 = T[13];
68 | Out[1] = (m1 * x + m5 * y + m9 * z + m13 * w);
69 | var m2 = T[2];
70 | var m6 = T[6];
71 | var m10 = T[10];
72 | var m14 = T[14];
73 | Out[2] = (m2 * x + m6 * y + m10 * z + m14 * w);
74 | var m3 = T[3];
75 | var m7 = T[7];
76 | var m11 = T[11];
77 | var m15 = T[15];
78 | Out[3] = (m3 * x + m7 * y + m11 * z + m15 * w);
79 | }
80 | }
81 |
82 | function simdVertexTransform(n) {
83 | for (var i = 0; i < n; i++) {
84 | var xxxx = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 0, 0, 0, 0);
85 | var z = SIMD.Float32x4.splat(0.0);
86 | z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(xxxx, SIMD.Float32x4.load(Tx, 0)));
87 | var yyyy = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 1, 1, 1, 1);
88 | z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(yyyy, SIMD.Float32x4.load(Tx, 4)));
89 | var zzzz = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 2, 2, 2, 2);
90 | z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(zzzz, SIMD.Float32x4.load(Tx, 8)));
91 | var wwww = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 3, 3, 3, 3);
92 | z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(wwww, SIMD.Float32x4.load(Tx, 12)));
93 | SIMD.Float32x4.store(Outx, 0, z);
94 | }
95 | }
96 |
97 | } ());
98 |
--------------------------------------------------------------------------------
/src/benchmarks/transpose4x4.js:
--------------------------------------------------------------------------------
1 | // Transpose a 4x4 matrix
2 | // Author: Peter Jensen
3 | (function () {
4 |
5 | // Kernel configuration
6 | var kernelConfig = {
7 | kernelName: "Transpose4x4",
8 | kernelInit: init,
9 | kernelCleanup: cleanup,
10 | kernelSimd: simdTransposeN,
11 | kernelNonSimd: transposeN,
12 | kernelIterations: 100000000
13 | };
14 |
15 | // Hook up to the harness
16 | benchmarks.add (new Benchmark (kernelConfig));
17 |
18 | // Global object allocations
19 |
20 | var src = new Float32Array(16);
21 | var dst = new Float32Array(16);
22 | var tsrc = new Float32Array(16);
23 |
24 | var sel_ttff = SIMD.Bool32x4(true, true, false, false);
25 |
26 | function initMatrix(matrix, matrixTransposed) {
27 | for (var r = 0; r < 4; ++r) {
28 | var r4 = 4*r;
29 | for (var c = 0; c < 4; ++c) {
30 | matrix[r4 + c] = r4 + c;
31 | matrixTransposed[r + c*4] = r4 + c;
32 | }
33 | }
34 | }
35 |
36 | function printMatrix(matrix) {
37 | for (var r = 0; r < 4; ++r) {
38 | var str = "";
39 | var ri = r*4;
40 | for (var c = 0; c < 4; ++c) {
41 | var value = matrix[ri + c];
42 | str += " " + value.toFixed(2);
43 | }
44 | print(str);
45 | }
46 | }
47 |
48 | function compareEqualMatrix(m1, m2) {
49 | for (var i = 0; i < 16; ++i) {
50 | if (m1[i] !== m2[i]) {
51 | return false;
52 | }
53 | }
54 | return true;
55 | }
56 |
57 | // Kernel Initializer
58 | function init () {
59 | initMatrix(src, tsrc);
60 | transposeN(1);
61 | if (!compareEqualMatrix (tsrc, dst)) {
62 | return false;
63 | }
64 |
65 | simdTransposeN(1);
66 | // printMatrix(dst);
67 | if (!compareEqualMatrix (tsrc, dst)) {
68 | return false;
69 | }
70 |
71 | return true;
72 | }
73 |
74 | // Kernel Cleanup
75 | function cleanup () {
76 | return init();
77 | }
78 |
79 | // SIMD version of the kernel with SIMD.Float32x4.shuffle operation
80 | function simdTransposeMix() {
81 | var src0 = SIMD.Float32x4.load(src, 0);
82 | var src1 = SIMD.Float32x4.load(src, 4);
83 | var src2 = SIMD.Float32x4.load(src, 8);
84 | var src3 = SIMD.Float32x4.load(src, 12);
85 | var dst0;
86 | var dst1;
87 | var dst2;
88 | var dst3;
89 | var tmp01;
90 | var tmp23;
91 |
92 | tmp01 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
93 | tmp23 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
94 | dst0 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6);
95 | dst1 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7);
96 |
97 | tmp01 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
98 | tmp23 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
99 | dst2 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6);
100 | dst3 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7);
101 |
102 | SIMD.Float32x4.store(dst, 0, dst0);
103 | SIMD.Float32x4.store(dst, 4, dst1);
104 | SIMD.Float32x4.store(dst, 8, dst2);
105 | SIMD.Float32x4.store(dst, 12, dst3);
106 | }
107 |
108 | // SIMD version of the kernel
109 | function simdTranspose() {
110 | var src0 = SIMD.Float32x4.load(src, 0);
111 | var src1 = SIMD.Float32x4.load(src, 4);
112 | var src2 = SIMD.Float32x4.load(src, 8);
113 | var src3 = SIMD.Float32x4.load(src, 12);
114 | var dst0;
115 | var dst1;
116 | var dst2;
117 | var dst3;
118 | var tmp01;
119 | var tmp23;
120 |
121 | tmp01 = SIMD.Float32x4.select(sel_ttff, src0, SIMD.Float32x4.swizzle(src1, 0, 0, 0, 1));
122 | tmp23 = SIMD.Float32x4.select(sel_ttff, src2, SIMD.Float32x4.swizzle(src3, 0, 0, 0, 1));
123 | dst0 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 0, 2, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 0, 2));
124 | dst1 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 1, 3, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 1, 3));
125 |
126 | tmp01 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(src0, 2, 3, 0, 0), src1);
127 | tmp23 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(src2, 2, 3, 0, 0), src3);
128 | dst2 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 0, 2, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 0, 2));
129 | dst3 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 1, 3, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 1, 3));
130 |
131 | SIMD.Float32x4.store(dst, 0, dst0);
132 | SIMD.Float32x4.store(dst, 4, dst1);
133 | SIMD.Float32x4.store(dst, 8, dst2);
134 | SIMD.Float32x4.store(dst, 12, dst3);
135 | }
136 |
137 | // Non SIMD version of the kernel
138 | function transpose() {
139 | dst[0] = src[0];
140 | dst[1] = src[4];
141 | dst[2] = src[8];
142 | dst[3] = src[12];
143 | dst[4] = src[1];
144 | dst[5] = src[5];
145 | dst[6] = src[9];
146 | dst[7] = src[13];
147 | dst[8] = src[2];
148 | dst[9] = src[6];
149 | dst[10] = src[10];
150 | dst[11] = src[14];
151 | dst[12] = src[3];
152 | dst[13] = src[7];
153 | dst[14] = src[11];
154 | dst[15] = src[15];
155 | }
156 |
157 | function simdTransposeN(n) {
158 | for (var i = 0; i < n; ++i) {
159 | var src0 = SIMD.Float32x4.load(src, 0);
160 | var src1 = SIMD.Float32x4.load(src, 4);
161 | var src2 = SIMD.Float32x4.load(src, 8);
162 | var src3 = SIMD.Float32x4.load(src, 12);
163 | var dst0;
164 | var dst1;
165 | var dst2;
166 | var dst3;
167 | var tmp01;
168 | var tmp23;
169 |
170 | tmp01 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5);
171 | tmp23 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5);
172 | dst0 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6);
173 | dst1 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7);
174 |
175 | tmp01 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7);
176 | tmp23 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7);
177 | dst2 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6);
178 | dst3 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7);
179 |
180 | SIMD.Float32x4.store(dst, 0, dst0);
181 | SIMD.Float32x4.store(dst, 4, dst1);
182 | SIMD.Float32x4.store(dst, 8, dst2);
183 | SIMD.Float32x4.store(dst, 12, dst3);
184 | }
185 | }
186 |
187 | function transposeN(n) {
188 | for (var i = 0; i < n; ++i) {
189 | dst[0] = src[0];
190 | dst[1] = src[4];
191 | dst[2] = src[8];
192 | dst[3] = src[12];
193 | dst[4] = src[1];
194 | dst[5] = src[5];
195 | dst[6] = src[9];
196 | dst[7] = src[13];
197 | dst[8] = src[2];
198 | dst[9] = src[6];
199 | dst[10] = src[10];
200 | dst[11] = src[14];
201 | dst[12] = src[3];
202 | dst[13] = src[7];
203 | dst[14] = src[11];
204 | dst[15] = src[15];
205 | }
206 | }
207 |
208 | } ());
209 |
--------------------------------------------------------------------------------
/src/ecmascript_simd.js:
--------------------------------------------------------------------------------
1 | /*
2 | vim: set ts=8 sts=2 et sw=2 tw=79:
3 | Copyright (C) 2013
4 |
5 | This software is provided 'as-is', without any express or implied
6 | warranty. In no event will the authors be held liable for any damages
7 | arising from the use of this software.
8 |
9 | Permission is granted to anyone to use this software for any purpose,
10 | including commercial applications, and to alter it and redistribute it
11 | freely, subject to the following restrictions:
12 |
13 | 1. The origin of this software must not be misrepresented; you must not
14 | claim that you wrote the original software. If you use this software
15 | in a product, an acknowledgment in the product documentation would be
16 | appreciated but is not required.
17 | 2. Altered source versions must be plainly marked as such, and must not be
18 | misrepresented as being the original software.
19 | 3. This notice may not be removed or altered from any source distribution.
20 | */
21 |
22 | // A conforming SIMD.js implementation may contain the following deviations to
23 | // normal JS numeric behavior:
24 | // - Subnormal numbers may or may not be flushed to zero on input or output of
25 | // any SIMD operation.
26 |
27 | // Many of the operations in SIMD.js have semantics which correspond to scalar
28 | // operations in JS, however there are a few differences:
29 | // - Vector shifts don't mask the shift count.
30 | // - Conversions from float to int32 throw on error.
31 | // - Load and store operations throw when out of bounds.
32 |
33 | (function(global) {
34 |
35 | if (typeof global.SIMD === "undefined") {
36 | // SIMD module.
37 | global.SIMD = {};
38 | }
39 |
40 | if (typeof module !== "undefined") {
41 | // For CommonJS modules
42 | module.exports = global.SIMD;
43 | }
44 |
45 | var SIMD = global.SIMD;
46 |
47 | // Buffers for bit casting and coercing lane values to those representable in
48 | // the underlying lane type.
49 | var _f32x4 = new Float32Array(4);
50 | var _f64x2 = new Float64Array(_f32x4.buffer);
51 | var _i32x4 = new Int32Array(_f32x4.buffer);
52 | var _i16x8 = new Int16Array(_f32x4.buffer);
53 | var _i8x16 = new Int8Array(_f32x4.buffer);
54 | var _ui32x4 = new Uint32Array(_f32x4.buffer);
55 | var _ui16x8 = new Uint16Array(_f32x4.buffer);
56 | var _ui8x16 = new Uint8Array(_f32x4.buffer);
57 |
58 | function convertValue(buffer, value) {
59 | buffer[0] = value;
60 | return buffer[0];
61 | }
62 |
63 | function convertArray(buffer, array) {
64 | for (var i = 0; i < array.length; i++)
65 | array[i] = convertValue(buffer, array[i]);
66 | return array;
67 | }
68 |
69 | // Utility functions.
70 |
71 | function isInt32(o) {
72 | return (o | 0) === o;
73 | }
74 |
75 | function isTypedArray(o) {
76 | return (o instanceof Int8Array) ||
77 | (o instanceof Uint8Array) ||
78 | (o instanceof Uint8ClampedArray) ||
79 | (o instanceof Int16Array) ||
80 | (o instanceof Uint16Array) ||
81 | (o instanceof Int32Array) ||
82 | (o instanceof Uint32Array) ||
83 | (o instanceof Float32Array) ||
84 | (o instanceof Float64Array);
85 | }
86 |
87 | function minNum(x, y) {
88 | return x != x ? y :
89 | y != y ? x :
90 | Math.min(x, y);
91 | }
92 |
93 | function maxNum(x, y) {
94 | return x != x ? y :
95 | y != y ? x :
96 | Math.max(x, y);
97 | }
98 |
99 | function clamp(a, min, max) {
100 | if (a < min)
101 | return min;
102 | if (a > max)
103 | return max;
104 | return a;
105 | }
106 |
107 | // SIMD implementation functions
108 |
109 | function simdCoerceIndex(index) {
110 | index = +index;
111 | if (index != Math.floor(index))
112 | throw new RangeError("SIMD index must be an integer");
113 | return index;
114 | }
115 |
116 | function simdCheckLaneIndex(index, lanes) {
117 | if (!isInt32(index))
118 | throw new TypeError('Lane index must be an int32');
119 | if (index < 0 || index >= lanes)
120 | throw new RangeError('Lane index must be in bounds');
121 | }
122 |
123 | // Global lanes array for constructing SIMD values.
124 | var lanes = [];
125 |
126 | function simdCreate(type) {
127 | return type.fn.apply(type.fn, lanes);
128 | }
129 |
130 | function simdToString(type, a) {
131 | a = type.fn.check(a);
132 | var str = "SIMD." + type.name + "(";
133 | str += type.fn.extractLane(a, 0);
134 | for (var i = 1; i < type.lanes; i++) {
135 | str += ", " + type.fn.extractLane(a, i);
136 | }
137 | return str + ")";
138 | }
139 |
140 | function simdToLocaleString(type, a) {
141 | a = type.fn.check(a);
142 | var str = "SIMD." + type.name + "(";
143 | str += type.fn.extractLane(a, 0).toLocaleString();
144 | for (var i = 1; i < type.lanes; i++) {
145 | str += ", " + type.fn.extractLane(a, i).toLocaleString();
146 | }
147 | return str + ")";
148 | }
149 |
150 | function simdSplat(type, s) {
151 | for (var i = 0; i < type.lanes; i++)
152 | lanes[i] = s;
153 | return simdCreate(type);
154 | }
155 |
156 | function simdReplaceLane(type, a, i, s) {
157 | a = type.fn.check(a);
158 | simdCheckLaneIndex(i, type.lanes);
159 | for (var j = 0; j < type.lanes; j++)
160 | lanes[j] = type.fn.extractLane(a, j);
161 | lanes[i] = s;
162 | return simdCreate(type);
163 | }
164 |
165 | function simdFrom(toType, fromType, a) {
166 | a = fromType.fn.check(a);
167 | for (var i = 0; i < fromType.lanes; i++) {
168 | var v = Math.trunc(fromType.fn.extractLane(a, i));
169 | if (toType.minVal !== undefined &&
170 | !(toType.minVal <= v && v <= toType.maxVal)) {
171 | throw new RangeError("Can't convert value");
172 | }
173 | lanes[i] = v;
174 | }
175 | return simdCreate(toType);
176 | }
177 |
178 | function simdFromBits(toType, fromType, a) {
179 | a = fromType.fn.check(a);
180 | var newValue = new toType.fn();
181 | newValue.s_ = new toType.view(a.s_.buffer);
182 | return newValue;
183 | }
184 |
185 | function simdSelect(type, selector, a, b) {
186 | selector = type.boolType.fn.check(selector);
187 | a = type.fn.check(a);
188 | b = type.fn.check(b);
189 | for (var i = 0; i < type.lanes; i++) {
190 | lanes[i] = type.boolType.fn.extractLane(selector, i) ?
191 | type.fn.extractLane(a, i) : type.fn.extractLane(b, i);
192 | }
193 | return simdCreate(type);
194 | }
195 |
196 | function simdSwizzle(type, a, indices) {
197 | a = type.fn.check(a);
198 | for (var i = 0; i < indices.length; i++) {
199 | simdCheckLaneIndex(indices[i], type.lanes);
200 | lanes[i] = type.fn.extractLane(a, indices[i]);
201 | }
202 | return simdCreate(type);
203 | }
204 |
205 | function simdShuffle(type, a, b, indices) {
206 | a = type.fn.check(a);
207 | b = type.fn.check(b);
208 | for (var i = 0; i < indices.length; i++) {
209 | simdCheckLaneIndex(indices[i], 2 * type.lanes);
210 | lanes[i] = indices[i] < type.lanes ?
211 | type.fn.extractLane(a, indices[i]) :
212 | type.fn.extractLane(b, indices[i] - type.lanes);
213 | }
214 | return simdCreate(type);
215 | }
216 |
217 | function unaryNeg(a) { return -a; }
218 | function unaryBitwiseNot(a) { return ~a; }
219 | function unaryLogicalNot(a) { return !a; }
220 |
221 | function simdUnaryOp(type, op, a) {
222 | a = type.fn.check(a);
223 | for (var i = 0; i < type.lanes; i++)
224 | lanes[i] = op(type.fn.extractLane(a, i));
225 | return simdCreate(type);
226 | }
227 |
228 | function binaryAnd(a, b) { return a & b; }
229 | function binaryOr(a, b) { return a | b; }
230 | function binaryXor(a, b) { return a ^ b; }
231 | function binaryAdd(a, b) { return a + b; }
232 | function binarySub(a, b) { return a - b; }
233 | function binaryMul(a, b) { return a * b; }
234 | function binaryDiv(a, b) { return a / b; }
235 |
236 | var binaryImul;
237 | if (typeof Math.imul !== 'undefined') {
238 | binaryImul = Math.imul;
239 | } else {
240 | binaryImul = function(a, b) {
241 | var ah = (a >>> 16) & 0xffff;
242 | var al = a & 0xffff;
243 | var bh = (b >>> 16) & 0xffff;
244 | var bl = b & 0xffff;
245 | // the shift by 0 fixes the sign on the high part
246 | // the final |0 converts the unsigned value into a signed value
247 | return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0);
248 | };
249 | }
250 |
251 | function simdBinaryOp(type, op, a, b) {
252 | a = type.fn.check(a);
253 | b = type.fn.check(b);
254 | for (var i = 0; i < type.lanes; i++)
255 | lanes[i] = op(type.fn.extractLane(a, i), type.fn.extractLane(b, i));
256 | return simdCreate(type);
257 | }
258 |
259 | function binaryEqual(a, b) { return a == b; }
260 | function binaryNotEqual(a, b) { return a != b; }
261 | function binaryLess(a, b) { return a < b; }
262 | function binaryLessEqual(a, b) { return a <= b; }
263 | function binaryGreater(a, b) { return a > b; }
264 | function binaryGreaterEqual(a, b) { return a >= b; }
265 |
266 | function simdRelationalOp(type, op, a, b) {
267 | a = type.fn.check(a);
268 | b = type.fn.check(b);
269 | for (var i = 0; i < type.lanes; i++)
270 | lanes[i] = op(type.fn.extractLane(a, i), type.fn.extractLane(b, i));
271 | return simdCreate(type.boolType);
272 | }
273 |
274 | function simdAnyTrue(type, a) {
275 | a = type.fn.check(a);
276 | for (var i = 0; i < type.lanes; i++)
277 | if (type.fn.extractLane(a, i)) return true;
278 | return false;
279 | }
280 |
281 | function simdAllTrue(type, a) {
282 | a = type.fn.check(a);
283 | for (var i = 0; i < type.lanes; i++)
284 | if (!type.fn.extractLane(a, i)) return false;
285 | return true;
286 | }
287 |
288 | function binaryShiftLeft(a, bits) { return a << bits; }
289 | function binaryShiftRightArithmetic(a, bits) { return a >> bits; }
290 | function binaryShiftRightLogical(a, bits) { return a >>> bits; }
291 |
292 | function simdShiftOp(type, op, a, bits) {
293 | a = type.fn.check(a);
294 | for (var i = 0; i < type.lanes; i++)
295 | lanes[i] = op(type.fn.extractLane(a, i), bits);
296 | return simdCreate(type);
297 | }
298 |
299 | function simdLoad(type, tarray, index, count) {
300 | if (!isTypedArray(tarray))
301 | throw new TypeError("The 1st argument must be a typed array.");
302 | index = simdCoerceIndex(index);
303 | var bpe = tarray.BYTES_PER_ELEMENT;
304 | var bytes = count * type.laneSize;
305 | if (index < 0 || (index * bpe + bytes) > tarray.byteLength)
306 | throw new RangeError("The value of index is invalid.");
307 |
308 | var newValue = type.fn();
309 | var dst = new Uint8Array(newValue.s_.buffer);
310 | var src = new Uint8Array(tarray.buffer, tarray.byteOffset + index * bpe, bytes);
311 |
312 | for (var i = 0; i < bytes; i++) {
313 | dst[i] = src[i];
314 | }
315 | var typeBytes = type.lanes * type.laneSize;
316 | for (var i = bytes; i < typeBytes; i++) {
317 | dst[i] = 0;
318 | }
319 | return newValue;
320 | }
321 |
322 | function simdStore(type, tarray, index, a, count) {
323 | if (!isTypedArray(tarray))
324 | throw new TypeError("The 1st argument must be a typed array.");
325 | index = simdCoerceIndex(index);
326 | var bpe = tarray.BYTES_PER_ELEMENT;
327 | var bytes = count * type.laneSize;
328 | if (index < 0 || (index * bpe + bytes) > tarray.byteLength)
329 | throw new RangeError("The value of index is invalid.");
330 |
331 | a = type.fn.check(a);
332 |
333 | // The underlying buffers are copied byte by byte, to avoid float
334 | // canonicalization.
335 | var src = new Uint8Array(a.s_.buffer);
336 | var dst = new Uint8Array(tarray.buffer, tarray.byteOffset + index * bpe, bytes);
337 | for (var i = 0; i < bytes; i++) {
338 | dst[i] = src[i];
339 | }
340 | return a;
341 | }
342 |
343 | // Constructors and extractLane functions are closely related and must be
344 | // polyfilled together.
345 |
346 | // Float32x4
347 | if (typeof SIMD.Float32x4 === "undefined" ||
348 | typeof SIMD.Float32x4.extractLane === "undefined") {
349 | SIMD.Float32x4 = function(s0, s1, s2, s3) {
350 | if (!(this instanceof SIMD.Float32x4)) {
351 | return new SIMD.Float32x4(s0, s1, s2, s3);
352 | }
353 | this.s_ = convertArray(_f32x4, new Float32Array([s0, s1, s2, s3]));
354 | }
355 |
356 | SIMD.Float32x4.extractLane = function(v, i) {
357 | v = SIMD.Float32x4.check(v);
358 | simdCheckLaneIndex(i, 4);
359 | return v.s_[i];
360 | }
361 | }
362 |
363 | // Miscellaneous functions that aren't easily parameterized on type.
364 |
365 | if (typeof SIMD.Float32x4.swizzle === "undefined") {
366 | SIMD.Float32x4.swizzle = function(a, s0, s1, s2, s3) {
367 | return simdSwizzle(float32x4, a, [s0, s1, s2, s3]);
368 | }
369 | }
370 |
371 | if (typeof SIMD.Float32x4.shuffle === "undefined") {
372 | SIMD.Float32x4.shuffle = function(a, b, s0, s1, s2, s3) {
373 | return simdShuffle(float32x4, a, b, [s0, s1, s2, s3]);
374 | }
375 | }
376 |
377 | // Int32x4
378 | if (typeof SIMD.Int32x4 === "undefined" ||
379 | typeof SIMD.Int32x4.extractLane === "undefined") {
380 | SIMD.Int32x4 = function(s0, s1, s2, s3) {
381 | if (!(this instanceof SIMD.Int32x4)) {
382 | return new SIMD.Int32x4(s0, s1, s2, s3);
383 | }
384 | this.s_ = convertArray(_i32x4, new Int32Array([s0, s1, s2, s3]));
385 | }
386 |
387 | SIMD.Int32x4.extractLane = function(v, i) {
388 | v = SIMD.Int32x4.check(v);
389 | simdCheckLaneIndex(i, 4);
390 | return v.s_[i];
391 | }
392 | }
393 |
394 | if (typeof SIMD.Int32x4.swizzle === "undefined") {
395 | SIMD.Int32x4.swizzle = function(a, s0, s1, s2, s3) {
396 | return simdSwizzle(int32x4, a, [s0, s1, s2, s3]);
397 | }
398 | }
399 |
400 | if (typeof SIMD.Int32x4.shuffle === "undefined") {
401 | SIMD.Int32x4.shuffle = function(a, b, s0, s1, s2, s3) {
402 | return simdShuffle(int32x4, a, b, [s0, s1, s2, s3]);
403 | }
404 | }
405 |
406 | // Int16x8
407 | if (typeof SIMD.Int16x8 === "undefined" ||
408 | typeof SIMD.Int16x8.extractLane === "undefined") {
409 | SIMD.Int16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) {
410 | if (!(this instanceof SIMD.Int16x8)) {
411 | return new SIMD.Int16x8(s0, s1, s2, s3, s4, s5, s6, s7);
412 | }
413 | this.s_ = convertArray(_i16x8, new Int16Array([s0, s1, s2, s3, s4, s5, s6, s7]));
414 | }
415 |
416 | SIMD.Int16x8.extractLane = function(v, i) {
417 | v = SIMD.Int16x8.check(v);
418 | simdCheckLaneIndex(i, 8);
419 | return v.s_[i];
420 | }
421 | }
422 |
423 | if (typeof SIMD.Int16x8.swizzle === "undefined") {
424 | SIMD.Int16x8.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7) {
425 | return simdSwizzle(int16x8, a, [s0, s1, s2, s3, s4, s5, s6, s7]);
426 | }
427 | }
428 |
429 | if (typeof SIMD.Int16x8.shuffle === "undefined") {
430 | SIMD.Int16x8.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7) {
431 | return simdShuffle(int16x8, a, b, [s0, s1, s2, s3, s4, s5, s6, s7]);
432 | }
433 | }
434 |
435 | // Int8x16
436 | if (typeof SIMD.Int8x16 === "undefined" ||
437 | typeof SIMD.Int8x16.extractLane === "undefined") {
438 | SIMD.Int8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7,
439 | s8, s9, s10, s11, s12, s13, s14, s15) {
440 | if (!(this instanceof SIMD.Int8x16)) {
441 | return new SIMD.Int8x16(s0, s1, s2, s3, s4, s5, s6, s7,
442 | s8, s9, s10, s11, s12, s13, s14, s15);
443 | }
444 | this.s_ = convertArray(_i8x16, new Int8Array([s0, s1, s2, s3, s4, s5, s6, s7,
445 | s8, s9, s10, s11, s12, s13, s14, s15]));
446 | }
447 |
448 | SIMD.Int8x16.extractLane = function(v, i) {
449 | v = SIMD.Int8x16.check(v);
450 | simdCheckLaneIndex(i, 16);
451 | return v.s_[i];
452 | }
453 | }
454 |
455 | if (typeof SIMD.Int8x16.swizzle === "undefined") {
456 | SIMD.Int8x16.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7,
457 | s8, s9, s10, s11, s12, s13, s14, s15) {
458 | return simdSwizzle(int8x16, a, [s0, s1, s2, s3, s4, s5, s6, s7,
459 | s8, s9, s10, s11, s12, s13, s14, s15]);
460 | }
461 | }
462 |
463 | if (typeof SIMD.Int8x16.shuffle === "undefined") {
464 | SIMD.Int8x16.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7,
465 | s8, s9, s10, s11, s12, s13, s14, s15) {
466 | return simdShuffle(int8x16, a, b, [s0, s1, s2, s3, s4, s5, s6, s7,
467 | s8, s9, s10, s11, s12, s13, s14, s15]);
468 | }
469 | }
470 |
471 | // Uint32x4
472 | if (typeof SIMD.Uint32x4 === "undefined" ||
473 | typeof SIMD.Uint32x4.extractLane === "undefined") {
474 | SIMD.Uint32x4 = function(s0, s1, s2, s3) {
475 | if (!(this instanceof SIMD.Uint32x4)) {
476 | return new SIMD.Uint32x4(s0, s1, s2, s3);
477 | }
478 | this.s_ = convertArray(_ui32x4, new Uint32Array([s0, s1, s2, s3]));
479 | }
480 |
481 | SIMD.Uint32x4.extractLane = function(v, i) {
482 | v = SIMD.Uint32x4.check(v);
483 | simdCheckLaneIndex(i, 4);
484 | return v.s_[i];
485 | }
486 | }
487 |
488 | if (typeof SIMD.Uint32x4.swizzle === "undefined") {
489 | SIMD.Uint32x4.swizzle = function(a, s0, s1, s2, s3) {
490 | return simdSwizzle(uint32x4, a, [s0, s1, s2, s3]);
491 | }
492 | }
493 |
494 | if (typeof SIMD.Uint32x4.shuffle === "undefined") {
495 | SIMD.Uint32x4.shuffle = function(a, b, s0, s1, s2, s3) {
496 | return simdShuffle(uint32x4, a, b, [s0, s1, s2, s3]);
497 | }
498 | }
499 |
500 | // Uint16x8
501 | if (typeof SIMD.Uint16x8 === "undefined" ||
502 | typeof SIMD.Uint16x8.extractLane === "undefined") {
503 | SIMD.Uint16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) {
504 | if (!(this instanceof SIMD.Uint16x8)) {
505 | return new SIMD.Uint16x8(s0, s1, s2, s3, s4, s5, s6, s7);
506 | }
507 | this.s_ = convertArray(_ui16x8, new Uint16Array([s0, s1, s2, s3, s4, s5, s6, s7]));
508 | }
509 |
510 | SIMD.Uint16x8.extractLane = function(v, i) {
511 | v = SIMD.Uint16x8.check(v);
512 | simdCheckLaneIndex(i, 8);
513 | return v.s_[i];
514 | }
515 | }
516 |
517 | if (typeof SIMD.Uint16x8.swizzle === "undefined") {
518 | SIMD.Uint16x8.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7) {
519 | return simdSwizzle(uint16x8, a, [s0, s1, s2, s3, s4, s5, s6, s7]);
520 | }
521 | }
522 |
523 | if (typeof SIMD.Uint16x8.shuffle === "undefined") {
524 | SIMD.Uint16x8.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7) {
525 | return simdShuffle(uint16x8, a, b, [s0, s1, s2, s3, s4, s5, s6, s7]);
526 | }
527 | }
528 |
529 | // Uint8x16
530 | if (typeof SIMD.Uint8x16 === "undefined" ||
531 | typeof SIMD.Uint8x16.extractLane === "undefined") {
532 | SIMD.Uint8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7,
533 | s8, s9, s10, s11, s12, s13, s14, s15) {
534 | if (!(this instanceof SIMD.Uint8x16)) {
535 | return new SIMD.Uint8x16(s0, s1, s2, s3, s4, s5, s6, s7,
536 | s8, s9, s10, s11, s12, s13, s14, s15);
537 | }
538 | this.s_ = convertArray(_ui8x16, new Uint8Array([s0, s1, s2, s3, s4, s5, s6, s7,
539 | s8, s9, s10, s11, s12, s13, s14, s15]));
540 | }
541 |
542 | SIMD.Uint8x16.extractLane = function(v, i) {
543 | v = SIMD.Uint8x16.check(v);
544 | simdCheckLaneIndex(i, 16);
545 | return v.s_[i];
546 | }
547 | }
548 |
549 | if (typeof SIMD.Uint8x16.swizzle === "undefined") {
550 | SIMD.Uint8x16.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7,
551 | s8, s9, s10, s11, s12, s13, s14, s15) {
552 | return simdSwizzle(uint8x16, a, [s0, s1, s2, s3, s4, s5, s6, s7,
553 | s8, s9, s10, s11, s12, s13, s14, s15]);
554 | }
555 | }
556 |
557 | if (typeof SIMD.Uint8x16.shuffle === "undefined") {
558 | SIMD.Uint8x16.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7,
559 | s8, s9, s10, s11, s12, s13, s14, s15) {
560 | return simdShuffle(uint8x16, a, b, [s0, s1, s2, s3, s4, s5, s6, s7,
561 | s8, s9, s10, s11, s12, s13, s14, s15]);
562 | }
563 | }
564 |
565 | // Bool32x4
566 | if (typeof SIMD.Bool32x4 === "undefined" ||
567 | typeof SIMD.Bool32x4.extractLane === "undefined") {
568 | SIMD.Bool32x4 = function(s0, s1, s2, s3) {
569 | if (!(this instanceof SIMD.Bool32x4)) {
570 | return new SIMD.Bool32x4(s0, s1, s2, s3);
571 | }
572 | this.s_ = [!!s0, !!s1, !!s2, !!s3];
573 | }
574 |
575 | SIMD.Bool32x4.extractLane = function(v, i) {
576 | v = SIMD.Bool32x4.check(v);
577 | simdCheckLaneIndex(i, 4);
578 | return v.s_[i];
579 | }
580 | }
581 |
582 | // Bool16x8
583 | if (typeof SIMD.Bool16x8 === "undefined" ||
584 | typeof SIMD.Bool16x8.extractLane === "undefined") {
585 | SIMD.Bool16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) {
586 | if (!(this instanceof SIMD.Bool16x8)) {
587 | return new SIMD.Bool16x8(s0, s1, s2, s3, s4, s5, s6, s7);
588 | }
589 | this.s_ = [!!s0, !!s1, !!s2, !!s3, !!s4, !!s5, !!s6, !!s7];
590 | }
591 |
592 | SIMD.Bool16x8.extractLane = function(v, i) {
593 | v = SIMD.Bool16x8.check(v);
594 | simdCheckLaneIndex(i, 8);
595 | return v.s_[i];
596 | }
597 | }
598 |
599 | // Bool8x16
600 | if (typeof SIMD.Bool8x16 === "undefined" ||
601 | typeof SIMD.Bool8x16.extractLane === "undefined") {
602 | SIMD.Bool8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7,
603 | s8, s9, s10, s11, s12, s13, s14, s15) {
604 | if (!(this instanceof SIMD.Bool8x16)) {
605 | return new SIMD.Bool8x16(s0, s1, s2, s3, s4, s5, s6, s7,
606 | s8, s9, s10, s11, s12, s13, s14, s15);
607 | }
608 | this.s_ = [!!s0, !!s1, !!s2, !!s3, !!s4, !!s5, !!s6, !!s7,
609 | !!s8, !!s9, !!s10, !!s11, !!s12, !!s13, !!s14, !!s15];
610 | }
611 |
612 | SIMD.Bool8x16.extractLane = function(v, i) {
613 | v = SIMD.Bool8x16.check(v);
614 | simdCheckLaneIndex(i, 16);
615 | return v.s_[i];
616 | }
617 | }
618 |
619 | // Type data to generate the remaining functions.
620 |
621 | var float32x4 = {
622 | name: "Float32x4",
623 | fn: SIMD.Float32x4,
624 | lanes: 4,
625 | laneSize: 4,
626 | buffer: _f32x4,
627 | view: Float32Array,
628 | mulFn: binaryMul,
629 | fns: ["check", "splat", "replaceLane", "select",
630 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
631 | "add", "sub", "mul", "div", "neg", "abs", "min", "max", "minNum", "maxNum",
632 | "reciprocalApproximation", "reciprocalSqrtApproximation", "sqrt",
633 | "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"],
634 | }
635 |
636 | var int32x4 = {
637 | name: "Int32x4",
638 | fn: SIMD.Int32x4,
639 | lanes: 4,
640 | laneSize: 4,
641 | minVal: -0x80000000,
642 | maxVal: 0x7FFFFFFF,
643 | buffer: _i32x4,
644 | notFn: unaryBitwiseNot,
645 | view: Int32Array,
646 | mulFn: binaryImul,
647 | fns: ["check", "splat", "replaceLane", "select",
648 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
649 | "and", "or", "xor", "not",
650 | "add", "sub", "mul", "neg",
651 | "shiftLeftByScalar", "shiftRightByScalar",
652 | "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"],
653 | }
654 |
655 | var int16x8 = {
656 | name: "Int16x8",
657 | fn: SIMD.Int16x8,
658 | lanes: 8,
659 | laneSize: 2,
660 | minVal: -0x8000,
661 | maxVal: 0x7FFF,
662 | buffer: _i16x8,
663 | notFn: unaryBitwiseNot,
664 | view: Int16Array,
665 | mulFn: binaryMul,
666 | fns: ["check", "splat", "replaceLane", "select",
667 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
668 | "and", "or", "xor", "not",
669 | "add", "sub", "mul", "neg",
670 | "shiftLeftByScalar", "shiftRightByScalar",
671 | "addSaturate", "subSaturate",
672 | "load", "store"],
673 | }
674 |
675 | var int8x16 = {
676 | name: "Int8x16",
677 | fn: SIMD.Int8x16,
678 | lanes: 16,
679 | laneSize: 1,
680 | minVal: -0x80,
681 | maxVal: 0x7F,
682 | buffer: _i8x16,
683 | notFn: unaryBitwiseNot,
684 | view: Int8Array,
685 | mulFn: binaryMul,
686 | fns: ["check", "splat", "replaceLane", "select",
687 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
688 | "and", "or", "xor", "not",
689 | "add", "sub", "mul", "neg",
690 | "shiftLeftByScalar", "shiftRightByScalar",
691 | "addSaturate", "subSaturate",
692 | "load", "store"],
693 | }
694 |
695 | var uint32x4 = {
696 | name: "Uint32x4",
697 | fn: SIMD.Uint32x4,
698 | lanes: 4,
699 | laneSize: 4,
700 | minVal: 0,
701 | maxVal: 0xFFFFFFFF,
702 | unsigned: true,
703 | buffer: _ui32x4,
704 | notFn: unaryBitwiseNot,
705 | view: Uint32Array,
706 | mulFn: binaryImul,
707 | fns: ["check", "splat", "replaceLane", "select",
708 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
709 | "and", "or", "xor", "not",
710 | "add", "sub", "mul",
711 | "shiftLeftByScalar", "shiftRightByScalar",
712 | "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"],
713 | }
714 |
715 | var uint16x8 = {
716 | name: "Uint16x8",
717 | fn: SIMD.Uint16x8,
718 | lanes: 8,
719 | laneSize: 2,
720 | unsigned: true,
721 | minVal: 0,
722 | maxVal: 0xFFFF,
723 | buffer: _ui16x8,
724 | notFn: unaryBitwiseNot,
725 | view: Uint16Array,
726 | mulFn: binaryMul,
727 | fns: ["check", "splat", "replaceLane", "select",
728 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
729 | "and", "or", "xor", "not",
730 | "add", "sub", "mul",
731 | "shiftLeftByScalar", "shiftRightByScalar",
732 | "addSaturate", "subSaturate",
733 | "load", "store"],
734 | }
735 |
736 | var uint8x16 = {
737 | name: "Uint8x16",
738 | fn: SIMD.Uint8x16,
739 | lanes: 16,
740 | laneSize: 1,
741 | unsigned: true,
742 | minVal: 0,
743 | maxVal: 0xFF,
744 | buffer: _ui8x16,
745 | notFn: unaryBitwiseNot,
746 | view: Uint8Array,
747 | mulFn: binaryMul,
748 | fns: ["check", "splat", "replaceLane", "select",
749 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
750 | "and", "or", "xor", "not",
751 | "add", "sub", "mul",
752 | "shiftLeftByScalar", "shiftRightByScalar",
753 | "addSaturate", "subSaturate",
754 | "load", "store"],
755 | }
756 |
757 | var bool32x4 = {
758 | name: "Bool32x4",
759 | fn: SIMD.Bool32x4,
760 | lanes: 4,
761 | laneSize: 4,
762 | notFn: unaryLogicalNot,
763 | fns: ["check", "splat", "replaceLane",
764 | "allTrue", "anyTrue", "and", "or", "xor", "not"],
765 | }
766 |
767 | var bool16x8 = {
768 | name: "Bool16x8",
769 | fn: SIMD.Bool16x8,
770 | lanes: 8,
771 | laneSize: 2,
772 | notFn: unaryLogicalNot,
773 | fns: ["check", "splat", "replaceLane",
774 | "allTrue", "anyTrue", "and", "or", "xor", "not"],
775 | }
776 |
777 | var bool8x16 = {
778 | name: "Bool8x16",
779 | fn: SIMD.Bool8x16,
780 | lanes: 16,
781 | laneSize: 1,
782 | notFn: unaryLogicalNot,
783 | fns: ["check", "splat", "replaceLane",
784 | "allTrue", "anyTrue", "and", "or", "xor", "not"],
785 | }
786 |
787 | // Each SIMD type has a corresponding Boolean SIMD type, which is returned by
788 | // relational ops.
789 | float32x4.boolType = int32x4.boolType = uint32x4.boolType = bool32x4;
790 | int16x8.boolType = uint16x8.boolType = bool16x8;
791 | int8x16.boolType = uint8x16.boolType = bool8x16;
792 |
793 | // SIMD from types.
794 | float32x4.from = [int32x4, uint32x4];
795 | int32x4.from = [float32x4, uint32x4];
796 | int16x8.from = [uint16x8];
797 | int8x16.from = [uint8x16];
798 | uint32x4.from = [float32x4, int32x4];
799 | uint16x8.from = [int16x8];
800 | uint8x16.from = [int8x16];
801 |
802 | // SIMD fromBits types.
803 | float32x4.fromBits = [int32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
804 | int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
805 | int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16];
806 | int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16];
807 | uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16];
808 | uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16];
809 | uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8];
810 |
811 | var simdTypes = [float32x4,
812 | int32x4, int16x8, int8x16,
813 | uint32x4, uint16x8, uint8x16,
814 | bool32x4, bool16x8, bool8x16];
815 |
816 | // SIMD Phase2 types.
817 |
818 | if (typeof simdPhase2 !== 'undefined') {
819 | // Float64x2
820 | if (typeof SIMD.Float64x2 === "undefined" ||
821 | typeof SIMD.Float64x2.extractLane === "undefined") {
822 | SIMD.Float64x2 = function(s0, s1) {
823 | if (!(this instanceof SIMD.Float64x2)) {
824 | return new SIMD.Float64x2(s0, s1);
825 | }
826 | this.s_ = convertArray(_f64x2, new Float64Array([s0, s1]));
827 | }
828 |
829 | SIMD.Float64x2.extractLane = function(v, i) {
830 | v = SIMD.Float64x2.check(v);
831 | simdCheckLaneIndex(i, 2);
832 | return v.s_[i];
833 | }
834 | }
835 |
836 | if (typeof SIMD.Float64x2.swizzle === "undefined") {
837 | SIMD.Float64x2.swizzle = function(a, s0, s1) {
838 | return simdSwizzle(float64x2, a, [s0, s1]);
839 | }
840 | }
841 |
842 | if (typeof SIMD.Float64x2.shuffle === "undefined") {
843 | SIMD.Float64x2.shuffle = function(a, b, s0, s1) {
844 | return simdShuffle(float64x2, a, b, [s0, s1]);
845 | }
846 | }
847 |
848 | // Bool64x2
849 | if (typeof SIMD.Bool64x2 === "undefined" ||
850 | typeof SIMD.Bool64x2.extractLane === "undefined") {
851 | SIMD.Bool64x2 = function(s0, s1) {
852 | if (!(this instanceof SIMD.Bool64x2)) {
853 | return new SIMD.Bool64x2(s0, s1);
854 | }
855 | this.s_ = [!!s0, !!s1];
856 | }
857 |
858 | SIMD.Bool64x2.extractLane = function(v, i) {
859 | v = SIMD.Bool64x2.check(v);
860 | simdCheckLaneIndex(i, 2);
861 | return v.s_[i];
862 | }
863 | }
864 |
865 | var float64x2 = {
866 | name: "Float64x2",
867 | fn: SIMD.Float64x2,
868 | lanes: 2,
869 | laneSize: 8,
870 | buffer: _f64x2,
871 | view: Float64Array,
872 | mulFn: binaryMul,
873 | fns: ["check", "splat", "replaceLane", "select",
874 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual",
875 | "add", "sub", "mul", "div", "neg", "abs", "min", "max", "minNum", "maxNum",
876 | "reciprocalApproximation", "reciprocalSqrtApproximation", "sqrt",
877 | "load", "store"],
878 | }
879 |
880 | var bool64x2 = {
881 | name: "Bool64x2",
882 | fn: SIMD.Bool64x2,
883 | lanes: 2,
884 | laneSize: 8,
885 | notFn: unaryLogicalNot,
886 | fns: ["check", "splat", "replaceLane",
887 | "allTrue", "anyTrue", "and", "or", "xor", "not"],
888 | }
889 |
890 | float64x2.boolType = bool64x2;
891 |
892 | float32x4.fromBits.push(float64x2);
893 | int32x4.fromBits.push(float64x2);
894 | int16x8.fromBits.push(float64x2);
895 | int8x16.fromBits.push(float64x2);
896 | uint32x4.fromBits.push(float64x2);
897 | uint16x8.fromBits.push(float64x2);
898 | uint8x16.fromBits.push(float64x2);
899 |
900 | float64x2.fromBits = [float32x4, int32x4, int16x8, int8x16,
901 | uint32x4, uint16x8, uint8x16];
902 |
903 | simdTypes.push(float64x2);
904 | simdTypes.push(bool64x2);
905 | }
906 |
907 | // SIMD prototype functions.
908 | var prototypeFns = {
909 | valueOf:
910 | function(type) {
911 | return function() {
912 | throw new TypeError(type.name + " cannot be converted to a number");
913 | }
914 | },
915 |
916 | toString:
917 | function(type) {
918 | return function() {
919 | return simdToString(type, this);
920 | }
921 | },
922 |
923 | toLocaleString:
924 | function(type) {
925 | return function() {
926 | return simdToLocaleString(type, this);
927 | }
928 | },
929 | };
930 |
931 | // SIMD constructor functions.
932 |
933 | var simdFns = {
934 | check:
935 | function(type) {
936 | return function(a) {
937 | if (!(a instanceof type.fn)) {
938 | throw new TypeError("Argument is not a " + type.name + ".");
939 | }
940 | return a;
941 | }
942 | },
943 |
944 | splat:
945 | function(type) {
946 | return function(s) { return simdSplat(type, s); }
947 | },
948 |
949 | replaceLane:
950 | function(type) {
951 | return function(a, i, s) { return simdReplaceLane(type, a, i, s); }
952 | },
953 |
954 | allTrue:
955 | function(type) {
956 | return function(a) { return simdAllTrue(type, a); }
957 | },
958 |
959 | anyTrue:
960 | function(type) {
961 | return function(a) { return simdAnyTrue(type, a); }
962 | },
963 |
964 | and:
965 | function(type) {
966 | return function(a, b) {
967 | return simdBinaryOp(type, binaryAnd, a, b);
968 | }
969 | },
970 |
971 | or:
972 | function(type) {
973 | return function(a, b) {
974 | return simdBinaryOp(type, binaryOr, a, b);
975 | }
976 | },
977 |
978 | xor:
979 | function(type) {
980 | return function(a, b) {
981 | return simdBinaryOp(type, binaryXor, a, b);
982 | }
983 | },
984 |
985 | not:
986 | function(type) {
987 | return function(a) {
988 | return simdUnaryOp(type, type.notFn, a);
989 | }
990 | },
991 |
992 | equal:
993 | function(type) {
994 | return function(a, b) {
995 | return simdRelationalOp(type, binaryEqual, a, b);
996 | }
997 | },
998 |
999 | notEqual:
1000 | function(type) {
1001 | return function(a, b) {
1002 | return simdRelationalOp(type, binaryNotEqual, a, b);
1003 | }
1004 | },
1005 |
1006 | lessThan:
1007 | function(type) {
1008 | return function(a, b) {
1009 | return simdRelationalOp(type, binaryLess, a, b);
1010 | }
1011 | },
1012 |
1013 | lessThanOrEqual:
1014 | function(type) {
1015 | return function(a, b) {
1016 | return simdRelationalOp(type, binaryLessEqual, a, b);
1017 | }
1018 | },
1019 |
1020 | greaterThan:
1021 | function(type) {
1022 | return function(a, b) {
1023 | return simdRelationalOp(type, binaryGreater, a, b);
1024 | }
1025 | },
1026 |
1027 | greaterThanOrEqual:
1028 | function(type) {
1029 | return function(a, b) {
1030 | return simdRelationalOp(type, binaryGreaterEqual, a, b);
1031 | }
1032 | },
1033 |
1034 | add:
1035 | function(type) {
1036 | return function(a, b) {
1037 | return simdBinaryOp(type, binaryAdd, a, b);
1038 | }
1039 | },
1040 |
1041 | sub:
1042 | function(type) {
1043 | return function(a, b) {
1044 | return simdBinaryOp(type, binarySub, a, b);
1045 | }
1046 | },
1047 |
1048 | mul:
1049 | function(type) {
1050 | return function(a, b) {
1051 | return simdBinaryOp(type, type.mulFn, a, b);
1052 | }
1053 | },
1054 |
1055 | div:
1056 | function(type) {
1057 | return function(a, b) {
1058 | return simdBinaryOp(type, binaryDiv, a, b);
1059 | }
1060 | },
1061 |
1062 | neg:
1063 | function(type) {
1064 | return function(a) {
1065 | return simdUnaryOp(type, unaryNeg, a);
1066 | }
1067 | },
1068 |
1069 | abs:
1070 | function(type) {
1071 | return function(a) {
1072 | return simdUnaryOp(type, Math.abs, a);
1073 | }
1074 | },
1075 |
1076 | min:
1077 | function(type) {
1078 | return function(a, b) {
1079 | return simdBinaryOp(type, Math.min, a, b);
1080 | }
1081 | },
1082 |
1083 | max:
1084 | function(type) {
1085 | return function(a, b) {
1086 | return simdBinaryOp(type, Math.max, a, b);
1087 | }
1088 | },
1089 |
1090 | minNum:
1091 | function(type) {
1092 | return function(a, b) {
1093 | return simdBinaryOp(type, minNum, a, b);
1094 | }
1095 | },
1096 |
1097 | maxNum:
1098 | function(type) {
1099 | return function(a, b) {
1100 | return simdBinaryOp(type, maxNum, a, b);
1101 | }
1102 | },
1103 |
1104 | load:
1105 | function(type) {
1106 | return function(tarray, index) {
1107 | return simdLoad(type, tarray, index, type.lanes);
1108 | }
1109 | },
1110 |
1111 | load1:
1112 | function(type) {
1113 | return function(tarray, index) {
1114 | return simdLoad(type, tarray, index, 1);
1115 | }
1116 | },
1117 |
1118 | load2:
1119 | function(type) {
1120 | return function(tarray, index) {
1121 | return simdLoad(type, tarray, index, 2);
1122 | }
1123 | },
1124 |
1125 | load3:
1126 | function(type) {
1127 | return function(tarray, index) {
1128 | return simdLoad(type, tarray, index, 3);
1129 | }
1130 | },
1131 |
1132 | store:
1133 | function(type) {
1134 | return function(tarray, index, a) {
1135 | return simdStore(type, tarray, index, a, type.lanes);
1136 | }
1137 | },
1138 |
1139 | store1:
1140 | function(type) {
1141 | return function(tarray, index, a) {
1142 | return simdStore(type, tarray, index, a, 1);
1143 | }
1144 | },
1145 |
1146 | store2:
1147 | function(type) {
1148 | return function(tarray, index, a) {
1149 | return simdStore(type, tarray, index, a, 2);
1150 | }
1151 | },
1152 |
1153 | store3:
1154 | function(type) {
1155 | return function(tarray, index, a) {
1156 | return simdStore(type, tarray, index, a, 3);
1157 | }
1158 | },
1159 |
1160 | select:
1161 | function(type) {
1162 | return function(selector, a, b) {
1163 | return simdSelect(type, selector, a, b);
1164 | }
1165 | },
1166 |
1167 |
1168 | reciprocalApproximation:
1169 | function(type) {
1170 | return function(a) {
1171 | a = type.fn.check(a);
1172 | return type.fn.div(type.fn.splat(1.0), a);
1173 | }
1174 | },
1175 |
1176 | reciprocalSqrtApproximation:
1177 | function(type) {
1178 | return function(a) {
1179 | a = type.fn.check(a);
1180 | return type.fn.reciprocalApproximation(type.fn.sqrt(a));
1181 | }
1182 | },
1183 |
1184 | sqrt:
1185 | function(type) {
1186 | return function(a) {
1187 | return simdUnaryOp(type, Math.sqrt, a);
1188 | }
1189 | },
1190 |
1191 | shiftLeftByScalar:
1192 | function(type) {
1193 | return function(a, bits) {
1194 | bits &= type.laneSize * 8 - 1;
1195 | return simdShiftOp(type, binaryShiftLeft, a, bits);
1196 | }
1197 | },
1198 |
1199 | shiftRightByScalar:
1200 | function(type) {
1201 | if (type.unsigned) {
1202 | return function(a, bits) {
1203 | bits &= type.laneSize * 8 - 1;
1204 | return simdShiftOp(type, binaryShiftRightLogical, a, bits);
1205 | }
1206 | } else {
1207 | return function(a, bits) {
1208 | bits &= type.laneSize * 8 - 1;
1209 | return simdShiftOp(type, binaryShiftRightArithmetic, a, bits);
1210 | }
1211 | }
1212 | },
1213 |
1214 | addSaturate:
1215 | function(type) {
1216 | function addSaturate(a, b) {
1217 | return clamp(a + b, type.minVal, type.maxVal);
1218 | }
1219 | return function(a, b) { return simdBinaryOp(type, addSaturate, a, b); }
1220 | },
1221 |
1222 | subSaturate:
1223 | function(type) {
1224 | function subSaturate(a, b) {
1225 | return clamp(a - b, type.minVal, type.maxVal);
1226 | }
1227 | return function(a, b) { return simdBinaryOp(type, subSaturate, a, b); }
1228 | },
1229 | }
1230 |
1231 | // Install functions.
1232 |
1233 | simdTypes.forEach(function(type) {
1234 | // Install each prototype function on each SIMD prototype.
1235 | var simdFn = type.fn;
1236 | var proto = simdFn.prototype;
1237 | for (var name in prototypeFns) {
1238 | if (!proto.hasOwnProperty(name))
1239 | proto[name] = prototypeFns[name](type);
1240 | }
1241 | // Install regular functions.
1242 | type.fns.forEach(function(name) {
1243 | if (typeof simdFn[name] === "undefined")
1244 | simdFn[name] = simdFns[name](type);
1245 | });
1246 | // Install 'fromTIMD' functions.
1247 | if (type.from) {
1248 | type.from.forEach(function(fromType) {
1249 | var name = "from" + fromType.name;
1250 | var toType = type; // pull type into closure.
1251 | if (typeof type.fn[name] === "undefined") {
1252 | type.fn[name] =
1253 | function(a) { return simdFrom(toType, fromType, a); }
1254 | }
1255 | });
1256 | }
1257 | // Install 'fromTIMDBits' functions.
1258 | if (type.fromBits) {
1259 | type.fromBits.forEach(function(fromType) {
1260 | var name = "from" + fromType.name + "Bits";
1261 | var toType = type; // pull type into closure.
1262 | if (typeof type.fn[name] === "undefined") {
1263 | type.fn[name] =
1264 | function(a) { return simdFromBits(toType, fromType, a); }
1265 | }
1266 | });
1267 | }
1268 | });
1269 |
1270 | // If we're in a browser, the global namespace is named 'window'. If we're
1271 | // in node, it's named 'global'. If we're in a web worker, it's named
1272 | // 'self'. If we're in a shell, 'this' might work.
1273 | })(typeof window !== "undefined"
1274 | ? window
1275 | : (typeof process === 'object' &&
1276 | typeof require === 'function' &&
1277 | typeof global === 'object')
1278 | ? global
1279 | : typeof self === 'object'
1280 | ? self
1281 | : this);
1282 |
--------------------------------------------------------------------------------
/src/ecmascript_simd_tests.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (C) 2013
3 |
4 | This software is provided 'as-is', without any express or implied
5 | warranty. In no event will the authors be held liable for any damages
6 | arising from the use of this software.
7 |
8 | Permission is granted to anyone to use this software for any purpose,
9 | including commercial applications, and to alter it and redistribute it
10 | freely, subject to the following restrictions:
11 |
12 | 1. The origin of this software must not be misrepresented; you must not
13 | claim that you wrote the original software. If you use this software
14 | in a product, an acknowledgment in the product documentation would be
15 | appreciated but is not required.
16 | 2. Altered source versions must be plainly marked as such, and must not be
17 | misrepresented as being the original software.
18 | 3. This notice may not be removed or altered from any source distribution.
19 | */
20 |
21 | function minNum(x, y) {
22 | return x != x ? y :
23 | y != y ? x :
24 | Math.min(x, y);
25 | }
26 |
27 | function maxNum(x, y) {
28 | return x != x ? y :
29 | y != y ? x :
30 | Math.max(x, y);
31 | }
32 |
33 | function sameValue(x, y) {
34 | if (x == y)
35 | return x != 0 || y != 0 || (1/x == 1/y);
36 |
37 | return x != x && y != y;
38 | }
39 |
40 | function sameValueZero(x, y) {
41 | if (x == y) return true;
42 | return x != x & y != y;
43 | }
44 |
45 | function binaryMul(a, b) { return a * b; }
46 | var binaryImul;
47 | if (typeof Math.imul !== 'undefined') {
48 | binaryImul = Math.imul;
49 | } else {
50 | binaryImul = function(a, b) {
51 | var ah = (a >>> 16) & 0xffff;
52 | var al = a & 0xffff;
53 | var bh = (b >>> 16) & 0xffff;
54 | var bl = b & 0xffff;
55 | // the shift by 0 fixes the sign on the high part
56 | // the final |0 converts the unsigned value into a signed value
57 | return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0);
58 | };
59 | }
60 |
61 | var _f32x4 = new Float32Array(4);
62 | var _f64x2 = new Float64Array(_f32x4.buffer);
63 | var _i32x4 = new Int32Array(_f32x4.buffer);
64 | var _i16x8 = new Int16Array(_f32x4.buffer);
65 | var _i8x16 = new Int8Array(_f32x4.buffer);
66 | var _ui32x4 = new Uint32Array(_f32x4.buffer);
67 | var _ui16x8 = new Uint16Array(_f32x4.buffer);
68 | var _ui8x16 = new Uint8Array(_f32x4.buffer);
69 |
70 | var float32x4 = {
71 | name: "Float32x4",
72 | fn: SIMD.Float32x4,
73 | floatLane: true,
74 | signed: true,
75 | numerical: true,
76 | lanes: 4,
77 | laneSize: 4,
78 | interestingValues: [0, -0, 1, -1, 0.9, -0.9, 1.414, 0x7F, -0x80, -0x8000, -0x80000000, 0x7FFF, 0x7FFFFFFF, Infinity, -Infinity, NaN],
79 | view: Float32Array,
80 | buffer: _f32x4,
81 | mulFn: binaryMul,
82 | }
83 |
84 | var int32x4 = {
85 | name: "Int32x4",
86 | fn: SIMD.Int32x4,
87 | intLane: true,
88 | signed: true,
89 | numerical: true,
90 | logical: true,
91 | lanes: 4,
92 | laneSize: 4,
93 | minVal: -0x80000000,
94 | maxVal: 0x7FFFFFFF,
95 | interestingValues: [0, 1, -1, 0x40000000, 0x7FFFFFFF, -0x80000000],
96 | view: Int32Array,
97 | buffer: _i32x4,
98 | mulFn: binaryImul,
99 | }
100 |
101 | var int16x8 = {
102 | name: "Int16x8",
103 | fn: SIMD.Int16x8,
104 | intLane: true,
105 | signed: true,
106 | numerical: true,
107 | logical: true,
108 | lanes: 8,
109 | laneSize: 2,
110 | laneMask: 0xFFFF,
111 | minVal: -0x8000,
112 | maxVal: 0x7FFF,
113 | interestingValues: [0, 1, -1, 0x4000, 0x7FFF, -0x8000],
114 | view: Int16Array,
115 | buffer: _i16x8,
116 | mulFn: binaryMul,
117 | }
118 |
119 | var int8x16 = {
120 | name: "Int8x16",
121 | fn: SIMD.Int8x16,
122 | intLane: true,
123 | signed: true,
124 | numerical: true,
125 | logical: true,
126 | lanes: 16,
127 | laneSize: 1,
128 | laneMask: 0xFF,
129 | minVal: -0x80,
130 | maxVal: 0x7F,
131 | interestingValues: [0, 1, -1, 0x40, 0x7F, -0x80],
132 | view: Int8Array,
133 | buffer: _i8x16,
134 | mulFn: binaryMul,
135 | }
136 |
137 | var uint32x4 = {
138 | name: "Uint32x4",
139 | fn: SIMD.Uint32x4,
140 | intLane: true,
141 | unsigned: true,
142 | numerical: true,
143 | logical: true,
144 | lanes: 4,
145 | laneSize: 4,
146 | minVal: 0,
147 | maxVal: 0xFFFFFFFF,
148 | interestingValues: [0, 1, 0x40000000, 0x7FFFFFFF, 0xFFFFFFFF],
149 | view: Uint32Array,
150 | buffer: _ui32x4,
151 | mulFn: binaryImul,
152 | }
153 |
154 | var uint16x8 = {
155 | name: "Uint16x8",
156 | fn: SIMD.Uint16x8,
157 | intLane: true,
158 | unsigned: true,
159 | numerical: true,
160 | logical: true,
161 | lanes: 8,
162 | laneSize: 2,
163 | laneMask: 0xFFFF,
164 | minVal: 0,
165 | maxVal: 0xFFFF,
166 | interestingValues: [0, 1, 0x4000, 0x7FFF, 0xFFFF],
167 | view: Uint16Array,
168 | buffer: _ui16x8,
169 | mulFn: binaryMul,
170 | }
171 |
172 | var uint8x16 = {
173 | name: "Uint8x16",
174 | fn: SIMD.Uint8x16,
175 | intLane: true,
176 | unsigned: true,
177 | numerical: true,
178 | logical: true,
179 | lanes: 16,
180 | laneSize: 1,
181 | laneMask: 0xFF,
182 | minVal: 0,
183 | maxVal: 0xFF,
184 | interestingValues: [0, 1, 0x40, 0x7F, 0xFF],
185 | view: Int8Array,
186 | buffer: _ui8x16,
187 | mulFn: binaryMul,
188 | }
189 |
190 | var bool32x4 = {
191 | name: "Bool32x4",
192 | fn: SIMD.Bool32x4,
193 | boolLane: true,
194 | logical: true,
195 | lanes: 4,
196 | laneSize: 4,
197 | interestingValues: [true, false],
198 | }
199 |
200 | var bool16x8 = {
201 | name: "Bool16x8",
202 | fn: SIMD.Bool16x8,
203 | boolLane: true,
204 | logical: true,
205 | lanes: 8,
206 | laneSize: 2,
207 | interestingValues: [true, false],
208 | }
209 |
210 | var bool8x16 = {
211 | name: "Bool8x16",
212 | fn: SIMD.Bool8x16,
213 | boolLane: true,
214 | logical: true,
215 | lanes: 16,
216 | laneSize: 1,
217 | interestingValues: [true, false],
218 | }
219 |
220 | // Filter functions.
221 | function isFloatType(type) { return type.floatLane; }
222 | function isIntType(type) { return type.intLane; }
223 | function isBoolType(type) { return type.boolLane; }
224 | function isNumerical(type) { return type.numerical; }
225 | function isLogical(type) { return type.logical; }
226 | function isSigned(type) { return type.signed; }
227 | function isSignedIntType(type) { return type.intLane && type.signed; }
228 | function isUnsignedIntType(type) { return type.intLane && type.unsigned; }
229 | function isSmallIntType(type) { return type.intLane && type.lanes >= 8; }
230 | function isSmallUnsignedIntType(type) { return type.intLane && type.unsigned && type.lanes >= 8; }
231 | function hasLoadStore123(type) { return !type.boolLane && type.lanes == 4; }
232 |
233 | // Each SIMD type has a corresponding Boolean SIMD type, which is returned by
234 | // relational ops.
235 | float32x4.boolType = int32x4.boolType = uint32x4.boolType = bool32x4;
236 | int16x8.boolType = uint16x8.boolType = bool16x8;
237 | int8x16.boolType = uint8x16.boolType = bool8x16;
238 |
239 | // SIMD fromTIMD types.
240 | float32x4.from = [int32x4, uint32x4];
241 | int32x4.from = [float32x4, uint32x4];
242 | int16x8.from = [uint16x8];
243 | int8x16.from = [uint8x16];
244 | uint32x4.from = [float32x4, int32x4];
245 | uint16x8.from = [int16x8];
246 | uint8x16.from = [int8x16];
247 |
248 | // SIMD fromBits types.
249 | float32x4.fromBits = [int32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
250 | int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
251 | int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16];
252 | int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16];
253 | uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16];
254 | uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16];
255 | uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8];
256 |
257 | var simdTypes = [float32x4,
258 | int32x4, int16x8, int8x16,
259 | uint32x4, uint16x8, uint8x16,
260 | bool32x4, bool16x8, bool8x16];
261 |
262 | if (typeof simdPhase2 !== 'undefined') {
263 | var float64x2 = {
264 | name: "Float64x2",
265 | fn: SIMD.Float64x2,
266 | floatLane: true,
267 | signed: true,
268 | numerical: true,
269 | lanes: 2,
270 | laneSize: 8,
271 | interestingValues: [0, -0, 1, -1, 1.414, 0x7F, -0x80, -0x8000, -0x80000000, 0x7FFF, 0x7FFFFFFF, Infinity, -Infinity, NaN],
272 | view: Float64Array,
273 | buffer: _f64x2,
274 | mulFn: binaryMul,
275 | }
276 |
277 | var bool64x2 = {
278 | name: "Bool64x2",
279 | fn: SIMD.Bool64x2,
280 | boolLane: true,
281 | lanes: 2,
282 | laneSize: 8,
283 | interestingValues: [true, false],
284 | }
285 |
286 | float64x2.boolType = bool64x2;
287 |
288 | float32x4.fromBits.push(float64x2);
289 | int32x4.fromBits.push(float64x2);
290 | int16x8.fromBits.push(float64x2);
291 | int8x16.fromBits.push(float64x2);
292 | uint32x4.fromBits.push(float64x2);
293 | uint16x8.fromBits.push(float64x2);
294 | uint8x16.fromBits.push(float64x2);
295 |
296 | float64x2.fromBits = [float32x4, int32x4, int16x8, int8x16,
297 | uint32x4, uint16x8, uint8x16];
298 |
299 | int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16];
300 | int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16];
301 | int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16];
302 | uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16];
303 | uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16];
304 | uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8];
305 |
306 | simdTypes.push(float64x2);
307 | simdTypes.push(bool64x2);
308 | }
309 |
310 | // SIMD reference functions.
311 |
312 | function simdConvert(type, value) {
313 | if (type.buffer === undefined) return !!value; // bool types
314 | type.buffer[0] = value;
315 | return type.buffer[0];
316 | }
317 |
318 | // Reference implementation of toString.
319 | function simdToString(type, value) {
320 | value = type.fn.check(value);
321 | var str = "SIMD." + type.name + "(";
322 | str += type.fn.extractLane(value, 0);
323 | for (var i = 1; i < type.lanes; i++) {
324 | str += ", " + type.fn.extractLane(value, i);
325 | }
326 | return str + ")";
327 | }
328 |
329 | // Reference implementation of toLocaleString.
330 | function simdToLocaleString(type, value) {
331 | value = type.fn.check(value);
332 | var str = "SIMD." + type.name + "(";
333 | str += type.fn.extractLane(value, 0).toLocaleString();
334 | for (var i = 1; i < type.lanes; i++) {
335 | str += ", " + type.fn.extractLane(value, i).toLocaleString();
336 | }
337 | return str + ")";
338 | }
339 |
340 | // Utility functions.
341 |
342 | // Create a value for testing, with vanilla lane values, i.e. [0, 1, 2, ..]
343 | // for numeric types, [false, true, true, ..] for boolean types. These test
344 | // values shouldn't contain NaNs or other "interesting" values.
345 | function createTestValue(type) {
346 | var lanes = [];
347 | for (var i = 0; i < type.lanes; i++)
348 | lanes.push(i);
349 | return type.fn.apply(type.fn, lanes);
350 | }
351 |
352 | function createSplatValue(type, v) {
353 | var lanes = [];
354 | for (var i = 0; i < type.lanes; i++)
355 | lanes.push(v);
356 | return type.fn.apply(type.fn, lanes);
357 | }
358 |
359 | function checkValue(type, a, expect) {
360 | var ok = true;
361 | for (var i = 0; i < type.lanes; i++) {
362 | var v = type.fn.extractLane(a, i);
363 | var ev = simdConvert(type, expect(i));
364 | if (!sameValue(ev, v) && Math.abs(ev - v) >= 0.00001)
365 | ok = false;
366 | }
367 | if (!ok) {
368 | var lanes = [];
369 | for (var i = 0; i < type.lanes; i++)
370 | lanes.push(simdConvert(type, expect(i)));
371 | fail('expected SIMD.' + type.name + '(' + lanes + ') but found ' + a.toString());
372 | }
373 | }
374 |
375 | // Test methods for the different kinds of operations.
376 |
377 | // Test the constructor and splat with the given lane values.
378 | function testConstructor(type) {
379 | equal('function', typeof type.fn);
380 | equal('function', typeof type.fn.splat);
381 | for (var v of type.interestingValues) {
382 | var expected = simdConvert(type, v);
383 | var result = createSplatValue(type, v);
384 | checkValue(type, result, function(index) { return expected; });
385 | // splat.
386 | result = type.fn.splat(v);
387 | checkValue(type, result, function(index) { return expected; });
388 | }
389 | }
390 |
391 | function testCheck(type) {
392 | equal('function', typeof type.fn.check);
393 | // Other SIMD types shouldn't check for this type.
394 | var a = type.fn();
395 | for (var otherType of simdTypes) {
396 | if (otherType === type) {
397 | var result = type.fn.check(a);
398 | checkValue(type, result, function(index) { return type.fn.extractLane(a, index); });
399 | } else {
400 | throws(function() { otherType.check(a); });
401 | }
402 | }
403 | // Neither should other types.
404 | for (var x of [ {}, "", 0, 1, true, false, undefined, null, NaN, Infinity]) {
405 | throws(function() { type.fn.check(x); });
406 | }
407 | }
408 |
409 | function testReplaceLane(type) {
410 | equal('function', typeof type.fn.replaceLane);
411 | var a = createTestValue(type);
412 | for (var v of type.interestingValues) {
413 | var expected = simdConvert(type, v);
414 | for (var i = 0; i < type.lanes; i++) {
415 | var result = type.fn.replaceLane(a, i, v);
416 | checkValue(type, result,
417 | function(index) {
418 | return index == i ? expected : type.fn.extractLane(a, index);
419 | });
420 | }
421 | }
422 |
423 | function testIndexCheck(index) {
424 | throws(function() { type.fn.replaceLane(a, index, 0); });
425 | }
426 | testIndexCheck(type.lanes);
427 | testIndexCheck(13.37);
428 | testIndexCheck(null);
429 | testIndexCheck(undefined);
430 | testIndexCheck({});
431 | testIndexCheck(true);
432 | testIndexCheck('yo');
433 | testIndexCheck(-1);
434 | testIndexCheck(128);
435 | }
436 |
437 | // Compare unary op's behavior to ref op at each lane.
438 | function testUnaryOp(type, op, refOp) {
439 | equal('function', typeof type.fn[op]);
440 | for (var v of type.interestingValues) {
441 | var expected = simdConvert(type, refOp(v));
442 | var a = type.fn.splat(v);
443 | var result = type.fn[op](a);
444 | checkValue(type, result, function(index) { return expected; });
445 | }
446 | }
447 |
448 | // Compare binary op's behavior to ref op at each lane with the Cartesian
449 | // product of the given values.
450 | function testBinaryOp(type, op, refOp) {
451 | equal('function', typeof type.fn[op]);
452 | var zero = type.fn();
453 | for (var av of type.interestingValues) {
454 | for (var bv of type.interestingValues) {
455 | var expected = simdConvert(type, refOp(simdConvert(type, av), simdConvert(type, bv)));
456 | var a = type.fn.splat(av);
457 | var b = type.fn.splat(bv);
458 | var result = type.fn[op](a, b);
459 | checkValue(type, result, function(index) { return expected; });
460 | }
461 | }
462 | }
463 |
464 | // Compare relational op's behavior to ref op at each lane with the Cartesian
465 | // product of the given values.
466 | function testRelationalOp(type, op, refOp) {
467 | equal('function', typeof type.fn[op]);
468 | var zero = type.fn();
469 | for (var av of type.interestingValues) {
470 | for (var bv of type.interestingValues) {
471 | var expected = refOp(simdConvert(type, av), simdConvert(type, bv));
472 | var a = type.fn.splat(av);
473 | var b = type.fn.splat(bv);
474 | var result = type.fn[op](a, b);
475 | checkValue(type.boolType, result, function(index) { return expected; });
476 | }
477 | }
478 | }
479 |
480 | // Compare shift op's behavior to ref op at each lane.
481 | function testShiftOp(type, op, refOp) {
482 | equal('function', typeof type.fn[op]);
483 | var zero = type.fn();
484 | for (var v of type.interestingValues) {
485 | var s = type.laneSize * 8;
486 | for (var bits of [-1, 0, 1, 2, s - 1, s, s + 1]) {
487 | var expected = simdConvert(type, refOp(simdConvert(type, v), bits));
488 | var a = type.fn.splat(v);
489 | var result = type.fn[op](a, bits);
490 | checkValue(type, result, function(index) { return expected; });
491 | }
492 | }
493 | }
494 |
495 | function testFrom(toType, fromType, name) {
496 | equal('function', typeof toType.fn[name]);
497 | for (var v of fromType.interestingValues) {
498 | var fromValue = createSplatValue(fromType, v);
499 | v = Math.trunc(simdConvert(fromType, v));
500 | if (toType.minVal !== undefined &&
501 | !(toType.minVal <= v && v <= toType.maxVal)) {
502 | throws(function() { toType.fn[name](fromValue) });
503 | } else {
504 | v = simdConvert(toType, v);
505 | var result = toType.fn[name](fromValue);
506 | checkValue(toType, result, function(index) { return v; });
507 | }
508 | }
509 | }
510 |
511 | function testFromBits(toType, fromType, name) {
512 | equal('function', typeof toType.fn[name]);
513 | for (var v of fromType.interestingValues) {
514 | var fromValue = createSplatValue(fromType, v);
515 | var result = toType.fn[name](fromValue);
516 | for (var i = 0; i < fromType.lanes; i++)
517 | fromType.buffer[i] = fromType.fn.extractLane(fromValue, i);
518 | checkValue(toType, result, function(index) { return toType.buffer[index]; });
519 | }
520 | }
521 |
522 | function testAnyTrue(type) {
523 | equal('function', typeof type.fn.anyTrue);
524 | // All lanes 'false'.
525 | var a = type.fn.splat(false);
526 | ok(!type.fn.anyTrue(a));
527 | // One lane 'true'.
528 | for (var i = 0; i < type.lanes; i++) {
529 | a = type.fn.replaceLane(a, i, true);
530 | ok(type.fn.anyTrue(a));
531 | }
532 | // All lanes 'true'.
533 | a = type.fn.splat(true);
534 | ok(type.fn.anyTrue(a));
535 | }
536 |
537 | function testAllTrue(type) {
538 | equal('function', typeof type.fn.allTrue);
539 | // All lanes 'true'.
540 | var a = type.fn.splat(true);
541 | ok(type.fn.allTrue(a));
542 | // One lane 'false'.
543 | for (var i = 0; i < type.lanes; i++) {
544 | a = type.fn.replaceLane(a, i, false);
545 | ok(!type.fn.allTrue(a));
546 | }
547 | // All lanes 'false'.
548 | a = type.fn.splat(false);
549 | ok(!type.fn.allTrue(a));
550 | }
551 |
552 | function testSelect(type) {
553 | equal('function', typeof type.fn.select);
554 | // set a and b to values that are different for all numerical types.
555 | var av = 1;
556 | var bv = 2;
557 | var a = type.fn.splat(av);
558 | var b = type.fn.splat(bv);
559 | // test all selectors with a single 'true' lane.
560 | for (var i = 0; i < type.lanes; i++) {
561 | var selector = type.boolType.fn();
562 | selector = type.boolType.fn.replaceLane(selector, i, true);
563 | var result = type.fn.select(selector, a, b);
564 | checkValue(type, result, function(index) { return index == i ? av : bv; });
565 | }
566 | }
567 |
568 | function testSwizzle(type) {
569 | equal('function', typeof type.fn.swizzle);
570 | var a = createTestValue(type); // 0, 1, 2, 3, 4, 5, 6, ...
571 | var indices = [];
572 | // Identity swizzle.
573 | for (var i = 0; i < type.lanes; i++) indices.push(i);
574 | var result = type.fn.swizzle.apply(type.fn, [a].concat(indices));
575 | checkValue(type, result, function(index) { return type.fn.extractLane(a, index); });
576 | // Reverse swizzle.
577 | indices.reverse();
578 | var result = type.fn.swizzle.apply(type.fn, [a].concat(indices));
579 | checkValue(type, result, function(index) { return type.fn.extractLane(a, type.lanes - index - 1); });
580 |
581 | function testIndexCheck(index) {
582 | for (var i = 0; i < type.lanes; i++) {
583 | var args = [a].concat(indices);
584 | args[i + 1] = index;
585 | throws(function() { type.fn.swizzle.apply(type.fn, args); });
586 | }
587 | }
588 | testIndexCheck(type.lanes);
589 | testIndexCheck(13.37);
590 | testIndexCheck(null);
591 | testIndexCheck(undefined);
592 | testIndexCheck({});
593 | testIndexCheck(true);
594 | testIndexCheck('yo');
595 | testIndexCheck(-1);
596 | testIndexCheck(128);
597 | }
598 |
599 | function testShuffle(type) {
600 | equal('function', typeof type.fn.shuffle);
601 | var indices = [];
602 | for (var i = 0; i < type.lanes; i++) indices.push(i);
603 |
604 | var a = type.fn.apply(type.fn, indices); // 0, 1, 2, 3, 4 ...
605 | var b = type.fn.add(a, type.fn.splat(type.lanes)); // lanes, lanes+1 ...
606 | // All lanes from a.
607 | var result = type.fn.shuffle.apply(type.fn, [a, b].concat(indices));
608 | checkValue(type, result, function(index) { return type.fn.extractLane(a, index); });
609 | // One lane from b.
610 | for (var i = 0; i < type.lanes; i++) {
611 | var args = [a, b].concat(indices);
612 | args[2 + i] += type.lanes;
613 | var result = type.fn.shuffle.apply(type.fn, args);
614 | checkValue(type, result, function(index) {
615 | var val = index == i ? b : a;
616 | return type.fn.extractLane(val, index);
617 | });
618 | }
619 | // All lanes from b.
620 | for (var i = 0; i < type.lanes; i++) indices[i] += type.lanes;
621 | var result = type.fn.shuffle.apply(type.fn, [a, b].concat(indices));
622 | checkValue(type, result, function(index) { return type.fn.extractLane(b, index); });
623 |
624 | function testIndexCheck(index) {
625 | for (var i = 0; i < type.lanes; i++) {
626 | var args = [a, b].concat(indices);
627 | args[i + 2] = index;
628 | throws(function() { type.fn.shuffle.apply(type.fn, args); });
629 | }
630 | }
631 | testIndexCheck(2 * type.lanes);
632 | testIndexCheck(13.37);
633 | testIndexCheck(null);
634 | testIndexCheck(undefined);
635 | testIndexCheck({});
636 | testIndexCheck(true);
637 | testIndexCheck('yo');
638 | testIndexCheck(-1);
639 | testIndexCheck(128);
640 | }
641 |
642 | function testLoad(type, name, count) {
643 | var loadFn = type.fn[name];
644 | equal('function', typeof loadFn);
645 | var bufLanes = 2 * type.lanes; // Test all alignments.
646 | var bufSize = bufLanes * type.laneSize + 8; // Extra for over-alignment test.
647 | var ab = new ArrayBuffer(bufSize);
648 | var buf = new type.view(ab);
649 | for (var i = 0; i < bufLanes; i++) buf[i] = i; // Number buffer sequentially.
650 | // Test aligned loads.
651 | for (var i = 0; i < type.lanes; i++) {
652 | var a = loadFn(buf, i);
653 | checkValue(type, a, function(index) { return index < count ? i + index : 0; });
654 | }
655 |
656 | // Test index coercions.
657 | // Unlike typedArray[index], non-canonical strings are allowed here.
658 | checkValue(type, loadFn(buf, "0"), function(index) { return index < count ? index : 0; });
659 | checkValue(type, loadFn(buf, " -0.0 "), function(index) { return index < count ? index : 0; });
660 | checkValue(type, loadFn(buf, "00"), function(index) { return index < count ? index : 0; });
661 | checkValue(type, loadFn(buf, false), function(index) { return index < count ? index : 0; });
662 | checkValue(type, loadFn(buf, null), function(index) { return index < count ? index : 0; });
663 | checkValue(type, loadFn(buf, "01"), function(index) { return index < count ? 1 + index : 0; });
664 | checkValue(type, loadFn(buf, " +1e0"), function(index) { return index < count ? 1 + index : 0; });
665 | checkValue(type, loadFn(buf, true), function(index) { return index < count ? 1 + index : 0; });
666 |
667 | // Test the 2 possible over-alignments.
668 | var f64 = new Float64Array(ab);
669 | var stride = 8 / type.laneSize;
670 | for (var i = 0; i < 1; i++) {
671 | var a = loadFn(f64, i);
672 | checkValue(type, a, function(index) { return index < count ? stride * i + index : 0; });
673 | }
674 | // Test the 7 possible mis-alignments.
675 | var i8 = new Int8Array(ab);
676 | for (var misalignment = 1; misalignment < 8; misalignment++) {
677 | // Shift the buffer up by 1 byte.
678 | for (var i = i8.length - 1; i > 0; i--)
679 | i8[i] = i8[i - 1];
680 | var a = loadFn(i8, misalignment);
681 | checkValue(type, a, function(index) { return index < count ? i + index : 0; });
682 | }
683 |
684 | function testIndexCheck(buf, index) {
685 | throws(function () { loadFn(buf, index); });
686 | }
687 | testIndexCheck(buf, -1);
688 | testIndexCheck(buf, 0.7);
689 | testIndexCheck(buf, -0.1);
690 | testIndexCheck(buf, NaN);
691 | testIndexCheck(buf, bufSize / type.laneSize - count + 1);
692 | testIndexCheck(buf.buffer, 1);
693 | testIndexCheck(buf, "a");
694 | }
695 |
696 | function testStore(type, name, count) {
697 | var storeFn = type.fn[name];
698 | equal('function', typeof storeFn);
699 | var bufLanes = 2 * type.lanes; // Test all alignments.
700 | var bufSize = bufLanes * type.laneSize + 8; // Extra for over-alignment test.
701 | var ab = new ArrayBuffer(bufSize);
702 | var buf = new type.view(ab);
703 | var a = createTestValue(type); // Value containing 0, 1, 2, 3 ...
704 | function checkBuffer(offset) {
705 | for (var i = 0; i < count; i++)
706 | if (buf[offset + i] != i) return false;
707 | return true;
708 | }
709 | // Test aligned stores.
710 | for (var i = 0; i < type.lanes; i++) {
711 | storeFn(buf, i, a);
712 | ok(checkBuffer(i));
713 | }
714 |
715 | // Test index coercions.
716 | storeFn(buf, "0", a); ok(checkBuffer(0));
717 | storeFn(buf, "01", a); ok(checkBuffer(1));
718 | storeFn(buf, " -0.0 ", a); ok(checkBuffer(0));
719 | storeFn(buf, " +1e0", a); ok(checkBuffer(1));
720 | storeFn(buf, false, a); ok(checkBuffer(0));
721 | storeFn(buf, true, a); ok(checkBuffer(1));
722 | storeFn(buf, null, a); ok(checkBuffer(0));
723 |
724 | // Test the 2 over-alignments.
725 | var f64 = new Float64Array(ab);
726 | var stride = 8 / type.laneSize;
727 | for (var i = 0; i < 1; i++) {
728 | storeFn(f64, i, a);
729 | ok(checkBuffer(stride * i));
730 | }
731 | // Test the 7 mis-alignments.
732 | var i8 = new Int8Array(ab);
733 | for (var misalignment = 1; misalignment < 8; misalignment++) {
734 | storeFn(i8, misalignment, a);
735 | // Shift the buffer down by misalignment.
736 | for (var i = 0; i < i8.length - misalignment; i++)
737 | i8[i] = i8[i + misalignment];
738 | ok(checkBuffer(0));
739 | }
740 |
741 | function testIndexCheck(buf, index) {
742 | throws(function () { storeFn(buf, index, type.fn()); });
743 | }
744 | testIndexCheck(buf, -1);
745 | testIndexCheck(buf, bufSize / type.laneSize - count + 1);
746 | testIndexCheck(buf.buffer, 1);
747 | testIndexCheck(buf, "a");
748 | }
749 |
750 | function testOperators(type) {
751 | var inst = createTestValue(type);
752 | throws(function() { Number(inst) });
753 | throws(function() { +inst });
754 | throws(function() { -inst });
755 | throws(function() { ~inst });
756 | throws(function() { Math.fround(inst) });
757 | throws(function() { inst|0} );
758 | throws(function() { inst&0 });
759 | throws(function() { inst^0 });
760 | throws(function() { inst>>>0 });
761 | throws(function() { inst>>0 });
762 | throws(function() { inst<<0 });
763 | throws(function() { (inst + inst) });
764 | throws(function() { inst - inst });
765 | throws(function() { inst * inst });
766 | throws(function() { inst / inst });
767 | throws(function() { inst % inst });
768 | throws(function() { inst < inst });
769 | throws(function() { inst > inst });
770 | throws(function() { inst <= inst });
771 | throws(function() { inst >= inst });
772 | throws(function() { inst(); });
773 |
774 | equal(inst[0], undefined);
775 | equal(inst.a, undefined);
776 | equal(!inst, false);
777 | equal(!inst, false);
778 | equal(inst ? 1 : 2, 1);
779 | equal(inst ? 1 : 2, 1);
780 |
781 | equal('function', typeof inst.toString);
782 | equal(inst.toString(), simdToString(type, inst));
783 | equal('function', typeof inst.toLocaleString);
784 | equal(inst.toLocaleString(), simdToLocaleString(type, inst));
785 | // TODO: test valueOf?
786 | }
787 |
788 | // Tests value semantics for a given type.
789 | // TODO: more complete tests for Object wrappers, sameValue, sameValueZero, etc.
790 | function testValueSemantics(type) {
791 | // Create a vanilla test value.
792 | var x = createTestValue(type);
793 |
794 | // Check against non-SIMD types.
795 | var otherTypeValues = [0, 1.275, NaN, Infinity, "string", null, undefined,
796 | {}, function() {}];
797 | for (var other of simdTypes) {
798 | if (type !== other)
799 | otherTypeValues.push(createTestValue(other));
800 | }
801 | otherTypeValues.forEach(function(y) {
802 | equal(y == x, false);
803 | equal(x == y, false);
804 | equal(y != x, true);
805 | equal(x != y, true);
806 | equal(y === x, false);
807 | equal(x === y, false);
808 | equal(y !== x, true);
809 | equal(x !== y, true);
810 | });
811 |
812 | // Test that f(a, b) is the same as f(SIMD(a), SIMD(b)) for equality and
813 | // strict equality, at every lane.
814 | function test(a, b) {
815 | for (var i = 0; i < type.lanes; i++) {
816 | var aval = type.fn.replaceLane(x, i, a);
817 | var bval = type.fn.replaceLane(x, i, b);
818 | equal(a == b, aval == bval);
819 | equal(a === b, aval === bval);
820 | }
821 | }
822 | for (var a of type.interestingValues) {
823 | for (var b of type.interestingValues) {
824 | test(a, b);
825 | }
826 | }
827 | }
828 |
829 |
830 | simdTypes.forEach(function(type) {
831 | test(type.name + ' constructor', function() {
832 | testConstructor(type);
833 | });
834 | test(type.name + ' check', function() {
835 | testCheck(type);
836 | });
837 | test(type.name + ' operators', function() {
838 | testOperators(type);
839 | });
840 | // Note: This fails in the polyfill due to the lack of value semantics.
841 | test(type.name + ' value semantics', function() {
842 | testValueSemantics(type);
843 | });
844 | test(type.name + ' replaceLane', function() {
845 | testReplaceLane(type);
846 | });
847 | });
848 |
849 | simdTypes.filter(isNumerical).forEach(function(type) {
850 | test(type.name + ' equal', function() {
851 | testRelationalOp(type, 'equal', function(a, b) { return a == b; });
852 | });
853 | test(type.name + ' notEqual', function() {
854 | testRelationalOp(type, 'notEqual', function(a, b) { return a != b; });
855 | });
856 | test(type.name + ' lessThan', function() {
857 | testRelationalOp(type, 'lessThan', function(a, b) { return a < b; });
858 | });
859 | test(type.name + ' lessThanOrEqual', function() {
860 | testRelationalOp(type, 'lessThanOrEqual', function(a, b) { return a <= b; });
861 | });
862 | test(type.name + ' greaterThan', function() {
863 | testRelationalOp(type, 'greaterThan', function(a, b) { return a > b; });
864 | });
865 | test(type.name + ' greaterThanOrEqual', function() {
866 | testRelationalOp(type, 'greaterThanOrEqual', function(a, b) { return a >= b; });
867 | });
868 | test(type.name + ' add', function() {
869 | testBinaryOp(type, 'add', function(a, b) { return a + b; });
870 | });
871 | test(type.name + ' sub', function() {
872 | testBinaryOp(type, 'sub', function(a, b) { return a - b; });
873 | });
874 | test(type.name + ' mul', function() {
875 | testBinaryOp(type, 'mul', type.mulFn);
876 | });
877 | test(type.name + ' select', function() {
878 | testSelect(type);
879 | });
880 | test(type.name + ' swizzle', function() {
881 | testSwizzle(type);
882 | });
883 | test(type.name + ' shuffle', function() {
884 | testShuffle(type);
885 | });
886 | test(type.name + ' load', function() {
887 | testLoad(type, 'load', type.lanes);
888 | });
889 | test(type.name + ' store', function() {
890 | testStore(type, 'store', type.lanes);
891 | });
892 | });
893 |
894 | simdTypes.filter(hasLoadStore123).forEach(function(type) {
895 | test(type.name + ' load1', function() {
896 | testLoad(type, 'load1', 1);
897 | });
898 | test(type.name + ' load2', function() {
899 | testLoad(type, 'load2', 2);
900 | });
901 | test(type.name + ' load3', function() {
902 | testLoad(type, 'load3', 3);
903 | });
904 | test(type.name + ' store1', function() {
905 | testStore(type, 'store1', 1);
906 | });
907 | test(type.name + ' store1', function() {
908 | testStore(type, 'store2', 2);
909 | });
910 | test(type.name + ' store3', function() {
911 | testStore(type, 'store3', 3);
912 | });
913 | });
914 |
915 | simdTypes.filter(isLogical).forEach(function(type) {
916 | test(type.name + ' and', function() {
917 | testBinaryOp(type, 'and', function(a, b) { return a & b; });
918 | });
919 | test(type.name + ' or', function() {
920 | testBinaryOp(type, 'or', function(a, b) { return a | b; });
921 | });
922 | test(type.name + ' xor', function() {
923 | testBinaryOp(type, 'xor', function(a, b) { return a ^ b; });
924 | });
925 | });
926 |
927 | simdTypes.filter(isSigned).forEach(function(type) {
928 | test(type.name + ' neg', function() {
929 | testUnaryOp(type, 'neg', function(a) { return -a; });
930 | });
931 | });
932 |
933 | simdTypes.filter(isFloatType).forEach(function(type) {
934 | test(type.name + ' div', function() {
935 | testBinaryOp(type, 'div', function(a, b) { return a / b; });
936 | });
937 | test(type.name + ' abs', function() {
938 | testUnaryOp(type, 'abs', Math.abs);
939 | });
940 | test(type.name + ' min', function() {
941 | testBinaryOp(type, 'min', Math.min);
942 | });
943 | test(type.name + ' max', function() {
944 | testBinaryOp(type, 'max', Math.max);
945 | });
946 | test(type.name + ' minNum', function() {
947 | testBinaryOp(type, 'minNum', minNum);
948 | });
949 | test(type.name + ' maxNum', function() {
950 | testBinaryOp(type, 'maxNum', maxNum);
951 | });
952 | test(type.name + ' sqrt', function() {
953 | testUnaryOp(type, 'sqrt', function(a) { return Math.sqrt(a); });
954 | });
955 | test(type.name + ' reciprocalApproximation', function() {
956 | testUnaryOp(type, 'reciprocalApproximation', function(a) { return 1 / a; });
957 | });
958 | test(type.name + ' reciprocalSqrtApproximation', function() {
959 | testUnaryOp(type, 'reciprocalSqrtApproximation', function(a) { return 1 / Math.sqrt(a); });
960 | });
961 | })
962 |
963 | simdTypes.filter(isIntType).forEach(function(type) {
964 | test(type.name + ' not', function() {
965 | testUnaryOp(type, 'not', function(a) { return ~a; });
966 | });
967 | test(type.name + ' shiftLeftByScalar', function() {
968 | function shift(a, bits) {
969 | bits &= type.laneSize * 8 - 1;
970 | return a << bits;
971 | }
972 | testShiftOp(type, 'shiftLeftByScalar', shift);
973 | });
974 | });
975 |
976 | simdTypes.filter(isSignedIntType).forEach(function(type) {
977 | test(type.name + ' shiftRightByScalar', function() {
978 | function shift(a, bits) {
979 | bits &= type.laneSize * 8 - 1;
980 | return a >> bits;
981 | }
982 | testShiftOp(type, 'shiftRightByScalar', shift);
983 | });
984 | });
985 |
986 | simdTypes.filter(isUnsignedIntType).forEach(function(type) {
987 | test(type.name + ' shiftRightByScalar', function() {
988 | function shift(a, bits) {
989 | bits &= type.laneSize * 8 - 1;
990 | if (type.laneMask)
991 | a &= type.laneMask;
992 | return a >>> bits;
993 | }
994 | testShiftOp(type, 'shiftRightByScalar', shift);
995 | });
996 | });
997 |
998 | simdTypes.filter(isSmallIntType).forEach(function(type) {
999 | function saturate(type, a) {
1000 | if (a < type.minVal) return type.minVal;
1001 | if (a > type.maxVal) return type.maxVal;
1002 | return a;
1003 | }
1004 | test(type.name + ' addSaturate', function() {
1005 | testBinaryOp(type, 'addSaturate', function(a, b) { return saturate(type, a + b); });
1006 | });
1007 | test(type.name + ' subSaturate', function() {
1008 | testBinaryOp(type, 'subSaturate', function(a, b) { return saturate(type, a - b); });
1009 | });
1010 | });
1011 |
1012 | simdTypes.filter(isBoolType).forEach(function(type) {
1013 | test(type.name + ' not', function() {
1014 | testUnaryOp(type, 'not', function(a) { return !a; });
1015 | });
1016 | test(type.name + ' anyTrue', function() {
1017 | testAnyTrue(type, 'anyTrue');
1018 | });
1019 | test(type.name + ' allTrue', function() {
1020 | testAllTrue(type, 'allTrue');
1021 | });
1022 | });
1023 |
1024 | // From functions.
1025 | simdTypes.forEach(function(toType) {
1026 | if (!toType.from) return;
1027 | for (var fromType of toType.from) {
1028 | var fn = 'from' + fromType.name;
1029 | test(toType.name + ' ' + fn, function() {
1030 | testFrom(toType, fromType, fn);
1031 | });
1032 | }
1033 | });
1034 |
1035 | // FromBits functions.
1036 | simdTypes.forEach(function(toType) {
1037 | if (!toType.fromBits) return;
1038 | for (var fromType of toType.fromBits) {
1039 | var fn = 'from' + fromType.name + 'Bits';
1040 | test(toType.name + ' ' + fn, function() {
1041 | testFromBits(toType, fromType, fn);
1042 | });
1043 | }
1044 | });
1045 |
1046 | // Miscellaneous test methods.
1047 |
1048 | test('Float32x4 Int32x4 bit conversion', function() {
1049 | var m = SIMD.Int32x4(0x3F800000, 0x40000000, 0x40400000, 0x40800000);
1050 | var n = SIMD.Float32x4.fromInt32x4Bits(m);
1051 | equal(1.0, SIMD.Float32x4.extractLane(n, 0));
1052 | equal(2.0, SIMD.Float32x4.extractLane(n, 1));
1053 | equal(3.0, SIMD.Float32x4.extractLane(n, 2));
1054 | equal(4.0, SIMD.Float32x4.extractLane(n, 3));
1055 | n = SIMD.Float32x4(5.0, 6.0, 7.0, 8.0);
1056 | m = SIMD.Int32x4.fromFloat32x4Bits(n);
1057 | equal(0x40A00000, SIMD.Int32x4.extractLane(m, 0));
1058 | equal(0x40C00000, SIMD.Int32x4.extractLane(m, 1));
1059 | equal(0x40E00000, SIMD.Int32x4.extractLane(m, 2));
1060 | equal(0x41000000, SIMD.Int32x4.extractLane(m, 3));
1061 | // Flip sign using bit-wise operators.
1062 | n = SIMD.Float32x4(9.0, 10.0, 11.0, 12.0);
1063 | m = SIMD.Int32x4(0x80000000, 0x80000000, 0x80000000, 0x80000000);
1064 | var nMask = SIMD.Int32x4.fromFloat32x4Bits(n);
1065 | nMask = SIMD.Int32x4.xor(nMask, m); // flip sign.
1066 | n = SIMD.Float32x4.fromInt32x4Bits(nMask);
1067 | equal(-9.0, SIMD.Float32x4.extractLane(n, 0));
1068 | equal(-10.0, SIMD.Float32x4.extractLane(n, 1));
1069 | equal(-11.0, SIMD.Float32x4.extractLane(n, 2));
1070 | equal(-12.0, SIMD.Float32x4.extractLane(n, 3));
1071 | nMask = SIMD.Int32x4.fromFloat32x4Bits(n);
1072 | nMask = SIMD.Int32x4.xor(nMask, m); // flip sign.
1073 | n = SIMD.Float32x4.fromInt32x4Bits(nMask);
1074 | equal(9.0, SIMD.Float32x4.extractLane(n, 0));
1075 | equal(10.0, SIMD.Float32x4.extractLane(n, 1));
1076 | equal(11.0, SIMD.Float32x4.extractLane(n, 2));
1077 | equal(12.0, SIMD.Float32x4.extractLane(n, 3));
1078 | });
1079 |
1080 | function equalInt32x4(a, b) {
1081 | equal(SIMD.Int32x4.extractLane(a, 0), SIMD.Int32x4.extractLane(b, 0));
1082 | equal(SIMD.Int32x4.extractLane(a, 1), SIMD.Int32x4.extractLane(b, 1));
1083 | equal(SIMD.Int32x4.extractLane(a, 2), SIMD.Int32x4.extractLane(b, 2));
1084 | equal(SIMD.Int32x4.extractLane(a, 3), SIMD.Int32x4.extractLane(b, 3));
1085 | }
1086 |
1087 | test('Float32x4 Int32x4 round trip', function() {
1088 | // NaNs should stay unmodified across bit conversions
1089 | var m = SIMD.Int32x4(0xFFFFFFFF, 0xFFFF0000, 0x80000000, 0x0);
1090 | var m2 = SIMD.Int32x4.fromFloat32x4Bits(SIMD.Float32x4.fromInt32x4Bits(m));
1091 | // NaNs may be canonicalized, so these tests may fail in some implementations.
1092 | equalInt32x4(m, m2);
1093 | });
1094 |
1095 | test('Float32x4 Int32x4 load/store bit preservation', function() {
1096 | // NaNs should stay unmodified when storing and loading to Float32Array
1097 | var taf32 = new Float32Array(4);
1098 | var tai32 = new Int32Array(4);
1099 | var i4a, i4b;
1100 | i4a = SIMD.Int32x4(0x7fc00000,0x7fe00000,0x7ff00000,0x7ff80000);
1101 | SIMD.Int32x4.store(taf32, 0, i4a);
1102 | i4b = SIMD.Int32x4.load(taf32, 0);
1103 | equalInt32x4(i4a, i4b);
1104 |
1105 | // NaNs should stay unmodified when loading as Float32x4 and storing as Int32x4
1106 | SIMD.Int32x4.store(taf32, 0, i4a);
1107 | var f4 = SIMD.Float32x4.load(taf32, 0);
1108 | SIMD.Float32x4.store(tai32, 0, f4);
1109 | i4b = SIMD.Int32x4.load(tai32, 0);
1110 | equalInt32x4(i4a, i4b);
1111 | });
1112 |
--------------------------------------------------------------------------------
/src/external/qunit.css:
--------------------------------------------------------------------------------
1 | /*!
2 | * QUnit 1.18.0
3 | * http://qunitjs.com/
4 | *
5 | * Copyright jQuery Foundation and other contributors
6 | * Released under the MIT license
7 | * http://jquery.org/license
8 | *
9 | * Date: 2015-04-03T10:23Z
10 | */
11 |
12 | /** Font Family and Sizes */
13 |
14 | #qunit-tests, #qunit-header, #qunit-banner, #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult {
15 | font-family: "Helvetica Neue Light", "HelveticaNeue-Light", "Helvetica Neue", Calibri, Helvetica, Arial, sans-serif;
16 | }
17 |
18 | #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult, #qunit-tests li { font-size: small; }
19 | #qunit-tests { font-size: smaller; }
20 |
21 |
22 | /** Resets */
23 |
24 | #qunit-tests, #qunit-header, #qunit-banner, #qunit-userAgent, #qunit-testresult, #qunit-modulefilter {
25 | margin: 0;
26 | padding: 0;
27 | }
28 |
29 |
30 | /** Header */
31 |
32 | #qunit-header {
33 | padding: 0.5em 0 0.5em 1em;
34 |
35 | color: #8699A4;
36 | background-color: #0D3349;
37 |
38 | font-size: 1.5em;
39 | line-height: 1em;
40 | font-weight: 400;
41 |
42 | border-radius: 5px 5px 0 0;
43 | }
44 |
45 | #qunit-header a {
46 | text-decoration: none;
47 | color: #C2CCD1;
48 | }
49 |
50 | #qunit-header a:hover,
51 | #qunit-header a:focus {
52 | color: #FFF;
53 | }
54 |
55 | #qunit-testrunner-toolbar label {
56 | display: inline-block;
57 | padding: 0 0.5em 0 0.1em;
58 | }
59 |
60 | #qunit-banner {
61 | height: 5px;
62 | }
63 |
64 | #qunit-testrunner-toolbar {
65 | padding: 0.5em 1em 0.5em 1em;
66 | color: #5E740B;
67 | background-color: #EEE;
68 | overflow: hidden;
69 | }
70 |
71 | #qunit-userAgent {
72 | padding: 0.5em 1em 0.5em 1em;
73 | background-color: #2B81AF;
74 | color: #FFF;
75 | text-shadow: rgba(0, 0, 0, 0.5) 2px 2px 1px;
76 | }
77 |
78 | #qunit-modulefilter-container {
79 | float: right;
80 | padding: 0.2em;
81 | }
82 |
83 | .qunit-url-config {
84 | display: inline-block;
85 | padding: 0.1em;
86 | }
87 |
88 | .qunit-filter {
89 | display: block;
90 | float: right;
91 | margin-left: 1em;
92 | }
93 |
94 | /** Tests: Pass/Fail */
95 |
96 | #qunit-tests {
97 | list-style-position: inside;
98 | }
99 |
100 | #qunit-tests li {
101 | padding: 0.4em 1em 0.4em 1em;
102 | border-bottom: 1px solid #FFF;
103 | list-style-position: inside;
104 | }
105 |
106 | #qunit-tests > li {
107 | display: none;
108 | }
109 |
110 | #qunit-tests li.running,
111 | #qunit-tests li.pass,
112 | #qunit-tests li.fail,
113 | #qunit-tests li.skipped {
114 | display: list-item;
115 | }
116 |
117 | #qunit-tests.hidepass li.running,
118 | #qunit-tests.hidepass li.pass {
119 | visibility: hidden;
120 | position: absolute;
121 | width: 0px;
122 | height: 0px;
123 | padding: 0;
124 | border: 0;
125 | margin: 0;
126 | }
127 |
128 | #qunit-tests li strong {
129 | cursor: pointer;
130 | }
131 |
132 | #qunit-tests li.skipped strong {
133 | cursor: default;
134 | }
135 |
136 | #qunit-tests li a {
137 | padding: 0.5em;
138 | color: #C2CCD1;
139 | text-decoration: none;
140 | }
141 |
142 | #qunit-tests li p a {
143 | padding: 0.25em;
144 | color: #6B6464;
145 | }
146 | #qunit-tests li a:hover,
147 | #qunit-tests li a:focus {
148 | color: #000;
149 | }
150 |
151 | #qunit-tests li .runtime {
152 | float: right;
153 | font-size: smaller;
154 | }
155 |
156 | .qunit-assert-list {
157 | margin-top: 0.5em;
158 | padding: 0.5em;
159 |
160 | background-color: #FFF;
161 |
162 | border-radius: 5px;
163 | }
164 |
165 | .qunit-collapsed {
166 | display: none;
167 | }
168 |
169 | #qunit-tests table {
170 | border-collapse: collapse;
171 | margin-top: 0.2em;
172 | }
173 |
174 | #qunit-tests th {
175 | text-align: right;
176 | vertical-align: top;
177 | padding: 0 0.5em 0 0;
178 | }
179 |
180 | #qunit-tests td {
181 | vertical-align: top;
182 | }
183 |
184 | #qunit-tests pre {
185 | margin: 0;
186 | white-space: pre-wrap;
187 | word-wrap: break-word;
188 | }
189 |
190 | #qunit-tests del {
191 | background-color: #E0F2BE;
192 | color: #374E0C;
193 | text-decoration: none;
194 | }
195 |
196 | #qunit-tests ins {
197 | background-color: #FFCACA;
198 | color: #500;
199 | text-decoration: none;
200 | }
201 |
202 | /*** Test Counts */
203 |
204 | #qunit-tests b.counts { color: #000; }
205 | #qunit-tests b.passed { color: #5E740B; }
206 | #qunit-tests b.failed { color: #710909; }
207 |
208 | #qunit-tests li li {
209 | padding: 5px;
210 | background-color: #FFF;
211 | border-bottom: none;
212 | list-style-position: inside;
213 | }
214 |
215 | /*** Passing Styles */
216 |
217 | #qunit-tests li li.pass {
218 | color: #3C510C;
219 | background-color: #FFF;
220 | border-left: 10px solid #C6E746;
221 | }
222 |
223 | #qunit-tests .pass { color: #528CE0; background-color: #D2E0E6; }
224 | #qunit-tests .pass .test-name { color: #366097; }
225 |
226 | #qunit-tests .pass .test-actual,
227 | #qunit-tests .pass .test-expected { color: #999; }
228 |
229 | #qunit-banner.qunit-pass { background-color: #C6E746; }
230 |
231 | /*** Failing Styles */
232 |
233 | #qunit-tests li li.fail {
234 | color: #710909;
235 | background-color: #FFF;
236 | border-left: 10px solid #EE5757;
237 | white-space: pre;
238 | }
239 |
240 | #qunit-tests > li:last-child {
241 | border-radius: 0 0 5px 5px;
242 | }
243 |
244 | #qunit-tests .fail { color: #000; background-color: #EE5757; }
245 | #qunit-tests .fail .test-name,
246 | #qunit-tests .fail .module-name { color: #000; }
247 |
248 | #qunit-tests .fail .test-actual { color: #EE5757; }
249 | #qunit-tests .fail .test-expected { color: #008000; }
250 |
251 | #qunit-banner.qunit-fail { background-color: #EE5757; }
252 |
253 | /*** Skipped tests */
254 |
255 | #qunit-tests .skipped {
256 | background-color: #EBECE9;
257 | }
258 |
259 | #qunit-tests .qunit-skipped-label {
260 | background-color: #F4FF77;
261 | display: inline-block;
262 | font-style: normal;
263 | color: #366097;
264 | line-height: 1.8em;
265 | padding: 0 0.5em;
266 | margin: -0.4em 0.4em -0.4em 0;
267 | }
268 |
269 | /** Result */
270 |
271 | #qunit-testresult {
272 | padding: 0.5em 1em 0.5em 1em;
273 |
274 | color: #2B81AF;
275 | background-color: #D2E0E6;
276 |
277 | border-bottom: 1px solid #FFF;
278 | }
279 | #qunit-testresult .module-name {
280 | font-weight: 700;
281 | }
282 |
283 | /** Fixture */
284 |
285 | #qunit-fixture {
286 | position: absolute;
287 | top: -10000px;
288 | left: -10000px;
289 | width: 1000px;
290 | height: 1000px;
291 | }
292 |
--------------------------------------------------------------------------------
/src/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | EcmaScript SIMD numeric type tests
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/src/shell_test_runner.js:
--------------------------------------------------------------------------------
1 | // This is a simple script for running the tests from a standalone JS shell.
2 |
3 | load("ecmascript_simd.js");
4 |
5 | // clearer marking
6 | var currentName = '';
7 | var numFails = 0;
8 |
9 | if (typeof skipValueTests === 'undefined')
10 | skipValueTests = false;
11 | if (typeof skipFromBitsTests === 'undefined')
12 | skipValueTests = true;
13 |
14 | function printIndented(str) {
15 | console.log(str.split('\n').map(function (s) { return ' ' + s }).join('\n'));
16 | }
17 |
18 | function fail(str) {
19 | var e = Error(str);
20 | console.log(e.toString());
21 | printIndented(e.stack);
22 | numFails++;
23 | }
24 |
25 | function test(name, func) {
26 | currentName = name;
27 | if (typeof skipValueTests !== 'undefined' && skipValueTests &&
28 | name.indexOf('value semantics') != -1) return;
29 | try {
30 | func();
31 | } catch (e) {
32 | console.log('exception thrown from ' + currentName + ': ' + e.toString());
33 | if (e.stack)
34 | printIndented(e.stack);
35 | numFails++;
36 | }
37 | }
38 |
39 | function equal(a, b) {
40 | if (a != b)
41 | fail('equal(' + a + ', ' + b + ') failed in ' + currentName);
42 | }
43 |
44 | function notEqual(a, b) {
45 | if (a == b)
46 | fail('notEqual(' + a + ', ' + b + ') failed in ' + currentName);
47 | }
48 |
49 | function throws(func) {
50 | var pass = false;
51 | try {
52 | func();
53 | } catch (e) {
54 | pass = true;
55 | }
56 | if (!pass)
57 | fail('throws failed in ' + currentName);
58 | }
59 |
60 | function ok(x) {
61 | if (!x)
62 | fail('not ok in ' + currentName);
63 | }
64 |
65 | load("ecmascript_simd_tests.js");
66 |
67 | if (numFails > 0) {
68 | print('total number of fails and exceptions: ' + numFails);
69 | quit(1);
70 | }
71 |
--------------------------------------------------------------------------------
/src/test.js:
--------------------------------------------------------------------------------
1 | // To specifically test the p(r)olyfill.
2 |
3 | if (typeof SIMD != 'undefined')
4 | SIMD = void 0;
5 |
6 | load('./shell_test_runner.js');
7 |
--------------------------------------------------------------------------------
/tc39/SIMD-128 TC-39.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tc39/ecmascript_simd/a5529db02b6144256b7458bc96e2e7e117b6e5e9/tc39/SIMD-128 TC-39.pdf
--------------------------------------------------------------------------------