├── LICENSE.txt ├── README.md ├── extended-api.md ├── run_dev_server.sh ├── src ├── benchmarks │ ├── aobench.js │ ├── averageFloat32x4.js │ ├── averageFloat32x4LoadFromInt8Array.js │ ├── averageFloat32x4LoadX.js │ ├── averageFloat32x4LoadXY.js │ ├── averageFloat32x4LoadXYZ.js │ ├── averageInt32x4Load.js │ ├── base.js │ ├── index.html │ ├── inverse4x4.js │ ├── kernel-template.js │ ├── mandelbrot.js │ ├── matrix-multiplication.js │ ├── memcpy.js │ ├── memset.js │ ├── run.js │ ├── run_browser.js │ ├── shiftrows.js │ ├── sinx4.js │ ├── transform.js │ └── transpose4x4.js ├── ecmascript_simd.js ├── ecmascript_simd_tests.js ├── external │ ├── qunit.css │ └── qunit.js ├── index.html ├── shell_test_runner.js └── test.js └── tc39 ├── SIMD-128 TC-39.pdf └── spec.html /LICENSE.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2013 3 | 4 | This software is provided 'as-is', without any express or implied 5 | warranty. In no event will the authors be held liable for any damages 6 | arising from the use of this software. 7 | 8 | Permission is granted to anyone to use this software for any purpose, 9 | including commercial applications, and to alter it and redistribute it 10 | freely, subject to the following restrictions: 11 | 12 | 1. The origin of this software must not be misrepresented; you must not 13 | claim that you wrote the original software. If you use this software 14 | in a product, an acknowledgment in the product documentation would be 15 | appreciated but is not required. 16 | 2. Altered source versions must be plainly marked as such, and must not be 17 | misrepresented as being the original software. 18 | 3. This notice may not be removed or altered from any source distribution. 19 | */ 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SIMD.js 2 | =============== 3 | 4 | SIMD.js has been taken out of active development in TC39 and removed 5 | from Stage 3, and is not being pursued by web browsers for 6 | implementation. SIMD operations exposed to the web are under active 7 | development within WebAssembly, with operations based on the SIMD.js 8 | operations. With WebAssembly in advanced development or shipping in 9 | multiple browsers, it seems like an adequate vehicle to subsume asm.js 10 | use cases, which are judged to be the broader cases. Although some 11 | developers have expressed interest in using SIMD.js outside of asm.js, 12 | implementers have found that implementing and optimizing for this case 13 | reliably creates a lot of complexity, and have made the decision to 14 | focus instead on delivering WebAssembly and SIMD instructions in WASM. 15 | 16 | See https://github.com/WebAssembly/simd for current development. 17 | 18 | This repository retains a historical snapshot of the SIMD.js specification work: 19 | * The authoritative API reference documentation is generated from tc39/spec.html. You can view a rendered copy at http://tc39.github.io/ecmascript_simd/ . 20 | * A polyfill at src/ecmascript_simd.js, which can't implement value semantics, but includes a correct implementation of all functions 21 | * Extensive tests at src/ecmascript_simd_tests.js, which can be run using other files in src/. Benchmarks and example code live in the same directory. 22 | * A presentation explaining the motivation and outlining the approach at [tc39/SIMD-128 TC-39.pdf](https://github.com/tc39/ecmascript_simd/blob/master/tc39/SIMD-128%20TC-39.pdf) 23 | -------------------------------------------------------------------------------- /extended-api.md: -------------------------------------------------------------------------------- 1 | SIMD.js Extended API Proposal 2 | ============================= 3 | 4 | This document proposes an extended API for SIMD.js which is meant provide access 5 | to platforms-specific optimizations. It will sit on top of and complement the 6 | base API. 7 | 8 | The expectation is that most users will use the base API most of the time. While 9 | some compromises are being made to serve portability, most of the base API will 10 | still be fast, and it will deliver the most consistent results. The extension API 11 | will offer opportunities for performance tuning, will support specialized code 12 | sequences, and will aid in porting of code from other platforms. 13 | 14 | This proposal splits the problem space into two parts: 15 | - operations which are portable, but with semantic differences 16 | - operations which are only available on some platforms 17 | 18 | Operations which are portable, but with semantic differences 19 | ------------------------------------------------------------ 20 | 21 | Primarily, this will use a new `SIMD.Relaxed` namespace: 22 | 23 | ``` 24 | SIMD.Relaxed.Int32x4.fromFloat32x4 // relaxed on NaN or overflow 25 | SIMD.Relaxed.Float32x4.max // relaxed on NaN, 0 and -0 fungible 26 | SIMD.Relaxed.Int32x4.shiftLeftByScalar // relaxed on shift count overflow 27 | ... 28 | ``` 29 | 30 | Functions in `SIMD.Relaxed` mimic functions in the base API with corresponding names, 31 | and provide weaker portability with greater potential for performance, for example by 32 | having unspecified results if NaN appear in any part of the (implied) computation, by 33 | treating negative zero as interchangeable with zero, or by having unspecified 34 | results if an overflow occurs. 35 | 36 | Note that an implementation in which these are all identical to their corresponding 37 | functions in the base namespace will be fully conforming. 38 | 39 | Accompanying this is a new `SIMD.Checked` namespace to help developers find errors: 40 | 41 | ``` 42 | SIMD.Checked.Int32x4.fromFloat32x4 43 | SIMD.Checked.Float32x4.max 44 | SIMD.Checked.Int32x4.shiftLeftByScalar 45 | ... 46 | ``` 47 | 48 | Functions in `SIMD.Checked` all correspond to functions in `SIMD.Relaxed` and 49 | throw on any value which would produce unspecified results. They may also 50 | canonicalize negative zero to positive zero. We'll publish a standard polyfill for 51 | these functions which implementations or users can use if they wish. 52 | 53 | Operations which are only available on some platforms 54 | ----------------------------------------------------- 55 | 56 | Operations from all platforms are collected together in a single `SIMD.Universe` namespace: 57 | 58 | ``` 59 | SIMD.Universe.Float32x4.fma 60 | SIMD.Universe.Int32x4.rotateLeft 61 | SIMD.Universe.Int32x4.rotateRight 62 | SIMD.Universe.Int32x4.signMask // movmskps on x86 63 | SIMD.Universe.Int32x4.bitInsertIfTrue // vbit on ARM 64 | ... 65 | ``` 66 | 67 | Unlike in the `SIMD.Relaxed` namespace, these operations all have fairly strict 68 | semantics. 69 | 70 | We'll publish a standard polyfill that will fill in all functions in the 71 | `SIMD.Universe` namespace that the JIT doesn't predefine. This will ensure that 72 | programs continue to at least execute across platforms, though of course the 73 | performance may vary widely. 74 | 75 | Some indication of the performance will be made: 76 | 77 | ``` 78 | SIMD.isFast 79 | ``` 80 | 81 | This function takes a single argument, a function in the `SIMD.Universe` API, 82 | and returns a bool indicating whether the given function is "fast" -- roughly 83 | meaning a single operation in the underlying platform. 84 | -------------------------------------------------------------------------------- /run_dev_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | python -m SimpleHTTPServer 3 | -------------------------------------------------------------------------------- /src/benchmarks/aobench.js: -------------------------------------------------------------------------------- 1 | // AOBench 2 | // ambient occlusion renderer 3 | // See full demo at https://github.com/wahbahdoo/aobench 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "AOBench", 10 | kernelInit: initAobench, 11 | kernelCleanup: cleanupAobench, 12 | kernelSimd: simdAobench, 13 | kernelNonSimd: nonSimdAobench 14 | }; 15 | 16 | // Hook up to the harness 17 | benchmarks.add (new Benchmark (kernelConfig)); 18 | 19 | // Global variables 20 | var NAO_SAMPLES = 8; 21 | var spheres; 22 | var plane; 23 | var rands1; 24 | var rands2; 25 | var isect0; 26 | 27 | // Initialization and verification 28 | function initAobench () { 29 | init_scene(); 30 | var A = ambient_occlusion(isect0); 31 | var B = ambient_occlusion_simd(isect0); 32 | return ((A.x == B.x) && (A.y == B.y) && (A.z == B.z)); 33 | } 34 | 35 | function cleanupAobench() { 36 | return initAobench(); 37 | } 38 | 39 | // Non SIMD version of the kernel 40 | function nonSimdAobench (n) { 41 | for (var i = 0; i < n; i++) { 42 | ambient_occlusion(isect0); 43 | } 44 | } 45 | 46 | // SIMD version of the kernel 47 | function simdAobench (n) { 48 | for (var i = 0; i < n; i++) { 49 | ambient_occlusion_simd(isect0); 50 | } 51 | } 52 | 53 | // AOBench initialization of objects and pseudorand numbers (for benchmark predictability) 54 | function init_scene() { 55 | spheres = new Array(); 56 | spheres[0] = { 57 | center: { 58 | x: -2.0, 59 | y: 0.0, 60 | z: -3.5 61 | }, 62 | radius: 0.5 63 | }; 64 | spheres[1] = { 65 | center: { 66 | x: -0.5, 67 | y: 0.0, 68 | z: -3.0 69 | }, 70 | radius: 0.5 71 | }; 72 | spheres[2] = { 73 | center: { 74 | x: 1.0, 75 | y: 0.0, 76 | z: -2.2 77 | }, 78 | radius: 0.5 79 | }; 80 | plane = { 81 | p: { 82 | x: 0.0, 83 | y: -0.5, 84 | z: 0.0 85 | }, 86 | n: { 87 | x: 0.0, 88 | y: 1.0, 89 | z: 0.0 90 | } 91 | }; 92 | rands1 = new Array(0.1352356830611825, 0.288015044759959, 0.7678821850568056, 0.2686317905317992, 93 | 0.3331136927008629, 0.8684257145505399, 0.781927386065945, 0.5896540696267039, 94 | 0.44623699225485325, 0.9686877066269517, 0.07219804194755852, 0.32867410429753363, 95 | 0.25455036014318466, 0.6900878311134875, 0.32115139183588326, 0.8623794671148062, 96 | 0.41069260938093066, 0.999176808167249, 0.31144002149812877, 0.21190544497221708, 97 | 0.589751492254436, 0.618399447761476, 0.7838233797810972, 0.22662024036981165, 98 | 0.5274769144598395, 0.8913978524506092, 0.2461202829144895, 0.575232774252072, 99 | 0.20723191439174116, 0.15211533522233367, 0.5140219402965158, 0.695398824987933, 100 | 0.7201623972505331, 0.1737971710972488, 0.3138047114480287, 0.09142904286272824, 101 | 0.15824169223196805, 0.11588017432950437, 0.4076798539608717, 0.06385629274882376, 102 | 0.9907234299462289, 0.1742915315553546, 0.9236432255711406, 0.8344372694846243, 103 | 0.05793144227936864, 0.35464465571567416, 0.3937969475518912, 0.8209003841038793, 104 | 0.6443945677019656, 0.15443599177524447, 0.8957053178455681, 0.4145913925021887, 105 | 0.4667414356954396, 0.42764953384175897, 0.03486692951992154, 0.13391495239920914, 106 | 0.6122364429756999, 0.7934473238419741, 0.13505113637074828, 0.7279673060402274, 107 | 0.3638722419273108, 0.30750402715057135, 0.8705337035935372, 0.3060465627349913); 108 | 109 | rands2 = new Array(0.6100146626122296, 0.8141843967605382, 0.7538463387172669, 0.538857217412442, 110 | 0.7884696905966848, 0.2656198723707348, 0.3280213042162359, 0.25133296218700707, 111 | 0.18718935316428542, 0.7374026740435511, 0.8333564973436296, 0.22081619454547763, 112 | 0.08140448946505785, 0.7737920694053173, 0.9531879865098745, 0.385226191021502, 113 | 0.8437968089710921, 0.45293551217764616, 0.11351405014283955, 0.6402874339837581, 114 | 0.9657228307332844, 0.5241556512191892, 0.9501411342062056, 0.7991736396215856, 115 | 0.7572617880068719, 0.6777111298870295, 0.19950113398954272, 0.09956562682054937, 116 | 0.03746219468303025, 0.18719390942715108, 0.1519025124143809, 0.8241845818702132, 117 | 0.9609565436840057, 0.7231316142715514, 0.26712060417048633, 0.7414182834327221, 118 | 0.4706993775907904, 0.9619642498437315, 0.14598079677671194, 0.1517641346435994, 119 | 0.5583144023548812, 0.7664180144201964, 0.8109071112703532, 0.4008640209212899, 120 | 0.10891564912162721, 0.8558103002142161, 0.03816548571921885, 0.4263107746373862, 121 | 0.280488790711388, 0.915016517508775, 0.8379701666999608, 0.5821647725533694, 122 | 0.3671900019980967, 0.6120628621429205, 0.5861144624650478, 0.5639409353025258, 123 | 0.4884668991435319, 0.9718172331340611, 0.4438377188052982, 0.9853541473858058, 124 | 0.021908782655373216,0.6144221667200327, 0.11301262397319078, 0.17565111187286675); 125 | isect0 = { 126 | t: 0.7907924036719444, 127 | hit: 1, 128 | p: { 129 | x: 0.3484251968503937, 130 | y: -0.49999999999999994, 131 | z: -0.5039370078740157 132 | }, 133 | n: { 134 | x: 0, 135 | y: 1, 136 | z: 0 137 | } 138 | }; 139 | } 140 | 141 | // Sequential AO calculation functions ---------------------------------------------- 142 | 143 | function ambient_occlusion(isect) { 144 | var col = {}; 145 | 146 | var ntheta = NAO_SAMPLES; 147 | var nphi = NAO_SAMPLES; 148 | var eps = 0.0001; 149 | 150 | var p = { 151 | x: isect.p.x + eps * isect.n.x, 152 | y: isect.p.y + eps * isect.n.y, 153 | z: isect.p.z + eps * isect.n.z 154 | }; 155 | 156 | var basis = new Array({}, {}, {}); 157 | orthoBasis(basis, isect.n); 158 | 159 | var occlusion = 0; 160 | 161 | for (var j = 0; j < ntheta; j++) { 162 | for (var i = 0; i < nphi; i++) { 163 | var theta = Math.sqrt(rands1[j * ntheta + i]); 164 | var phi = 2 * Math.PI * rands2[j * ntheta + i]; 165 | 166 | var x = Math.cos(phi) * theta; 167 | var y = Math.sin(phi) * theta; 168 | var z = Math.sqrt(1 - theta * theta); 169 | 170 | var rx = x * basis[0].x + y * basis[1].x + z * basis[2].x; 171 | var ry = x * basis[0].y + y * basis[1].y + z * basis[2].y; 172 | var rz = x * basis[0].z + y * basis[1].z + z * basis[2].z; 173 | 174 | var ray = { 175 | org: p, 176 | dir: { 177 | x: rx, 178 | y: ry, 179 | z: rz 180 | } 181 | }; 182 | 183 | var occIsectA = { 184 | t: 1e17, 185 | hit: 0 186 | } 187 | var occIsectB = { 188 | p: { x:0, y:0, z:0 }, 189 | n: { x:0, y:0, z:0 } 190 | }; 191 | 192 | ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[0]); 193 | ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[1]); 194 | ray_sphere_intersect(occIsectA, occIsectB, ray, spheres[2]); 195 | ray_plane_intersect(occIsectA, occIsectB, ray, plane); 196 | 197 | if (occIsectA.hit) occlusion += 1.0; 198 | 199 | } 200 | } 201 | 202 | occlusion = (ntheta * nphi - occlusion) / (ntheta * nphi); 203 | 204 | col.x = occlusion; 205 | col.y = occlusion; 206 | col.z = occlusion; 207 | 208 | return col; 209 | } 210 | 211 | function ray_sphere_intersect(isectA, isectB, ray, sphere) { 212 | var rs = { 213 | x: ray.org.x - sphere.center.x, 214 | y: ray.org.y - sphere.center.y, 215 | z: ray.org.z - sphere.center.z 216 | }; 217 | 218 | var B = vdot(rs, ray.dir); 219 | var C = vdot(rs, rs) - sphere.radius * sphere.radius; 220 | var D = B * B - C; 221 | 222 | if (D > 0) { 223 | var t = -B - Math.sqrt(D); 224 | if ((t > 0) && (t < isectA.t)) { 225 | 226 | isectA.t = t; 227 | isectA.hit = 1; 228 | 229 | isectB.p.x = ray.org.x + ray.dir.x * t; 230 | isectB.p.y = ray.org.y + ray.dir.y * t; 231 | isectB.p.z = ray.org.z + ray.dir.z * t; 232 | 233 | isectB.n.x = isectB.p.x - sphere.center.x; 234 | isectB.n.y = isectB.p.y - sphere.center.y; 235 | isectB.n.z = isectB.p.z - sphere.center.z; 236 | 237 | vnormalize(isectB.n); 238 | } 239 | } 240 | 241 | } 242 | 243 | function ray_plane_intersect(isectA, isectB, ray, plane) { 244 | var d = -vdot(plane.p, plane.n); 245 | var v = vdot(ray.dir, plane.n); 246 | 247 | if (Math.abs(v) < 1e-17) return; 248 | 249 | var t = -(vdot(ray.org, plane.n) + d) / v; 250 | 251 | if ((t > 0) && (t < isectA.t)) { 252 | isectA.t = t; 253 | isectA.hit = 1; 254 | isectB.p.x = ray.org.x + ray.dir.x * t; 255 | isectB.p.y = ray.org.y + ray.dir.y * t; 256 | isectB.p.z = ray.org.z + ray.dir.z * t; 257 | isectB.n = plane.n; 258 | } 259 | } 260 | 261 | // SIMD AO calculation functions ---------------------------------------------------- 262 | 263 | function ambient_occlusion_simd(isect) { 264 | var col = {}; 265 | 266 | var i, j; 267 | var ntheta = NAO_SAMPLES; 268 | var nphi = NAO_SAMPLES; 269 | var eps = 0.0001; 270 | 271 | var p = { 272 | x: isect.p.x + eps * isect.n.x, 273 | y: isect.p.y + eps * isect.n.y, 274 | z: isect.p.z + eps * isect.n.z 275 | }; 276 | 277 | var basis = new Array({}, {}, {}); 278 | orthoBasis(basis, isect.n); 279 | 280 | var occlusion = 0; 281 | var occlusionx4 = SIMD.Float32x4.splat(0.0); 282 | 283 | for (j = 0; j < ntheta; j++) { 284 | for (i = 0; i < nphi; i += 4) { 285 | var theta = SIMD.Float32x4.sqrt(SIMD.Float32x4(rands1[j * ntheta + i], rands1[j * ntheta + i + 1], rands1[j * ntheta + i + 2], rands1[j * ntheta + i + 3])); 286 | var phi0 = 2 * Math.PI * rands2[j * ntheta + i]; 287 | var phi1 = 2 * Math.PI * rands2[j * ntheta + i + 1]; 288 | var phi2 = 2 * Math.PI * rands2[j * ntheta + i + 2]; 289 | var phi3 = 2 * Math.PI * rands2[j * ntheta + i + 3]; 290 | var sinphi = SIMD.Float32x4(Math.sin(phi0), Math.sin(phi1), Math.sin(phi2), Math.sin(phi3)); 291 | var cosphi = SIMD.Float32x4(Math.cos(phi0), Math.cos(phi1), Math.cos(phi2), Math.cos(phi3)); 292 | 293 | var x = SIMD.Float32x4.mul(cosphi, theta); 294 | var y = SIMD.Float32x4.mul(sinphi, theta); 295 | var z = SIMD.Float32x4.sqrt(SIMD.Float32x4.sub(SIMD.Float32x4.splat(1.0), SIMD.Float32x4.mul(theta, theta))); 296 | 297 | var dirx = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].x)), 298 | SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].x)), 299 | SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].x)))); 300 | var diry = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].y)), 301 | SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].y)), 302 | SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].y)))); 303 | var dirz = SIMD.Float32x4.add(SIMD.Float32x4.mul(x, SIMD.Float32x4.splat(basis[0].z)), 304 | SIMD.Float32x4.add(SIMD.Float32x4.mul(y, SIMD.Float32x4.splat(basis[1].z)), 305 | SIMD.Float32x4.mul(z, SIMD.Float32x4.splat(basis[2].z)))); 306 | 307 | var orgx = SIMD.Float32x4.splat(p.x); 308 | var orgy = SIMD.Float32x4.splat(p.y); 309 | var orgz = SIMD.Float32x4.splat(p.z); 310 | 311 | var occIsectA = { 312 | t: SIMD.Float32x4.splat(1e17), 313 | hit: SIMD.Bool32x4.splat(false) 314 | }; 315 | var occIsectB = { 316 | p: { 317 | x: SIMD.Float32x4.splat(0.0), 318 | y: SIMD.Float32x4.splat(0.0), 319 | z: SIMD.Float32x4.splat(0.0) 320 | }, 321 | n: { 322 | x: SIMD.Float32x4.splat(0.0), 323 | y: SIMD.Float32x4.splat(0.0), 324 | z: SIMD.Float32x4.splat(0.0) 325 | } 326 | }; 327 | 328 | ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[0]); 329 | ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[1]); 330 | ray_sphere_intersect_simd(occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, spheres[2]); 331 | ray_plane_intersect_simd (occIsectA, occIsectB, dirx, diry, dirz, orgx, orgy, orgz, plane); 332 | 333 | occlusionx4 = SIMD.Float32x4.add( 334 | occlusionx4, 335 | SIMD.Float32x4.select(occIsectA.hit, SIMD.Float32x4.splat(1.0), 336 | SIMD.Float32x4.splat(0.0))); 337 | 338 | } 339 | } 340 | 341 | occlusion = SIMD.Float32x4.extractLane(occlusionx4, 0) + 342 | SIMD.Float32x4.extractLane(occlusionx4, 1) + 343 | SIMD.Float32x4.extractLane(occlusionx4, 2) + 344 | SIMD.Float32x4.extractLane(occlusionx4, 3); 345 | 346 | occlusion = (ntheta * nphi - occlusion) / (ntheta * nphi); 347 | 348 | col.x = occlusion; 349 | col.y = occlusion; 350 | col.z = occlusion; 351 | 352 | return col; 353 | } 354 | 355 | function ray_sphere_intersect_simd(isectA, isectB, dirx, diry, dirz, orgx, orgy, orgz, sphere) { 356 | 357 | var rsx = SIMD.Float32x4.sub(orgx, SIMD.Float32x4.splat(sphere.center.x)); 358 | var rsy = SIMD.Float32x4.sub(orgy, SIMD.Float32x4.splat(sphere.center.y)); 359 | var rsz = SIMD.Float32x4.sub(orgz, SIMD.Float32x4.splat(sphere.center.z)); 360 | 361 | var B = SIMD.Float32x4.add(SIMD.Float32x4.mul(rsx, dirx), 362 | SIMD.Float32x4.add(SIMD.Float32x4.mul(rsy, diry), SIMD.Float32x4.mul(rsz, dirz))); 363 | var C = SIMD.Float32x4.sub(SIMD.Float32x4.add(SIMD.Float32x4.mul(rsx, rsx), 364 | SIMD.Float32x4.add(SIMD.Float32x4.mul(rsy, rsy), SIMD.Float32x4.mul(rsz, rsz))), 365 | SIMD.Float32x4.splat(sphere.radius * sphere.radius)); 366 | var D = SIMD.Float32x4.sub(SIMD.Float32x4.mul(B, B), C); 367 | 368 | var cond1 = SIMD.Float32x4.greaterThan(D, SIMD.Float32x4.splat(0.0)); 369 | if (SIMD.Bool32x4.anyTrue(cond1)) { 370 | var t2 = SIMD.Float32x4.select(cond1, SIMD.Float32x4.sub(SIMD.Float32x4.neg(B), SIMD.Float32x4.sqrt(D)), SIMD.Float32x4.splat(0.0)); 371 | var cond2 = SIMD.Bool32x4.and(SIMD.Float32x4.greaterThan(t2, SIMD.Float32x4.splat(0.0)), 372 | SIMD.Float32x4.lessThan(t2, isectA.t)); 373 | if (SIMD.Bool32x4.anyTrue(cond2)) { 374 | isectA.t = SIMD.Float32x4.select(cond2, t2, isectA.t); 375 | isectA.hit = SIMD.Bool32x4.or(cond2, isectA.hit); 376 | 377 | isectB.p.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirx, isectA.t)), isectB.p.x); 378 | isectB.p.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(diry, isectA.t)), isectB.p.y); 379 | isectB.p.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirz, isectA.t)), isectB.p.z); 380 | 381 | isectB.n.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.x, SIMD.Float32x4.splat(sphere.center.x)), isectB.n.x); 382 | isectB.n.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.y, SIMD.Float32x4.splat(sphere.center.y)), isectB.n.y); 383 | isectB.n.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.sub(isectB.p.z, SIMD.Float32x4.splat(sphere.center.z)), isectB.n.z); 384 | 385 | var lengths = SIMD.Float32x4.sqrt(SIMD.Float32x4.add(SIMD.Float32x4.mul(isectB.n.x, isectB.n.x), 386 | SIMD.Float32x4.add(SIMD.Float32x4.mul(isectB.n.y, isectB.n.y), 387 | SIMD.Float32x4.mul(isectB.n.z, isectB.n.z)))); 388 | var cond3 = SIMD.Float32x4.greaterThan(SIMD.Float32x4.abs(lengths), SIMD.Float32x4.splat(1e-17)); 389 | isectB.n.x = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.x, lengths), isectB.n.x); 390 | isectB.n.y = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.y, lengths), isectB.n.y); 391 | isectB.n.z = SIMD.Float32x4.select(cond3, SIMD.Float32x4.div(isectB.n.z, lengths), isectB.n.z); 392 | } 393 | } 394 | } 395 | 396 | function ray_plane_intersect_simd(isectA, isectB, dirx, diry, dirz, orgx, orgy, orgz, plane) { 397 | var d = SIMD.Float32x4.neg(SIMD.Float32x4.add(SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.x), SIMD.Float32x4.splat(plane.n.x)), 398 | SIMD.Float32x4.add(SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.y), SIMD.Float32x4.splat(plane.n.y)), 399 | SIMD.Float32x4.mul(SIMD.Float32x4.splat(plane.p.z), SIMD.Float32x4.splat(plane.n.z))))); 400 | var v = SIMD.Float32x4.add(SIMD.Float32x4.mul(dirx, SIMD.Float32x4.splat(plane.n.x)), 401 | SIMD.Float32x4.add(SIMD.Float32x4.mul(diry, SIMD.Float32x4.splat(plane.n.y)), 402 | SIMD.Float32x4.mul(dirz, SIMD.Float32x4.splat(plane.n.z)))); 403 | 404 | var cond1 = SIMD.Float32x4.greaterThan(SIMD.Float32x4.abs(v), SIMD.Float32x4.splat(1e-17)); 405 | var dp = SIMD.Float32x4.add(SIMD.Float32x4.mul(orgx, SIMD.Float32x4.splat(plane.n.x)), 406 | SIMD.Float32x4.add(SIMD.Float32x4.mul(orgy, SIMD.Float32x4.splat(plane.n.y)), 407 | SIMD.Float32x4.mul(orgz, SIMD.Float32x4.splat(plane.n.z)))); 408 | var t2 = SIMD.Float32x4.select(cond1, SIMD.Float32x4.div(SIMD.Float32x4.neg(SIMD.Float32x4.add(dp, d)), v), SIMD.Float32x4.splat(0.0)); 409 | var cond2 = SIMD.Bool32x4.and(SIMD.Float32x4.greaterThan(t2, SIMD.Float32x4.splat(0.0)), SIMD.Float32x4.lessThan(t2, isectA.t)); 410 | if (SIMD.Bool32x4.anyTrue(cond2)) { 411 | isectA.t = SIMD.Float32x4.select(cond2, t2, isectA.t); 412 | isectA.hit = SIMD.Bool32x4.or(cond2, isectA.hit); 413 | 414 | isectB.p.x = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgx, SIMD.Float32x4.mul(dirx, isectA.t)), isectB.p.x); 415 | isectB.p.y = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgy, SIMD.Float32x4.mul(diry, isectA.t)), isectB.p.y); 416 | isectB.p.z = SIMD.Float32x4.select(cond2, SIMD.Float32x4.add(orgz, SIMD.Float32x4.mul(dirz, isectA.t)), isectB.p.z); 417 | 418 | isectB.n.x = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.x), isectB.n.x); 419 | isectB.n.y = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.y), isectB.n.y); 420 | isectB.n.z = SIMD.Float32x4.select(cond2, Float32x4.splat(plane.n.z), isectB.n.z); 421 | } 422 | } 423 | 424 | // Utility calculation functions ---------------------------------------------------- 425 | 426 | function vdot(v0, v1) { 427 | return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; 428 | } 429 | 430 | function vcross(v0, v1) { 431 | return { 432 | x: v0.y * v1.z - v0.z * v1.y, 433 | y: v0.z * v1.x - v0.x * v1.z, 434 | z: v0.x * v1.y - v0.y * v1.x 435 | }; 436 | } 437 | 438 | function vnormalize(c) { 439 | var length = Math.sqrt(vdot(c, c)); 440 | if (Math.abs(length) > 1e-17) { 441 | c.x /= length; 442 | c.y /= length; 443 | c.z /= length; 444 | } 445 | } 446 | 447 | function orthoBasis(basis, n) { 448 | basis[2] = n; 449 | basis[1] = { x: 0, y: 0, z: 0 }; 450 | 451 | if ((n.x < 0.6) && (n.x > -0.6)) { 452 | basis[1].x = 1.0; 453 | } 454 | else if ((n.y < 0.6) && (n.y > -0.6)) { 455 | basis[1].y = 1.0; 456 | } 457 | else if ((n.z < 0.6) && (n.z > -0.6)) { 458 | basis[1].z = 1.0; 459 | } 460 | else { 461 | basis[1].x = 1.0; 462 | } 463 | 464 | basis[0] = vcross(basis[1], basis[2]); 465 | vnormalize(basis[0]); 466 | 467 | basis[1] = vcross(basis[2], basis[0]); 468 | vnormalize(basis[1]); 469 | } 470 | 471 | } ()); 472 | -------------------------------------------------------------------------------- /src/benchmarks/averageFloat32x4.js: -------------------------------------------------------------------------------- 1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements 2 | // in a Float32Array. Compare to scalar implementation of same function. 3 | // Author: Peter Jensen 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "AverageFloat32x4", 10 | kernelInit: initArray, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdAverage, 13 | kernelNonSimd: average, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add(new Benchmark(kernelConfig)); 19 | 20 | // Benchmark data, initialization and kernel functions 21 | var a = new Float32Array(10000); 22 | 23 | function sanityCheck() { 24 | return Math.abs(average(1) - simdAverage(1)) < 0.0001; 25 | } 26 | 27 | function initArray() { 28 | var j = 0; 29 | for (var i = 0, l = a.length; i < l; ++i) { 30 | a[i] = 0.1; 31 | } 32 | // Check that the two kernel functions yields the same result, roughly 33 | // Account for the fact that the simdAverage() is computed using float32 34 | // precision and the average() is using double precision 35 | return sanityCheck(); 36 | } 37 | 38 | function cleanup() { 39 | return sanityCheck(); 40 | } 41 | 42 | function average(n) { 43 | for (var i = 0; i < n; ++i) { 44 | var sum = 0.0; 45 | for (var j = 0, l = a.length; j < l; ++j) { 46 | sum += a[j]; 47 | } 48 | } 49 | return sum/a.length; 50 | } 51 | 52 | function simdAverage(n) { 53 | var a_length = a.length; 54 | for (var i = 0; i < n; ++i) { 55 | var sum4 = SIMD.Float32x4.splat(0.0); 56 | for (var j = 0; j < a_length; j += 4) { 57 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load(a, j)); 58 | } 59 | } 60 | return (SIMD.Float32x4.extractLane(sum4, 0) + 61 | SIMD.Float32x4.extractLane(sum4, 1) + 62 | SIMD.Float32x4.extractLane(sum4, 2) + 63 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length; 64 | } 65 | 66 | } ()); 67 | -------------------------------------------------------------------------------- /src/benchmarks/averageFloat32x4LoadFromInt8Array.js: -------------------------------------------------------------------------------- 1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements 2 | // in a Float32Array. Compare to scalar implementation of same function. 3 | // Author: Peter Jensen 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "AverageFloat32x4LoadFromInt8Array", 10 | kernelInit: initArray, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdAverage, 13 | kernelNonSimd: average, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add(new Benchmark(kernelConfig)); 19 | 20 | // Benchmark data, initialization and kernel functions 21 | var a = new Float32Array(10000); 22 | var b = new Int8Array(a.buffer); 23 | 24 | function sanityCheck() { 25 | return Math.abs(average(1) - simdAverage(1)) < 0.0001; 26 | } 27 | 28 | function initArray() { 29 | var j = 0; 30 | for (var i = 0, l = a.length; i < l; ++i) { 31 | a[i] = 0.1; 32 | } 33 | // Check that the two kernel functions yields the same result, roughly 34 | // Account for the fact that the simdAverage() is computed using float32 35 | // precision and the average() is using double precision 36 | return sanityCheck(); 37 | } 38 | 39 | function cleanup() { 40 | return sanityCheck(); 41 | } 42 | 43 | function average(n) { 44 | for (var i = 0; i < n; ++i) { 45 | var sum = 0.0; 46 | for (var j = 0, l = a.length; j < l; ++j) { 47 | sum += a[j]; 48 | } 49 | } 50 | return sum/a.length; 51 | } 52 | 53 | function simdAverage(n) { 54 | for (var i = 0; i < n; ++i) { 55 | var sum4 = SIMD.Float32x4.splat(0.0); 56 | for (var j = 0; j < a.length / 4; ++j) { 57 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load(b, j << 4)); 58 | } 59 | } 60 | return (SIMD.Float32x4.extractLane(sum4, 0) + 61 | SIMD.Float32x4.extractLane(sum4, 1) + 62 | SIMD.Float32x4.extractLane(sum4, 2) + 63 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length; 64 | } 65 | 66 | } ()); 67 | -------------------------------------------------------------------------------- /src/benchmarks/averageFloat32x4LoadX.js: -------------------------------------------------------------------------------- 1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements 2 | // in a Float32Array. Compare to scalar implementation of same function. 3 | // Author: Peter Jensen 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "AverageFloat32x4LoadX", 10 | kernelInit: initArray, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdAverageLoad, 13 | kernelNonSimd: average, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add(new Benchmark(kernelConfig)); 19 | 20 | // Benchmark data, initialization and kernel functions 21 | var a = new Float32Array(10000); 22 | var a1 = new Float32Array(10000); 23 | var b = new Int8Array(a.buffer); 24 | 25 | function sanityCheck() { 26 | return true; 27 | return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001; 28 | } 29 | 30 | function initArray() { 31 | var j = 0; 32 | for (var i = 0, l = a.length; i < l; ++i) { 33 | a[i] = 0.1; 34 | } 35 | // Check that the two kernel functions yields the same result, roughly 36 | // Account for the fact that the simdAverage() is computed using float32 37 | // precision and the average() is using double precision 38 | return sanityCheck(); 39 | } 40 | 41 | function cleanup() { 42 | return sanityCheck(); 43 | } 44 | 45 | function average(n) { 46 | for (var i = 0; i < n; ++i) { 47 | var sum = 0.0; 48 | for (var j = 0, l = a.length; j < l; ++j) { 49 | sum += a[j]; 50 | } 51 | } 52 | return sum/a.length; 53 | } 54 | 55 | function simdAverageLoad(n) { 56 | var a_length = a.length; 57 | for (var i = 0; i < n; ++i) { 58 | var sum4 = SIMD.Float32x4.splat(0.0); 59 | for (var j = 0; j < a_length; ++j) { 60 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load1(a, j)); 61 | } 62 | } 63 | return (SIMD.Float32x4.extractLane(sum4, 0) + 64 | SIMD.Float32x4.extractLane(sum4, 1) + 65 | SIMD.Float32x4.extractLane(sum4, 2) + 66 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length; 67 | } 68 | 69 | } ()); 70 | -------------------------------------------------------------------------------- /src/benchmarks/averageFloat32x4LoadXY.js: -------------------------------------------------------------------------------- 1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements 2 | // in a Float32Array. Compare to scalar implementation of same function. 3 | // Author: Peter Jensen 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "AverageFloat32x4LoadXY", 10 | kernelInit: initArray, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdAverageLoad, 13 | kernelNonSimd: average, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add(new Benchmark(kernelConfig)); 19 | 20 | // Benchmark data, initialization and kernel functions 21 | var a = new Float32Array(10000); 22 | var a1 = new Float32Array(10000); 23 | var b = new Int8Array(a.buffer); 24 | 25 | function sanityCheck() { 26 | return true; 27 | return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001; 28 | } 29 | 30 | function initArray() { 31 | var j = 0; 32 | for (var i = 0, l = a.length; i < l; ++i) { 33 | a[i] = 0.1; 34 | } 35 | // Check that the two kernel functions yields the same result, roughly 36 | // Account for the fact that the simdAverage() is computed using float32 37 | // precision and the average() is using double precision 38 | return sanityCheck(); 39 | } 40 | 41 | function cleanup() { 42 | return sanityCheck(); 43 | } 44 | 45 | function average(n) { 46 | for (var i = 0; i < n; ++i) { 47 | var sum = 0.0; 48 | for (var j = 0, l = a.length; j < l; ++j) { 49 | sum += a[j]; 50 | } 51 | } 52 | return sum/a.length; 53 | } 54 | 55 | function simdAverageLoad(n) { 56 | var a_length = a.length; 57 | for (var i = 0; i < n; ++i) { 58 | var sum4 = SIMD.Float32x4.splat(0.0); 59 | for (var j = 0; j < a_length / 2; ++j) { 60 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load2(a, j << 1)); 61 | //SIMD.Float32x4.store(a1, j << 2, sum4); 62 | } 63 | } 64 | return (SIMD.Float32x4.extractLane(sum4, 0) + 65 | SIMD.Float32x4.extractLane(sum4, 1) + 66 | SIMD.Float32x4.extractLane(sum4, 2) + 67 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length; 68 | } 69 | 70 | } ()); 71 | -------------------------------------------------------------------------------- /src/benchmarks/averageFloat32x4LoadXYZ.js: -------------------------------------------------------------------------------- 1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements 2 | // in a Float32Array. Compare to scalar implementation of same function. 3 | // Author: Peter Jensen 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "AverageFloat32x4LoadXYZ", 10 | kernelInit: initArray, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdAverageLoad, 13 | kernelNonSimd: average, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add(new Benchmark(kernelConfig)); 19 | 20 | // Benchmark data, initialization and kernel functions 21 | var a = new Float32Array(9999); 22 | var a1 = new Float32Array(9999); 23 | var b = new Int8Array(a.buffer); 24 | 25 | function sanityCheck() { 26 | return true; 27 | return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001; 28 | } 29 | 30 | function initArray() { 31 | var j = 0; 32 | for (var i = 0, l = a.length; i < l; ++i) { 33 | a[i] = 0.1; 34 | } 35 | // Check that the two kernel functions yields the same result, roughly 36 | // Account for the fact that the simdAverage() is computed using float32 37 | // precision and the average() is using double precision 38 | return sanityCheck(); 39 | } 40 | 41 | function cleanup() { 42 | return sanityCheck(); 43 | } 44 | 45 | function average(n) { 46 | for (var i = 0; i < n; ++i) { 47 | var sum = 0.0; 48 | for (var j = 0, l = a.length; j < l; ++j) { 49 | sum += a[j]; 50 | } 51 | } 52 | return sum/a.length; 53 | } 54 | 55 | function simdAverageLoad(n) { 56 | var a_length = a.length; 57 | for (var i = 0; i < n; ++i) { 58 | var sum4 = SIMD.Float32x4.splat(0.0); 59 | for (var j = 0; j < a_length / 3 ; ++j) { 60 | sum4 = SIMD.Float32x4.add(sum4, SIMD.Float32x4.load3(a, j * 3)); 61 | //SIMD.Float32x4.store(a1, j << 2, sum4); 62 | } 63 | } 64 | return (SIMD.Float32x4.extractLane(sum4, 0) + 65 | SIMD.Float32x4.extractLane(sum4, 1) + 66 | SIMD.Float32x4.extractLane(sum4, 2) + 67 | SIMD.Float32x4.extractLane(sum4, 3)) / a.length; 68 | } 69 | 70 | } ()); 71 | -------------------------------------------------------------------------------- /src/benchmarks/averageInt32x4Load.js: -------------------------------------------------------------------------------- 1 | // Simple performance test of SIMD.add operation. Use SIMD.add to average up elements 2 | // in a Int32Array. Compare to scalar implementation of same function. 3 | // Author: Peter Jensen 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "AverageInt32x4Load", 10 | kernelInit: initArray, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdAverageLoad, 13 | kernelNonSimd: average, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add(new Benchmark(kernelConfig)); 19 | 20 | // Benchmark data, initialization and kernel functions 21 | var a = new Int32Array(10000); 22 | var a1 = new Int32Array(10000); 23 | var b = new Int8Array(a.buffer); 24 | 25 | function sanityCheck() { 26 | return true; 27 | return Math.abs(average(1) - simdAverageLoad(1)) < 0.0001; 28 | } 29 | 30 | function initArray() { 31 | var j = 0; 32 | for (var i = 0, l = a.length; i < l; ++i) { 33 | a[i] = 1; 34 | } 35 | // Check that the two kernel functions yields the same result, roughly 36 | // Account for the fact that the simdAverage() is computed using float32 37 | // precision and the average() is using double precision 38 | return sanityCheck(); 39 | } 40 | 41 | function cleanup() { 42 | return sanityCheck(); 43 | } 44 | 45 | function average(n) { 46 | for (var i = 0; i < n; ++i) { 47 | var sum = 0.0; 48 | for (var j = 0, l = a.length; j < l; ++j) { 49 | sum += a[j]; 50 | } 51 | } 52 | return sum/a.length; 53 | } 54 | 55 | function simdAverageLoad(n) { 56 | var a_length = a.length; 57 | for (var i = 0; i < n; ++i) { 58 | var sum4 = SIMD.Int32x4.splat(0); 59 | for (var j = 0; j < a_length / 4; ++j) { 60 | sum4 = SIMD.Int32x4.add(sum4, SIMD.Int32x4.load(a, j << 2)); 61 | } 62 | } 63 | return (SIMD.Int32x4.extractLane(sum4, 0) + 64 | SIMD.Int32x4.extractLane(sum4, 1) + 65 | SIMD.Int32x4.extractLane(sum4, 2) + 66 | SIMD.Int32x4.extractLane(sum4, 3)) / a.length; 67 | } 68 | 69 | } ()); 70 | -------------------------------------------------------------------------------- /src/benchmarks/base.js: -------------------------------------------------------------------------------- 1 | // SIMD Kernel Benchmark Harness 2 | // Author: Peter Jensen 3 | 4 | function Benchmark (config) { 5 | this.config = config; 6 | this.initOk = true; // Initialize all properties used on a Benchmark object 7 | this.cleanupOk = true; 8 | this.useAutoIterations = true; 9 | this.autoIterations = 0; 10 | this.actualIterations = 0; 11 | this.simdTime = 0; 12 | this.nonSimdTime = 0; 13 | } 14 | 15 | function Benchmarks () { 16 | this.benchmarks = []; 17 | } 18 | 19 | Benchmarks.prototype.add = function (benchmark) { 20 | this.benchmarks.push (benchmark); 21 | return this.benchmarks.length - 1; 22 | } 23 | 24 | Benchmarks.prototype.runOne = function (benchmark) { 25 | 26 | function timeKernel(kernel, iterations) { 27 | var start, stop; 28 | start = Date.now(); 29 | kernel(iterations); 30 | stop = Date.now(); 31 | return stop - start; 32 | } 33 | 34 | function computeIterations() { 35 | var desiredRuntime = 1000; // milliseconds for longest running kernel 36 | var testIterations = 10; // iterations used to determine time for desiredRuntime 37 | 38 | // Make the slowest kernel run for at least 500ms 39 | var simdTime = timeKernel(benchmark.config.kernelSimd, testIterations); 40 | var nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, testIterations); 41 | var maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime; 42 | while (maxTime < 500) { 43 | testIterations *= 2; 44 | simdTime = timeKernel(benchmark.config.kernelSimd, testIterations); 45 | nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, testIterations); 46 | maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime; 47 | } 48 | maxTime = simdTime > nonSimdTime ? simdTime : nonSimdTime; 49 | 50 | // Compute iteration count for 1 second run of slowest kernel 51 | var iterations = Math.ceil(desiredRuntime * testIterations / maxTime); 52 | return iterations; 53 | } 54 | 55 | // Initialize the kernels and check the correctness status 56 | if (!benchmark.config.kernelInit()) { 57 | benchmark.initOk = false; 58 | return false; 59 | } 60 | 61 | // Determine how many iterations to use. 62 | if (benchmark.useAutoIterations) { 63 | benchmark.autoIterations = computeIterations(); 64 | benchmark.actualIterations = benchmark.autoIterations; 65 | } 66 | else { 67 | benchmark.actualIterations = benchmark.config.kernelIterations; 68 | } 69 | 70 | // Run the SIMD kernel 71 | benchmark.simdTime = timeKernel(benchmark.config.kernelSimd, benchmark.actualIterations); 72 | 73 | // Run the non-SIMD kernel 74 | benchmark.nonSimdTime = timeKernel(benchmark.config.kernelNonSimd, benchmark.actualIterations); 75 | 76 | // Do the final sanity check 77 | if (!benchmark.config.kernelCleanup()) { 78 | benchmark.cleanupOk = false; 79 | return false; 80 | } 81 | 82 | return true; 83 | } 84 | 85 | Benchmarks.prototype.report = function (benchmark, outputFunctions) { 86 | 87 | function fillRight(str, width) { 88 | str += ""; // make sure it's a string 89 | while (str.length < width) { 90 | str += " "; 91 | } 92 | return str; 93 | } 94 | 95 | function fillLeft(str, width) { 96 | str += ""; // make sure it's a string 97 | while (str.length < width) { 98 | str = " " + str; 99 | } 100 | return str; 101 | } 102 | 103 | if (!benchmark.initOk) { 104 | outputFunctions.notifyError(fillRight(benchmark.config.kernelName + ": ", 23) + "FAILED INIT"); 105 | return; 106 | } 107 | if (!benchmark.cleanupOk) { 108 | outputFunctions.notifyError(fillRight(benchmark.config.kernelName + ": ", 23) + "FAILED CLEANUP"); 109 | return; 110 | } 111 | 112 | var ratio = benchmark.nonSimdTime / benchmark.simdTime; 113 | outputFunctions.notifyResult( 114 | fillRight(benchmark.config.kernelName + ": ", 23) + 115 | "Iterations(" + fillLeft(benchmark.actualIterations, 10) + ")" + 116 | ", SIMD(" + fillLeft(benchmark.simdTime + "ms)", 8) + 117 | ", Non-SIMD(" + fillLeft(benchmark.nonSimdTime + "ms)", 8) + 118 | ", Speedup(" + ratio.toFixed(3) + ")"); 119 | } 120 | 121 | Benchmarks.prototype.runAll = function (outputFunctions, useAutoIterations) { 122 | if (typeof useAutoIterations === "undefined") { 123 | useAutoIterations = false; 124 | } 125 | for (var i = 0, n = this.benchmarks.length; i < n; ++i) { 126 | var benchmark = this.benchmarks[i]; 127 | benchmark.useAutoIterations = useAutoIterations; 128 | this.runOne(benchmark); 129 | this.report(benchmark, outputFunctions); 130 | } 131 | } 132 | 133 | var benchmarks = new Benchmarks (); 134 | -------------------------------------------------------------------------------- /src/benchmarks/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | EcmaScript SIMD benchmarks 6 | 7 | 8 |
Running benchmarks...

9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/benchmarks/inverse4x4.js: -------------------------------------------------------------------------------- 1 | // Kernel for doing a 4x4 Matrix Inverse operation 2 | // Based on Cramer's rule. 3 | // See: ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf 4 | // Author: Peter Jensen 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "Matrix4x4Inverse", 10 | kernelInit: init, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdMatrixInverseN, 13 | kernelNonSimd: nonSimdMatrixInverseN, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add (new Benchmark (kernelConfig)); 19 | 20 | // Global Variables 21 | var src = new Float32Array(16); // Source matrix 22 | var dst = new Float32Array(16); // Result matrix 23 | var tsrc = new Float32Array(16); // Transposed version of 'src' 24 | var tmp = new Float32Array(12); // Temporary array of multiply results 25 | var ident = new Float32Array( 26 | [1,0,0,0, 27 | 0,1,0,0, 28 | 0,0,1,0, 29 | 0,0,0,1]); 30 | 31 | function printMatrix(matrix) { 32 | for (var r = 0; r < 4; ++r) { 33 | var str = ""; 34 | var ri = r*4; 35 | for (var c = 0; c < 4; ++c) { 36 | var value = matrix[ri + c]; 37 | str += " " + value.toFixed(2); 38 | } 39 | print(str); 40 | } 41 | } 42 | 43 | function initMatrix(matrix) { 44 | // These values were chosen somewhat randomly, but they will at least yield a solution. 45 | matrix [0] = 0; matrix[1] = 1; matrix[2] = 2; matrix[3] = 3; 46 | matrix [4] = -1; matrix[5] = -2; matrix[6] = -3; matrix[7] = -4; 47 | matrix [8] = 0; matrix[9] = 0; matrix[10] = 2; matrix[11] = 3; 48 | matrix [12] = -1; matrix[13] = -2; matrix[14] = 0; matrix[15] = -4; 49 | } 50 | 51 | function mulMatrix(dst, op1, op2) { 52 | for (var r = 0; r < 4; ++r) { 53 | for (var c = 0; c < 4; ++c) { 54 | var ri = 4*r; 55 | dst[ri + c] = op1[ri]*op2[c] + op1[ri+1]*op2[c+4] + op1[ri+2]*op2[c+8] + op1[ri+3]*op2[c+12] 56 | } 57 | } 58 | } 59 | 60 | function checkMatrix(matrix) { 61 | // when multiplied with the src matrix it should yield the identity matrix 62 | mulMatrix(tsrc, src, matrix); 63 | for (var i = 0; i < 16; ++i) { 64 | if (Math.abs (tsrc[i] - ident[i]) > 0.00001) { 65 | return false; 66 | } 67 | } 68 | // printMatrix (tsrc); 69 | return true; 70 | } 71 | 72 | // Kernel Initializer 73 | function init() { 74 | initMatrix(src); 75 | // printMatrix(src); 76 | nonSimdMatrixInverseN(1); 77 | // printMatrix(dst); 78 | if (!checkMatrix(dst)) { 79 | return false; 80 | } 81 | 82 | initMatrix(src); 83 | simdMatrixInverseN(1); 84 | // printMatrix(dst); 85 | if (!checkMatrix(dst)) { 86 | return false; 87 | } 88 | 89 | return true; 90 | } 91 | 92 | function cleanup() { 93 | return init(); 94 | } 95 | 96 | function simdMatrixInverse() { 97 | var src0, src1, src2, src3; 98 | var row0, row1, row2, row3; 99 | var tmp1; 100 | var minor0, minor1, minor2, minor3; 101 | var det; 102 | 103 | // Load the 4 rows 104 | var src0 = SIMD.Float32x4.load(src, 0); 105 | var src1 = SIMD.Float32x4.load(src, 4); 106 | var src2 = SIMD.Float32x4.load(src, 8); 107 | var src3 = SIMD.Float32x4.load(src, 16); 108 | 109 | // Transpose the source matrix. Sort of. Not a true transpose operation 110 | 111 | tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5); 112 | row1 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5); 113 | row0 = SIMD.Float32x4.shuffle(tmp1, row1, 0, 2, 4, 6); 114 | row1 = SIMD.Float32x4.shuffle(row1, tmp1, 1, 3, 5, 7); 115 | 116 | tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7); 117 | row3 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7); 118 | row2 = SIMD.Float32x4.shuffle(tmp1, row3, 0, 2, 4, 6); 119 | row3 = SIMD.Float32x4.shuffle(row3, tmp1, 1, 3, 5, 7); 120 | 121 | // This is a true transposition, but it will lead to an incorrect result 122 | 123 | //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5); 124 | //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5); 125 | //row0 = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6); 126 | //row1 = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7); 127 | 128 | //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7); 129 | //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7); 130 | //row2 = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6); 131 | //row3 = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7); 132 | 133 | // ---- 134 | tmp1 = SIMD.Float32x4.mul(row2, row3); 135 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 136 | minor0 = SIMD.Float32x4.mul(row1, tmp1); 137 | minor1 = SIMD.Float32x4.mul(row0, tmp1); 138 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 139 | minor0 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row1, tmp1), minor0); 140 | minor1 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor1); 141 | minor1 = SIMD.Float32x4.swizzle(minor1, 2, 3, 0, 1); // 0x4E = 01001110 142 | 143 | // ---- 144 | tmp1 = SIMD.Float32x4.mul(row1, row2); 145 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 146 | minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor0); 147 | minor3 = SIMD.Float32x4.mul(row0, tmp1); 148 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 149 | minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row3, tmp1)); 150 | minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor3); 151 | minor3 = SIMD.Float32x4.swizzle(minor3, 2, 3, 0, 1); // 0x4E = 01001110 152 | 153 | // ---- 154 | tmp1 = SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(row1, 2, 3, 0, 1), row3); // 0x4E = 01001110 155 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 156 | row2 = SIMD.Float32x4.swizzle(row2, 2, 3, 0, 1); // 0x4E = 01001110 157 | minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor0); 158 | minor2 = SIMD.Float32x4.mul(row0, tmp1); 159 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 160 | minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row2, tmp1)); 161 | minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor2); 162 | minor2 = SIMD.Float32x4.swizzle(minor2, 2, 3, 0, 1); // 0x4E = 01001110 163 | 164 | // ---- 165 | tmp1 = SIMD.Float32x4.mul(row0, row1); 166 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 167 | minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor2); 168 | minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row2, tmp1), minor3); 169 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 170 | minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row3, tmp1), minor2); 171 | minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row2, tmp1)); 172 | 173 | // ---- 174 | tmp1 = SIMD.Float32x4.mul(row0, row3); 175 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 176 | minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row2, tmp1)); 177 | minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor2); 178 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 179 | minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor1); 180 | minor2 = SIMD.Float32x4.sub(minor2, SIMD.Float32x4.mul(row1, tmp1)); 181 | 182 | // ---- 183 | tmp1 = SIMD.Float32x4.mul(row0, row2); 184 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 185 | minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor1); 186 | minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row1, tmp1)); 187 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 188 | minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row3, tmp1)); 189 | minor3 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor3); 190 | 191 | // Compute determinant 192 | det = SIMD.Float32x4.mul(row0, minor0); 193 | det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 2, 3, 0, 1), det); // 0x4E = 01001110 194 | det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 1, 0, 3, 2), det); // 0xB1 = 10110001 195 | tmp1 = SIMD.Float32x4.reciprocalApproximation(det); 196 | det = SIMD.Float32x4.sub(SIMD.Float32x4.add(tmp1, tmp1), SIMD.Float32x4.mul(det, SIMD.Float32x4.mul(tmp1, tmp1))); 197 | det = SIMD.Float32x4.swizzle(det, 0, 0, 0, 0); 198 | 199 | // These shuffles aren't necessary if the faulty transposition is done 200 | // up at the top of this function. 201 | //minor0 = SIMD.Float32x4.swizzle(minor0, 2, 1, 0, 3); 202 | //minor1 = SIMD.Float32x4.swizzle(minor1, 2, 1, 0, 3); 203 | //minor2 = SIMD.Float32x4.swizzle(minor2, 2, 1, 0, 3); 204 | //minor3 = SIMD.Float32x4.swizzle(minor3, 2, 1, 0, 3); 205 | 206 | // Compute final values by multiplying with 1/det 207 | minor0 = SIMD.Float32x4.mul(det, minor0); 208 | minor1 = SIMD.Float32x4.mul(det, minor1); 209 | minor2 = SIMD.Float32x4.mul(det, minor2); 210 | minor3 = SIMD.Float32x4.mul(det, minor3); 211 | 212 | SIMD.Float32x4.store(dst, 0, minor0); 213 | SIMD.Float32x4.store(dst, 4, minor1); 214 | SIMD.Float32x4.store(dst, 8, minor2); 215 | SIMD.Float32x4.store(dst, 12, minor3); 216 | } 217 | 218 | function nonSimdMatrixInverse() { 219 | 220 | // Transpose the source matrix 221 | for (var i = 0; i < 4; i++) { 222 | tsrc[i] = src[i*4]; 223 | tsrc[i + 4] = src[i*4 + 1]; 224 | tsrc[i + 8] = src[i*4 + 2]; 225 | tsrc[i + 12] = src[i*4 + 3]; 226 | } 227 | 228 | // Calculate pairs for first 8 elements (cofactors) 229 | tmp[0] = tsrc[10] * tsrc[15]; 230 | tmp[1] = tsrc[11] * tsrc[14]; 231 | tmp[2] = tsrc[9] * tsrc[15]; 232 | tmp[3] = tsrc[11] * tsrc[13]; 233 | tmp[4] = tsrc[9] * tsrc[14]; 234 | tmp[5] = tsrc[10] * tsrc[13]; 235 | tmp[6] = tsrc[8] * tsrc[15]; 236 | tmp[7] = tsrc[11] * tsrc[12]; 237 | tmp[8] = tsrc[8] * tsrc[14]; 238 | tmp[9] = tsrc[10] * tsrc[12]; 239 | tmp[10] = tsrc[8] * tsrc[13]; 240 | tmp[11] = tsrc[9] * tsrc[12]; 241 | 242 | // calculate first 8 elements (cofactors) 243 | dst[0] = tmp[0]*tsrc[5] + tmp[3]*tsrc[6] + tmp[4]*tsrc[7]; 244 | dst[0] -= tmp[1]*tsrc[5] + tmp[2]*tsrc[6] + tmp[5]*tsrc[7]; 245 | dst[1] = tmp[1]*tsrc[4] + tmp[6]*tsrc[6] + tmp[9]*tsrc[7]; 246 | dst[1] -= tmp[0]*tsrc[4] + tmp[7]*tsrc[6] + tmp[8]*tsrc[7]; 247 | dst[2] = tmp[2]*tsrc[4] + tmp[7]*tsrc[5] + tmp[10]*tsrc[7]; 248 | dst[2] -= tmp[3]*tsrc[4] + tmp[6]*tsrc[5] + tmp[11]*tsrc[7]; 249 | dst[3] = tmp[5]*tsrc[4] + tmp[8]*tsrc[5] + tmp[11]*tsrc[6]; 250 | dst[3] -= tmp[4]*tsrc[4] + tmp[9]*tsrc[5] + tmp[10]*tsrc[6]; 251 | dst[4] = tmp[1]*tsrc[1] + tmp[2]*tsrc[2] + tmp[5]*tsrc[3]; 252 | dst[4] -= tmp[0]*tsrc[1] + tmp[3]*tsrc[2] + tmp[4]*tsrc[3]; 253 | dst[5] = tmp[0]*tsrc[0] + tmp[7]*tsrc[2] + tmp[8]*tsrc[3]; 254 | dst[5] -= tmp[1]*tsrc[0] + tmp[6]*tsrc[2] + tmp[9]*tsrc[3]; 255 | dst[6] = tmp[3]*tsrc[0] + tmp[6]*tsrc[1] + tmp[11]*tsrc[3]; 256 | dst[6] -= tmp[2]*tsrc[0] + tmp[7]*tsrc[1] + tmp[10]*tsrc[3]; 257 | dst[7] = tmp[4]*tsrc[0] + tmp[9]*tsrc[1] + tmp[10]*tsrc[2]; 258 | dst[7] -= tmp[5]*tsrc[0] + tmp[8]*tsrc[1] + tmp[11]*tsrc[2]; 259 | 260 | // calculate pairs for second 8 elements (cofactors) 261 | tmp[0] = tsrc[2]*tsrc[7]; 262 | tmp[1] = tsrc[3]*tsrc[6]; 263 | tmp[2] = tsrc[1]*tsrc[7]; 264 | tmp[3] = tsrc[3]*tsrc[5]; 265 | tmp[4] = tsrc[1]*tsrc[6]; 266 | tmp[5] = tsrc[2]*tsrc[5]; 267 | tmp[6] = tsrc[0]*tsrc[7]; 268 | tmp[7] = tsrc[3]*tsrc[4]; 269 | tmp[8] = tsrc[0]*tsrc[6]; 270 | tmp[9] = tsrc[2]*tsrc[4]; 271 | tmp[10] = tsrc[0]*tsrc[5]; 272 | tmp[11] = tsrc[1]*tsrc[4]; 273 | 274 | // calculate second 8 elements (cofactors) 275 | dst[8] = tmp[0]*tsrc[13] + tmp[3]*tsrc[14] + tmp[4]*tsrc[15]; 276 | dst[8] -= tmp[1]*tsrc[13] + tmp[2]*tsrc[14] + tmp[5]*tsrc[15]; 277 | dst[9] = tmp[1]*tsrc[12] + tmp[6]*tsrc[14] + tmp[9]*tsrc[15]; 278 | dst[9] -= tmp[0]*tsrc[12] + tmp[7]*tsrc[14] + tmp[8]*tsrc[15]; 279 | dst[10] = tmp[2]*tsrc[12] + tmp[7]*tsrc[13] + tmp[10]*tsrc[15]; 280 | dst[10]-= tmp[3]*tsrc[12] + tmp[6]*tsrc[13] + tmp[11]*tsrc[15]; 281 | dst[11] = tmp[5]*tsrc[12] + tmp[8]*tsrc[13] + tmp[11]*tsrc[14]; 282 | dst[11]-= tmp[4]*tsrc[12] + tmp[9]*tsrc[13] + tmp[10]*tsrc[14]; 283 | dst[12] = tmp[2]*tsrc[10] + tmp[5]*tsrc[11] + tmp[1]*tsrc[9]; 284 | dst[12]-= tmp[4]*tsrc[11] + tmp[0]*tsrc[9] + tmp[3]*tsrc[10]; 285 | dst[13] = tmp[8]*tsrc[11] + tmp[0]*tsrc[8] + tmp[7]*tsrc[10]; 286 | dst[13]-= tmp[6]*tsrc[10] + tmp[9]*tsrc[11] + tmp[1]*tsrc[8]; 287 | dst[14] = tmp[6]*tsrc[9] + tmp[11]*tsrc[11] + tmp[3]*tsrc[8]; 288 | dst[14]-= tmp[10]*tsrc[11] + tmp[2]*tsrc[8] + tmp[7]*tsrc[9]; 289 | dst[15] = tmp[10]*tsrc[10] + tmp[4]*tsrc[8] + tmp[9]*tsrc[9]; 290 | dst[15]-= tmp[8]*tsrc[9] + tmp[11]*tsrc[10] + tmp[5]*tsrc[8]; 291 | 292 | // calculate determinant 293 | var det = tsrc[0]*dst[0] + tsrc[1]*dst[1] + tsrc[2]*dst[2] + tsrc[3]*dst[3]; 294 | 295 | // calculate matrix inverse 296 | det = 1/det; 297 | for (var j = 0; j < 16; j++) { 298 | dst[j] *= det; 299 | } 300 | 301 | } 302 | 303 | // SIMD version of the kernel 304 | function simdMatrixInverseN(n) { 305 | for (var iterations = 0; iterations < n; ++iterations) { 306 | var src0, src1, src2, src3; 307 | var row0, row1, row2, row3; 308 | var tmp1; 309 | var minor0, minor1, minor2, minor3; 310 | var det; 311 | 312 | // Load the 4 rows 313 | var src0 = SIMD.Float32x4.load(src, 0); 314 | var src1 = SIMD.Float32x4.load(src, 4); 315 | var src2 = SIMD.Float32x4.load(src, 8); 316 | var src3 = SIMD.Float32x4.load(src, 12); 317 | 318 | // Transpose the source matrix. Sort of. Not a true transpose operation 319 | 320 | tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5); 321 | row1 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5); 322 | row0 = SIMD.Float32x4.shuffle(tmp1, row1, 0, 2, 4, 6); 323 | row1 = SIMD.Float32x4.shuffle(row1, tmp1, 1, 3, 5, 7); 324 | 325 | tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7); 326 | row3 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7); 327 | row2 = SIMD.Float32x4.shuffle(tmp1, row3, 0, 2, 4, 6); 328 | row3 = SIMD.Float32x4.shuffle(row3, tmp1, 1, 3, 5, 7); 329 | 330 | // This is a true transposition, but it will lead to an incorrect result 331 | 332 | //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5); 333 | //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5); 334 | //row0 = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6); 335 | //row1 = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7); 336 | 337 | //tmp1 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7); 338 | //tmp2 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7); 339 | //row2 = SIMD.Float32x4.shuffle(tmp1, tmp2, 0, 2, 4, 6); 340 | //row3 = SIMD.Float32x4.shuffle(tmp1, tmp2, 1, 3, 5, 7); 341 | 342 | // ---- 343 | tmp1 = SIMD.Float32x4.mul(row2, row3); 344 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 345 | minor0 = SIMD.Float32x4.mul(row1, tmp1); 346 | minor1 = SIMD.Float32x4.mul(row0, tmp1); 347 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 348 | minor0 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row1, tmp1), minor0); 349 | minor1 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor1); 350 | minor1 = SIMD.Float32x4.swizzle(minor1, 2, 3, 0, 1); // 0x4E = 01001110 351 | 352 | // ---- 353 | tmp1 = SIMD.Float32x4.mul(row1, row2); 354 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 355 | minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor0); 356 | minor3 = SIMD.Float32x4.mul(row0, tmp1); 357 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 358 | minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row3, tmp1)); 359 | minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor3); 360 | minor3 = SIMD.Float32x4.swizzle(minor3, 2, 3, 0, 1); // 0x4E = 01001110 361 | 362 | // ---- 363 | tmp1 = SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(row1, 2, 3, 0, 1), row3); // 0x4E = 01001110 364 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 365 | row2 = SIMD.Float32x4.swizzle(row2, 2, 3, 0, 1); // 0x4E = 01001110 366 | minor0 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor0); 367 | minor2 = SIMD.Float32x4.mul(row0, tmp1); 368 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 369 | minor0 = SIMD.Float32x4.sub(minor0, SIMD.Float32x4.mul(row2, tmp1)); 370 | minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row0, tmp1), minor2); 371 | minor2 = SIMD.Float32x4.swizzle(minor2, 2, 3, 0, 1); // 0x4E = 01001110 372 | 373 | // ---- 374 | tmp1 = SIMD.Float32x4.mul(row0, row1); 375 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 376 | minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor2); 377 | minor3 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row2, tmp1), minor3); 378 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 379 | minor2 = SIMD.Float32x4.sub(SIMD.Float32x4.mul(row3, tmp1), minor2); 380 | minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row2, tmp1)); 381 | 382 | // ---- 383 | tmp1 = SIMD.Float32x4.mul(row0, row3); 384 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 385 | minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row2, tmp1)); 386 | minor2 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor2); 387 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 388 | minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row2, tmp1), minor1); 389 | minor2 = SIMD.Float32x4.sub(minor2, SIMD.Float32x4.mul(row1, tmp1)); 390 | 391 | // ---- 392 | tmp1 = SIMD.Float32x4.mul(row0, row2); 393 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 1, 0, 3, 2); // 0xB1 = 10110001 394 | minor1 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row3, tmp1), minor1); 395 | minor3 = SIMD.Float32x4.sub(minor3, SIMD.Float32x4.mul(row1, tmp1)); 396 | tmp1 = SIMD.Float32x4.swizzle(tmp1, 2, 3, 0, 1); // 0x4E = 01001110 397 | minor1 = SIMD.Float32x4.sub(minor1, SIMD.Float32x4.mul(row3, tmp1)); 398 | minor3 = SIMD.Float32x4.add(SIMD.Float32x4.mul(row1, tmp1), minor3); 399 | 400 | // Compute determinant 401 | det = SIMD.Float32x4.mul(row0, minor0); 402 | det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 2, 3, 0, 1), det); // 0x4E = 01001110 403 | det = SIMD.Float32x4.add(SIMD.Float32x4.swizzle(det, 1, 0, 3, 2), det); // 0xB1 = 10110001 404 | tmp1 = SIMD.Float32x4.reciprocalApproximation(det); 405 | det = SIMD.Float32x4.sub(SIMD.Float32x4.add(tmp1, tmp1), SIMD.Float32x4.mul(det, SIMD.Float32x4.mul(tmp1, tmp1))); 406 | det = SIMD.Float32x4.swizzle(det, 0, 0, 0, 0); 407 | 408 | // These shuffles aren't necessary if the faulty transposition is done 409 | // up at the top of this function. 410 | //minor0 = SIMD.Float32x4.swizzle(minor0, 2, 1, 0, 3); 411 | //minor1 = SIMD.Float32x4.swizzle(minor1, 2, 1, 0, 3); 412 | //minor2 = SIMD.Float32x4.swizzle(minor2, 2, 1, 0, 3); 413 | //minor3 = SIMD.Float32x4.swizzle(minor3, 2, 1, 0, 3); 414 | 415 | // Compute final values by multiplying with 1/det 416 | minor0 = SIMD.Float32x4.mul(det, minor0); 417 | minor1 = SIMD.Float32x4.mul(det, minor1); 418 | minor2 = SIMD.Float32x4.mul(det, minor2); 419 | minor3 = SIMD.Float32x4.mul(det, minor3); 420 | 421 | SIMD.Float32x4.store(dst, 0, minor0); 422 | SIMD.Float32x4.store(dst, 4, minor1); 423 | SIMD.Float32x4.store(dst, 8, minor2); 424 | SIMD.Float32x4.store(dst, 12, minor3); 425 | } 426 | } 427 | 428 | // Non SIMD version of the kernel 429 | function nonSimdMatrixInverseN(n) { 430 | for (var iterations = 0; iterations < n; ++iterations) { 431 | // Transpose the source matrix 432 | for (var i = 0; i < 4; i++) { 433 | tsrc[i] = src[i * 4]; 434 | tsrc[i + 4] = src[i * 4 + 1]; 435 | tsrc[i + 8] = src[i * 4 + 2]; 436 | tsrc[i + 12] = src[i * 4 + 3]; 437 | } 438 | 439 | // Calculate pairs for first 8 elements (cofactors) 440 | tmp[0] = tsrc[10] * tsrc[15]; 441 | tmp[1] = tsrc[11] * tsrc[14]; 442 | tmp[2] = tsrc[9] * tsrc[15]; 443 | tmp[3] = tsrc[11] * tsrc[13]; 444 | tmp[4] = tsrc[9] * tsrc[14]; 445 | tmp[5] = tsrc[10] * tsrc[13]; 446 | tmp[6] = tsrc[8] * tsrc[15]; 447 | tmp[7] = tsrc[11] * tsrc[12]; 448 | tmp[8] = tsrc[8] * tsrc[14]; 449 | tmp[9] = tsrc[10] * tsrc[12]; 450 | tmp[10] = tsrc[8] * tsrc[13]; 451 | tmp[11] = tsrc[9] * tsrc[12]; 452 | 453 | // calculate first 8 elements (cofactors) 454 | dst[0] = tmp[0] * tsrc[5] + tmp[3] * tsrc[6] + tmp[4] * tsrc[7]; 455 | dst[0] -= tmp[1] * tsrc[5] + tmp[2] * tsrc[6] + tmp[5] * tsrc[7]; 456 | dst[1] = tmp[1] * tsrc[4] + tmp[6] * tsrc[6] + tmp[9] * tsrc[7]; 457 | dst[1] -= tmp[0] * tsrc[4] + tmp[7] * tsrc[6] + tmp[8] * tsrc[7]; 458 | dst[2] = tmp[2] * tsrc[4] + tmp[7] * tsrc[5] + tmp[10] * tsrc[7]; 459 | dst[2] -= tmp[3] * tsrc[4] + tmp[6] * tsrc[5] + tmp[11] * tsrc[7]; 460 | dst[3] = tmp[5] * tsrc[4] + tmp[8] * tsrc[5] + tmp[11] * tsrc[6]; 461 | dst[3] -= tmp[4] * tsrc[4] + tmp[9] * tsrc[5] + tmp[10] * tsrc[6]; 462 | dst[4] = tmp[1] * tsrc[1] + tmp[2] * tsrc[2] + tmp[5] * tsrc[3]; 463 | dst[4] -= tmp[0] * tsrc[1] + tmp[3] * tsrc[2] + tmp[4] * tsrc[3]; 464 | dst[5] = tmp[0] * tsrc[0] + tmp[7] * tsrc[2] + tmp[8] * tsrc[3]; 465 | dst[5] -= tmp[1] * tsrc[0] + tmp[6] * tsrc[2] + tmp[9] * tsrc[3]; 466 | dst[6] = tmp[3] * tsrc[0] + tmp[6] * tsrc[1] + tmp[11] * tsrc[3]; 467 | dst[6] -= tmp[2] * tsrc[0] + tmp[7] * tsrc[1] + tmp[10] * tsrc[3]; 468 | dst[7] = tmp[4] * tsrc[0] + tmp[9] * tsrc[1] + tmp[10] * tsrc[2]; 469 | dst[7] -= tmp[5] * tsrc[0] + tmp[8] * tsrc[1] + tmp[11] * tsrc[2]; 470 | 471 | // calculate pairs for second 8 elements (cofactors) 472 | tmp[0] = tsrc[2] * tsrc[7]; 473 | tmp[1] = tsrc[3] * tsrc[6]; 474 | tmp[2] = tsrc[1] * tsrc[7]; 475 | tmp[3] = tsrc[3] * tsrc[5]; 476 | tmp[4] = tsrc[1] * tsrc[6]; 477 | tmp[5] = tsrc[2] * tsrc[5]; 478 | tmp[6] = tsrc[0] * tsrc[7]; 479 | tmp[7] = tsrc[3] * tsrc[4]; 480 | tmp[8] = tsrc[0] * tsrc[6]; 481 | tmp[9] = tsrc[2] * tsrc[4]; 482 | tmp[10] = tsrc[0] * tsrc[5]; 483 | tmp[11] = tsrc[1] * tsrc[4]; 484 | 485 | // calculate second 8 elements (cofactors) 486 | dst[8] = tmp[0] * tsrc[13] + tmp[3] * tsrc[14] + tmp[4] * tsrc[15]; 487 | dst[8] -= tmp[1] * tsrc[13] + tmp[2] * tsrc[14] + tmp[5] * tsrc[15]; 488 | dst[9] = tmp[1] * tsrc[12] + tmp[6] * tsrc[14] + tmp[9] * tsrc[15]; 489 | dst[9] -= tmp[0] * tsrc[12] + tmp[7] * tsrc[14] + tmp[8] * tsrc[15]; 490 | dst[10] = tmp[2] * tsrc[12] + tmp[7] * tsrc[13] + tmp[10] * tsrc[15]; 491 | dst[10] -= tmp[3] * tsrc[12] + tmp[6] * tsrc[13] + tmp[11] * tsrc[15]; 492 | dst[11] = tmp[5] * tsrc[12] + tmp[8] * tsrc[13] + tmp[11] * tsrc[14]; 493 | dst[11] -= tmp[4] * tsrc[12] + tmp[9] * tsrc[13] + tmp[10] * tsrc[14]; 494 | dst[12] = tmp[2] * tsrc[10] + tmp[5] * tsrc[11] + tmp[1] * tsrc[9]; 495 | dst[12] -= tmp[4] * tsrc[11] + tmp[0] * tsrc[9] + tmp[3] * tsrc[10]; 496 | dst[13] = tmp[8] * tsrc[11] + tmp[0] * tsrc[8] + tmp[7] * tsrc[10]; 497 | dst[13] -= tmp[6] * tsrc[10] + tmp[9] * tsrc[11] + tmp[1] * tsrc[8]; 498 | dst[14] = tmp[6] * tsrc[9] + tmp[11] * tsrc[11] + tmp[3] * tsrc[8]; 499 | dst[14] -= tmp[10] * tsrc[11] + tmp[2] * tsrc[8] + tmp[7] * tsrc[9]; 500 | dst[15] = tmp[10] * tsrc[10] + tmp[4] * tsrc[8] + tmp[9] * tsrc[9]; 501 | dst[15] -= tmp[8] * tsrc[9] + tmp[11] * tsrc[10] + tmp[5] * tsrc[8]; 502 | 503 | // calculate determinant 504 | var det = tsrc[0] * dst[0] + tsrc[1] * dst[1] + tsrc[2] * dst[2] + tsrc[3] * dst[3]; 505 | 506 | // calculate matrix inverse 507 | det = 1 / det; 508 | for (var j = 0; j < 16; j++) { 509 | dst[j] *= det; 510 | } 511 | } 512 | } 513 | 514 | } ()); 515 | -------------------------------------------------------------------------------- /src/benchmarks/kernel-template.js: -------------------------------------------------------------------------------- 1 | // Kernel template 2 | // Author: Peter Jensen 3 | (function () { 4 | 5 | // Kernel configuration 6 | var kernelConfig = { 7 | kernelName: "Test", 8 | kernelInit: init, 9 | kernelCleanup: cleanup, 10 | kernelSimd: simd, 11 | kernelNonSimd: nonSimd, 12 | kernelIterations: 100000000 13 | }; 14 | 15 | // Hook up to the harness 16 | benchmarks.add (new Benchmark (kernelConfig)); 17 | 18 | // Kernel Initializer 19 | function init () { 20 | // Do initial sanity check and initialize data for the kernels. 21 | // The sanity check should verify that the simd and nonSimd results 22 | // are the same. 23 | // It is recommended to do minimal object creation in the kernels 24 | // themselves. If global data needs to be initialized, here would 25 | // be the place to do it. 26 | // If the sanity checks fails the kernels will not be executed 27 | // Returns: 28 | // true: First run (unoptimized) of the kernels passed 29 | // false: First run (unoptimized) of the kernels failed 30 | return simd (1) === nonSimd (1); 31 | } 32 | 33 | // Kernel Cleanup 34 | function cleanup () { 35 | // Do final sanity check and perform cleanup. 36 | // This function is called when all the kernel iterations have been 37 | // executed, so they should be in their final optimized version. The 38 | // sanity check done during initialization will probably be of the 39 | // initial unoptimized version. 40 | // Returns: 41 | // true: Last run (optimized) of the kernels passed 42 | // false: last run (optimized) of the kernels failed 43 | return simd (1) === nonSimd (1); 44 | } 45 | 46 | // SIMD version of the kernel 47 | function simd (n) { 48 | var s = 0; 49 | for (var i = 0; i < n; ++i) { 50 | s += i; 51 | } 52 | return s; 53 | } 54 | 55 | // Non SIMD version of the kernel 56 | function nonSimd (n) { 57 | var s = 0; 58 | for (var i = 0; i < n; ++i) { 59 | s += i; 60 | } 61 | return s; 62 | } 63 | 64 | } ()); 65 | -------------------------------------------------------------------------------- /src/benchmarks/mandelbrot.js: -------------------------------------------------------------------------------- 1 | // Mandelbrot Benchmark 2 | // Author: Peter Jensen 3 | (function () { 4 | 5 | // Kernel configuration 6 | var kernelConfig = { 7 | kernelName: "Mandelbrot", 8 | kernelInit: initMandelbrot, 9 | kernelCleanup: cleanupMandelbrot, 10 | kernelSimd: simdMandelbrot, 11 | kernelNonSimd: nonSimdMandelbrot, 12 | kernelIterations: 10000 13 | }; 14 | 15 | // Hook up to the harness 16 | benchmarks.add (new Benchmark (kernelConfig)); 17 | 18 | function Float32x4ToString (f4) { 19 | return "[" + SIMD.Float32x4.extractLane(f4, 0) + "," + 20 | SIMD.Float32x4.extractLane(f4, 1) + "," + 21 | SIMD.Float32x4.extractLane(f4, 2) + "," + 22 | SIMD.Float32x4.extractLane(f4, 3) + "]"; 23 | } 24 | 25 | function Int32x4ToString (i4) { 26 | return "[" + SIMD.Int32x4.extractLane(i4, 0) + "," + 27 | SIMD.Int32x4.extractLane(i4, 1) + "," + 28 | SIMD.Int32x4.extractLane(i4, 2) + "," + 29 | SIMD.Int32x4.extractLane(i4, 3) + "]"; 30 | } 31 | 32 | function mandelx1(c_re, c_im, max_iterations) { 33 | var z_re = c_re, 34 | z_im = c_im, 35 | i; 36 | for (i = 0; i < max_iterations; i++) { 37 | var z_re2 = z_re*z_re; 38 | var z_im2 = z_im*z_im; 39 | if (z_re2 + z_im2 > 4.0) 40 | break; 41 | 42 | var new_re = z_re2 - z_im2; 43 | var new_im = 2.0 * z_re * z_im; 44 | z_re = c_re + new_re; 45 | z_im = c_im + new_im; 46 | } 47 | return i; 48 | } 49 | 50 | function mandelx4(c_re4, c_im4, max_iterations) { 51 | var z_re4 = c_re4; 52 | var z_im4 = c_im4; 53 | var four4 = SIMD.Float32x4.splat (4.0); 54 | var two4 = SIMD.Float32x4.splat (2.0); 55 | var count4 = SIMD.Int32x4.splat (0); 56 | var zero4 = SIMD.Int32x4.splat (0); 57 | var one4 = SIMD.Int32x4.splat (1); 58 | 59 | for (var i = 0; i < max_iterations; ++i) { 60 | var z_re24 = SIMD.Float32x4.mul (z_re4, z_re4); 61 | var z_im24 = SIMD.Float32x4.mul (z_im4, z_im4); 62 | 63 | var mb4 = SIMD.Float32x4.lessThanOrEqual (SIMD.Float32x4.add (z_re24, z_im24), four4); 64 | // if all 4 values are greater than 4.0, there's no reason to continue 65 | if (!SIMD.Bool32x4.allTrue(mb4)) { 66 | break; 67 | } 68 | 69 | var new_re4 = SIMD.Float32x4.sub(z_re24, z_im24); 70 | var new_im4 = SIMD.Float32x4.mul(SIMD.Float32x4.mul (two4, z_re4), z_im4); 71 | z_re4 = SIMD.Float32x4.add(c_re4, new_re4); 72 | z_im4 = SIMD.Float32x4.add(c_im4, new_im4); 73 | count4 = SIMD.Int32x4.add(count4, SIMD.Int32x4.select(mb4, one4, zero4)); 74 | } 75 | return count4; 76 | } 77 | 78 | function sanityCheck() { 79 | var simd = simdMandelbrot(1); 80 | var nonSimd = nonSimdMandelbrot(1); 81 | if (simd.length !== nonSimd.length) { 82 | return false; 83 | } 84 | for (var i = 0, n = simd.length; i < n; ++i) { 85 | if (simd[i] !== nonSimd[i]) { 86 | return false; 87 | } 88 | } 89 | return true; 90 | } 91 | 92 | function initMandelbrot() { 93 | return sanityCheck(); 94 | } 95 | 96 | function cleanupMandelbrot() { 97 | return sanityCheck(); 98 | } 99 | 100 | // Non SIMD version of the kernel 101 | function nonSimdMandelbrot (n) { 102 | var result = new Array (4); 103 | for (var i = 0; i < n; ++i) { 104 | result [0] = mandelx1 (0.01, 0.01, 100); 105 | result [1] = mandelx1 (0.01, 0.01, 100); 106 | result [2] = mandelx1 (0.01, 0.01, 100); 107 | result [3] = mandelx1 (0.01, 0.01, 100); 108 | } 109 | return result; 110 | } 111 | 112 | // SIMD version of the kernel 113 | function simdMandelbrot (n) { 114 | var result = new Array (4); 115 | var vec0 = SIMD.Float32x4.splat (0.01); 116 | for (var i = 0; i < n; ++i) { 117 | var r = mandelx4 (vec0, vec0, 100); 118 | result [0] = SIMD.Int32x4.extractLane(r, 0); 119 | result [1] = SIMD.Int32x4.extractLane(r, 1); 120 | result [2] = SIMD.Int32x4.extractLane(r, 2); 121 | result [3] = SIMD.Int32x4.extractLane(r, 3); 122 | } 123 | return result; 124 | } 125 | 126 | } ()); 127 | -------------------------------------------------------------------------------- /src/benchmarks/matrix-multiplication.js: -------------------------------------------------------------------------------- 1 | // 4x4 matrix multiplication 2 | // Author: John McCutchan 3 | 4 | (function () { 5 | 6 | // Kernel configuration 7 | var kernelConfig = { 8 | kernelName: "MatrixMultiplication", 9 | kernelInit: init, 10 | kernelCleanup: cleanup, 11 | kernelSimd: simdMultiply, 12 | kernelNonSimd: multiply, 13 | kernelIterations: 1000 14 | }; 15 | 16 | // Hook up to the harness 17 | benchmarks.add(new Benchmark(kernelConfig)); 18 | 19 | // Benchmark data, initialization and kernel functions 20 | var T1 = new Float32Array(16); 21 | var T2 = new Float32Array(16); 22 | var Out = new Float32Array(16); 23 | var T1x = new Float32Array(16); 24 | var T2x = new Float32Array(16); 25 | var Outx = new Float32Array(16); 26 | 27 | function equals(A, b) { 28 | return (A[0] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 0)) && 29 | (A[1] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 1)) && 30 | (A[2] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 2)) && 31 | (A[3] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 0), 3)) && 32 | (A[4] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 0)) && 33 | (A[5] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 1)) && 34 | (A[6] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 2)) && 35 | (A[7] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 4), 3)) && 36 | (A[8] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 0)) && 37 | (A[9] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 1)) && 38 | (A[10] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 2)) && 39 | (A[11] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 8), 3)) && 40 | (A[12] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 0)) && 41 | (A[13] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 1)) && 42 | (A[14] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 2)) && 43 | (A[15] == SIMD.Float32x4.extractLane(SIMD.Float32x4.load(b, 12), 3)); 44 | } 45 | 46 | function init() { 47 | T1[0] = 1.0; 48 | T1[5] = 1.0; 49 | T1[10] = 1.0; 50 | T1[15] = 1.0; 51 | 52 | T2[0] = 2.0; 53 | T2[5] = 2.0; 54 | T2[10] = 2.0; 55 | T2[15] = 2.0; 56 | 57 | SIMD.Float32x4.store(T1x, 0, SIMD.Float32x4(1.0, 0.0, 0.0, 0.0)); 58 | SIMD.Float32x4.store(T1x, 4, SIMD.Float32x4(0.0, 1.0, 0.0, 0.0)); 59 | SIMD.Float32x4.store(T1x, 8, SIMD.Float32x4(0.0, 0.0, 1.0, 0.0)); 60 | SIMD.Float32x4.store(T1x, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 1.0)); 61 | 62 | SIMD.Float32x4.store(T2x, 0, SIMD.Float32x4(2.0, 0.0, 0.0, 0.0)); 63 | SIMD.Float32x4.store(T2x, 4, SIMD.Float32x4(0.0, 2.0, 0.0, 0.0)); 64 | SIMD.Float32x4.store(T2x, 8, SIMD.Float32x4(0.0, 0.0, 2.0, 0.0)); 65 | SIMD.Float32x4.store(T2x, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 2.0)); 66 | 67 | multiply(1); 68 | simdMultiply(1); 69 | return equals(T1, T1x) && equals(T2, T2x) && equals(Out, Outx); 70 | } 71 | 72 | function cleanup() { 73 | return init(); // Sanity checking before and after are the same 74 | } 75 | 76 | function multiply(n) { 77 | for (var i = 0; i < n; i++) { 78 | var a00 = T1[0]; 79 | var a01 = T1[1]; 80 | var a02 = T1[2]; 81 | var a03 = T1[3]; 82 | var a10 = T1[4]; 83 | var a11 = T1[5]; 84 | var a12 = T1[6]; 85 | var a13 = T1[7]; 86 | var a20 = T1[8]; 87 | var a21 = T1[9]; 88 | var a22 = T1[10]; 89 | var a23 = T1[11]; 90 | var a30 = T1[12]; 91 | var a31 = T1[13]; 92 | var a32 = T1[14]; 93 | var a33 = T1[15]; 94 | 95 | var b0 = T2[0]; 96 | var b1 = T2[1]; 97 | var b2 = T2[2]; 98 | var b3 = T2[3]; 99 | Out[0] = b0*a00 + b1*a10 + b2*a20 + b3*a30; 100 | Out[1] = b0*a01 + b1*a11 + b2*a21 + b3*a31; 101 | Out[2] = b0*a02 + b1*a12 + b2*a22 + b3*a32; 102 | Out[3] = b0*a03 + b1*a13 + b2*a23 + b3*a33; 103 | 104 | b0 = T2[4]; 105 | b1 = T2[5]; 106 | b2 = T2[6]; 107 | b3 = T2[7]; 108 | Out[4] = b0*a00 + b1*a10 + b2*a20 + b3*a30; 109 | Out[5] = b0*a01 + b1*a11 + b2*a21 + b3*a31; 110 | Out[6] = b0*a02 + b1*a12 + b2*a22 + b3*a32; 111 | Out[7] = b0*a03 + b1*a13 + b2*a23 + b3*a33; 112 | 113 | b0 = T2[8]; 114 | b1 = T2[9]; 115 | b2 = T2[10]; 116 | b3 = T2[11]; 117 | Out[8] = b0*a00 + b1*a10 + b2*a20 + b3*a30; 118 | Out[9] = b0*a01 + b1*a11 + b2*a21 + b3*a31; 119 | Out[10] = b0*a02 + b1*a12 + b2*a22 + b3*a32; 120 | Out[11] = b0*a03 + b1*a13 + b2*a23 + b3*a33; 121 | 122 | b0 = T2[12]; 123 | b1 = T2[13]; 124 | b2 = T2[14]; 125 | b3 = T2[15]; 126 | Out[12] = b0*a00 + b1*a10 + b2*a20 + b3*a30; 127 | Out[13] = b0*a01 + b1*a11 + b2*a21 + b3*a31; 128 | Out[14] = b0*a02 + b1*a12 + b2*a22 + b3*a32; 129 | Out[15] = b0*a03 + b1*a13 + b2*a23 + b3*a33; 130 | } 131 | } 132 | 133 | function simdMultiply(n) { 134 | for (var i = 0; i < n; i++) { 135 | var a0 = SIMD.Float32x4.load(T1x, 0); 136 | var a1 = SIMD.Float32x4.load(T1x, 4); 137 | var a2 = SIMD.Float32x4.load(T1x, 8); 138 | var a3 = SIMD.Float32x4.load(T1x, 12); 139 | var b0 = SIMD.Float32x4.load(T2x, 0); 140 | SIMD.Float32x4.store(Outx, 0, 141 | SIMD.Float32x4.add( 142 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 0, 0, 0, 0), a0), 143 | SIMD.Float32x4.add( 144 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 1, 1, 1, 1), a1), 145 | SIMD.Float32x4.add( 146 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 2, 2, 2, 2), a2), 147 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b0, 3, 3, 3, 3), a3))))); 148 | var b1 = SIMD.Float32x4.load(T2x, 4); 149 | SIMD.Float32x4.store(Outx, 4, 150 | SIMD.Float32x4.add( 151 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 0, 0, 0, 0), a0), 152 | SIMD.Float32x4.add( 153 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 1, 1, 1, 1), a1), 154 | SIMD.Float32x4.add( 155 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 2, 2, 2, 2), a2), 156 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b1, 3, 3, 3, 3), a3))))); 157 | var b2 = SIMD.Float32x4.load(T2x, 8); 158 | SIMD.Float32x4.store(Outx, 8, 159 | SIMD.Float32x4.add( 160 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 0, 0, 0, 0), a0), 161 | SIMD.Float32x4.add( 162 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 1, 1, 1, 1), a1), 163 | SIMD.Float32x4.add( 164 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 2, 2, 2, 2), a2), 165 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b2, 3, 3, 3, 3), a3))))); 166 | var b3 = SIMD.Float32x4.load(T2x, 12); 167 | SIMD.Float32x4.store(Outx, 12, 168 | SIMD.Float32x4.add( 169 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 0, 0, 0, 0), a0), 170 | SIMD.Float32x4.add( 171 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 1, 1, 1, 1), a1), 172 | SIMD.Float32x4.add( 173 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 2, 2, 2, 2), a2), 174 | SIMD.Float32x4.mul(SIMD.Float32x4.swizzle(b3, 3, 3, 3, 3), a3))))); 175 | } 176 | } 177 | 178 | } ()); 179 | -------------------------------------------------------------------------------- /src/benchmarks/memcpy.js: -------------------------------------------------------------------------------- 1 | // Simple performance test memcpy using SIMD. 2 | // Author: Moh Haghighat 3 | // January 20, 2015 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "Memcpy", 10 | kernelInit: initArray, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdMemcpy, 13 | kernelNonSimd: memcpy, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add(new Benchmark(kernelConfig)); 19 | 20 | // Benchmark data, initialization and kernel functions 21 | var TOTAL_MEMORY = 4096*32; 22 | var buffer = new ArrayBuffer(TOTAL_MEMORY); 23 | var HEAP8 = new Int8Array(buffer); 24 | var HEAP32 = new Int32Array(buffer); 25 | var HEAPU8 = new Uint8Array(buffer); 26 | 27 | var LEN = TOTAL_MEMORY/32; 28 | var ptr1 = 0; 29 | var ptr2 = ptr1 + 2 * LEN; 30 | var ptr3 = ptr2 + 2 * LEN; 31 | var VAL = 200; 32 | 33 | function sanityCheck() { 34 | for (var j = 0; j < LEN; ++j) { 35 | if (HEAP8[ptr2+j] != HEAP8[ptr3+j]) { 36 | return false; 37 | } 38 | } 39 | return true; 40 | } 41 | 42 | function initArray() { 43 | for (var j = 0; j < LEN; ++j) { 44 | HEAP8[ptr1+j] = (VAL+1*j)|0; 45 | HEAP8[ptr2+j] = (VAL+2*j)|0; 46 | HEAP8[ptr3+j] = (VAL+3*j)|0; 47 | } 48 | return true; 49 | } 50 | 51 | function cleanup() { 52 | return sanityCheck(); 53 | } 54 | 55 | function _emscripten_memcpy_big(dest, src, num) { 56 | dest = dest; src = src; num = num; 57 | HEAPU8.set(HEAPU8.subarray(src, src+num), dest); 58 | return dest; 59 | } 60 | 61 | function NonSimdAsmjsModule (global, imp, buffer) { 62 | "use asm" 63 | 64 | var HEAP8 = new global.Int8Array(buffer); 65 | var HEAP32 = new global.Int32Array(buffer); 66 | var _emscripten_memcpy_big = imp._emscripten_memcpy_big; 67 | 68 | function _memcpy(dest, src, num) { 69 | dest = dest|0; src = src|0; num = num|0; 70 | var ret = 0; 71 | if ((num|0) >= 4096) return _emscripten_memcpy_big(dest|0, src|0, num|0)|0; 72 | ret = dest|0; 73 | if ((dest&3) == (src&3)) { 74 | while (dest & 3) { 75 | if ((num|0) == 0) return ret|0; 76 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0); 77 | dest = (dest+1)|0; 78 | src = (src+1)|0; 79 | num = (num-1)|0; 80 | } 81 | while ((num|0) >= 4) { 82 | HEAP32[((dest)>>2)]=((HEAP32[((src)>>2)])|0); 83 | dest = (dest+4)|0; 84 | src = (src+4)|0; 85 | num = (num-4)|0; 86 | } 87 | } 88 | while ((num|0) > 0) { 89 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0); 90 | dest = (dest+1)|0; 91 | src = (src+1)|0; 92 | num = (num-1)|0; 93 | } 94 | return ret|0; 95 | } 96 | 97 | return _memcpy; 98 | } 99 | 100 | function SimdAsmjsModule (global, imp, buffer) { 101 | "use asm" 102 | 103 | var HEAP8 = new global.Int8Array(buffer); 104 | var HEAP32 = new global.Int32Array(buffer); 105 | var HEAPU8 = new global.Uint8Array(buffer); 106 | var _emscripten_memcpy_big = imp._emscripten_memcpy_big; 107 | var i4 = global.SIMD.Int32x4; 108 | var i4load = i4.load; 109 | var i4store = i4.store; 110 | 111 | function _memcpy(dest, src, num) { 112 | dest = dest|0; src = src|0; num = num|0; 113 | var ret = 0; 114 | if ((num|0) >= 4096) return _emscripten_memcpy_big(dest|0, src|0, num|0)|0; 115 | ret = dest|0; 116 | 117 | if ((num|0) >= 16) { 118 | while (dest & 15) { 119 | if ((num|0) == 0) return ret|0; 120 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0); 121 | dest = (dest+1)|0; 122 | src = (src+1)|0; 123 | num = (num-1)|0; 124 | } 125 | while ((num|0) >= 16) { 126 | i4store(HEAPU8, ((dest)>>0), i4load(HEAPU8, ((src)>>0))); 127 | dest = (dest+16)|0; 128 | src = (src+16)|0; 129 | num = (num-16)|0; 130 | } 131 | if ((num|0) == 0) return ret|0; 132 | } 133 | 134 | if ((dest&3) == (src&3)) { 135 | while (dest & 3) { 136 | if ((num|0) == 0) return ret|0; 137 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0); 138 | dest = (dest+1)|0; 139 | src = (src+1)|0; 140 | num = (num-1)|0; 141 | } 142 | while ((num|0) >= 4) { 143 | HEAP32[((dest)>>2)]=((HEAP32[((src)>>2)])|0); 144 | dest = (dest+4)|0; 145 | src = (src+4)|0; 146 | num = (num-4)|0; 147 | } 148 | } 149 | 150 | while ((num|0) > 0) { 151 | HEAP8[((dest)>>0)]=((HEAP8[((src)>>0)])|0); 152 | dest = (dest+1)|0; 153 | src = (src+1)|0; 154 | num = (num-1)|0; 155 | } 156 | 157 | return ret|0; 158 | } 159 | 160 | return _memcpy; 161 | } 162 | 163 | function memcpy(n) { 164 | var func = NonSimdAsmjsModule(this, {"_emscripten_memcpy_big": _emscripten_memcpy_big}, buffer); 165 | for (var i = 0; i < n; ++i) { 166 | // try memcpy of variable lengths, from 0 to LEN 167 | for (var j = 0; j < LEN; ++j) { 168 | // try different (alignment mod 16) from 0 to 15 169 | for (var k = 0; k < 16; k++){ 170 | func (ptr2+k, ptr1, j); 171 | } 172 | } 173 | } 174 | return true; 175 | } 176 | 177 | function simdMemcpy(n) { 178 | var func = SimdAsmjsModule(this, {"_emscripten_memcpy_big": _emscripten_memcpy_big}, buffer); 179 | for (var i = 0; i < n; ++i) { 180 | // try memcpy of variable lengths, from 0 to LEN 181 | for (var j = 0; j < LEN; ++j) { 182 | // try different (alignment mod 16) from 0 to 15 183 | for (var k = 0; k < 16; k++){ 184 | func (ptr3+k, ptr1, j); 185 | } 186 | } 187 | } 188 | return true; 189 | } 190 | 191 | } ()); 192 | -------------------------------------------------------------------------------- /src/benchmarks/memset.js: -------------------------------------------------------------------------------- 1 | // Simple performance test memset using SIMD. 2 | // Author: Moh Haghighat 3 | // December 10, 2014 4 | 5 | (function () { 6 | 7 | // Kernel configuration 8 | var kernelConfig = { 9 | kernelName: "Memset", 10 | kernelInit: initArray, 11 | kernelCleanup: cleanup, 12 | kernelSimd: simdMemset, 13 | kernelNonSimd: memset, 14 | kernelIterations: 1000 15 | }; 16 | 17 | // Hook up to the harness 18 | benchmarks.add(new Benchmark(kernelConfig)); 19 | 20 | // Benchmark data, initialization and kernel functions 21 | var TOTAL_MEMORY = 4096*32; 22 | var buffer = new ArrayBuffer(TOTAL_MEMORY); 23 | var HEAP8 = new Int8Array(buffer); 24 | var HEAP32 = new Int32Array(buffer); 25 | var HEAPU8 = new Uint8Array(buffer); 26 | 27 | var LEN = TOTAL_MEMORY/16; 28 | var ptr1 = 0; 29 | var ptr2 = ptr1 + LEN; 30 | var VAL = 200; 31 | 32 | function sanityCheck() { 33 | for (var j = 0; j < LEN; ++j) { 34 | if (HEAP8[ptr1+j] != HEAP8[ptr2+j]) { 35 | return false; 36 | } 37 | } 38 | return true; 39 | } 40 | 41 | function initArray() { 42 | return true; 43 | } 44 | 45 | function cleanup() { 46 | return sanityCheck(); 47 | } 48 | 49 | function NonSimdAsmjsModule (global, imp, buffer) { 50 | "use asm" 51 | 52 | var HEAP8 = new global.Int8Array(buffer); 53 | var HEAP32 = new global.Int32Array(buffer); 54 | 55 | function _memset(ptr, value, num) { 56 | ptr = ptr|0; 57 | value = value|0; 58 | num = num|0; 59 | var stop = 0, value4 = 0, stop4 = 0, unaligned = 0; 60 | stop = (ptr + num)|0; 61 | if ((num|0) >= 20) { 62 | // This is unaligned, but quite large, so work hard to get to aligned settings 63 | value = value & 0xff; 64 | unaligned = ptr & 3; 65 | value4 = value | (value << 8) | (value << 16) | (value << 24); 66 | stop4 = stop & ~3; 67 | if (unaligned) { 68 | unaligned = (ptr + 4 - unaligned)|0; 69 | while ((ptr|0) < (unaligned|0)) { // no need to check for stop, since we have large num 70 | HEAP8[((ptr)>>0)]=value; 71 | ptr = (ptr+1)|0; 72 | } 73 | } 74 | while ((ptr|0) < (stop4|0)) { 75 | HEAP32[((ptr)>>2)]=value4; 76 | ptr = (ptr+4)|0; 77 | } 78 | } 79 | while ((ptr|0) < (stop|0)) { 80 | HEAP8[((ptr)>>0)]=value; 81 | ptr = (ptr+1)|0; 82 | } 83 | return (ptr-num)|0; 84 | } 85 | 86 | return _memset; 87 | } 88 | 89 | function SimdAsmjsModule (global, imp, buffer) { 90 | "use asm" 91 | 92 | var HEAP8 = new global.Int8Array(buffer); 93 | var HEAP32 = new global.Int32Array(buffer); 94 | var HEAPU8 = new global.Uint8Array(buffer); 95 | var i4 = global.SIMD.Int32x4; 96 | var i4splat = i4.splat; 97 | var i4store = i4.store; 98 | 99 | function _simdMemset(ptr, value, num) { 100 | ptr = ptr|0; 101 | value = value|0; 102 | num = num|0; 103 | 104 | var value2 = 0, value4 = 0, value16 = i4(0, 0, 0, 0), stop = 0, stop4 = 0, stop16 = 0, unaligned = 0; 105 | 106 | stop = (ptr + num)|0; 107 | if ((num|0) >= 16) { 108 | // This is unaligned, but quite large, so work hard to get to aligned settings 109 | value = value & 0xff; 110 | 111 | unaligned = ptr & 0xf; 112 | if (unaligned) { 113 | // Initialize the 16-byte unaligned leading part 114 | unaligned = (ptr + 16 - unaligned)|0; 115 | while ((ptr|0) < (unaligned|0)) { // no need to check for stop, since we have large num 116 | HEAP8[((ptr)>>0)]=value; 117 | ptr = (ptr+1)|0; 118 | } 119 | } 120 | 121 | value2 = (value | (value << 8))|0; 122 | value4 = (value2 | (value2 << 16))|0; 123 | value16 =i4splat(value4); 124 | stop16 = stop & ~15; 125 | 126 | 127 | while ((ptr|0) < (stop16|0)) { 128 | i4store(HEAPU8, ((ptr)>>0), value16); 129 | ptr = (ptr+16)|0; 130 | } 131 | 132 | stop4 = stop & ~3; 133 | while ((ptr|0) < (stop4|0)) { 134 | HEAP32[((ptr)>>2)]=value4; 135 | ptr = (ptr+4)|0; 136 | } 137 | } 138 | while ((ptr|0) < (stop|0)) { 139 | HEAP8[((ptr)>>0)]=value; 140 | ptr = (ptr+1)|0; 141 | } 142 | return (ptr-num)|0; 143 | } 144 | 145 | return _simdMemset; 146 | } 147 | 148 | function memset(n) { 149 | var func = NonSimdAsmjsModule(this, {}, buffer); 150 | for (var i = 0; i < n; ++i) { 151 | func (ptr1, VAL, LEN); 152 | } 153 | return true; 154 | } 155 | 156 | function simdMemset(n) { 157 | var func = SimdAsmjsModule(this, {}, buffer); 158 | for (var i = 0; i < n; ++i) { 159 | func (ptr2, VAL, LEN); 160 | } 161 | return true; 162 | } 163 | 164 | } ()); 165 | -------------------------------------------------------------------------------- /src/benchmarks/run.js: -------------------------------------------------------------------------------- 1 | "use strict" 2 | 3 | load ('../ecmascript_simd.js'); 4 | load ('base.js'); 5 | 6 | // load individual benchmarks 7 | 8 | load ('kernel-template.js'); 9 | load ('averageFloat32x4.js'); 10 | load ('averageFloat32x4LoadFromInt8Array.js'); 11 | load ('averageFloat32x4LoadX.js'); 12 | load ('averageFloat32x4LoadXY.js'); 13 | load ('averageFloat32x4LoadXYZ.js'); 14 | load ('averageInt32x4Load.js'); 15 | load ('mandelbrot.js'); 16 | load ('matrix-multiplication.js'); 17 | load ('transform.js'); 18 | load ('shiftrows.js'); 19 | load ('aobench.js'); 20 | load ('transform.js'); 21 | load ('transpose4x4.js'); 22 | load ('inverse4x4.js'); 23 | load ('sinx4.js'); 24 | load ('memset.js'); 25 | load ('memcpy.js'); 26 | 27 | function printResult (str) { 28 | print (str); 29 | } 30 | 31 | function printError (str) { 32 | print (str); 33 | } 34 | 35 | function printScore (str) { 36 | print (str); 37 | } 38 | 39 | benchmarks.runAll ({notifyResult: printResult, 40 | notifyError: printError, 41 | notifyScore: printScore}, 42 | true); 43 | -------------------------------------------------------------------------------- /src/benchmarks/run_browser.js: -------------------------------------------------------------------------------- 1 | var logs = document.getElementById("logs"); 2 | 3 | function printResult(str) { 4 | console.log(str); 5 | logs.innerHTML += str + '
'; 6 | } 7 | 8 | function printError(str) { 9 | console.log(str); 10 | logs.innerHTML += str + '
'; 11 | } 12 | 13 | function printScore(str) { 14 | console.log(str); 15 | logs.innerHTML += str + '
'; 16 | } 17 | 18 | window.onload = function() { 19 | console.log('Running benchmarks.'); 20 | benchmarks.runAll({notifyResult: printResult, 21 | notifyError: printError, 22 | notifyScore: printScore}, true); 23 | printResult('Benchmarks completed.'); 24 | } -------------------------------------------------------------------------------- /src/benchmarks/shiftrows.js: -------------------------------------------------------------------------------- 1 | // ShiftRows is a hot function in the implementation of the Rijndael cipher 2 | // For documentation see: http://asmaes.sourceforge.net/rijndael/rijndaelImplementation.pdf 3 | // Author: Peter Jensen 4 | (function() { 5 | 6 | // Kernel configuration 7 | var kernelConfig = { 8 | kernelName: "ShiftRows", 9 | kernelInit: init, 10 | kernelCleanup: cleanup, 11 | kernelSimd: simdShiftRowsN, 12 | kernelNonSimd: shiftRowsN, 13 | kernelIterations: 1000 14 | }; 15 | 16 | // Hook up to the harness 17 | benchmarks.add(new Benchmark(kernelConfig)); 18 | 19 | // Do the object allocations globally, so the performance of the kernel 20 | // functions aren't overshadowed by object creations 21 | 22 | var state = new Int32Array(16); // 4x4 state matrix 23 | var temp = new Int32Array (1000); // Big enough for 1000 columns 24 | 25 | function printState() { 26 | for (var r = 0; r < 4; ++r) { 27 | var str = ""; 28 | var ri = r*4; 29 | for (var c = 0; c < 4; ++c) { 30 | var value = state[ri + c]; 31 | if (value < 10) { 32 | str += " "; 33 | } 34 | str += " " + state[ri + c]; 35 | } 36 | print(str); 37 | } 38 | } 39 | 40 | // initialize the 4x4 state matrix 41 | function initState() { 42 | for (var i = 0; i < 16; ++i) { 43 | state[i] = i; 44 | } 45 | } 46 | 47 | // Verify the result of calling shiftRows(state, 4) 48 | function checkState() { 49 | var expected = new Uint32Array( 50 | [ 0, 1, 2, 3, 51 | 5, 6, 7, 4, 52 | 10, 11, 8, 9, 53 | 15, 12, 13, 14]); 54 | for (var i = 0; i < 16; ++i) { 55 | if (state[i] !== expected[i]) { 56 | return false; 57 | } 58 | } 59 | return true; 60 | } 61 | 62 | function init() { 63 | // Check that shiftRows yields the right result 64 | initState(); 65 | shiftRowsN(1); 66 | if (!checkState()) { 67 | return false; 68 | } 69 | 70 | // Check that simdShiftRows yields the right result 71 | initState(); 72 | simdShiftRowsN(1); 73 | if (!checkState()) { 74 | return false; 75 | } 76 | return true; 77 | } 78 | 79 | function cleanup() { 80 | return init(); // Sanity checking before and after are the same 81 | } 82 | 83 | // This is the typical implementation of the shiftRows function 84 | function shiftRows(state, Nc) { 85 | for (var r = 1; r < 4; ++r) { 86 | var ri = r*Nc; // get the starting index of row 'r' 87 | var c; 88 | for (c = 0; c < Nc; ++c) { 89 | temp[c] = state[ri + ((c + r) % Nc)]; 90 | } 91 | for (c = 0; c < Nc; ++c) { 92 | state[ri + c] = temp[c]; 93 | } 94 | } 95 | } 96 | 97 | // The SIMD optimized version of the shiftRows function 98 | // The function is special cased for a 4 column setting (Nc == 4). 99 | // This is the value used for AES blocks (see documentation for details) 100 | function simdShiftRows(state, Nc) { 101 | if (Nc !== 4) { 102 | shiftRows(state, Nc); 103 | } 104 | for (var r = 1; r < 4; ++r) { 105 | var rx4 = SIMD.Int32x4.load(state, r << 2); 106 | if (r == 1) { 107 | SIMD.Int32x4.store(state, 4, SIMD.Int32x4.swizzle(rx4, 1, 2, 3, 0)); 108 | } 109 | else if (r == 2) { 110 | SIMD.Int32x4.store(state, 8, SIMD.Int32x4.swizzle(rx4, 2, 3, 0, 1)); 111 | } 112 | else { // r == 3 113 | SIMD.Int32x4.store(state, 12, SIMD.Int32x4.swizzle(rx4, 3, 0, 1, 2)); 114 | } 115 | } 116 | } 117 | 118 | function shiftRowsN(iterations) { 119 | for (var i = 0; i < iterations; ++i) { 120 | shiftRows(state, 4); 121 | } 122 | } 123 | 124 | function simdShiftRowsN(iterations) { 125 | for (var i = 0; i < iterations; ++i) { 126 | simdShiftRows(state, 4); 127 | } 128 | } 129 | } ()); 130 | -------------------------------------------------------------------------------- /src/benchmarks/sinx4.js: -------------------------------------------------------------------------------- 1 | // Compute sin() in 4 lanes: 2 | // Algorithm adopted from: http://gruntthepeon.free.fr/ssemath/ 3 | // Author: Peter Jensen 4 | (function () { 5 | 6 | // Kernel configuration 7 | var kernelConfig = { 8 | kernelName: "Sine", 9 | kernelInit: init, 10 | kernelCleanup: cleanup, 11 | kernelSimd: simd, 12 | kernelNonSimd: nonSimd, 13 | kernelIterations: 100000000 14 | }; 15 | 16 | // Hook up to the harness 17 | benchmarks.add (new Benchmark (kernelConfig)); 18 | 19 | // Kernel Initializer 20 | function init () { 21 | // Do initial sanity check and initialize data for the kernels. 22 | // The sanity check should verify that the simd and nonSimd results 23 | // are the same. 24 | // It is recommended to do minimal object creation in the kernels 25 | // themselves. If global data needs to be initialized, here would 26 | // be the place to do it. 27 | // If the sanity checks fails the kernels will not be executed 28 | // Returns: 29 | // true: First run (unoptimized) of the kernels passed 30 | // false: First run (unoptimized) of the kernels failed 31 | var simdResult = simd(1); 32 | var nonSimdResult = nonSimd(1); 33 | return almostEqual (simdResult, nonSimdResult); 34 | } 35 | 36 | // Kernel Cleanup 37 | function cleanup () { 38 | // Do final sanity check and perform cleanup. 39 | // This function is called when all the kernel iterations have been 40 | // executed, so they should be in their final optimized version. The 41 | // sanity check done during initialization will probably be of the 42 | // initial unoptimized version. 43 | // Returns: 44 | // true: Last run (optimized) of the kernels passed 45 | // false: last run (optimized) of the kernels failed 46 | var simdResult = simd(1); 47 | var nonSimdResult = nonSimd(1); 48 | return almostEqual (simdResult, nonSimdResult); 49 | } 50 | 51 | function almostEqual(a, b) { 52 | for (var i = 0; i < 4; ++i) { 53 | if (Math.abs (a - b) > 0.00001) { 54 | return false; 55 | } 56 | } 57 | return true; 58 | } 59 | 60 | function printFloat32x4(msg, v) { 61 | print (msg, SIMD.Float32x4.extractLane(v, 0).toFixed(6), 62 | SIMD.Float32x4.extractLane(v, 1).toFixed(6), 63 | SIMD.Float32x4.extractLane(v, 2).toFixed(6), 64 | SIMD.Float32x4.extractLane(v, 3).toFixed(6)); 65 | } 66 | 67 | function printInt32x4(msg, v) { 68 | print (msg, SIMD.Float32x4.extractLane(v, 0), 69 | SIMD.Float32x4.extractLane(v, 1), 70 | SIMD.Float32x4.extractLane(v, 2), 71 | SIMD.Float32x4.extractLane(v, 3)); 72 | } 73 | 74 | function sinx4Test() { 75 | var x = SIMD.Float32x4(1.0, 2.0, 3.0, 4.0); 76 | var sinx4 = simdSin(x); 77 | print (SIMD.Float32x4.extractLane(sinx4, 0), 78 | SIMD.Float32x4.extractLane(sinx4, 1), 79 | SIMD.Float32x4.extractLane(sinx4, 2), 80 | SIMD.Float32x4.extractLane(sinx4, 3)); 81 | print (Math.sin(SIMD.Float32x4.extractLane(x, 0)), 82 | Math.sin(SIMD.Float32x4.extractLane(x, 1)), 83 | Math.sin(SIMD.Float32x4.extractLane(x, 2)), 84 | Math.sin(SIMD.Float32x4.extractLane(x, 3))); 85 | } 86 | 87 | var _ps_sign_mask = SIMD.Int32x4.splat(0x80000000); 88 | var _ps_inv_sign_mask = SIMD.Int32x4.not(_ps_sign_mask); 89 | var _ps_cephes_FOPI = SIMD.Float32x4.splat(1.27323954473516); 90 | var _pi32_1 = SIMD.Int32x4.splat(1); 91 | var _pi32_inv1 = SIMD.Int32x4.not(_pi32_1); 92 | var _pi32_4 = SIMD.Int32x4.splat(4); 93 | var _pi32_2 = SIMD.Int32x4.splat(2); 94 | var _ps_minus_cephes_DP1 = SIMD.Float32x4.splat(-0.78515625); 95 | var _ps_minus_cephes_DP2 = SIMD.Float32x4.splat(-2.4187564849853515625E-4); 96 | var _ps_minus_cephes_DP3 = SIMD.Float32x4.splat(-3.77489497744594108E-8); 97 | var _ps_coscof_p0 = SIMD.Float32x4.splat(2.443315711809948E-5); 98 | var _ps_coscof_p1 = SIMD.Float32x4.splat(-1.388731625493765E-3); 99 | var _ps_coscof_p2 = SIMD.Float32x4.splat(4.166664568298827E-2); 100 | var _ps_0p5 = SIMD.Float32x4.splat(0.5); 101 | var _ps_1 = SIMD.Float32x4.splat(1.0); 102 | var _ps_sincof_p0 = SIMD.Float32x4.splat(-1.9515295891E-4); 103 | var _ps_sincof_p1 = SIMD.Float32x4.splat(8.3321608736E-3); 104 | var _ps_sincof_p2 = SIMD.Float32x4.splat(-1.6666654611E-1); 105 | 106 | function sinx4 (x) { 107 | var xmm1; 108 | var xmm2; 109 | var xmm3; 110 | var sign_bit; 111 | var swap_sign_bit; 112 | var poly_mask; 113 | var y; 114 | var y2; 115 | var z; 116 | var tmp; 117 | 118 | var emm0; 119 | var emm2; 120 | var emm2mask; 121 | 122 | sign_bit = x; 123 | x = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(x), _ps_inv_sign_mask)); 124 | sign_bit = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(sign_bit), _ps_sign_mask)); 125 | y = SIMD.Float32x4.mul(x, _ps_cephes_FOPI); 126 | //printFloat32x4 ("Probe 6", y); 127 | emm2 = SIMD.Int32x4.fromFloat32x4(y); 128 | emm2 = SIMD.Int32x4.add(emm2, _pi32_1); 129 | emm2 = SIMD.Int32x4.and(emm2, _pi32_inv1); 130 | //printInt32x4 ("Probe 8", emm2); 131 | y = SIMD.Float32x4.fromInt32x4(emm2); 132 | //printFloat32x4 ("Probe 7", y); 133 | emm0 = SIMD.Int32x4.and(emm2, _pi32_4); 134 | emm0 = SIMD.Int32x4.shiftLeftByScalar(emm0, 29); 135 | 136 | emm2 = SIMD.Int32x4.and(emm2, _pi32_2); 137 | emm2mask = SIMD.Int32x4.equal(emm2, SIMD.Int32x4.splat(0)); 138 | emm2 = SIMD.Int32x4.select(emm2mask, SIMD.Int32x4.splat(-1), SIMD.Int32x4.splat(0)); 139 | 140 | swap_sign_bit = SIMD.Float32x4.fromInt32x4Bits(emm0); 141 | poly_mask = SIMD.Float32x4.fromInt32x4Bits(emm2); 142 | sign_bit = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.xor(SIMD.Int32x4.fromFloat32x4Bits(sign_bit), SIMD.Int32x4.fromFloat32x4Bits(swap_sign_bit))); 143 | //printFloat32x4 ("Probe 1", sign_bit); 144 | 145 | //printFloat32x4 ("Probe 4", y); 146 | //printFloat32x4 ("Probe 5", _ps_minus_cephes_DP1); 147 | xmm1 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP1); 148 | //printFloat32x4 ("Probe 3", xmm1); 149 | xmm2 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP2); 150 | xmm3 = SIMD.Float32x4.mul(y, _ps_minus_cephes_DP3); 151 | x = SIMD.Float32x4.add(x, xmm1); 152 | x = SIMD.Float32x4.add(x, xmm2); 153 | x = SIMD.Float32x4.add(x, xmm3); 154 | //printFloat32x4 ("Probe 2", x); 155 | 156 | y = _ps_coscof_p0; 157 | z = SIMD.Float32x4.mul(x, x); 158 | y = SIMD.Float32x4.mul(y, z); 159 | y = SIMD.Float32x4.add(y, _ps_coscof_p1); 160 | y = SIMD.Float32x4.mul(y, z); 161 | y = SIMD.Float32x4.add(y, _ps_coscof_p2); 162 | y = SIMD.Float32x4.mul(y, z); 163 | y = SIMD.Float32x4.mul(y, z); 164 | tmp = SIMD.Float32x4.mul(z, _ps_0p5); 165 | y = SIMD.Float32x4.sub(y, tmp); 166 | y = SIMD.Float32x4.add(y, _ps_1); 167 | 168 | y2 = _ps_sincof_p0; 169 | //printFloat32x4 ("Probe 11", y2); 170 | //printFloat32x4 ("Probe 12", z); 171 | y2 = SIMD.Float32x4.mul(y2, z); 172 | y2 = SIMD.Float32x4.add(y2, _ps_sincof_p1); 173 | //printFloat32x4 ("Probe 13", y2); 174 | y2 = SIMD.Float32x4.mul(y2, z); 175 | y2 = SIMD.Float32x4.add(y2, _ps_sincof_p2); 176 | y2 = SIMD.Float32x4.mul(y2, z); 177 | y2 = SIMD.Float32x4.mul(y2, x); 178 | y2 = SIMD.Float32x4.add(y2, x); 179 | 180 | xmm3 = poly_mask; 181 | y2 = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.fromFloat32x4Bits(xmm3), SIMD.Int32x4.fromFloat32x4Bits(y2))); 182 | //printFloat32x4 ("Probe 10", y2); 183 | y = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.and(SIMD.Int32x4.not(SIMD.Int32x4.fromFloat32x4Bits(xmm3)), SIMD.Int32x4.fromFloat32x4Bits(y))); 184 | y = SIMD.Float32x4.add(y, y2); 185 | 186 | //printFloat32x4 ("Probe 9", y); 187 | y = SIMD.Float32x4.fromInt32x4Bits(SIMD.Int32x4.xor(SIMD.Int32x4.fromFloat32x4Bits(y), SIMD.Int32x4.fromFloat32x4Bits(sign_bit))); 188 | return y; 189 | } 190 | 191 | var simdInput = SIMD.Float32x4 (1.0, 2.0, 3.0, 4.0); 192 | var nonSimdInput = [1.0, 2.0, 3.0, 4.0]; 193 | 194 | // SIMD version of the kernel 195 | function simd (n) { 196 | var result ; 197 | for (var i = 0; i < n; ++i) { 198 | result = sinx4 (simdInput); 199 | } 200 | return [SIMD.Float32x4.extractLane(result, 0), 201 | SIMD.Float32x4.extractLane(result, 1), 202 | SIMD.Float32x4.extractLane(result, 2), 203 | SIMD.Float32x4.extractLane(result, 3)]; 204 | } 205 | 206 | // Non SIMD version of the kernel 207 | function nonSimd (n) { 208 | var s = 0; 209 | var x = nonSimdInput[0]; 210 | var y = nonSimdInput[1]; 211 | var z = nonSimdInput[2]; 212 | var w = nonSimdInput[3]; 213 | var rx, ry, rz, rw; 214 | for (var i = 0; i < n; ++i) { 215 | rx = Math.sin(x); 216 | ry = Math.sin(y); 217 | rz = Math.sin(z); 218 | rw = Math.sin(w); 219 | } 220 | return [rx, ry, rz, rw]; 221 | } 222 | 223 | } ()); 224 | -------------------------------------------------------------------------------- /src/benchmarks/transform.js: -------------------------------------------------------------------------------- 1 | // Transform vertex by 4x4 transformation matrix. 2 | // Author: John McCutchan 3 | 4 | (function () { 5 | 6 | // Kernel configuration 7 | var kernelConfig = { 8 | kernelName: "VertexTransform", 9 | kernelInit: init, 10 | kernelCleanup: cleanup, 11 | kernelSimd: simdVertexTransform, 12 | kernelNonSimd: vertexTransform, 13 | kernelIterations: 1000 14 | }; 15 | 16 | // Hook up to the harness 17 | benchmarks.add(new Benchmark(kernelConfig)); 18 | 19 | // Benchmark data, initialization and kernel functions 20 | var T = new Float32Array(16); 21 | var V = new Float32Array(4); 22 | var Out = new Float32Array(4); 23 | var Tx = new Float32Array(16); 24 | var Vx = new Float32Array(4); 25 | var Outx = new Float32Array(4); 26 | 27 | function init() { 28 | T[0] = 1.0; 29 | T[5] = 1.0; 30 | T[10] = 1.0; 31 | T[15] = 1.0; 32 | V[0] = 1.0; 33 | V[1] = 2.0; 34 | V[2] = 3.0; 35 | V[3] = 1.0; 36 | SIMD.Float32x4.store(Tx, 0, SIMD.Float32x4(1.0, 0.0, 0.0, 0.0)); 37 | SIMD.Float32x4.store(Tx, 4, SIMD.Float32x4(0.0, 1.0, 0.0, 0.0)); 38 | SIMD.Float32x4.store(Tx, 8, SIMD.Float32x4(0.0, 0.0, 1.0, 0.0)); 39 | SIMD.Float32x4.store(Tx, 12, SIMD.Float32x4(0.0, 0.0, 0.0, 1.0)); 40 | SIMD.Float32x4.store(Vx, 0, SIMD.Float32x4(1.0, 2.0, 3.0, 1.0)); 41 | simdVertexTransform(1); 42 | vertexTransform(1); 43 | return (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 0) == Out[0]) && 44 | (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 1) == Out[1]) && 45 | (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 2) == Out[2]) && 46 | (SIMD.Float32x4.extractLane(SIMD.Float32x4.load(Outx, 0), 3) == Out[3]); 47 | } 48 | 49 | function cleanup() { 50 | return init(); // Sanity checking before and after are the same 51 | } 52 | 53 | function vertexTransform(n) { 54 | for (var i = 0; i < n; i++) { 55 | var x = V[0]; 56 | var y = V[1]; 57 | var z = V[2]; 58 | var w = V[3]; 59 | var m0 = T[0]; 60 | var m4 = T[4]; 61 | var m8 = T[8]; 62 | var m12 = T[12]; 63 | Out[0] = (m0 * x + m4 * y + m8 * z + m12 * w); 64 | var m1 = T[1]; 65 | var m5 = T[5]; 66 | var m9 = T[9]; 67 | var m13 = T[13]; 68 | Out[1] = (m1 * x + m5 * y + m9 * z + m13 * w); 69 | var m2 = T[2]; 70 | var m6 = T[6]; 71 | var m10 = T[10]; 72 | var m14 = T[14]; 73 | Out[2] = (m2 * x + m6 * y + m10 * z + m14 * w); 74 | var m3 = T[3]; 75 | var m7 = T[7]; 76 | var m11 = T[11]; 77 | var m15 = T[15]; 78 | Out[3] = (m3 * x + m7 * y + m11 * z + m15 * w); 79 | } 80 | } 81 | 82 | function simdVertexTransform(n) { 83 | for (var i = 0; i < n; i++) { 84 | var xxxx = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 0, 0, 0, 0); 85 | var z = SIMD.Float32x4.splat(0.0); 86 | z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(xxxx, SIMD.Float32x4.load(Tx, 0))); 87 | var yyyy = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 1, 1, 1, 1); 88 | z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(yyyy, SIMD.Float32x4.load(Tx, 4))); 89 | var zzzz = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 2, 2, 2, 2); 90 | z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(zzzz, SIMD.Float32x4.load(Tx, 8))); 91 | var wwww = SIMD.Float32x4.swizzle(SIMD.Float32x4.load(Vx, 0), 3, 3, 3, 3); 92 | z = SIMD.Float32x4.add(z, SIMD.Float32x4.mul(wwww, SIMD.Float32x4.load(Tx, 12))); 93 | SIMD.Float32x4.store(Outx, 0, z); 94 | } 95 | } 96 | 97 | } ()); 98 | -------------------------------------------------------------------------------- /src/benchmarks/transpose4x4.js: -------------------------------------------------------------------------------- 1 | // Transpose a 4x4 matrix 2 | // Author: Peter Jensen 3 | (function () { 4 | 5 | // Kernel configuration 6 | var kernelConfig = { 7 | kernelName: "Transpose4x4", 8 | kernelInit: init, 9 | kernelCleanup: cleanup, 10 | kernelSimd: simdTransposeN, 11 | kernelNonSimd: transposeN, 12 | kernelIterations: 100000000 13 | }; 14 | 15 | // Hook up to the harness 16 | benchmarks.add (new Benchmark (kernelConfig)); 17 | 18 | // Global object allocations 19 | 20 | var src = new Float32Array(16); 21 | var dst = new Float32Array(16); 22 | var tsrc = new Float32Array(16); 23 | 24 | var sel_ttff = SIMD.Bool32x4(true, true, false, false); 25 | 26 | function initMatrix(matrix, matrixTransposed) { 27 | for (var r = 0; r < 4; ++r) { 28 | var r4 = 4*r; 29 | for (var c = 0; c < 4; ++c) { 30 | matrix[r4 + c] = r4 + c; 31 | matrixTransposed[r + c*4] = r4 + c; 32 | } 33 | } 34 | } 35 | 36 | function printMatrix(matrix) { 37 | for (var r = 0; r < 4; ++r) { 38 | var str = ""; 39 | var ri = r*4; 40 | for (var c = 0; c < 4; ++c) { 41 | var value = matrix[ri + c]; 42 | str += " " + value.toFixed(2); 43 | } 44 | print(str); 45 | } 46 | } 47 | 48 | function compareEqualMatrix(m1, m2) { 49 | for (var i = 0; i < 16; ++i) { 50 | if (m1[i] !== m2[i]) { 51 | return false; 52 | } 53 | } 54 | return true; 55 | } 56 | 57 | // Kernel Initializer 58 | function init () { 59 | initMatrix(src, tsrc); 60 | transposeN(1); 61 | if (!compareEqualMatrix (tsrc, dst)) { 62 | return false; 63 | } 64 | 65 | simdTransposeN(1); 66 | // printMatrix(dst); 67 | if (!compareEqualMatrix (tsrc, dst)) { 68 | return false; 69 | } 70 | 71 | return true; 72 | } 73 | 74 | // Kernel Cleanup 75 | function cleanup () { 76 | return init(); 77 | } 78 | 79 | // SIMD version of the kernel with SIMD.Float32x4.shuffle operation 80 | function simdTransposeMix() { 81 | var src0 = SIMD.Float32x4.load(src, 0); 82 | var src1 = SIMD.Float32x4.load(src, 4); 83 | var src2 = SIMD.Float32x4.load(src, 8); 84 | var src3 = SIMD.Float32x4.load(src, 12); 85 | var dst0; 86 | var dst1; 87 | var dst2; 88 | var dst3; 89 | var tmp01; 90 | var tmp23; 91 | 92 | tmp01 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5); 93 | tmp23 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5); 94 | dst0 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6); 95 | dst1 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7); 96 | 97 | tmp01 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7); 98 | tmp23 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7); 99 | dst2 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6); 100 | dst3 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7); 101 | 102 | SIMD.Float32x4.store(dst, 0, dst0); 103 | SIMD.Float32x4.store(dst, 4, dst1); 104 | SIMD.Float32x4.store(dst, 8, dst2); 105 | SIMD.Float32x4.store(dst, 12, dst3); 106 | } 107 | 108 | // SIMD version of the kernel 109 | function simdTranspose() { 110 | var src0 = SIMD.Float32x4.load(src, 0); 111 | var src1 = SIMD.Float32x4.load(src, 4); 112 | var src2 = SIMD.Float32x4.load(src, 8); 113 | var src3 = SIMD.Float32x4.load(src, 12); 114 | var dst0; 115 | var dst1; 116 | var dst2; 117 | var dst3; 118 | var tmp01; 119 | var tmp23; 120 | 121 | tmp01 = SIMD.Float32x4.select(sel_ttff, src0, SIMD.Float32x4.swizzle(src1, 0, 0, 0, 1)); 122 | tmp23 = SIMD.Float32x4.select(sel_ttff, src2, SIMD.Float32x4.swizzle(src3, 0, 0, 0, 1)); 123 | dst0 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 0, 2, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 0, 2)); 124 | dst1 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 1, 3, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 1, 3)); 125 | 126 | tmp01 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(src0, 2, 3, 0, 0), src1); 127 | tmp23 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(src2, 2, 3, 0, 0), src3); 128 | dst2 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 0, 2, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 0, 2)); 129 | dst3 = SIMD.Float32x4.select(sel_ttff, SIMD.Float32x4.swizzle(tmp01, 1, 3, 0, 0), SIMD.Float32x4.swizzle(tmp23, 0, 0, 1, 3)); 130 | 131 | SIMD.Float32x4.store(dst, 0, dst0); 132 | SIMD.Float32x4.store(dst, 4, dst1); 133 | SIMD.Float32x4.store(dst, 8, dst2); 134 | SIMD.Float32x4.store(dst, 12, dst3); 135 | } 136 | 137 | // Non SIMD version of the kernel 138 | function transpose() { 139 | dst[0] = src[0]; 140 | dst[1] = src[4]; 141 | dst[2] = src[8]; 142 | dst[3] = src[12]; 143 | dst[4] = src[1]; 144 | dst[5] = src[5]; 145 | dst[6] = src[9]; 146 | dst[7] = src[13]; 147 | dst[8] = src[2]; 148 | dst[9] = src[6]; 149 | dst[10] = src[10]; 150 | dst[11] = src[14]; 151 | dst[12] = src[3]; 152 | dst[13] = src[7]; 153 | dst[14] = src[11]; 154 | dst[15] = src[15]; 155 | } 156 | 157 | function simdTransposeN(n) { 158 | for (var i = 0; i < n; ++i) { 159 | var src0 = SIMD.Float32x4.load(src, 0); 160 | var src1 = SIMD.Float32x4.load(src, 4); 161 | var src2 = SIMD.Float32x4.load(src, 8); 162 | var src3 = SIMD.Float32x4.load(src, 12); 163 | var dst0; 164 | var dst1; 165 | var dst2; 166 | var dst3; 167 | var tmp01; 168 | var tmp23; 169 | 170 | tmp01 = SIMD.Float32x4.shuffle(src0, src1, 0, 1, 4, 5); 171 | tmp23 = SIMD.Float32x4.shuffle(src2, src3, 0, 1, 4, 5); 172 | dst0 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6); 173 | dst1 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7); 174 | 175 | tmp01 = SIMD.Float32x4.shuffle(src0, src1, 2, 3, 6, 7); 176 | tmp23 = SIMD.Float32x4.shuffle(src2, src3, 2, 3, 6, 7); 177 | dst2 = SIMD.Float32x4.shuffle(tmp01, tmp23, 0, 2, 4, 6); 178 | dst3 = SIMD.Float32x4.shuffle(tmp01, tmp23, 1, 3, 5, 7); 179 | 180 | SIMD.Float32x4.store(dst, 0, dst0); 181 | SIMD.Float32x4.store(dst, 4, dst1); 182 | SIMD.Float32x4.store(dst, 8, dst2); 183 | SIMD.Float32x4.store(dst, 12, dst3); 184 | } 185 | } 186 | 187 | function transposeN(n) { 188 | for (var i = 0; i < n; ++i) { 189 | dst[0] = src[0]; 190 | dst[1] = src[4]; 191 | dst[2] = src[8]; 192 | dst[3] = src[12]; 193 | dst[4] = src[1]; 194 | dst[5] = src[5]; 195 | dst[6] = src[9]; 196 | dst[7] = src[13]; 197 | dst[8] = src[2]; 198 | dst[9] = src[6]; 199 | dst[10] = src[10]; 200 | dst[11] = src[14]; 201 | dst[12] = src[3]; 202 | dst[13] = src[7]; 203 | dst[14] = src[11]; 204 | dst[15] = src[15]; 205 | } 206 | } 207 | 208 | } ()); 209 | -------------------------------------------------------------------------------- /src/ecmascript_simd.js: -------------------------------------------------------------------------------- 1 | /* 2 | vim: set ts=8 sts=2 et sw=2 tw=79: 3 | Copyright (C) 2013 4 | 5 | This software is provided 'as-is', without any express or implied 6 | warranty. In no event will the authors be held liable for any damages 7 | arising from the use of this software. 8 | 9 | Permission is granted to anyone to use this software for any purpose, 10 | including commercial applications, and to alter it and redistribute it 11 | freely, subject to the following restrictions: 12 | 13 | 1. The origin of this software must not be misrepresented; you must not 14 | claim that you wrote the original software. If you use this software 15 | in a product, an acknowledgment in the product documentation would be 16 | appreciated but is not required. 17 | 2. Altered source versions must be plainly marked as such, and must not be 18 | misrepresented as being the original software. 19 | 3. This notice may not be removed or altered from any source distribution. 20 | */ 21 | 22 | // A conforming SIMD.js implementation may contain the following deviations to 23 | // normal JS numeric behavior: 24 | // - Subnormal numbers may or may not be flushed to zero on input or output of 25 | // any SIMD operation. 26 | 27 | // Many of the operations in SIMD.js have semantics which correspond to scalar 28 | // operations in JS, however there are a few differences: 29 | // - Vector shifts don't mask the shift count. 30 | // - Conversions from float to int32 throw on error. 31 | // - Load and store operations throw when out of bounds. 32 | 33 | (function(global) { 34 | 35 | if (typeof global.SIMD === "undefined") { 36 | // SIMD module. 37 | global.SIMD = {}; 38 | } 39 | 40 | if (typeof module !== "undefined") { 41 | // For CommonJS modules 42 | module.exports = global.SIMD; 43 | } 44 | 45 | var SIMD = global.SIMD; 46 | 47 | // Buffers for bit casting and coercing lane values to those representable in 48 | // the underlying lane type. 49 | var _f32x4 = new Float32Array(4); 50 | var _f64x2 = new Float64Array(_f32x4.buffer); 51 | var _i32x4 = new Int32Array(_f32x4.buffer); 52 | var _i16x8 = new Int16Array(_f32x4.buffer); 53 | var _i8x16 = new Int8Array(_f32x4.buffer); 54 | var _ui32x4 = new Uint32Array(_f32x4.buffer); 55 | var _ui16x8 = new Uint16Array(_f32x4.buffer); 56 | var _ui8x16 = new Uint8Array(_f32x4.buffer); 57 | 58 | function convertValue(buffer, value) { 59 | buffer[0] = value; 60 | return buffer[0]; 61 | } 62 | 63 | function convertArray(buffer, array) { 64 | for (var i = 0; i < array.length; i++) 65 | array[i] = convertValue(buffer, array[i]); 66 | return array; 67 | } 68 | 69 | // Utility functions. 70 | 71 | function isInt32(o) { 72 | return (o | 0) === o; 73 | } 74 | 75 | function isTypedArray(o) { 76 | return (o instanceof Int8Array) || 77 | (o instanceof Uint8Array) || 78 | (o instanceof Uint8ClampedArray) || 79 | (o instanceof Int16Array) || 80 | (o instanceof Uint16Array) || 81 | (o instanceof Int32Array) || 82 | (o instanceof Uint32Array) || 83 | (o instanceof Float32Array) || 84 | (o instanceof Float64Array); 85 | } 86 | 87 | function minNum(x, y) { 88 | return x != x ? y : 89 | y != y ? x : 90 | Math.min(x, y); 91 | } 92 | 93 | function maxNum(x, y) { 94 | return x != x ? y : 95 | y != y ? x : 96 | Math.max(x, y); 97 | } 98 | 99 | function clamp(a, min, max) { 100 | if (a < min) 101 | return min; 102 | if (a > max) 103 | return max; 104 | return a; 105 | } 106 | 107 | // SIMD implementation functions 108 | 109 | function simdCoerceIndex(index) { 110 | index = +index; 111 | if (index != Math.floor(index)) 112 | throw new RangeError("SIMD index must be an integer"); 113 | return index; 114 | } 115 | 116 | function simdCheckLaneIndex(index, lanes) { 117 | if (!isInt32(index)) 118 | throw new TypeError('Lane index must be an int32'); 119 | if (index < 0 || index >= lanes) 120 | throw new RangeError('Lane index must be in bounds'); 121 | } 122 | 123 | // Global lanes array for constructing SIMD values. 124 | var lanes = []; 125 | 126 | function simdCreate(type) { 127 | return type.fn.apply(type.fn, lanes); 128 | } 129 | 130 | function simdToString(type, a) { 131 | a = type.fn.check(a); 132 | var str = "SIMD." + type.name + "("; 133 | str += type.fn.extractLane(a, 0); 134 | for (var i = 1; i < type.lanes; i++) { 135 | str += ", " + type.fn.extractLane(a, i); 136 | } 137 | return str + ")"; 138 | } 139 | 140 | function simdToLocaleString(type, a) { 141 | a = type.fn.check(a); 142 | var str = "SIMD." + type.name + "("; 143 | str += type.fn.extractLane(a, 0).toLocaleString(); 144 | for (var i = 1; i < type.lanes; i++) { 145 | str += ", " + type.fn.extractLane(a, i).toLocaleString(); 146 | } 147 | return str + ")"; 148 | } 149 | 150 | function simdSplat(type, s) { 151 | for (var i = 0; i < type.lanes; i++) 152 | lanes[i] = s; 153 | return simdCreate(type); 154 | } 155 | 156 | function simdReplaceLane(type, a, i, s) { 157 | a = type.fn.check(a); 158 | simdCheckLaneIndex(i, type.lanes); 159 | for (var j = 0; j < type.lanes; j++) 160 | lanes[j] = type.fn.extractLane(a, j); 161 | lanes[i] = s; 162 | return simdCreate(type); 163 | } 164 | 165 | function simdFrom(toType, fromType, a) { 166 | a = fromType.fn.check(a); 167 | for (var i = 0; i < fromType.lanes; i++) { 168 | var v = Math.trunc(fromType.fn.extractLane(a, i)); 169 | if (toType.minVal !== undefined && 170 | !(toType.minVal <= v && v <= toType.maxVal)) { 171 | throw new RangeError("Can't convert value"); 172 | } 173 | lanes[i] = v; 174 | } 175 | return simdCreate(toType); 176 | } 177 | 178 | function simdFromBits(toType, fromType, a) { 179 | a = fromType.fn.check(a); 180 | var newValue = new toType.fn(); 181 | newValue.s_ = new toType.view(a.s_.buffer); 182 | return newValue; 183 | } 184 | 185 | function simdSelect(type, selector, a, b) { 186 | selector = type.boolType.fn.check(selector); 187 | a = type.fn.check(a); 188 | b = type.fn.check(b); 189 | for (var i = 0; i < type.lanes; i++) { 190 | lanes[i] = type.boolType.fn.extractLane(selector, i) ? 191 | type.fn.extractLane(a, i) : type.fn.extractLane(b, i); 192 | } 193 | return simdCreate(type); 194 | } 195 | 196 | function simdSwizzle(type, a, indices) { 197 | a = type.fn.check(a); 198 | for (var i = 0; i < indices.length; i++) { 199 | simdCheckLaneIndex(indices[i], type.lanes); 200 | lanes[i] = type.fn.extractLane(a, indices[i]); 201 | } 202 | return simdCreate(type); 203 | } 204 | 205 | function simdShuffle(type, a, b, indices) { 206 | a = type.fn.check(a); 207 | b = type.fn.check(b); 208 | for (var i = 0; i < indices.length; i++) { 209 | simdCheckLaneIndex(indices[i], 2 * type.lanes); 210 | lanes[i] = indices[i] < type.lanes ? 211 | type.fn.extractLane(a, indices[i]) : 212 | type.fn.extractLane(b, indices[i] - type.lanes); 213 | } 214 | return simdCreate(type); 215 | } 216 | 217 | function unaryNeg(a) { return -a; } 218 | function unaryBitwiseNot(a) { return ~a; } 219 | function unaryLogicalNot(a) { return !a; } 220 | 221 | function simdUnaryOp(type, op, a) { 222 | a = type.fn.check(a); 223 | for (var i = 0; i < type.lanes; i++) 224 | lanes[i] = op(type.fn.extractLane(a, i)); 225 | return simdCreate(type); 226 | } 227 | 228 | function binaryAnd(a, b) { return a & b; } 229 | function binaryOr(a, b) { return a | b; } 230 | function binaryXor(a, b) { return a ^ b; } 231 | function binaryAdd(a, b) { return a + b; } 232 | function binarySub(a, b) { return a - b; } 233 | function binaryMul(a, b) { return a * b; } 234 | function binaryDiv(a, b) { return a / b; } 235 | 236 | var binaryImul; 237 | if (typeof Math.imul !== 'undefined') { 238 | binaryImul = Math.imul; 239 | } else { 240 | binaryImul = function(a, b) { 241 | var ah = (a >>> 16) & 0xffff; 242 | var al = a & 0xffff; 243 | var bh = (b >>> 16) & 0xffff; 244 | var bl = b & 0xffff; 245 | // the shift by 0 fixes the sign on the high part 246 | // the final |0 converts the unsigned value into a signed value 247 | return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0); 248 | }; 249 | } 250 | 251 | function simdBinaryOp(type, op, a, b) { 252 | a = type.fn.check(a); 253 | b = type.fn.check(b); 254 | for (var i = 0; i < type.lanes; i++) 255 | lanes[i] = op(type.fn.extractLane(a, i), type.fn.extractLane(b, i)); 256 | return simdCreate(type); 257 | } 258 | 259 | function binaryEqual(a, b) { return a == b; } 260 | function binaryNotEqual(a, b) { return a != b; } 261 | function binaryLess(a, b) { return a < b; } 262 | function binaryLessEqual(a, b) { return a <= b; } 263 | function binaryGreater(a, b) { return a > b; } 264 | function binaryGreaterEqual(a, b) { return a >= b; } 265 | 266 | function simdRelationalOp(type, op, a, b) { 267 | a = type.fn.check(a); 268 | b = type.fn.check(b); 269 | for (var i = 0; i < type.lanes; i++) 270 | lanes[i] = op(type.fn.extractLane(a, i), type.fn.extractLane(b, i)); 271 | return simdCreate(type.boolType); 272 | } 273 | 274 | function simdAnyTrue(type, a) { 275 | a = type.fn.check(a); 276 | for (var i = 0; i < type.lanes; i++) 277 | if (type.fn.extractLane(a, i)) return true; 278 | return false; 279 | } 280 | 281 | function simdAllTrue(type, a) { 282 | a = type.fn.check(a); 283 | for (var i = 0; i < type.lanes; i++) 284 | if (!type.fn.extractLane(a, i)) return false; 285 | return true; 286 | } 287 | 288 | function binaryShiftLeft(a, bits) { return a << bits; } 289 | function binaryShiftRightArithmetic(a, bits) { return a >> bits; } 290 | function binaryShiftRightLogical(a, bits) { return a >>> bits; } 291 | 292 | function simdShiftOp(type, op, a, bits) { 293 | a = type.fn.check(a); 294 | for (var i = 0; i < type.lanes; i++) 295 | lanes[i] = op(type.fn.extractLane(a, i), bits); 296 | return simdCreate(type); 297 | } 298 | 299 | function simdLoad(type, tarray, index, count) { 300 | if (!isTypedArray(tarray)) 301 | throw new TypeError("The 1st argument must be a typed array."); 302 | index = simdCoerceIndex(index); 303 | var bpe = tarray.BYTES_PER_ELEMENT; 304 | var bytes = count * type.laneSize; 305 | if (index < 0 || (index * bpe + bytes) > tarray.byteLength) 306 | throw new RangeError("The value of index is invalid."); 307 | 308 | var newValue = type.fn(); 309 | var dst = new Uint8Array(newValue.s_.buffer); 310 | var src = new Uint8Array(tarray.buffer, tarray.byteOffset + index * bpe, bytes); 311 | 312 | for (var i = 0; i < bytes; i++) { 313 | dst[i] = src[i]; 314 | } 315 | var typeBytes = type.lanes * type.laneSize; 316 | for (var i = bytes; i < typeBytes; i++) { 317 | dst[i] = 0; 318 | } 319 | return newValue; 320 | } 321 | 322 | function simdStore(type, tarray, index, a, count) { 323 | if (!isTypedArray(tarray)) 324 | throw new TypeError("The 1st argument must be a typed array."); 325 | index = simdCoerceIndex(index); 326 | var bpe = tarray.BYTES_PER_ELEMENT; 327 | var bytes = count * type.laneSize; 328 | if (index < 0 || (index * bpe + bytes) > tarray.byteLength) 329 | throw new RangeError("The value of index is invalid."); 330 | 331 | a = type.fn.check(a); 332 | 333 | // The underlying buffers are copied byte by byte, to avoid float 334 | // canonicalization. 335 | var src = new Uint8Array(a.s_.buffer); 336 | var dst = new Uint8Array(tarray.buffer, tarray.byteOffset + index * bpe, bytes); 337 | for (var i = 0; i < bytes; i++) { 338 | dst[i] = src[i]; 339 | } 340 | return a; 341 | } 342 | 343 | // Constructors and extractLane functions are closely related and must be 344 | // polyfilled together. 345 | 346 | // Float32x4 347 | if (typeof SIMD.Float32x4 === "undefined" || 348 | typeof SIMD.Float32x4.extractLane === "undefined") { 349 | SIMD.Float32x4 = function(s0, s1, s2, s3) { 350 | if (!(this instanceof SIMD.Float32x4)) { 351 | return new SIMD.Float32x4(s0, s1, s2, s3); 352 | } 353 | this.s_ = convertArray(_f32x4, new Float32Array([s0, s1, s2, s3])); 354 | } 355 | 356 | SIMD.Float32x4.extractLane = function(v, i) { 357 | v = SIMD.Float32x4.check(v); 358 | simdCheckLaneIndex(i, 4); 359 | return v.s_[i]; 360 | } 361 | } 362 | 363 | // Miscellaneous functions that aren't easily parameterized on type. 364 | 365 | if (typeof SIMD.Float32x4.swizzle === "undefined") { 366 | SIMD.Float32x4.swizzle = function(a, s0, s1, s2, s3) { 367 | return simdSwizzle(float32x4, a, [s0, s1, s2, s3]); 368 | } 369 | } 370 | 371 | if (typeof SIMD.Float32x4.shuffle === "undefined") { 372 | SIMD.Float32x4.shuffle = function(a, b, s0, s1, s2, s3) { 373 | return simdShuffle(float32x4, a, b, [s0, s1, s2, s3]); 374 | } 375 | } 376 | 377 | // Int32x4 378 | if (typeof SIMD.Int32x4 === "undefined" || 379 | typeof SIMD.Int32x4.extractLane === "undefined") { 380 | SIMD.Int32x4 = function(s0, s1, s2, s3) { 381 | if (!(this instanceof SIMD.Int32x4)) { 382 | return new SIMD.Int32x4(s0, s1, s2, s3); 383 | } 384 | this.s_ = convertArray(_i32x4, new Int32Array([s0, s1, s2, s3])); 385 | } 386 | 387 | SIMD.Int32x4.extractLane = function(v, i) { 388 | v = SIMD.Int32x4.check(v); 389 | simdCheckLaneIndex(i, 4); 390 | return v.s_[i]; 391 | } 392 | } 393 | 394 | if (typeof SIMD.Int32x4.swizzle === "undefined") { 395 | SIMD.Int32x4.swizzle = function(a, s0, s1, s2, s3) { 396 | return simdSwizzle(int32x4, a, [s0, s1, s2, s3]); 397 | } 398 | } 399 | 400 | if (typeof SIMD.Int32x4.shuffle === "undefined") { 401 | SIMD.Int32x4.shuffle = function(a, b, s0, s1, s2, s3) { 402 | return simdShuffle(int32x4, a, b, [s0, s1, s2, s3]); 403 | } 404 | } 405 | 406 | // Int16x8 407 | if (typeof SIMD.Int16x8 === "undefined" || 408 | typeof SIMD.Int16x8.extractLane === "undefined") { 409 | SIMD.Int16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) { 410 | if (!(this instanceof SIMD.Int16x8)) { 411 | return new SIMD.Int16x8(s0, s1, s2, s3, s4, s5, s6, s7); 412 | } 413 | this.s_ = convertArray(_i16x8, new Int16Array([s0, s1, s2, s3, s4, s5, s6, s7])); 414 | } 415 | 416 | SIMD.Int16x8.extractLane = function(v, i) { 417 | v = SIMD.Int16x8.check(v); 418 | simdCheckLaneIndex(i, 8); 419 | return v.s_[i]; 420 | } 421 | } 422 | 423 | if (typeof SIMD.Int16x8.swizzle === "undefined") { 424 | SIMD.Int16x8.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7) { 425 | return simdSwizzle(int16x8, a, [s0, s1, s2, s3, s4, s5, s6, s7]); 426 | } 427 | } 428 | 429 | if (typeof SIMD.Int16x8.shuffle === "undefined") { 430 | SIMD.Int16x8.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7) { 431 | return simdShuffle(int16x8, a, b, [s0, s1, s2, s3, s4, s5, s6, s7]); 432 | } 433 | } 434 | 435 | // Int8x16 436 | if (typeof SIMD.Int8x16 === "undefined" || 437 | typeof SIMD.Int8x16.extractLane === "undefined") { 438 | SIMD.Int8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7, 439 | s8, s9, s10, s11, s12, s13, s14, s15) { 440 | if (!(this instanceof SIMD.Int8x16)) { 441 | return new SIMD.Int8x16(s0, s1, s2, s3, s4, s5, s6, s7, 442 | s8, s9, s10, s11, s12, s13, s14, s15); 443 | } 444 | this.s_ = convertArray(_i8x16, new Int8Array([s0, s1, s2, s3, s4, s5, s6, s7, 445 | s8, s9, s10, s11, s12, s13, s14, s15])); 446 | } 447 | 448 | SIMD.Int8x16.extractLane = function(v, i) { 449 | v = SIMD.Int8x16.check(v); 450 | simdCheckLaneIndex(i, 16); 451 | return v.s_[i]; 452 | } 453 | } 454 | 455 | if (typeof SIMD.Int8x16.swizzle === "undefined") { 456 | SIMD.Int8x16.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7, 457 | s8, s9, s10, s11, s12, s13, s14, s15) { 458 | return simdSwizzle(int8x16, a, [s0, s1, s2, s3, s4, s5, s6, s7, 459 | s8, s9, s10, s11, s12, s13, s14, s15]); 460 | } 461 | } 462 | 463 | if (typeof SIMD.Int8x16.shuffle === "undefined") { 464 | SIMD.Int8x16.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7, 465 | s8, s9, s10, s11, s12, s13, s14, s15) { 466 | return simdShuffle(int8x16, a, b, [s0, s1, s2, s3, s4, s5, s6, s7, 467 | s8, s9, s10, s11, s12, s13, s14, s15]); 468 | } 469 | } 470 | 471 | // Uint32x4 472 | if (typeof SIMD.Uint32x4 === "undefined" || 473 | typeof SIMD.Uint32x4.extractLane === "undefined") { 474 | SIMD.Uint32x4 = function(s0, s1, s2, s3) { 475 | if (!(this instanceof SIMD.Uint32x4)) { 476 | return new SIMD.Uint32x4(s0, s1, s2, s3); 477 | } 478 | this.s_ = convertArray(_ui32x4, new Uint32Array([s0, s1, s2, s3])); 479 | } 480 | 481 | SIMD.Uint32x4.extractLane = function(v, i) { 482 | v = SIMD.Uint32x4.check(v); 483 | simdCheckLaneIndex(i, 4); 484 | return v.s_[i]; 485 | } 486 | } 487 | 488 | if (typeof SIMD.Uint32x4.swizzle === "undefined") { 489 | SIMD.Uint32x4.swizzle = function(a, s0, s1, s2, s3) { 490 | return simdSwizzle(uint32x4, a, [s0, s1, s2, s3]); 491 | } 492 | } 493 | 494 | if (typeof SIMD.Uint32x4.shuffle === "undefined") { 495 | SIMD.Uint32x4.shuffle = function(a, b, s0, s1, s2, s3) { 496 | return simdShuffle(uint32x4, a, b, [s0, s1, s2, s3]); 497 | } 498 | } 499 | 500 | // Uint16x8 501 | if (typeof SIMD.Uint16x8 === "undefined" || 502 | typeof SIMD.Uint16x8.extractLane === "undefined") { 503 | SIMD.Uint16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) { 504 | if (!(this instanceof SIMD.Uint16x8)) { 505 | return new SIMD.Uint16x8(s0, s1, s2, s3, s4, s5, s6, s7); 506 | } 507 | this.s_ = convertArray(_ui16x8, new Uint16Array([s0, s1, s2, s3, s4, s5, s6, s7])); 508 | } 509 | 510 | SIMD.Uint16x8.extractLane = function(v, i) { 511 | v = SIMD.Uint16x8.check(v); 512 | simdCheckLaneIndex(i, 8); 513 | return v.s_[i]; 514 | } 515 | } 516 | 517 | if (typeof SIMD.Uint16x8.swizzle === "undefined") { 518 | SIMD.Uint16x8.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7) { 519 | return simdSwizzle(uint16x8, a, [s0, s1, s2, s3, s4, s5, s6, s7]); 520 | } 521 | } 522 | 523 | if (typeof SIMD.Uint16x8.shuffle === "undefined") { 524 | SIMD.Uint16x8.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7) { 525 | return simdShuffle(uint16x8, a, b, [s0, s1, s2, s3, s4, s5, s6, s7]); 526 | } 527 | } 528 | 529 | // Uint8x16 530 | if (typeof SIMD.Uint8x16 === "undefined" || 531 | typeof SIMD.Uint8x16.extractLane === "undefined") { 532 | SIMD.Uint8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7, 533 | s8, s9, s10, s11, s12, s13, s14, s15) { 534 | if (!(this instanceof SIMD.Uint8x16)) { 535 | return new SIMD.Uint8x16(s0, s1, s2, s3, s4, s5, s6, s7, 536 | s8, s9, s10, s11, s12, s13, s14, s15); 537 | } 538 | this.s_ = convertArray(_ui8x16, new Uint8Array([s0, s1, s2, s3, s4, s5, s6, s7, 539 | s8, s9, s10, s11, s12, s13, s14, s15])); 540 | } 541 | 542 | SIMD.Uint8x16.extractLane = function(v, i) { 543 | v = SIMD.Uint8x16.check(v); 544 | simdCheckLaneIndex(i, 16); 545 | return v.s_[i]; 546 | } 547 | } 548 | 549 | if (typeof SIMD.Uint8x16.swizzle === "undefined") { 550 | SIMD.Uint8x16.swizzle = function(a, s0, s1, s2, s3, s4, s5, s6, s7, 551 | s8, s9, s10, s11, s12, s13, s14, s15) { 552 | return simdSwizzle(uint8x16, a, [s0, s1, s2, s3, s4, s5, s6, s7, 553 | s8, s9, s10, s11, s12, s13, s14, s15]); 554 | } 555 | } 556 | 557 | if (typeof SIMD.Uint8x16.shuffle === "undefined") { 558 | SIMD.Uint8x16.shuffle = function(a, b, s0, s1, s2, s3, s4, s5, s6, s7, 559 | s8, s9, s10, s11, s12, s13, s14, s15) { 560 | return simdShuffle(uint8x16, a, b, [s0, s1, s2, s3, s4, s5, s6, s7, 561 | s8, s9, s10, s11, s12, s13, s14, s15]); 562 | } 563 | } 564 | 565 | // Bool32x4 566 | if (typeof SIMD.Bool32x4 === "undefined" || 567 | typeof SIMD.Bool32x4.extractLane === "undefined") { 568 | SIMD.Bool32x4 = function(s0, s1, s2, s3) { 569 | if (!(this instanceof SIMD.Bool32x4)) { 570 | return new SIMD.Bool32x4(s0, s1, s2, s3); 571 | } 572 | this.s_ = [!!s0, !!s1, !!s2, !!s3]; 573 | } 574 | 575 | SIMD.Bool32x4.extractLane = function(v, i) { 576 | v = SIMD.Bool32x4.check(v); 577 | simdCheckLaneIndex(i, 4); 578 | return v.s_[i]; 579 | } 580 | } 581 | 582 | // Bool16x8 583 | if (typeof SIMD.Bool16x8 === "undefined" || 584 | typeof SIMD.Bool16x8.extractLane === "undefined") { 585 | SIMD.Bool16x8 = function(s0, s1, s2, s3, s4, s5, s6, s7) { 586 | if (!(this instanceof SIMD.Bool16x8)) { 587 | return new SIMD.Bool16x8(s0, s1, s2, s3, s4, s5, s6, s7); 588 | } 589 | this.s_ = [!!s0, !!s1, !!s2, !!s3, !!s4, !!s5, !!s6, !!s7]; 590 | } 591 | 592 | SIMD.Bool16x8.extractLane = function(v, i) { 593 | v = SIMD.Bool16x8.check(v); 594 | simdCheckLaneIndex(i, 8); 595 | return v.s_[i]; 596 | } 597 | } 598 | 599 | // Bool8x16 600 | if (typeof SIMD.Bool8x16 === "undefined" || 601 | typeof SIMD.Bool8x16.extractLane === "undefined") { 602 | SIMD.Bool8x16 = function(s0, s1, s2, s3, s4, s5, s6, s7, 603 | s8, s9, s10, s11, s12, s13, s14, s15) { 604 | if (!(this instanceof SIMD.Bool8x16)) { 605 | return new SIMD.Bool8x16(s0, s1, s2, s3, s4, s5, s6, s7, 606 | s8, s9, s10, s11, s12, s13, s14, s15); 607 | } 608 | this.s_ = [!!s0, !!s1, !!s2, !!s3, !!s4, !!s5, !!s6, !!s7, 609 | !!s8, !!s9, !!s10, !!s11, !!s12, !!s13, !!s14, !!s15]; 610 | } 611 | 612 | SIMD.Bool8x16.extractLane = function(v, i) { 613 | v = SIMD.Bool8x16.check(v); 614 | simdCheckLaneIndex(i, 16); 615 | return v.s_[i]; 616 | } 617 | } 618 | 619 | // Type data to generate the remaining functions. 620 | 621 | var float32x4 = { 622 | name: "Float32x4", 623 | fn: SIMD.Float32x4, 624 | lanes: 4, 625 | laneSize: 4, 626 | buffer: _f32x4, 627 | view: Float32Array, 628 | mulFn: binaryMul, 629 | fns: ["check", "splat", "replaceLane", "select", 630 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual", 631 | "add", "sub", "mul", "div", "neg", "abs", "min", "max", "minNum", "maxNum", 632 | "reciprocalApproximation", "reciprocalSqrtApproximation", "sqrt", 633 | "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"], 634 | } 635 | 636 | var int32x4 = { 637 | name: "Int32x4", 638 | fn: SIMD.Int32x4, 639 | lanes: 4, 640 | laneSize: 4, 641 | minVal: -0x80000000, 642 | maxVal: 0x7FFFFFFF, 643 | buffer: _i32x4, 644 | notFn: unaryBitwiseNot, 645 | view: Int32Array, 646 | mulFn: binaryImul, 647 | fns: ["check", "splat", "replaceLane", "select", 648 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual", 649 | "and", "or", "xor", "not", 650 | "add", "sub", "mul", "neg", 651 | "shiftLeftByScalar", "shiftRightByScalar", 652 | "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"], 653 | } 654 | 655 | var int16x8 = { 656 | name: "Int16x8", 657 | fn: SIMD.Int16x8, 658 | lanes: 8, 659 | laneSize: 2, 660 | minVal: -0x8000, 661 | maxVal: 0x7FFF, 662 | buffer: _i16x8, 663 | notFn: unaryBitwiseNot, 664 | view: Int16Array, 665 | mulFn: binaryMul, 666 | fns: ["check", "splat", "replaceLane", "select", 667 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual", 668 | "and", "or", "xor", "not", 669 | "add", "sub", "mul", "neg", 670 | "shiftLeftByScalar", "shiftRightByScalar", 671 | "addSaturate", "subSaturate", 672 | "load", "store"], 673 | } 674 | 675 | var int8x16 = { 676 | name: "Int8x16", 677 | fn: SIMD.Int8x16, 678 | lanes: 16, 679 | laneSize: 1, 680 | minVal: -0x80, 681 | maxVal: 0x7F, 682 | buffer: _i8x16, 683 | notFn: unaryBitwiseNot, 684 | view: Int8Array, 685 | mulFn: binaryMul, 686 | fns: ["check", "splat", "replaceLane", "select", 687 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual", 688 | "and", "or", "xor", "not", 689 | "add", "sub", "mul", "neg", 690 | "shiftLeftByScalar", "shiftRightByScalar", 691 | "addSaturate", "subSaturate", 692 | "load", "store"], 693 | } 694 | 695 | var uint32x4 = { 696 | name: "Uint32x4", 697 | fn: SIMD.Uint32x4, 698 | lanes: 4, 699 | laneSize: 4, 700 | minVal: 0, 701 | maxVal: 0xFFFFFFFF, 702 | unsigned: true, 703 | buffer: _ui32x4, 704 | notFn: unaryBitwiseNot, 705 | view: Uint32Array, 706 | mulFn: binaryImul, 707 | fns: ["check", "splat", "replaceLane", "select", 708 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual", 709 | "and", "or", "xor", "not", 710 | "add", "sub", "mul", 711 | "shiftLeftByScalar", "shiftRightByScalar", 712 | "load", "load1", "load2", "load3", "store", "store1", "store2", "store3"], 713 | } 714 | 715 | var uint16x8 = { 716 | name: "Uint16x8", 717 | fn: SIMD.Uint16x8, 718 | lanes: 8, 719 | laneSize: 2, 720 | unsigned: true, 721 | minVal: 0, 722 | maxVal: 0xFFFF, 723 | buffer: _ui16x8, 724 | notFn: unaryBitwiseNot, 725 | view: Uint16Array, 726 | mulFn: binaryMul, 727 | fns: ["check", "splat", "replaceLane", "select", 728 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual", 729 | "and", "or", "xor", "not", 730 | "add", "sub", "mul", 731 | "shiftLeftByScalar", "shiftRightByScalar", 732 | "addSaturate", "subSaturate", 733 | "load", "store"], 734 | } 735 | 736 | var uint8x16 = { 737 | name: "Uint8x16", 738 | fn: SIMD.Uint8x16, 739 | lanes: 16, 740 | laneSize: 1, 741 | unsigned: true, 742 | minVal: 0, 743 | maxVal: 0xFF, 744 | buffer: _ui8x16, 745 | notFn: unaryBitwiseNot, 746 | view: Uint8Array, 747 | mulFn: binaryMul, 748 | fns: ["check", "splat", "replaceLane", "select", 749 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual", 750 | "and", "or", "xor", "not", 751 | "add", "sub", "mul", 752 | "shiftLeftByScalar", "shiftRightByScalar", 753 | "addSaturate", "subSaturate", 754 | "load", "store"], 755 | } 756 | 757 | var bool32x4 = { 758 | name: "Bool32x4", 759 | fn: SIMD.Bool32x4, 760 | lanes: 4, 761 | laneSize: 4, 762 | notFn: unaryLogicalNot, 763 | fns: ["check", "splat", "replaceLane", 764 | "allTrue", "anyTrue", "and", "or", "xor", "not"], 765 | } 766 | 767 | var bool16x8 = { 768 | name: "Bool16x8", 769 | fn: SIMD.Bool16x8, 770 | lanes: 8, 771 | laneSize: 2, 772 | notFn: unaryLogicalNot, 773 | fns: ["check", "splat", "replaceLane", 774 | "allTrue", "anyTrue", "and", "or", "xor", "not"], 775 | } 776 | 777 | var bool8x16 = { 778 | name: "Bool8x16", 779 | fn: SIMD.Bool8x16, 780 | lanes: 16, 781 | laneSize: 1, 782 | notFn: unaryLogicalNot, 783 | fns: ["check", "splat", "replaceLane", 784 | "allTrue", "anyTrue", "and", "or", "xor", "not"], 785 | } 786 | 787 | // Each SIMD type has a corresponding Boolean SIMD type, which is returned by 788 | // relational ops. 789 | float32x4.boolType = int32x4.boolType = uint32x4.boolType = bool32x4; 790 | int16x8.boolType = uint16x8.boolType = bool16x8; 791 | int8x16.boolType = uint8x16.boolType = bool8x16; 792 | 793 | // SIMD from types. 794 | float32x4.from = [int32x4, uint32x4]; 795 | int32x4.from = [float32x4, uint32x4]; 796 | int16x8.from = [uint16x8]; 797 | int8x16.from = [uint8x16]; 798 | uint32x4.from = [float32x4, int32x4]; 799 | uint16x8.from = [int16x8]; 800 | uint8x16.from = [int8x16]; 801 | 802 | // SIMD fromBits types. 803 | float32x4.fromBits = [int32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16]; 804 | int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16]; 805 | int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16]; 806 | int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16]; 807 | uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16]; 808 | uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16]; 809 | uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8]; 810 | 811 | var simdTypes = [float32x4, 812 | int32x4, int16x8, int8x16, 813 | uint32x4, uint16x8, uint8x16, 814 | bool32x4, bool16x8, bool8x16]; 815 | 816 | // SIMD Phase2 types. 817 | 818 | if (typeof simdPhase2 !== 'undefined') { 819 | // Float64x2 820 | if (typeof SIMD.Float64x2 === "undefined" || 821 | typeof SIMD.Float64x2.extractLane === "undefined") { 822 | SIMD.Float64x2 = function(s0, s1) { 823 | if (!(this instanceof SIMD.Float64x2)) { 824 | return new SIMD.Float64x2(s0, s1); 825 | } 826 | this.s_ = convertArray(_f64x2, new Float64Array([s0, s1])); 827 | } 828 | 829 | SIMD.Float64x2.extractLane = function(v, i) { 830 | v = SIMD.Float64x2.check(v); 831 | simdCheckLaneIndex(i, 2); 832 | return v.s_[i]; 833 | } 834 | } 835 | 836 | if (typeof SIMD.Float64x2.swizzle === "undefined") { 837 | SIMD.Float64x2.swizzle = function(a, s0, s1) { 838 | return simdSwizzle(float64x2, a, [s0, s1]); 839 | } 840 | } 841 | 842 | if (typeof SIMD.Float64x2.shuffle === "undefined") { 843 | SIMD.Float64x2.shuffle = function(a, b, s0, s1) { 844 | return simdShuffle(float64x2, a, b, [s0, s1]); 845 | } 846 | } 847 | 848 | // Bool64x2 849 | if (typeof SIMD.Bool64x2 === "undefined" || 850 | typeof SIMD.Bool64x2.extractLane === "undefined") { 851 | SIMD.Bool64x2 = function(s0, s1) { 852 | if (!(this instanceof SIMD.Bool64x2)) { 853 | return new SIMD.Bool64x2(s0, s1); 854 | } 855 | this.s_ = [!!s0, !!s1]; 856 | } 857 | 858 | SIMD.Bool64x2.extractLane = function(v, i) { 859 | v = SIMD.Bool64x2.check(v); 860 | simdCheckLaneIndex(i, 2); 861 | return v.s_[i]; 862 | } 863 | } 864 | 865 | var float64x2 = { 866 | name: "Float64x2", 867 | fn: SIMD.Float64x2, 868 | lanes: 2, 869 | laneSize: 8, 870 | buffer: _f64x2, 871 | view: Float64Array, 872 | mulFn: binaryMul, 873 | fns: ["check", "splat", "replaceLane", "select", 874 | "equal", "notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual", 875 | "add", "sub", "mul", "div", "neg", "abs", "min", "max", "minNum", "maxNum", 876 | "reciprocalApproximation", "reciprocalSqrtApproximation", "sqrt", 877 | "load", "store"], 878 | } 879 | 880 | var bool64x2 = { 881 | name: "Bool64x2", 882 | fn: SIMD.Bool64x2, 883 | lanes: 2, 884 | laneSize: 8, 885 | notFn: unaryLogicalNot, 886 | fns: ["check", "splat", "replaceLane", 887 | "allTrue", "anyTrue", "and", "or", "xor", "not"], 888 | } 889 | 890 | float64x2.boolType = bool64x2; 891 | 892 | float32x4.fromBits.push(float64x2); 893 | int32x4.fromBits.push(float64x2); 894 | int16x8.fromBits.push(float64x2); 895 | int8x16.fromBits.push(float64x2); 896 | uint32x4.fromBits.push(float64x2); 897 | uint16x8.fromBits.push(float64x2); 898 | uint8x16.fromBits.push(float64x2); 899 | 900 | float64x2.fromBits = [float32x4, int32x4, int16x8, int8x16, 901 | uint32x4, uint16x8, uint8x16]; 902 | 903 | simdTypes.push(float64x2); 904 | simdTypes.push(bool64x2); 905 | } 906 | 907 | // SIMD prototype functions. 908 | var prototypeFns = { 909 | valueOf: 910 | function(type) { 911 | return function() { 912 | throw new TypeError(type.name + " cannot be converted to a number"); 913 | } 914 | }, 915 | 916 | toString: 917 | function(type) { 918 | return function() { 919 | return simdToString(type, this); 920 | } 921 | }, 922 | 923 | toLocaleString: 924 | function(type) { 925 | return function() { 926 | return simdToLocaleString(type, this); 927 | } 928 | }, 929 | }; 930 | 931 | // SIMD constructor functions. 932 | 933 | var simdFns = { 934 | check: 935 | function(type) { 936 | return function(a) { 937 | if (!(a instanceof type.fn)) { 938 | throw new TypeError("Argument is not a " + type.name + "."); 939 | } 940 | return a; 941 | } 942 | }, 943 | 944 | splat: 945 | function(type) { 946 | return function(s) { return simdSplat(type, s); } 947 | }, 948 | 949 | replaceLane: 950 | function(type) { 951 | return function(a, i, s) { return simdReplaceLane(type, a, i, s); } 952 | }, 953 | 954 | allTrue: 955 | function(type) { 956 | return function(a) { return simdAllTrue(type, a); } 957 | }, 958 | 959 | anyTrue: 960 | function(type) { 961 | return function(a) { return simdAnyTrue(type, a); } 962 | }, 963 | 964 | and: 965 | function(type) { 966 | return function(a, b) { 967 | return simdBinaryOp(type, binaryAnd, a, b); 968 | } 969 | }, 970 | 971 | or: 972 | function(type) { 973 | return function(a, b) { 974 | return simdBinaryOp(type, binaryOr, a, b); 975 | } 976 | }, 977 | 978 | xor: 979 | function(type) { 980 | return function(a, b) { 981 | return simdBinaryOp(type, binaryXor, a, b); 982 | } 983 | }, 984 | 985 | not: 986 | function(type) { 987 | return function(a) { 988 | return simdUnaryOp(type, type.notFn, a); 989 | } 990 | }, 991 | 992 | equal: 993 | function(type) { 994 | return function(a, b) { 995 | return simdRelationalOp(type, binaryEqual, a, b); 996 | } 997 | }, 998 | 999 | notEqual: 1000 | function(type) { 1001 | return function(a, b) { 1002 | return simdRelationalOp(type, binaryNotEqual, a, b); 1003 | } 1004 | }, 1005 | 1006 | lessThan: 1007 | function(type) { 1008 | return function(a, b) { 1009 | return simdRelationalOp(type, binaryLess, a, b); 1010 | } 1011 | }, 1012 | 1013 | lessThanOrEqual: 1014 | function(type) { 1015 | return function(a, b) { 1016 | return simdRelationalOp(type, binaryLessEqual, a, b); 1017 | } 1018 | }, 1019 | 1020 | greaterThan: 1021 | function(type) { 1022 | return function(a, b) { 1023 | return simdRelationalOp(type, binaryGreater, a, b); 1024 | } 1025 | }, 1026 | 1027 | greaterThanOrEqual: 1028 | function(type) { 1029 | return function(a, b) { 1030 | return simdRelationalOp(type, binaryGreaterEqual, a, b); 1031 | } 1032 | }, 1033 | 1034 | add: 1035 | function(type) { 1036 | return function(a, b) { 1037 | return simdBinaryOp(type, binaryAdd, a, b); 1038 | } 1039 | }, 1040 | 1041 | sub: 1042 | function(type) { 1043 | return function(a, b) { 1044 | return simdBinaryOp(type, binarySub, a, b); 1045 | } 1046 | }, 1047 | 1048 | mul: 1049 | function(type) { 1050 | return function(a, b) { 1051 | return simdBinaryOp(type, type.mulFn, a, b); 1052 | } 1053 | }, 1054 | 1055 | div: 1056 | function(type) { 1057 | return function(a, b) { 1058 | return simdBinaryOp(type, binaryDiv, a, b); 1059 | } 1060 | }, 1061 | 1062 | neg: 1063 | function(type) { 1064 | return function(a) { 1065 | return simdUnaryOp(type, unaryNeg, a); 1066 | } 1067 | }, 1068 | 1069 | abs: 1070 | function(type) { 1071 | return function(a) { 1072 | return simdUnaryOp(type, Math.abs, a); 1073 | } 1074 | }, 1075 | 1076 | min: 1077 | function(type) { 1078 | return function(a, b) { 1079 | return simdBinaryOp(type, Math.min, a, b); 1080 | } 1081 | }, 1082 | 1083 | max: 1084 | function(type) { 1085 | return function(a, b) { 1086 | return simdBinaryOp(type, Math.max, a, b); 1087 | } 1088 | }, 1089 | 1090 | minNum: 1091 | function(type) { 1092 | return function(a, b) { 1093 | return simdBinaryOp(type, minNum, a, b); 1094 | } 1095 | }, 1096 | 1097 | maxNum: 1098 | function(type) { 1099 | return function(a, b) { 1100 | return simdBinaryOp(type, maxNum, a, b); 1101 | } 1102 | }, 1103 | 1104 | load: 1105 | function(type) { 1106 | return function(tarray, index) { 1107 | return simdLoad(type, tarray, index, type.lanes); 1108 | } 1109 | }, 1110 | 1111 | load1: 1112 | function(type) { 1113 | return function(tarray, index) { 1114 | return simdLoad(type, tarray, index, 1); 1115 | } 1116 | }, 1117 | 1118 | load2: 1119 | function(type) { 1120 | return function(tarray, index) { 1121 | return simdLoad(type, tarray, index, 2); 1122 | } 1123 | }, 1124 | 1125 | load3: 1126 | function(type) { 1127 | return function(tarray, index) { 1128 | return simdLoad(type, tarray, index, 3); 1129 | } 1130 | }, 1131 | 1132 | store: 1133 | function(type) { 1134 | return function(tarray, index, a) { 1135 | return simdStore(type, tarray, index, a, type.lanes); 1136 | } 1137 | }, 1138 | 1139 | store1: 1140 | function(type) { 1141 | return function(tarray, index, a) { 1142 | return simdStore(type, tarray, index, a, 1); 1143 | } 1144 | }, 1145 | 1146 | store2: 1147 | function(type) { 1148 | return function(tarray, index, a) { 1149 | return simdStore(type, tarray, index, a, 2); 1150 | } 1151 | }, 1152 | 1153 | store3: 1154 | function(type) { 1155 | return function(tarray, index, a) { 1156 | return simdStore(type, tarray, index, a, 3); 1157 | } 1158 | }, 1159 | 1160 | select: 1161 | function(type) { 1162 | return function(selector, a, b) { 1163 | return simdSelect(type, selector, a, b); 1164 | } 1165 | }, 1166 | 1167 | 1168 | reciprocalApproximation: 1169 | function(type) { 1170 | return function(a) { 1171 | a = type.fn.check(a); 1172 | return type.fn.div(type.fn.splat(1.0), a); 1173 | } 1174 | }, 1175 | 1176 | reciprocalSqrtApproximation: 1177 | function(type) { 1178 | return function(a) { 1179 | a = type.fn.check(a); 1180 | return type.fn.reciprocalApproximation(type.fn.sqrt(a)); 1181 | } 1182 | }, 1183 | 1184 | sqrt: 1185 | function(type) { 1186 | return function(a) { 1187 | return simdUnaryOp(type, Math.sqrt, a); 1188 | } 1189 | }, 1190 | 1191 | shiftLeftByScalar: 1192 | function(type) { 1193 | return function(a, bits) { 1194 | bits &= type.laneSize * 8 - 1; 1195 | return simdShiftOp(type, binaryShiftLeft, a, bits); 1196 | } 1197 | }, 1198 | 1199 | shiftRightByScalar: 1200 | function(type) { 1201 | if (type.unsigned) { 1202 | return function(a, bits) { 1203 | bits &= type.laneSize * 8 - 1; 1204 | return simdShiftOp(type, binaryShiftRightLogical, a, bits); 1205 | } 1206 | } else { 1207 | return function(a, bits) { 1208 | bits &= type.laneSize * 8 - 1; 1209 | return simdShiftOp(type, binaryShiftRightArithmetic, a, bits); 1210 | } 1211 | } 1212 | }, 1213 | 1214 | addSaturate: 1215 | function(type) { 1216 | function addSaturate(a, b) { 1217 | return clamp(a + b, type.minVal, type.maxVal); 1218 | } 1219 | return function(a, b) { return simdBinaryOp(type, addSaturate, a, b); } 1220 | }, 1221 | 1222 | subSaturate: 1223 | function(type) { 1224 | function subSaturate(a, b) { 1225 | return clamp(a - b, type.minVal, type.maxVal); 1226 | } 1227 | return function(a, b) { return simdBinaryOp(type, subSaturate, a, b); } 1228 | }, 1229 | } 1230 | 1231 | // Install functions. 1232 | 1233 | simdTypes.forEach(function(type) { 1234 | // Install each prototype function on each SIMD prototype. 1235 | var simdFn = type.fn; 1236 | var proto = simdFn.prototype; 1237 | for (var name in prototypeFns) { 1238 | if (!proto.hasOwnProperty(name)) 1239 | proto[name] = prototypeFns[name](type); 1240 | } 1241 | // Install regular functions. 1242 | type.fns.forEach(function(name) { 1243 | if (typeof simdFn[name] === "undefined") 1244 | simdFn[name] = simdFns[name](type); 1245 | }); 1246 | // Install 'fromTIMD' functions. 1247 | if (type.from) { 1248 | type.from.forEach(function(fromType) { 1249 | var name = "from" + fromType.name; 1250 | var toType = type; // pull type into closure. 1251 | if (typeof type.fn[name] === "undefined") { 1252 | type.fn[name] = 1253 | function(a) { return simdFrom(toType, fromType, a); } 1254 | } 1255 | }); 1256 | } 1257 | // Install 'fromTIMDBits' functions. 1258 | if (type.fromBits) { 1259 | type.fromBits.forEach(function(fromType) { 1260 | var name = "from" + fromType.name + "Bits"; 1261 | var toType = type; // pull type into closure. 1262 | if (typeof type.fn[name] === "undefined") { 1263 | type.fn[name] = 1264 | function(a) { return simdFromBits(toType, fromType, a); } 1265 | } 1266 | }); 1267 | } 1268 | }); 1269 | 1270 | // If we're in a browser, the global namespace is named 'window'. If we're 1271 | // in node, it's named 'global'. If we're in a web worker, it's named 1272 | // 'self'. If we're in a shell, 'this' might work. 1273 | })(typeof window !== "undefined" 1274 | ? window 1275 | : (typeof process === 'object' && 1276 | typeof require === 'function' && 1277 | typeof global === 'object') 1278 | ? global 1279 | : typeof self === 'object' 1280 | ? self 1281 | : this); 1282 | -------------------------------------------------------------------------------- /src/ecmascript_simd_tests.js: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2013 3 | 4 | This software is provided 'as-is', without any express or implied 5 | warranty. In no event will the authors be held liable for any damages 6 | arising from the use of this software. 7 | 8 | Permission is granted to anyone to use this software for any purpose, 9 | including commercial applications, and to alter it and redistribute it 10 | freely, subject to the following restrictions: 11 | 12 | 1. The origin of this software must not be misrepresented; you must not 13 | claim that you wrote the original software. If you use this software 14 | in a product, an acknowledgment in the product documentation would be 15 | appreciated but is not required. 16 | 2. Altered source versions must be plainly marked as such, and must not be 17 | misrepresented as being the original software. 18 | 3. This notice may not be removed or altered from any source distribution. 19 | */ 20 | 21 | function minNum(x, y) { 22 | return x != x ? y : 23 | y != y ? x : 24 | Math.min(x, y); 25 | } 26 | 27 | function maxNum(x, y) { 28 | return x != x ? y : 29 | y != y ? x : 30 | Math.max(x, y); 31 | } 32 | 33 | function sameValue(x, y) { 34 | if (x == y) 35 | return x != 0 || y != 0 || (1/x == 1/y); 36 | 37 | return x != x && y != y; 38 | } 39 | 40 | function sameValueZero(x, y) { 41 | if (x == y) return true; 42 | return x != x & y != y; 43 | } 44 | 45 | function binaryMul(a, b) { return a * b; } 46 | var binaryImul; 47 | if (typeof Math.imul !== 'undefined') { 48 | binaryImul = Math.imul; 49 | } else { 50 | binaryImul = function(a, b) { 51 | var ah = (a >>> 16) & 0xffff; 52 | var al = a & 0xffff; 53 | var bh = (b >>> 16) & 0xffff; 54 | var bl = b & 0xffff; 55 | // the shift by 0 fixes the sign on the high part 56 | // the final |0 converts the unsigned value into a signed value 57 | return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0); 58 | }; 59 | } 60 | 61 | var _f32x4 = new Float32Array(4); 62 | var _f64x2 = new Float64Array(_f32x4.buffer); 63 | var _i32x4 = new Int32Array(_f32x4.buffer); 64 | var _i16x8 = new Int16Array(_f32x4.buffer); 65 | var _i8x16 = new Int8Array(_f32x4.buffer); 66 | var _ui32x4 = new Uint32Array(_f32x4.buffer); 67 | var _ui16x8 = new Uint16Array(_f32x4.buffer); 68 | var _ui8x16 = new Uint8Array(_f32x4.buffer); 69 | 70 | var float32x4 = { 71 | name: "Float32x4", 72 | fn: SIMD.Float32x4, 73 | floatLane: true, 74 | signed: true, 75 | numerical: true, 76 | lanes: 4, 77 | laneSize: 4, 78 | interestingValues: [0, -0, 1, -1, 0.9, -0.9, 1.414, 0x7F, -0x80, -0x8000, -0x80000000, 0x7FFF, 0x7FFFFFFF, Infinity, -Infinity, NaN], 79 | view: Float32Array, 80 | buffer: _f32x4, 81 | mulFn: binaryMul, 82 | } 83 | 84 | var int32x4 = { 85 | name: "Int32x4", 86 | fn: SIMD.Int32x4, 87 | intLane: true, 88 | signed: true, 89 | numerical: true, 90 | logical: true, 91 | lanes: 4, 92 | laneSize: 4, 93 | minVal: -0x80000000, 94 | maxVal: 0x7FFFFFFF, 95 | interestingValues: [0, 1, -1, 0x40000000, 0x7FFFFFFF, -0x80000000], 96 | view: Int32Array, 97 | buffer: _i32x4, 98 | mulFn: binaryImul, 99 | } 100 | 101 | var int16x8 = { 102 | name: "Int16x8", 103 | fn: SIMD.Int16x8, 104 | intLane: true, 105 | signed: true, 106 | numerical: true, 107 | logical: true, 108 | lanes: 8, 109 | laneSize: 2, 110 | laneMask: 0xFFFF, 111 | minVal: -0x8000, 112 | maxVal: 0x7FFF, 113 | interestingValues: [0, 1, -1, 0x4000, 0x7FFF, -0x8000], 114 | view: Int16Array, 115 | buffer: _i16x8, 116 | mulFn: binaryMul, 117 | } 118 | 119 | var int8x16 = { 120 | name: "Int8x16", 121 | fn: SIMD.Int8x16, 122 | intLane: true, 123 | signed: true, 124 | numerical: true, 125 | logical: true, 126 | lanes: 16, 127 | laneSize: 1, 128 | laneMask: 0xFF, 129 | minVal: -0x80, 130 | maxVal: 0x7F, 131 | interestingValues: [0, 1, -1, 0x40, 0x7F, -0x80], 132 | view: Int8Array, 133 | buffer: _i8x16, 134 | mulFn: binaryMul, 135 | } 136 | 137 | var uint32x4 = { 138 | name: "Uint32x4", 139 | fn: SIMD.Uint32x4, 140 | intLane: true, 141 | unsigned: true, 142 | numerical: true, 143 | logical: true, 144 | lanes: 4, 145 | laneSize: 4, 146 | minVal: 0, 147 | maxVal: 0xFFFFFFFF, 148 | interestingValues: [0, 1, 0x40000000, 0x7FFFFFFF, 0xFFFFFFFF], 149 | view: Uint32Array, 150 | buffer: _ui32x4, 151 | mulFn: binaryImul, 152 | } 153 | 154 | var uint16x8 = { 155 | name: "Uint16x8", 156 | fn: SIMD.Uint16x8, 157 | intLane: true, 158 | unsigned: true, 159 | numerical: true, 160 | logical: true, 161 | lanes: 8, 162 | laneSize: 2, 163 | laneMask: 0xFFFF, 164 | minVal: 0, 165 | maxVal: 0xFFFF, 166 | interestingValues: [0, 1, 0x4000, 0x7FFF, 0xFFFF], 167 | view: Uint16Array, 168 | buffer: _ui16x8, 169 | mulFn: binaryMul, 170 | } 171 | 172 | var uint8x16 = { 173 | name: "Uint8x16", 174 | fn: SIMD.Uint8x16, 175 | intLane: true, 176 | unsigned: true, 177 | numerical: true, 178 | logical: true, 179 | lanes: 16, 180 | laneSize: 1, 181 | laneMask: 0xFF, 182 | minVal: 0, 183 | maxVal: 0xFF, 184 | interestingValues: [0, 1, 0x40, 0x7F, 0xFF], 185 | view: Int8Array, 186 | buffer: _ui8x16, 187 | mulFn: binaryMul, 188 | } 189 | 190 | var bool32x4 = { 191 | name: "Bool32x4", 192 | fn: SIMD.Bool32x4, 193 | boolLane: true, 194 | logical: true, 195 | lanes: 4, 196 | laneSize: 4, 197 | interestingValues: [true, false], 198 | } 199 | 200 | var bool16x8 = { 201 | name: "Bool16x8", 202 | fn: SIMD.Bool16x8, 203 | boolLane: true, 204 | logical: true, 205 | lanes: 8, 206 | laneSize: 2, 207 | interestingValues: [true, false], 208 | } 209 | 210 | var bool8x16 = { 211 | name: "Bool8x16", 212 | fn: SIMD.Bool8x16, 213 | boolLane: true, 214 | logical: true, 215 | lanes: 16, 216 | laneSize: 1, 217 | interestingValues: [true, false], 218 | } 219 | 220 | // Filter functions. 221 | function isFloatType(type) { return type.floatLane; } 222 | function isIntType(type) { return type.intLane; } 223 | function isBoolType(type) { return type.boolLane; } 224 | function isNumerical(type) { return type.numerical; } 225 | function isLogical(type) { return type.logical; } 226 | function isSigned(type) { return type.signed; } 227 | function isSignedIntType(type) { return type.intLane && type.signed; } 228 | function isUnsignedIntType(type) { return type.intLane && type.unsigned; } 229 | function isSmallIntType(type) { return type.intLane && type.lanes >= 8; } 230 | function isSmallUnsignedIntType(type) { return type.intLane && type.unsigned && type.lanes >= 8; } 231 | function hasLoadStore123(type) { return !type.boolLane && type.lanes == 4; } 232 | 233 | // Each SIMD type has a corresponding Boolean SIMD type, which is returned by 234 | // relational ops. 235 | float32x4.boolType = int32x4.boolType = uint32x4.boolType = bool32x4; 236 | int16x8.boolType = uint16x8.boolType = bool16x8; 237 | int8x16.boolType = uint8x16.boolType = bool8x16; 238 | 239 | // SIMD fromTIMD types. 240 | float32x4.from = [int32x4, uint32x4]; 241 | int32x4.from = [float32x4, uint32x4]; 242 | int16x8.from = [uint16x8]; 243 | int8x16.from = [uint8x16]; 244 | uint32x4.from = [float32x4, int32x4]; 245 | uint16x8.from = [int16x8]; 246 | uint8x16.from = [int8x16]; 247 | 248 | // SIMD fromBits types. 249 | float32x4.fromBits = [int32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16]; 250 | int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16]; 251 | int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16]; 252 | int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16]; 253 | uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16]; 254 | uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16]; 255 | uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8]; 256 | 257 | var simdTypes = [float32x4, 258 | int32x4, int16x8, int8x16, 259 | uint32x4, uint16x8, uint8x16, 260 | bool32x4, bool16x8, bool8x16]; 261 | 262 | if (typeof simdPhase2 !== 'undefined') { 263 | var float64x2 = { 264 | name: "Float64x2", 265 | fn: SIMD.Float64x2, 266 | floatLane: true, 267 | signed: true, 268 | numerical: true, 269 | lanes: 2, 270 | laneSize: 8, 271 | interestingValues: [0, -0, 1, -1, 1.414, 0x7F, -0x80, -0x8000, -0x80000000, 0x7FFF, 0x7FFFFFFF, Infinity, -Infinity, NaN], 272 | view: Float64Array, 273 | buffer: _f64x2, 274 | mulFn: binaryMul, 275 | } 276 | 277 | var bool64x2 = { 278 | name: "Bool64x2", 279 | fn: SIMD.Bool64x2, 280 | boolLane: true, 281 | lanes: 2, 282 | laneSize: 8, 283 | interestingValues: [true, false], 284 | } 285 | 286 | float64x2.boolType = bool64x2; 287 | 288 | float32x4.fromBits.push(float64x2); 289 | int32x4.fromBits.push(float64x2); 290 | int16x8.fromBits.push(float64x2); 291 | int8x16.fromBits.push(float64x2); 292 | uint32x4.fromBits.push(float64x2); 293 | uint16x8.fromBits.push(float64x2); 294 | uint8x16.fromBits.push(float64x2); 295 | 296 | float64x2.fromBits = [float32x4, int32x4, int16x8, int8x16, 297 | uint32x4, uint16x8, uint8x16]; 298 | 299 | int32x4.fromBits = [float32x4, int16x8, int8x16, uint32x4, uint16x8, uint8x16]; 300 | int16x8.fromBits = [float32x4, int32x4, int8x16, uint32x4, uint16x8, uint8x16]; 301 | int8x16.fromBits = [float32x4, int32x4, int16x8, uint32x4, uint16x8, uint8x16]; 302 | uint32x4.fromBits = [float32x4, int32x4, int16x8, int8x16, uint16x8, uint8x16]; 303 | uint16x8.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint8x16]; 304 | uint8x16.fromBits = [float32x4, int32x4, int16x8, int8x16, uint32x4, uint16x8]; 305 | 306 | simdTypes.push(float64x2); 307 | simdTypes.push(bool64x2); 308 | } 309 | 310 | // SIMD reference functions. 311 | 312 | function simdConvert(type, value) { 313 | if (type.buffer === undefined) return !!value; // bool types 314 | type.buffer[0] = value; 315 | return type.buffer[0]; 316 | } 317 | 318 | // Reference implementation of toString. 319 | function simdToString(type, value) { 320 | value = type.fn.check(value); 321 | var str = "SIMD." + type.name + "("; 322 | str += type.fn.extractLane(value, 0); 323 | for (var i = 1; i < type.lanes; i++) { 324 | str += ", " + type.fn.extractLane(value, i); 325 | } 326 | return str + ")"; 327 | } 328 | 329 | // Reference implementation of toLocaleString. 330 | function simdToLocaleString(type, value) { 331 | value = type.fn.check(value); 332 | var str = "SIMD." + type.name + "("; 333 | str += type.fn.extractLane(value, 0).toLocaleString(); 334 | for (var i = 1; i < type.lanes; i++) { 335 | str += ", " + type.fn.extractLane(value, i).toLocaleString(); 336 | } 337 | return str + ")"; 338 | } 339 | 340 | // Utility functions. 341 | 342 | // Create a value for testing, with vanilla lane values, i.e. [0, 1, 2, ..] 343 | // for numeric types, [false, true, true, ..] for boolean types. These test 344 | // values shouldn't contain NaNs or other "interesting" values. 345 | function createTestValue(type) { 346 | var lanes = []; 347 | for (var i = 0; i < type.lanes; i++) 348 | lanes.push(i); 349 | return type.fn.apply(type.fn, lanes); 350 | } 351 | 352 | function createSplatValue(type, v) { 353 | var lanes = []; 354 | for (var i = 0; i < type.lanes; i++) 355 | lanes.push(v); 356 | return type.fn.apply(type.fn, lanes); 357 | } 358 | 359 | function checkValue(type, a, expect) { 360 | var ok = true; 361 | for (var i = 0; i < type.lanes; i++) { 362 | var v = type.fn.extractLane(a, i); 363 | var ev = simdConvert(type, expect(i)); 364 | if (!sameValue(ev, v) && Math.abs(ev - v) >= 0.00001) 365 | ok = false; 366 | } 367 | if (!ok) { 368 | var lanes = []; 369 | for (var i = 0; i < type.lanes; i++) 370 | lanes.push(simdConvert(type, expect(i))); 371 | fail('expected SIMD.' + type.name + '(' + lanes + ') but found ' + a.toString()); 372 | } 373 | } 374 | 375 | // Test methods for the different kinds of operations. 376 | 377 | // Test the constructor and splat with the given lane values. 378 | function testConstructor(type) { 379 | equal('function', typeof type.fn); 380 | equal('function', typeof type.fn.splat); 381 | for (var v of type.interestingValues) { 382 | var expected = simdConvert(type, v); 383 | var result = createSplatValue(type, v); 384 | checkValue(type, result, function(index) { return expected; }); 385 | // splat. 386 | result = type.fn.splat(v); 387 | checkValue(type, result, function(index) { return expected; }); 388 | } 389 | } 390 | 391 | function testCheck(type) { 392 | equal('function', typeof type.fn.check); 393 | // Other SIMD types shouldn't check for this type. 394 | var a = type.fn(); 395 | for (var otherType of simdTypes) { 396 | if (otherType === type) { 397 | var result = type.fn.check(a); 398 | checkValue(type, result, function(index) { return type.fn.extractLane(a, index); }); 399 | } else { 400 | throws(function() { otherType.check(a); }); 401 | } 402 | } 403 | // Neither should other types. 404 | for (var x of [ {}, "", 0, 1, true, false, undefined, null, NaN, Infinity]) { 405 | throws(function() { type.fn.check(x); }); 406 | } 407 | } 408 | 409 | function testReplaceLane(type) { 410 | equal('function', typeof type.fn.replaceLane); 411 | var a = createTestValue(type); 412 | for (var v of type.interestingValues) { 413 | var expected = simdConvert(type, v); 414 | for (var i = 0; i < type.lanes; i++) { 415 | var result = type.fn.replaceLane(a, i, v); 416 | checkValue(type, result, 417 | function(index) { 418 | return index == i ? expected : type.fn.extractLane(a, index); 419 | }); 420 | } 421 | } 422 | 423 | function testIndexCheck(index) { 424 | throws(function() { type.fn.replaceLane(a, index, 0); }); 425 | } 426 | testIndexCheck(type.lanes); 427 | testIndexCheck(13.37); 428 | testIndexCheck(null); 429 | testIndexCheck(undefined); 430 | testIndexCheck({}); 431 | testIndexCheck(true); 432 | testIndexCheck('yo'); 433 | testIndexCheck(-1); 434 | testIndexCheck(128); 435 | } 436 | 437 | // Compare unary op's behavior to ref op at each lane. 438 | function testUnaryOp(type, op, refOp) { 439 | equal('function', typeof type.fn[op]); 440 | for (var v of type.interestingValues) { 441 | var expected = simdConvert(type, refOp(v)); 442 | var a = type.fn.splat(v); 443 | var result = type.fn[op](a); 444 | checkValue(type, result, function(index) { return expected; }); 445 | } 446 | } 447 | 448 | // Compare binary op's behavior to ref op at each lane with the Cartesian 449 | // product of the given values. 450 | function testBinaryOp(type, op, refOp) { 451 | equal('function', typeof type.fn[op]); 452 | var zero = type.fn(); 453 | for (var av of type.interestingValues) { 454 | for (var bv of type.interestingValues) { 455 | var expected = simdConvert(type, refOp(simdConvert(type, av), simdConvert(type, bv))); 456 | var a = type.fn.splat(av); 457 | var b = type.fn.splat(bv); 458 | var result = type.fn[op](a, b); 459 | checkValue(type, result, function(index) { return expected; }); 460 | } 461 | } 462 | } 463 | 464 | // Compare relational op's behavior to ref op at each lane with the Cartesian 465 | // product of the given values. 466 | function testRelationalOp(type, op, refOp) { 467 | equal('function', typeof type.fn[op]); 468 | var zero = type.fn(); 469 | for (var av of type.interestingValues) { 470 | for (var bv of type.interestingValues) { 471 | var expected = refOp(simdConvert(type, av), simdConvert(type, bv)); 472 | var a = type.fn.splat(av); 473 | var b = type.fn.splat(bv); 474 | var result = type.fn[op](a, b); 475 | checkValue(type.boolType, result, function(index) { return expected; }); 476 | } 477 | } 478 | } 479 | 480 | // Compare shift op's behavior to ref op at each lane. 481 | function testShiftOp(type, op, refOp) { 482 | equal('function', typeof type.fn[op]); 483 | var zero = type.fn(); 484 | for (var v of type.interestingValues) { 485 | var s = type.laneSize * 8; 486 | for (var bits of [-1, 0, 1, 2, s - 1, s, s + 1]) { 487 | var expected = simdConvert(type, refOp(simdConvert(type, v), bits)); 488 | var a = type.fn.splat(v); 489 | var result = type.fn[op](a, bits); 490 | checkValue(type, result, function(index) { return expected; }); 491 | } 492 | } 493 | } 494 | 495 | function testFrom(toType, fromType, name) { 496 | equal('function', typeof toType.fn[name]); 497 | for (var v of fromType.interestingValues) { 498 | var fromValue = createSplatValue(fromType, v); 499 | v = Math.trunc(simdConvert(fromType, v)); 500 | if (toType.minVal !== undefined && 501 | !(toType.minVal <= v && v <= toType.maxVal)) { 502 | throws(function() { toType.fn[name](fromValue) }); 503 | } else { 504 | v = simdConvert(toType, v); 505 | var result = toType.fn[name](fromValue); 506 | checkValue(toType, result, function(index) { return v; }); 507 | } 508 | } 509 | } 510 | 511 | function testFromBits(toType, fromType, name) { 512 | equal('function', typeof toType.fn[name]); 513 | for (var v of fromType.interestingValues) { 514 | var fromValue = createSplatValue(fromType, v); 515 | var result = toType.fn[name](fromValue); 516 | for (var i = 0; i < fromType.lanes; i++) 517 | fromType.buffer[i] = fromType.fn.extractLane(fromValue, i); 518 | checkValue(toType, result, function(index) { return toType.buffer[index]; }); 519 | } 520 | } 521 | 522 | function testAnyTrue(type) { 523 | equal('function', typeof type.fn.anyTrue); 524 | // All lanes 'false'. 525 | var a = type.fn.splat(false); 526 | ok(!type.fn.anyTrue(a)); 527 | // One lane 'true'. 528 | for (var i = 0; i < type.lanes; i++) { 529 | a = type.fn.replaceLane(a, i, true); 530 | ok(type.fn.anyTrue(a)); 531 | } 532 | // All lanes 'true'. 533 | a = type.fn.splat(true); 534 | ok(type.fn.anyTrue(a)); 535 | } 536 | 537 | function testAllTrue(type) { 538 | equal('function', typeof type.fn.allTrue); 539 | // All lanes 'true'. 540 | var a = type.fn.splat(true); 541 | ok(type.fn.allTrue(a)); 542 | // One lane 'false'. 543 | for (var i = 0; i < type.lanes; i++) { 544 | a = type.fn.replaceLane(a, i, false); 545 | ok(!type.fn.allTrue(a)); 546 | } 547 | // All lanes 'false'. 548 | a = type.fn.splat(false); 549 | ok(!type.fn.allTrue(a)); 550 | } 551 | 552 | function testSelect(type) { 553 | equal('function', typeof type.fn.select); 554 | // set a and b to values that are different for all numerical types. 555 | var av = 1; 556 | var bv = 2; 557 | var a = type.fn.splat(av); 558 | var b = type.fn.splat(bv); 559 | // test all selectors with a single 'true' lane. 560 | for (var i = 0; i < type.lanes; i++) { 561 | var selector = type.boolType.fn(); 562 | selector = type.boolType.fn.replaceLane(selector, i, true); 563 | var result = type.fn.select(selector, a, b); 564 | checkValue(type, result, function(index) { return index == i ? av : bv; }); 565 | } 566 | } 567 | 568 | function testSwizzle(type) { 569 | equal('function', typeof type.fn.swizzle); 570 | var a = createTestValue(type); // 0, 1, 2, 3, 4, 5, 6, ... 571 | var indices = []; 572 | // Identity swizzle. 573 | for (var i = 0; i < type.lanes; i++) indices.push(i); 574 | var result = type.fn.swizzle.apply(type.fn, [a].concat(indices)); 575 | checkValue(type, result, function(index) { return type.fn.extractLane(a, index); }); 576 | // Reverse swizzle. 577 | indices.reverse(); 578 | var result = type.fn.swizzle.apply(type.fn, [a].concat(indices)); 579 | checkValue(type, result, function(index) { return type.fn.extractLane(a, type.lanes - index - 1); }); 580 | 581 | function testIndexCheck(index) { 582 | for (var i = 0; i < type.lanes; i++) { 583 | var args = [a].concat(indices); 584 | args[i + 1] = index; 585 | throws(function() { type.fn.swizzle.apply(type.fn, args); }); 586 | } 587 | } 588 | testIndexCheck(type.lanes); 589 | testIndexCheck(13.37); 590 | testIndexCheck(null); 591 | testIndexCheck(undefined); 592 | testIndexCheck({}); 593 | testIndexCheck(true); 594 | testIndexCheck('yo'); 595 | testIndexCheck(-1); 596 | testIndexCheck(128); 597 | } 598 | 599 | function testShuffle(type) { 600 | equal('function', typeof type.fn.shuffle); 601 | var indices = []; 602 | for (var i = 0; i < type.lanes; i++) indices.push(i); 603 | 604 | var a = type.fn.apply(type.fn, indices); // 0, 1, 2, 3, 4 ... 605 | var b = type.fn.add(a, type.fn.splat(type.lanes)); // lanes, lanes+1 ... 606 | // All lanes from a. 607 | var result = type.fn.shuffle.apply(type.fn, [a, b].concat(indices)); 608 | checkValue(type, result, function(index) { return type.fn.extractLane(a, index); }); 609 | // One lane from b. 610 | for (var i = 0; i < type.lanes; i++) { 611 | var args = [a, b].concat(indices); 612 | args[2 + i] += type.lanes; 613 | var result = type.fn.shuffle.apply(type.fn, args); 614 | checkValue(type, result, function(index) { 615 | var val = index == i ? b : a; 616 | return type.fn.extractLane(val, index); 617 | }); 618 | } 619 | // All lanes from b. 620 | for (var i = 0; i < type.lanes; i++) indices[i] += type.lanes; 621 | var result = type.fn.shuffle.apply(type.fn, [a, b].concat(indices)); 622 | checkValue(type, result, function(index) { return type.fn.extractLane(b, index); }); 623 | 624 | function testIndexCheck(index) { 625 | for (var i = 0; i < type.lanes; i++) { 626 | var args = [a, b].concat(indices); 627 | args[i + 2] = index; 628 | throws(function() { type.fn.shuffle.apply(type.fn, args); }); 629 | } 630 | } 631 | testIndexCheck(2 * type.lanes); 632 | testIndexCheck(13.37); 633 | testIndexCheck(null); 634 | testIndexCheck(undefined); 635 | testIndexCheck({}); 636 | testIndexCheck(true); 637 | testIndexCheck('yo'); 638 | testIndexCheck(-1); 639 | testIndexCheck(128); 640 | } 641 | 642 | function testLoad(type, name, count) { 643 | var loadFn = type.fn[name]; 644 | equal('function', typeof loadFn); 645 | var bufLanes = 2 * type.lanes; // Test all alignments. 646 | var bufSize = bufLanes * type.laneSize + 8; // Extra for over-alignment test. 647 | var ab = new ArrayBuffer(bufSize); 648 | var buf = new type.view(ab); 649 | for (var i = 0; i < bufLanes; i++) buf[i] = i; // Number buffer sequentially. 650 | // Test aligned loads. 651 | for (var i = 0; i < type.lanes; i++) { 652 | var a = loadFn(buf, i); 653 | checkValue(type, a, function(index) { return index < count ? i + index : 0; }); 654 | } 655 | 656 | // Test index coercions. 657 | // Unlike typedArray[index], non-canonical strings are allowed here. 658 | checkValue(type, loadFn(buf, "0"), function(index) { return index < count ? index : 0; }); 659 | checkValue(type, loadFn(buf, " -0.0 "), function(index) { return index < count ? index : 0; }); 660 | checkValue(type, loadFn(buf, "00"), function(index) { return index < count ? index : 0; }); 661 | checkValue(type, loadFn(buf, false), function(index) { return index < count ? index : 0; }); 662 | checkValue(type, loadFn(buf, null), function(index) { return index < count ? index : 0; }); 663 | checkValue(type, loadFn(buf, "01"), function(index) { return index < count ? 1 + index : 0; }); 664 | checkValue(type, loadFn(buf, " +1e0"), function(index) { return index < count ? 1 + index : 0; }); 665 | checkValue(type, loadFn(buf, true), function(index) { return index < count ? 1 + index : 0; }); 666 | 667 | // Test the 2 possible over-alignments. 668 | var f64 = new Float64Array(ab); 669 | var stride = 8 / type.laneSize; 670 | for (var i = 0; i < 1; i++) { 671 | var a = loadFn(f64, i); 672 | checkValue(type, a, function(index) { return index < count ? stride * i + index : 0; }); 673 | } 674 | // Test the 7 possible mis-alignments. 675 | var i8 = new Int8Array(ab); 676 | for (var misalignment = 1; misalignment < 8; misalignment++) { 677 | // Shift the buffer up by 1 byte. 678 | for (var i = i8.length - 1; i > 0; i--) 679 | i8[i] = i8[i - 1]; 680 | var a = loadFn(i8, misalignment); 681 | checkValue(type, a, function(index) { return index < count ? i + index : 0; }); 682 | } 683 | 684 | function testIndexCheck(buf, index) { 685 | throws(function () { loadFn(buf, index); }); 686 | } 687 | testIndexCheck(buf, -1); 688 | testIndexCheck(buf, 0.7); 689 | testIndexCheck(buf, -0.1); 690 | testIndexCheck(buf, NaN); 691 | testIndexCheck(buf, bufSize / type.laneSize - count + 1); 692 | testIndexCheck(buf.buffer, 1); 693 | testIndexCheck(buf, "a"); 694 | } 695 | 696 | function testStore(type, name, count) { 697 | var storeFn = type.fn[name]; 698 | equal('function', typeof storeFn); 699 | var bufLanes = 2 * type.lanes; // Test all alignments. 700 | var bufSize = bufLanes * type.laneSize + 8; // Extra for over-alignment test. 701 | var ab = new ArrayBuffer(bufSize); 702 | var buf = new type.view(ab); 703 | var a = createTestValue(type); // Value containing 0, 1, 2, 3 ... 704 | function checkBuffer(offset) { 705 | for (var i = 0; i < count; i++) 706 | if (buf[offset + i] != i) return false; 707 | return true; 708 | } 709 | // Test aligned stores. 710 | for (var i = 0; i < type.lanes; i++) { 711 | storeFn(buf, i, a); 712 | ok(checkBuffer(i)); 713 | } 714 | 715 | // Test index coercions. 716 | storeFn(buf, "0", a); ok(checkBuffer(0)); 717 | storeFn(buf, "01", a); ok(checkBuffer(1)); 718 | storeFn(buf, " -0.0 ", a); ok(checkBuffer(0)); 719 | storeFn(buf, " +1e0", a); ok(checkBuffer(1)); 720 | storeFn(buf, false, a); ok(checkBuffer(0)); 721 | storeFn(buf, true, a); ok(checkBuffer(1)); 722 | storeFn(buf, null, a); ok(checkBuffer(0)); 723 | 724 | // Test the 2 over-alignments. 725 | var f64 = new Float64Array(ab); 726 | var stride = 8 / type.laneSize; 727 | for (var i = 0; i < 1; i++) { 728 | storeFn(f64, i, a); 729 | ok(checkBuffer(stride * i)); 730 | } 731 | // Test the 7 mis-alignments. 732 | var i8 = new Int8Array(ab); 733 | for (var misalignment = 1; misalignment < 8; misalignment++) { 734 | storeFn(i8, misalignment, a); 735 | // Shift the buffer down by misalignment. 736 | for (var i = 0; i < i8.length - misalignment; i++) 737 | i8[i] = i8[i + misalignment]; 738 | ok(checkBuffer(0)); 739 | } 740 | 741 | function testIndexCheck(buf, index) { 742 | throws(function () { storeFn(buf, index, type.fn()); }); 743 | } 744 | testIndexCheck(buf, -1); 745 | testIndexCheck(buf, bufSize / type.laneSize - count + 1); 746 | testIndexCheck(buf.buffer, 1); 747 | testIndexCheck(buf, "a"); 748 | } 749 | 750 | function testOperators(type) { 751 | var inst = createTestValue(type); 752 | throws(function() { Number(inst) }); 753 | throws(function() { +inst }); 754 | throws(function() { -inst }); 755 | throws(function() { ~inst }); 756 | throws(function() { Math.fround(inst) }); 757 | throws(function() { inst|0} ); 758 | throws(function() { inst&0 }); 759 | throws(function() { inst^0 }); 760 | throws(function() { inst>>>0 }); 761 | throws(function() { inst>>0 }); 762 | throws(function() { inst<<0 }); 763 | throws(function() { (inst + inst) }); 764 | throws(function() { inst - inst }); 765 | throws(function() { inst * inst }); 766 | throws(function() { inst / inst }); 767 | throws(function() { inst % inst }); 768 | throws(function() { inst < inst }); 769 | throws(function() { inst > inst }); 770 | throws(function() { inst <= inst }); 771 | throws(function() { inst >= inst }); 772 | throws(function() { inst(); }); 773 | 774 | equal(inst[0], undefined); 775 | equal(inst.a, undefined); 776 | equal(!inst, false); 777 | equal(!inst, false); 778 | equal(inst ? 1 : 2, 1); 779 | equal(inst ? 1 : 2, 1); 780 | 781 | equal('function', typeof inst.toString); 782 | equal(inst.toString(), simdToString(type, inst)); 783 | equal('function', typeof inst.toLocaleString); 784 | equal(inst.toLocaleString(), simdToLocaleString(type, inst)); 785 | // TODO: test valueOf? 786 | } 787 | 788 | // Tests value semantics for a given type. 789 | // TODO: more complete tests for Object wrappers, sameValue, sameValueZero, etc. 790 | function testValueSemantics(type) { 791 | // Create a vanilla test value. 792 | var x = createTestValue(type); 793 | 794 | // Check against non-SIMD types. 795 | var otherTypeValues = [0, 1.275, NaN, Infinity, "string", null, undefined, 796 | {}, function() {}]; 797 | for (var other of simdTypes) { 798 | if (type !== other) 799 | otherTypeValues.push(createTestValue(other)); 800 | } 801 | otherTypeValues.forEach(function(y) { 802 | equal(y == x, false); 803 | equal(x == y, false); 804 | equal(y != x, true); 805 | equal(x != y, true); 806 | equal(y === x, false); 807 | equal(x === y, false); 808 | equal(y !== x, true); 809 | equal(x !== y, true); 810 | }); 811 | 812 | // Test that f(a, b) is the same as f(SIMD(a), SIMD(b)) for equality and 813 | // strict equality, at every lane. 814 | function test(a, b) { 815 | for (var i = 0; i < type.lanes; i++) { 816 | var aval = type.fn.replaceLane(x, i, a); 817 | var bval = type.fn.replaceLane(x, i, b); 818 | equal(a == b, aval == bval); 819 | equal(a === b, aval === bval); 820 | } 821 | } 822 | for (var a of type.interestingValues) { 823 | for (var b of type.interestingValues) { 824 | test(a, b); 825 | } 826 | } 827 | } 828 | 829 | 830 | simdTypes.forEach(function(type) { 831 | test(type.name + ' constructor', function() { 832 | testConstructor(type); 833 | }); 834 | test(type.name + ' check', function() { 835 | testCheck(type); 836 | }); 837 | test(type.name + ' operators', function() { 838 | testOperators(type); 839 | }); 840 | // Note: This fails in the polyfill due to the lack of value semantics. 841 | test(type.name + ' value semantics', function() { 842 | testValueSemantics(type); 843 | }); 844 | test(type.name + ' replaceLane', function() { 845 | testReplaceLane(type); 846 | }); 847 | }); 848 | 849 | simdTypes.filter(isNumerical).forEach(function(type) { 850 | test(type.name + ' equal', function() { 851 | testRelationalOp(type, 'equal', function(a, b) { return a == b; }); 852 | }); 853 | test(type.name + ' notEqual', function() { 854 | testRelationalOp(type, 'notEqual', function(a, b) { return a != b; }); 855 | }); 856 | test(type.name + ' lessThan', function() { 857 | testRelationalOp(type, 'lessThan', function(a, b) { return a < b; }); 858 | }); 859 | test(type.name + ' lessThanOrEqual', function() { 860 | testRelationalOp(type, 'lessThanOrEqual', function(a, b) { return a <= b; }); 861 | }); 862 | test(type.name + ' greaterThan', function() { 863 | testRelationalOp(type, 'greaterThan', function(a, b) { return a > b; }); 864 | }); 865 | test(type.name + ' greaterThanOrEqual', function() { 866 | testRelationalOp(type, 'greaterThanOrEqual', function(a, b) { return a >= b; }); 867 | }); 868 | test(type.name + ' add', function() { 869 | testBinaryOp(type, 'add', function(a, b) { return a + b; }); 870 | }); 871 | test(type.name + ' sub', function() { 872 | testBinaryOp(type, 'sub', function(a, b) { return a - b; }); 873 | }); 874 | test(type.name + ' mul', function() { 875 | testBinaryOp(type, 'mul', type.mulFn); 876 | }); 877 | test(type.name + ' select', function() { 878 | testSelect(type); 879 | }); 880 | test(type.name + ' swizzle', function() { 881 | testSwizzle(type); 882 | }); 883 | test(type.name + ' shuffle', function() { 884 | testShuffle(type); 885 | }); 886 | test(type.name + ' load', function() { 887 | testLoad(type, 'load', type.lanes); 888 | }); 889 | test(type.name + ' store', function() { 890 | testStore(type, 'store', type.lanes); 891 | }); 892 | }); 893 | 894 | simdTypes.filter(hasLoadStore123).forEach(function(type) { 895 | test(type.name + ' load1', function() { 896 | testLoad(type, 'load1', 1); 897 | }); 898 | test(type.name + ' load2', function() { 899 | testLoad(type, 'load2', 2); 900 | }); 901 | test(type.name + ' load3', function() { 902 | testLoad(type, 'load3', 3); 903 | }); 904 | test(type.name + ' store1', function() { 905 | testStore(type, 'store1', 1); 906 | }); 907 | test(type.name + ' store1', function() { 908 | testStore(type, 'store2', 2); 909 | }); 910 | test(type.name + ' store3', function() { 911 | testStore(type, 'store3', 3); 912 | }); 913 | }); 914 | 915 | simdTypes.filter(isLogical).forEach(function(type) { 916 | test(type.name + ' and', function() { 917 | testBinaryOp(type, 'and', function(a, b) { return a & b; }); 918 | }); 919 | test(type.name + ' or', function() { 920 | testBinaryOp(type, 'or', function(a, b) { return a | b; }); 921 | }); 922 | test(type.name + ' xor', function() { 923 | testBinaryOp(type, 'xor', function(a, b) { return a ^ b; }); 924 | }); 925 | }); 926 | 927 | simdTypes.filter(isSigned).forEach(function(type) { 928 | test(type.name + ' neg', function() { 929 | testUnaryOp(type, 'neg', function(a) { return -a; }); 930 | }); 931 | }); 932 | 933 | simdTypes.filter(isFloatType).forEach(function(type) { 934 | test(type.name + ' div', function() { 935 | testBinaryOp(type, 'div', function(a, b) { return a / b; }); 936 | }); 937 | test(type.name + ' abs', function() { 938 | testUnaryOp(type, 'abs', Math.abs); 939 | }); 940 | test(type.name + ' min', function() { 941 | testBinaryOp(type, 'min', Math.min); 942 | }); 943 | test(type.name + ' max', function() { 944 | testBinaryOp(type, 'max', Math.max); 945 | }); 946 | test(type.name + ' minNum', function() { 947 | testBinaryOp(type, 'minNum', minNum); 948 | }); 949 | test(type.name + ' maxNum', function() { 950 | testBinaryOp(type, 'maxNum', maxNum); 951 | }); 952 | test(type.name + ' sqrt', function() { 953 | testUnaryOp(type, 'sqrt', function(a) { return Math.sqrt(a); }); 954 | }); 955 | test(type.name + ' reciprocalApproximation', function() { 956 | testUnaryOp(type, 'reciprocalApproximation', function(a) { return 1 / a; }); 957 | }); 958 | test(type.name + ' reciprocalSqrtApproximation', function() { 959 | testUnaryOp(type, 'reciprocalSqrtApproximation', function(a) { return 1 / Math.sqrt(a); }); 960 | }); 961 | }) 962 | 963 | simdTypes.filter(isIntType).forEach(function(type) { 964 | test(type.name + ' not', function() { 965 | testUnaryOp(type, 'not', function(a) { return ~a; }); 966 | }); 967 | test(type.name + ' shiftLeftByScalar', function() { 968 | function shift(a, bits) { 969 | bits &= type.laneSize * 8 - 1; 970 | return a << bits; 971 | } 972 | testShiftOp(type, 'shiftLeftByScalar', shift); 973 | }); 974 | }); 975 | 976 | simdTypes.filter(isSignedIntType).forEach(function(type) { 977 | test(type.name + ' shiftRightByScalar', function() { 978 | function shift(a, bits) { 979 | bits &= type.laneSize * 8 - 1; 980 | return a >> bits; 981 | } 982 | testShiftOp(type, 'shiftRightByScalar', shift); 983 | }); 984 | }); 985 | 986 | simdTypes.filter(isUnsignedIntType).forEach(function(type) { 987 | test(type.name + ' shiftRightByScalar', function() { 988 | function shift(a, bits) { 989 | bits &= type.laneSize * 8 - 1; 990 | if (type.laneMask) 991 | a &= type.laneMask; 992 | return a >>> bits; 993 | } 994 | testShiftOp(type, 'shiftRightByScalar', shift); 995 | }); 996 | }); 997 | 998 | simdTypes.filter(isSmallIntType).forEach(function(type) { 999 | function saturate(type, a) { 1000 | if (a < type.minVal) return type.minVal; 1001 | if (a > type.maxVal) return type.maxVal; 1002 | return a; 1003 | } 1004 | test(type.name + ' addSaturate', function() { 1005 | testBinaryOp(type, 'addSaturate', function(a, b) { return saturate(type, a + b); }); 1006 | }); 1007 | test(type.name + ' subSaturate', function() { 1008 | testBinaryOp(type, 'subSaturate', function(a, b) { return saturate(type, a - b); }); 1009 | }); 1010 | }); 1011 | 1012 | simdTypes.filter(isBoolType).forEach(function(type) { 1013 | test(type.name + ' not', function() { 1014 | testUnaryOp(type, 'not', function(a) { return !a; }); 1015 | }); 1016 | test(type.name + ' anyTrue', function() { 1017 | testAnyTrue(type, 'anyTrue'); 1018 | }); 1019 | test(type.name + ' allTrue', function() { 1020 | testAllTrue(type, 'allTrue'); 1021 | }); 1022 | }); 1023 | 1024 | // From functions. 1025 | simdTypes.forEach(function(toType) { 1026 | if (!toType.from) return; 1027 | for (var fromType of toType.from) { 1028 | var fn = 'from' + fromType.name; 1029 | test(toType.name + ' ' + fn, function() { 1030 | testFrom(toType, fromType, fn); 1031 | }); 1032 | } 1033 | }); 1034 | 1035 | // FromBits functions. 1036 | simdTypes.forEach(function(toType) { 1037 | if (!toType.fromBits) return; 1038 | for (var fromType of toType.fromBits) { 1039 | var fn = 'from' + fromType.name + 'Bits'; 1040 | test(toType.name + ' ' + fn, function() { 1041 | testFromBits(toType, fromType, fn); 1042 | }); 1043 | } 1044 | }); 1045 | 1046 | // Miscellaneous test methods. 1047 | 1048 | test('Float32x4 Int32x4 bit conversion', function() { 1049 | var m = SIMD.Int32x4(0x3F800000, 0x40000000, 0x40400000, 0x40800000); 1050 | var n = SIMD.Float32x4.fromInt32x4Bits(m); 1051 | equal(1.0, SIMD.Float32x4.extractLane(n, 0)); 1052 | equal(2.0, SIMD.Float32x4.extractLane(n, 1)); 1053 | equal(3.0, SIMD.Float32x4.extractLane(n, 2)); 1054 | equal(4.0, SIMD.Float32x4.extractLane(n, 3)); 1055 | n = SIMD.Float32x4(5.0, 6.0, 7.0, 8.0); 1056 | m = SIMD.Int32x4.fromFloat32x4Bits(n); 1057 | equal(0x40A00000, SIMD.Int32x4.extractLane(m, 0)); 1058 | equal(0x40C00000, SIMD.Int32x4.extractLane(m, 1)); 1059 | equal(0x40E00000, SIMD.Int32x4.extractLane(m, 2)); 1060 | equal(0x41000000, SIMD.Int32x4.extractLane(m, 3)); 1061 | // Flip sign using bit-wise operators. 1062 | n = SIMD.Float32x4(9.0, 10.0, 11.0, 12.0); 1063 | m = SIMD.Int32x4(0x80000000, 0x80000000, 0x80000000, 0x80000000); 1064 | var nMask = SIMD.Int32x4.fromFloat32x4Bits(n); 1065 | nMask = SIMD.Int32x4.xor(nMask, m); // flip sign. 1066 | n = SIMD.Float32x4.fromInt32x4Bits(nMask); 1067 | equal(-9.0, SIMD.Float32x4.extractLane(n, 0)); 1068 | equal(-10.0, SIMD.Float32x4.extractLane(n, 1)); 1069 | equal(-11.0, SIMD.Float32x4.extractLane(n, 2)); 1070 | equal(-12.0, SIMD.Float32x4.extractLane(n, 3)); 1071 | nMask = SIMD.Int32x4.fromFloat32x4Bits(n); 1072 | nMask = SIMD.Int32x4.xor(nMask, m); // flip sign. 1073 | n = SIMD.Float32x4.fromInt32x4Bits(nMask); 1074 | equal(9.0, SIMD.Float32x4.extractLane(n, 0)); 1075 | equal(10.0, SIMD.Float32x4.extractLane(n, 1)); 1076 | equal(11.0, SIMD.Float32x4.extractLane(n, 2)); 1077 | equal(12.0, SIMD.Float32x4.extractLane(n, 3)); 1078 | }); 1079 | 1080 | function equalInt32x4(a, b) { 1081 | equal(SIMD.Int32x4.extractLane(a, 0), SIMD.Int32x4.extractLane(b, 0)); 1082 | equal(SIMD.Int32x4.extractLane(a, 1), SIMD.Int32x4.extractLane(b, 1)); 1083 | equal(SIMD.Int32x4.extractLane(a, 2), SIMD.Int32x4.extractLane(b, 2)); 1084 | equal(SIMD.Int32x4.extractLane(a, 3), SIMD.Int32x4.extractLane(b, 3)); 1085 | } 1086 | 1087 | test('Float32x4 Int32x4 round trip', function() { 1088 | // NaNs should stay unmodified across bit conversions 1089 | var m = SIMD.Int32x4(0xFFFFFFFF, 0xFFFF0000, 0x80000000, 0x0); 1090 | var m2 = SIMD.Int32x4.fromFloat32x4Bits(SIMD.Float32x4.fromInt32x4Bits(m)); 1091 | // NaNs may be canonicalized, so these tests may fail in some implementations. 1092 | equalInt32x4(m, m2); 1093 | }); 1094 | 1095 | test('Float32x4 Int32x4 load/store bit preservation', function() { 1096 | // NaNs should stay unmodified when storing and loading to Float32Array 1097 | var taf32 = new Float32Array(4); 1098 | var tai32 = new Int32Array(4); 1099 | var i4a, i4b; 1100 | i4a = SIMD.Int32x4(0x7fc00000,0x7fe00000,0x7ff00000,0x7ff80000); 1101 | SIMD.Int32x4.store(taf32, 0, i4a); 1102 | i4b = SIMD.Int32x4.load(taf32, 0); 1103 | equalInt32x4(i4a, i4b); 1104 | 1105 | // NaNs should stay unmodified when loading as Float32x4 and storing as Int32x4 1106 | SIMD.Int32x4.store(taf32, 0, i4a); 1107 | var f4 = SIMD.Float32x4.load(taf32, 0); 1108 | SIMD.Float32x4.store(tai32, 0, f4); 1109 | i4b = SIMD.Int32x4.load(tai32, 0); 1110 | equalInt32x4(i4a, i4b); 1111 | }); 1112 | -------------------------------------------------------------------------------- /src/external/qunit.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * QUnit 1.18.0 3 | * http://qunitjs.com/ 4 | * 5 | * Copyright jQuery Foundation and other contributors 6 | * Released under the MIT license 7 | * http://jquery.org/license 8 | * 9 | * Date: 2015-04-03T10:23Z 10 | */ 11 | 12 | /** Font Family and Sizes */ 13 | 14 | #qunit-tests, #qunit-header, #qunit-banner, #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult { 15 | font-family: "Helvetica Neue Light", "HelveticaNeue-Light", "Helvetica Neue", Calibri, Helvetica, Arial, sans-serif; 16 | } 17 | 18 | #qunit-testrunner-toolbar, #qunit-userAgent, #qunit-testresult, #qunit-tests li { font-size: small; } 19 | #qunit-tests { font-size: smaller; } 20 | 21 | 22 | /** Resets */ 23 | 24 | #qunit-tests, #qunit-header, #qunit-banner, #qunit-userAgent, #qunit-testresult, #qunit-modulefilter { 25 | margin: 0; 26 | padding: 0; 27 | } 28 | 29 | 30 | /** Header */ 31 | 32 | #qunit-header { 33 | padding: 0.5em 0 0.5em 1em; 34 | 35 | color: #8699A4; 36 | background-color: #0D3349; 37 | 38 | font-size: 1.5em; 39 | line-height: 1em; 40 | font-weight: 400; 41 | 42 | border-radius: 5px 5px 0 0; 43 | } 44 | 45 | #qunit-header a { 46 | text-decoration: none; 47 | color: #C2CCD1; 48 | } 49 | 50 | #qunit-header a:hover, 51 | #qunit-header a:focus { 52 | color: #FFF; 53 | } 54 | 55 | #qunit-testrunner-toolbar label { 56 | display: inline-block; 57 | padding: 0 0.5em 0 0.1em; 58 | } 59 | 60 | #qunit-banner { 61 | height: 5px; 62 | } 63 | 64 | #qunit-testrunner-toolbar { 65 | padding: 0.5em 1em 0.5em 1em; 66 | color: #5E740B; 67 | background-color: #EEE; 68 | overflow: hidden; 69 | } 70 | 71 | #qunit-userAgent { 72 | padding: 0.5em 1em 0.5em 1em; 73 | background-color: #2B81AF; 74 | color: #FFF; 75 | text-shadow: rgba(0, 0, 0, 0.5) 2px 2px 1px; 76 | } 77 | 78 | #qunit-modulefilter-container { 79 | float: right; 80 | padding: 0.2em; 81 | } 82 | 83 | .qunit-url-config { 84 | display: inline-block; 85 | padding: 0.1em; 86 | } 87 | 88 | .qunit-filter { 89 | display: block; 90 | float: right; 91 | margin-left: 1em; 92 | } 93 | 94 | /** Tests: Pass/Fail */ 95 | 96 | #qunit-tests { 97 | list-style-position: inside; 98 | } 99 | 100 | #qunit-tests li { 101 | padding: 0.4em 1em 0.4em 1em; 102 | border-bottom: 1px solid #FFF; 103 | list-style-position: inside; 104 | } 105 | 106 | #qunit-tests > li { 107 | display: none; 108 | } 109 | 110 | #qunit-tests li.running, 111 | #qunit-tests li.pass, 112 | #qunit-tests li.fail, 113 | #qunit-tests li.skipped { 114 | display: list-item; 115 | } 116 | 117 | #qunit-tests.hidepass li.running, 118 | #qunit-tests.hidepass li.pass { 119 | visibility: hidden; 120 | position: absolute; 121 | width: 0px; 122 | height: 0px; 123 | padding: 0; 124 | border: 0; 125 | margin: 0; 126 | } 127 | 128 | #qunit-tests li strong { 129 | cursor: pointer; 130 | } 131 | 132 | #qunit-tests li.skipped strong { 133 | cursor: default; 134 | } 135 | 136 | #qunit-tests li a { 137 | padding: 0.5em; 138 | color: #C2CCD1; 139 | text-decoration: none; 140 | } 141 | 142 | #qunit-tests li p a { 143 | padding: 0.25em; 144 | color: #6B6464; 145 | } 146 | #qunit-tests li a:hover, 147 | #qunit-tests li a:focus { 148 | color: #000; 149 | } 150 | 151 | #qunit-tests li .runtime { 152 | float: right; 153 | font-size: smaller; 154 | } 155 | 156 | .qunit-assert-list { 157 | margin-top: 0.5em; 158 | padding: 0.5em; 159 | 160 | background-color: #FFF; 161 | 162 | border-radius: 5px; 163 | } 164 | 165 | .qunit-collapsed { 166 | display: none; 167 | } 168 | 169 | #qunit-tests table { 170 | border-collapse: collapse; 171 | margin-top: 0.2em; 172 | } 173 | 174 | #qunit-tests th { 175 | text-align: right; 176 | vertical-align: top; 177 | padding: 0 0.5em 0 0; 178 | } 179 | 180 | #qunit-tests td { 181 | vertical-align: top; 182 | } 183 | 184 | #qunit-tests pre { 185 | margin: 0; 186 | white-space: pre-wrap; 187 | word-wrap: break-word; 188 | } 189 | 190 | #qunit-tests del { 191 | background-color: #E0F2BE; 192 | color: #374E0C; 193 | text-decoration: none; 194 | } 195 | 196 | #qunit-tests ins { 197 | background-color: #FFCACA; 198 | color: #500; 199 | text-decoration: none; 200 | } 201 | 202 | /*** Test Counts */ 203 | 204 | #qunit-tests b.counts { color: #000; } 205 | #qunit-tests b.passed { color: #5E740B; } 206 | #qunit-tests b.failed { color: #710909; } 207 | 208 | #qunit-tests li li { 209 | padding: 5px; 210 | background-color: #FFF; 211 | border-bottom: none; 212 | list-style-position: inside; 213 | } 214 | 215 | /*** Passing Styles */ 216 | 217 | #qunit-tests li li.pass { 218 | color: #3C510C; 219 | background-color: #FFF; 220 | border-left: 10px solid #C6E746; 221 | } 222 | 223 | #qunit-tests .pass { color: #528CE0; background-color: #D2E0E6; } 224 | #qunit-tests .pass .test-name { color: #366097; } 225 | 226 | #qunit-tests .pass .test-actual, 227 | #qunit-tests .pass .test-expected { color: #999; } 228 | 229 | #qunit-banner.qunit-pass { background-color: #C6E746; } 230 | 231 | /*** Failing Styles */ 232 | 233 | #qunit-tests li li.fail { 234 | color: #710909; 235 | background-color: #FFF; 236 | border-left: 10px solid #EE5757; 237 | white-space: pre; 238 | } 239 | 240 | #qunit-tests > li:last-child { 241 | border-radius: 0 0 5px 5px; 242 | } 243 | 244 | #qunit-tests .fail { color: #000; background-color: #EE5757; } 245 | #qunit-tests .fail .test-name, 246 | #qunit-tests .fail .module-name { color: #000; } 247 | 248 | #qunit-tests .fail .test-actual { color: #EE5757; } 249 | #qunit-tests .fail .test-expected { color: #008000; } 250 | 251 | #qunit-banner.qunit-fail { background-color: #EE5757; } 252 | 253 | /*** Skipped tests */ 254 | 255 | #qunit-tests .skipped { 256 | background-color: #EBECE9; 257 | } 258 | 259 | #qunit-tests .qunit-skipped-label { 260 | background-color: #F4FF77; 261 | display: inline-block; 262 | font-style: normal; 263 | color: #366097; 264 | line-height: 1.8em; 265 | padding: 0 0.5em; 266 | margin: -0.4em 0.4em -0.4em 0; 267 | } 268 | 269 | /** Result */ 270 | 271 | #qunit-testresult { 272 | padding: 0.5em 1em 0.5em 1em; 273 | 274 | color: #2B81AF; 275 | background-color: #D2E0E6; 276 | 277 | border-bottom: 1px solid #FFF; 278 | } 279 | #qunit-testresult .module-name { 280 | font-weight: 700; 281 | } 282 | 283 | /** Fixture */ 284 | 285 | #qunit-fixture { 286 | position: absolute; 287 | top: -10000px; 288 | left: -10000px; 289 | width: 1000px; 290 | height: 1000px; 291 | } 292 | -------------------------------------------------------------------------------- /src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | EcmaScript SIMD numeric type tests 6 | 7 | 8 | 9 |
10 |
11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/shell_test_runner.js: -------------------------------------------------------------------------------- 1 | // This is a simple script for running the tests from a standalone JS shell. 2 | 3 | load("ecmascript_simd.js"); 4 | 5 | // clearer marking 6 | var currentName = ''; 7 | var numFails = 0; 8 | 9 | if (typeof skipValueTests === 'undefined') 10 | skipValueTests = false; 11 | if (typeof skipFromBitsTests === 'undefined') 12 | skipValueTests = true; 13 | 14 | function printIndented(str) { 15 | console.log(str.split('\n').map(function (s) { return ' ' + s }).join('\n')); 16 | } 17 | 18 | function fail(str) { 19 | var e = Error(str); 20 | console.log(e.toString()); 21 | printIndented(e.stack); 22 | numFails++; 23 | } 24 | 25 | function test(name, func) { 26 | currentName = name; 27 | if (typeof skipValueTests !== 'undefined' && skipValueTests && 28 | name.indexOf('value semantics') != -1) return; 29 | try { 30 | func(); 31 | } catch (e) { 32 | console.log('exception thrown from ' + currentName + ': ' + e.toString()); 33 | if (e.stack) 34 | printIndented(e.stack); 35 | numFails++; 36 | } 37 | } 38 | 39 | function equal(a, b) { 40 | if (a != b) 41 | fail('equal(' + a + ', ' + b + ') failed in ' + currentName); 42 | } 43 | 44 | function notEqual(a, b) { 45 | if (a == b) 46 | fail('notEqual(' + a + ', ' + b + ') failed in ' + currentName); 47 | } 48 | 49 | function throws(func) { 50 | var pass = false; 51 | try { 52 | func(); 53 | } catch (e) { 54 | pass = true; 55 | } 56 | if (!pass) 57 | fail('throws failed in ' + currentName); 58 | } 59 | 60 | function ok(x) { 61 | if (!x) 62 | fail('not ok in ' + currentName); 63 | } 64 | 65 | load("ecmascript_simd_tests.js"); 66 | 67 | if (numFails > 0) { 68 | print('total number of fails and exceptions: ' + numFails); 69 | quit(1); 70 | } 71 | -------------------------------------------------------------------------------- /src/test.js: -------------------------------------------------------------------------------- 1 | // To specifically test the p(r)olyfill. 2 | 3 | if (typeof SIMD != 'undefined') 4 | SIMD = void 0; 5 | 6 | load('./shell_test_runner.js'); 7 | -------------------------------------------------------------------------------- /tc39/SIMD-128 TC-39.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tc39/ecmascript_simd/a5529db02b6144256b7458bc96e2e7e117b6e5e9/tc39/SIMD-128 TC-39.pdf --------------------------------------------------------------------------------