├── .npmignore ├── .travis.yml ├── .gitignore ├── .jshintrc ├── src ├── import │ ├── formats │ │ ├── index.js │ │ ├── json.js │ │ ├── dsv.js │ │ ├── treejson.js │ │ └── topojson.js │ ├── read.js │ ├── readers.js │ ├── type.js │ └── load.js ├── aggregate │ ├── groupby.js │ ├── collector.js │ ├── measures.js │ └── aggregator.js ├── index.js ├── generate.js ├── bins │ ├── histogram.js │ └── bins.js ├── print.js ├── format.js ├── time.js ├── template.js ├── util.js └── stats.js ├── bower.json ├── test ├── util-alt-env.test.js ├── readers.test.js ├── lib │ └── XMLHttpRequest.js ├── print.test.js ├── generate.test.js ├── format.test.js ├── load.test.js ├── template.test.js ├── read.test.js ├── bins.test.js ├── data │ ├── flare.json │ └── stocks.csv ├── time.test.js └── stats.test.js ├── scripts └── deploy.sh ├── LICENSE ├── package.json └── README.md /.npmignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | coverage 3 | scripts 4 | temp 5 | test 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "0.10" 4 | - "0.12" 5 | install: npm install 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | _site 4 | coverage 5 | node_modules 6 | temp 7 | datalib.js 8 | datalib.js.map 9 | datalib.min.js 10 | -------------------------------------------------------------------------------- /.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | "undef": true, 3 | "unused": true, 4 | "eqnull": true, 5 | "freeze": true, 6 | "noarg": true, 7 | "node": true, 8 | "browser": true 9 | } -------------------------------------------------------------------------------- /src/import/formats/index.js: -------------------------------------------------------------------------------- 1 | var dsv = require('./dsv'); 2 | 3 | module.exports = { 4 | json: require('./json'), 5 | topojson: require('./topojson'), 6 | treejson: require('./treejson'), 7 | dsv: dsv, 8 | csv: dsv.delimiter(','), 9 | tsv: dsv.delimiter('\t') 10 | }; -------------------------------------------------------------------------------- /src/import/formats/json.js: -------------------------------------------------------------------------------- 1 | var util = require('../../util'); 2 | 3 | module.exports = function(data, format) { 4 | var d = util.isObject(data) && !util.isBuffer(data) ? 5 | data : JSON.parse(data); 6 | if (format && format.property) { 7 | d = util.accessor(format.property)(d); 8 | } 9 | return d; 10 | }; 11 | -------------------------------------------------------------------------------- /src/aggregate/groupby.js: -------------------------------------------------------------------------------- 1 | var util = require('../util'); 2 | var Aggregator = require('./aggregator'); 3 | 4 | module.exports = function() { 5 | // flatten arguments into a single array 6 | var args = [].reduce.call(arguments, function(a, x) { 7 | return a.concat(util.array(x)); 8 | }, []); 9 | // create and return an aggregator 10 | return new Aggregator() 11 | .groupby(args) 12 | .summarize({'*':'values'}); 13 | }; 14 | -------------------------------------------------------------------------------- /src/import/formats/dsv.js: -------------------------------------------------------------------------------- 1 | var util = require('../../util'); 2 | var d3_dsv = require('d3-dsv'); 3 | 4 | function dsv(data, format) { 5 | if (data) { 6 | var h = format.header; 7 | data = (h ? h.join(format.delimiter) + '\n' : '') + data; 8 | } 9 | return d3_dsv.dsv(format.delimiter).parse(data); 10 | } 11 | 12 | dsv.delimiter = function(delim) { 13 | var fmt = {delimiter: delim}; 14 | return function(data, format) { 15 | return dsv(data, format ? util.extend(format, fmt) : fmt); 16 | }; 17 | }; 18 | 19 | module.exports = dsv; -------------------------------------------------------------------------------- /src/import/formats/treejson.js: -------------------------------------------------------------------------------- 1 | var json = require('./json'); 2 | 3 | module.exports = function(data, format) { 4 | data = json(data, format); 5 | return toTable(data, (format && format.children)); 6 | }; 7 | 8 | function toTable(root, childrenField) { 9 | childrenField = childrenField || 'children'; 10 | var table = []; 11 | 12 | function visit(node) { 13 | table.push(node); 14 | var children = node[childrenField]; 15 | if (children) { 16 | for (var i=0; i/dev/null 2>&1 || { echo >&2 "I require jq but it's not installed. Aborting."; exit 1; } 7 | 8 | # 0.2 Check if all files are commited 9 | if [ -z "$(git status --porcelain)" ]; then 10 | echo "All tracked files are commited. Publishing on npm and bower." 11 | else 12 | echo "${RED}There are uncommitted files. Please commit or stash first!${NC}" 13 | git status 14 | exit 1 15 | fi 16 | 17 | # 1. NPM PUBLISH 18 | npm publish 19 | # exit if npm publish failed 20 | rc=$? 21 | if [[ $rc != 0 ]]; then 22 | echo "${RED} npm publish failed. Publishing cancelled. ${NC}" 23 | exit $rc; 24 | fi 25 | 26 | # 2. BOWER PUBLISH 27 | # read version 28 | gitsha=$(git rev-parse HEAD) 29 | version=$(cat package.json | jq .version | sed -e 's/^"//' -e 's/"$//') 30 | 31 | npm run build 32 | # swap to head so we don't commit compiled file to master along with tags 33 | git checkout head 34 | 35 | # add the compiled files, commit and tag! 36 | git add datalib* -f 37 | git commit -m "Release $version $gitsha" 38 | git tag -am "Release v$version." "v$version" 39 | 40 | # now swap back to the clean master and push the new tag 41 | git checkout master 42 | git push --tags 43 | 44 | # Woo hoo! Now the published tag contains compiled files which works great with bower. 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, University of Washington Interactive Data Lab 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "datalib", 3 | "version": "1.4.5", 4 | "description": "JavaScript utilites for loading, summarizing and working with data.", 5 | "keywords": [ 6 | "data", 7 | "table", 8 | "statistics", 9 | "parse", 10 | "csv", 11 | "tsv", 12 | "json", 13 | "utility" 14 | ], 15 | "repository": { 16 | "type": "git", 17 | "url": "http://github.com/vega/datalib.git" 18 | }, 19 | "author": { 20 | "name": "Jeffrey Heer", 21 | "url": "http://idl.cs.washington.edu" 22 | }, 23 | "license": "BSD-3-Clause", 24 | "dependencies": { 25 | "d3-dsv": "latest", 26 | "d3-format": "latest", 27 | "d3-time": "latest", 28 | "d3-time-format": "latest", 29 | "topojson": "^1.6.19", 30 | "request": "^2.60.0", 31 | "sync-request": "^2.0.1" 32 | }, 33 | "devDependencies": { 34 | "browserify": "^10.2.6", 35 | "browserify-shim": "^3.8.9", 36 | "browserify-versionify": "^1.0.4", 37 | "chai": "^3.0.0", 38 | "istanbul": "latest", 39 | "jshint": "^2.8.0", 40 | "mocha": "^2.2.5", 41 | "uglify-js": "^2.4.24" 42 | }, 43 | "main": "src/index.js", 44 | "scripts": { 45 | "deploy": "npm run lint && npm run test && scripts/deploy.sh", 46 | "lint": "jshint src/", 47 | "test": "TZ=America/Los_Angeles mocha --recursive test/", 48 | "cover": "TZ=America/Los_Angeles istanbul cover _mocha -- --recursive test/", 49 | "build": "browserify src/index.js -d -s dl -o datalib.js", 50 | "postbuild": "uglifyjs datalib.js -c -m -o datalib.min.js" 51 | }, 52 | "browserify": { 53 | "transform": [ 54 | "browserify-shim", 55 | "browserify-versionify" 56 | ] 57 | }, 58 | "browserify-shim": { 59 | "topojson": "global:topojson" 60 | }, 61 | "browser": { 62 | "buffer": false, 63 | "fs": false, 64 | "http": false, 65 | "request": false, 66 | "sync-request": false, 67 | "url": false 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /test/readers.test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var assert = require('chai').assert; 4 | var util = require('../src/util'); 5 | var read = require('../src/import/read'); 6 | var readers = require('../src/import/readers'); 7 | 8 | var file = './test/data/stocks.csv'; 9 | var text = require('fs').readFileSync(file, 'utf8'); 10 | var csv = read(text, {type: 'csv', parse: 'auto'}); 11 | 12 | describe('readers', function() { 13 | 14 | it('should read synchronously', function() { 15 | var data = readers.csv('file://' + file); 16 | assert.deepEqual(csv, data); 17 | }); 18 | 19 | it('should read asynchronously', function(done) { 20 | readers.csv('file://' + file, function(error, data) { 21 | assert.deepEqual(csv, data); 22 | done(); 23 | }); 24 | }); 25 | 26 | it('should read using options hash', function() { 27 | var data = readers.csv({ 28 | url: '//' + file, 29 | defaultProtocol: 'file' 30 | }); 31 | assert.deepEqual(csv, data); 32 | }); 33 | 34 | it('should read synchronously using format argument', function() { 35 | var data = readers.csv('file://' + file, {type: 'csv', parse: 'auto'}); 36 | assert.deepEqual(csv, data); 37 | }); 38 | 39 | it('should read asynchronously using format argument', function(done) { 40 | readers.csv('file://' + file, {type: 'csv', parse: 'auto'}, function(error, data) { 41 | assert.deepEqual(csv, data); 42 | done(); 43 | }); 44 | }); 45 | 46 | it('should return error if load fails', function(done) { 47 | readers.csv('file://' + file + ".invalid", function(error, data) { 48 | assert.isNotNull(error); 49 | assert.isNull(data); 50 | done(); 51 | }); 52 | }); 53 | 54 | it('should return error if read fails', function(done) { 55 | readers.json('file://' + file, {parse: 'auto'}, function(error, data) { 56 | assert.isNotNull(error); 57 | assert.isNull(data); 58 | done(); 59 | }); 60 | }); 61 | 62 | }); 63 | -------------------------------------------------------------------------------- /src/generate.js: -------------------------------------------------------------------------------- 1 | var gen = module.exports = {}; 2 | 3 | gen.repeat = function(val, n) { 4 | var a = Array(n), i; 5 | for (i=0; i stop) range.push(j); 24 | else while ((j = start + step * ++i) < stop) range.push(j); 25 | return range; 26 | }; 27 | 28 | gen.random = {}; 29 | 30 | gen.random.uniform = function(min, max) { 31 | if (max === undefined) { 32 | max = min === undefined ? 1 : min; 33 | min = 0; 34 | } 35 | var d = max - min; 36 | var f = function() { 37 | return min + d * Math.random(); 38 | }; 39 | f.samples = function(n) { return gen.zeros(n).map(f); }; 40 | return f; 41 | }; 42 | 43 | gen.random.integer = function(a, b) { 44 | if (b === undefined) { 45 | b = a; 46 | a = 0; 47 | } 48 | var d = b - a; 49 | var f = function() { 50 | return a + Math.floor(d * Math.random()); 51 | }; 52 | f.samples = function(n) { return gen.zeros(n).map(f); }; 53 | return f; 54 | }; 55 | 56 | gen.random.normal = function(mean, stdev) { 57 | mean = mean || 0; 58 | stdev = stdev || 1; 59 | var next; 60 | var f = function() { 61 | var x = 0, y = 0, rds, c; 62 | if (next !== undefined) { 63 | x = next; 64 | next = undefined; 65 | return x; 66 | } 67 | do { 68 | x = Math.random()*2-1; 69 | y = Math.random()*2-1; 70 | rds = x*x + y*y; 71 | } while (rds === 0 || rds > 1); 72 | c = Math.sqrt(-2*Math.log(rds)/rds); // Box-Muller transform 73 | next = mean + y*c*stdev; 74 | return mean + x*c*stdev; 75 | }; 76 | f.samples = function(n) { return gen.zeros(n).map(f); }; 77 | return f; 78 | }; -------------------------------------------------------------------------------- /test/lib/XMLHttpRequest.js: -------------------------------------------------------------------------------- 1 | var fs = require("fs"); 2 | 3 | module.exports = function XMLHttpRequest() { 4 | var self = this, 5 | info = self._info = {}, 6 | headers = {}, 7 | url; 8 | 9 | // TODO handle file system errors? 10 | 11 | self.readyState = 0; 12 | 13 | self.open = function(m, u, a) { 14 | info.url = u; 15 | info.async = a; 16 | self.readyState = 1; 17 | self.send = a ? read : readSync; 18 | 19 | // force state change for testing purposes 20 | if (self.onreadystatechange) self.onreadystatechange(); 21 | }; 22 | 23 | self.setRequestHeader = function(n, v) { 24 | if (/^Accept$/i.test(n)) info.mimeType = v.split(/,/g)[0]; 25 | }; 26 | 27 | function read() { 28 | self.readyState = 2; 29 | fs.readFile(info.url, "binary", function(e, d) { 30 | if (e) { 31 | self.status = 404; // assumed 32 | } else { 33 | self.status = self.type ? null : 200; 34 | self.responseType = self.type || 'text'; 35 | self.response = d; 36 | self.responseText = d; 37 | self.responseXML = {_xml: d}; 38 | headers["Content-Length"] = d.length; 39 | } 40 | self.readyState = 4; 41 | XMLHttpRequest._last = self; 42 | if (self.onload) self.onload(); 43 | if (self.onreadystatechange) self.onreadystatechange(); 44 | }); 45 | } 46 | 47 | function readSync() { 48 | self.readyState = 2; 49 | try { 50 | var d = fs.readFileSync(info.url, "binary"); 51 | self.status = self.type ? null : 200; 52 | self.responseType = self.type || 'text'; 53 | self.response = d; 54 | self.responseText = d; 55 | self.responseXML = {_xml: d}; 56 | headers["Content-Length"] = d.length; 57 | } catch (e) { 58 | self.status = 404; // assumed 59 | } 60 | self.readyState = 4; 61 | XMLHttpRequest._last = self; 62 | if (self.onload) self.onload(); 63 | if (self.onreadystatechange) self.onreadystatechange(); 64 | } 65 | 66 | self.getResponseHeader = function(n) { 67 | return headers[n]; 68 | }; 69 | }; -------------------------------------------------------------------------------- /src/bins/histogram.js: -------------------------------------------------------------------------------- 1 | var bins = require('./bins'), 2 | gen = require('../generate'), 3 | type = require('../import/type'), 4 | util = require('../util'), 5 | stats = require('../stats'); 6 | 7 | var qtype = { 8 | 'integer': 1, 9 | 'number': 1, 10 | 'date': 1 11 | }; 12 | 13 | function $bin(values, f, opt) { 14 | opt = options(values, f, opt); 15 | var b = spec(opt); 16 | return !b ? (opt.accessor || util.identity) : 17 | util.$func('bin', b.unit.unit ? 18 | function(x) { return b.value(b.unit.unit(x)); } : 19 | function(x) { return b.value(x); } 20 | )(opt.accessor); 21 | } 22 | 23 | function histogram(values, f, opt) { 24 | opt = options(values, f, opt); 25 | var b = spec(opt); 26 | return b ? 27 | numerical(values, opt.accessor, b) : 28 | categorical(values, opt.accessor, opt && opt.sort); 29 | } 30 | 31 | function spec(opt) { 32 | var t = opt.type, b = null; 33 | if (t == null || qtype[t]) { 34 | if (t === 'integer' && opt.minstep == null) opt.minstep = 1; 35 | b = (t === 'date') ? bins.date(opt) : bins(opt); 36 | } 37 | return b; 38 | } 39 | 40 | function options() { 41 | var a = arguments, 42 | i = 0, 43 | values = util.isArray(a[i]) ? a[i++] : null, 44 | f = util.isFunction(a[i]) || util.isString(a[i]) ? util.$(a[i++]) : null, 45 | opt = util.extend({}, a[i]); 46 | 47 | if (values) { 48 | opt.type = opt.type || type(values, f); 49 | if (qtype[opt.type]) { 50 | var ext = stats.extent(values, f); 51 | opt = util.extend({min: ext[0], max: ext[1]}, opt); 52 | } 53 | } 54 | if (f) { opt.accessor = f; } 55 | return opt; 56 | } 57 | 58 | function numerical(values, f, b) { 59 | var h = gen.range(b.start, b.stop + b.step/2, b.step) 60 | .map(function(v) { return {value: b.value(v), count: 0}; }); 61 | 62 | for (var i=0, v, j; i= h.length || !isFinite(j)) continue; 67 | h[j].count += 1; 68 | } 69 | } 70 | h.bins = b; 71 | return h; 72 | } 73 | 74 | function categorical(values, f, sort) { 75 | var u = stats.unique(values, f), 76 | c = stats.count.map(values, f); 77 | return u.map(function(k) { return {value: k, count: c[k]}; }) 78 | .sort(util.comparator(sort ? '-count' : '+value')); 79 | } 80 | 81 | module.exports = { 82 | $bin: $bin, 83 | histogram: histogram 84 | }; -------------------------------------------------------------------------------- /src/import/type.js: -------------------------------------------------------------------------------- 1 | var util = require('../util'); 2 | 3 | var TYPES = '__types__'; 4 | 5 | var PARSERS = { 6 | boolean: util.boolean, 7 | integer: util.number, 8 | number: util.number, 9 | date: util.date, 10 | string: function(x) { return x==='' ? null : x; } 11 | }; 12 | 13 | var TESTS = { 14 | boolean: function(x) { return x==='true' || x==='false' || util.isBoolean(x); }, 15 | integer: function(x) { return TESTS.number(x) && (x=+x) === ~~x; }, 16 | number: function(x) { return !isNaN(+x) && !util.isDate(x); }, 17 | date: function(x) { return !isNaN(Date.parse(x)); } 18 | }; 19 | 20 | function annotation(data, types) { 21 | if (!types) return data && data[TYPES] || null; 22 | data[TYPES] = types; 23 | } 24 | 25 | function type(values, f) { 26 | f = util.$(f); 27 | var v, i, n; 28 | 29 | // if data array has type annotations, use them 30 | if (values[TYPES]) { 31 | v = f(values[TYPES]); 32 | if (util.isString(v)) return v; 33 | } 34 | 35 | for (i=0, n=values.length; !util.isValid(v) && i 0) { 38 | m[a[i]] -= 1; 39 | } else { 40 | x[j++] = a[i]; 41 | } 42 | } 43 | } else if (k) { 44 | // has unique key field, so use that 45 | m = util.toMap(r, k); 46 | for (i=0, j=0, n=a.length; i 0) { 57 | count += 1; 58 | } 59 | return count; 60 | } 61 | 62 | var data = [ 63 | {symbol:'DATA', value: 300.57}, 64 | {symbol:'DATA', value: 12.3}, 65 | {symbol:'DATA', value: 27}, 66 | {symbol:'DATA', value: 1}, 67 | {symbol:'DATA', value: 2}, 68 | {symbol:'DATA', value: 3}, 69 | {symbol:'DATA', value: 4}, 70 | {symbol:'DATA', value: 5}, 71 | {symbol:'DATA', value: 6}, 72 | {symbol:'DATA', value: 7}, 73 | {symbol:'DATA', value: 8}, 74 | {symbol:'DATA', value: NaN}, 75 | {symbol:'ATAD', value: 0}, 76 | ]; 77 | 78 | it('should print summary', function() { 79 | var summary = stats.summary(data); 80 | var s1 = print.summary(data); 81 | var s2 = print.summary(summary); 82 | var s3 = (summary.toString = print.summary, summary.toString()); 83 | assert.strictEqual(s1, s2); 84 | assert.strictEqual(s1, s3); 85 | assert.equal(2, count(s1, '-- ')); 86 | assert.equal(2, count(s1, 'valid')); 87 | assert.equal(1, count(s1, 'top values')); 88 | assert.equal(1, count(s1, 'median')); 89 | }); 90 | }); 91 | 92 | }); 93 | -------------------------------------------------------------------------------- /src/print.js: -------------------------------------------------------------------------------- 1 | var util = require('./util'); 2 | var type = require('./import/type'); 3 | var stats = require('./stats'); 4 | var template = require('./template'); 5 | 6 | var FMT = { 7 | 'date': '|time:"%m/%d/%Y %H:%M:%S"', 8 | 'number': '|number:".4f"', 9 | 'integer': '|number:"d"' 10 | }; 11 | 12 | var POS = { 13 | 'number': 'left', 14 | 'integer': 'left' 15 | }; 16 | 17 | module.exports.table = function(data, opt) { 18 | opt = util.extend({separator:' ', minwidth: 8, maxwidth: 15}, opt); 19 | var fields = opt.fields || util.keys(data[0]), 20 | types = type.all(data); 21 | 22 | if (opt.start || opt.limit) { 23 | var a = opt.start || 0, 24 | b = opt.limit ? a + opt.limit : data.length; 25 | data = data.slice(a, b); 26 | } 27 | 28 | // determine char width of fields 29 | var lens = fields.map(function(name) { 30 | var format = FMT[types[name]] || '', 31 | t = template('{{' + name + format + '}}'), 32 | l = stats.max(data, function(x) { return t(x).length; }); 33 | l = Math.max(Math.min(name.length, opt.minwidth), l); 34 | return opt.maxwidth > 0 ? Math.min(l, opt.maxwidth) : l; 35 | }); 36 | 37 | // print header row 38 | var head = fields.map(function(name, i) { 39 | return util.truncate(util.pad(name, lens[i], 'center'), lens[i]); 40 | }).join(opt.separator); 41 | 42 | // build template function for each row 43 | var tmpl = template(fields.map(function(name, i) { 44 | return '{{' + 45 | name + 46 | (FMT[types[name]] || '') + 47 | ('|pad:' + lens[i] + ',' + (POS[types[name]] || 'right')) + 48 | ('|truncate:' + lens[i]) + 49 | '}}'; 50 | }).join(opt.separator)); 51 | 52 | // print table 53 | return head + "\n" + data.map(tmpl).join('\n'); 54 | }; 55 | 56 | module.exports.summary = function(s) { 57 | s = s ? s.__summary__ ? s : stats.summary(s) : this; 58 | var str = [], i, n; 59 | for (i=0, n=s.length; i maxb); 38 | 39 | // decrease step size if allowed 40 | for (i=0; i= minstep && span / v <= maxb) step = v; 43 | } 44 | } 45 | 46 | // update precision, min and max 47 | v = Math.log(step); 48 | precision = v >= 0 ? 0 : ~~(-v / logb) + 1; 49 | eps = Math.pow(base, -precision - 1); 50 | min = Math.min(min, Math.floor(min / step + eps) * step); 51 | max = Math.ceil(max / step) * step; 52 | 53 | return { 54 | start: min, 55 | stop: max, 56 | step: step, 57 | unit: {precision: precision}, 58 | value: value, 59 | index: index 60 | }; 61 | } 62 | 63 | function bisect(a, x, lo, hi) { 64 | while (lo < hi) { 65 | var mid = lo + hi >>> 1; 66 | if (util.cmp(a[mid], x) < 0) { lo = mid + 1; } 67 | else { hi = mid; } 68 | } 69 | return lo; 70 | } 71 | 72 | function value(v) { 73 | return this.step * Math.floor(v / this.step + EPSILON); 74 | } 75 | 76 | function index(v) { 77 | return Math.floor((v - this.start) / this.step + EPSILON); 78 | } 79 | 80 | function date_value(v) { 81 | return this.unit.date(value.call(this, v)); 82 | } 83 | 84 | function date_index(v) { 85 | return index.call(this, this.unit.unit(v)); 86 | } 87 | 88 | bins.date = function(opt) { 89 | if (!opt) { throw Error("Missing date binning options."); } 90 | 91 | // find time step, then bin 92 | var units = opt.utc ? time.utc : time, 93 | dmin = opt.min, 94 | dmax = opt.max, 95 | maxb = opt.maxbins || 20, 96 | minb = opt.minbins || 4, 97 | span = (+dmax) - (+dmin), 98 | unit = opt.unit ? units[opt.unit] : units.find(span, minb, maxb), 99 | spec = bins({ 100 | min: unit.min != null ? unit.min : unit.unit(dmin), 101 | max: unit.max != null ? unit.max : unit.unit(dmax), 102 | maxbins: maxb, 103 | minstep: unit.minstep, 104 | steps: unit.step 105 | }); 106 | 107 | spec.unit = unit; 108 | spec.index = date_index; 109 | if (!opt.raw) spec.value = date_value; 110 | return spec; 111 | }; 112 | 113 | module.exports = bins; 114 | -------------------------------------------------------------------------------- /test/generate.test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var assert = require('chai').assert; 4 | var gen = require('../src/generate'); 5 | 6 | describe('generate', function() { 7 | 8 | describe('repeat', function() { 9 | it('should generate repeated values', function() { 10 | assert.deepEqual([2,2,2], gen.repeat(2, 3)); 11 | assert.deepEqual(['a','a'], gen.repeat('a', 2)); 12 | assert.deepEqual([], gen.repeat(1, 0)); 13 | }); 14 | it('should throw error for negative lengths', function() { 15 | assert.throws(function() { return gen.repeat(1, -1); }); 16 | }); 17 | }); 18 | 19 | describe('zeros', function() { 20 | it('should generate repeated zeros', function() { 21 | assert.deepEqual([0,0,0], gen.zeros(3)); 22 | assert.deepEqual([0], gen.zeros(1)); 23 | assert.deepEqual([], gen.zeros(0)); 24 | }); 25 | it('should throw error for negative lengths', function() { 26 | assert.throws(function() { return gen.zeros(-1); }); 27 | }); 28 | }); 29 | 30 | describe('range', function() { 31 | it('should generate value ranges', function() { 32 | assert.deepEqual([0,1,2], gen.range(3)); 33 | assert.deepEqual([2,3,4], gen.range(2, 5)); 34 | assert.deepEqual([1,3,5,7], gen.range(1, 8, 2)); 35 | assert.deepEqual([2,1,0], gen.range(2, -1, -1)); 36 | assert.deepEqual([], gen.range(0, 2, -1)); 37 | }); 38 | it('should throw error for infinite range', function() { 39 | assert.throws(function() { return gen.range(0, +Infinity); }); 40 | }); 41 | }); 42 | 43 | describe('random uniform', function() { 44 | function rangeTest(start, stop) { 45 | return function(x) { 46 | assert.isTrue(x >= start && x < stop); 47 | } 48 | } 49 | it('should generate random values', function() { 50 | (rangeTest(0, 1))((gen.random.uniform())()); 51 | (rangeTest(0, 10))((gen.random.uniform(10))()); 52 | (rangeTest(5, 10))((gen.random.uniform(5, 10))()); 53 | }); 54 | it('should generate multiple samples', function() { 55 | gen.random.uniform().samples(10).map(rangeTest(0, 1)); 56 | gen.random.uniform(10).samples(10).map(rangeTest(0, 10)); 57 | gen.random.uniform(5, 10).samples(10).map(rangeTest(5, 10)); 58 | }); 59 | }); 60 | 61 | describe('random integer', function() { 62 | function intTest(start, stop) { 63 | return function(x) { 64 | assert.isTrue(x >= start && x < stop); 65 | assert.strictEqual(x, ~~x); 66 | } 67 | } 68 | it('should generate random values', function() { 69 | intTest(0, 10)((gen.random.integer(10))()); 70 | intTest(5, 10)((gen.random.integer(5, 10))()); 71 | }); 72 | it('should generate multiple samples', function() { 73 | gen.random.integer(10).samples(10).map(intTest(0, 10)); 74 | gen.random.integer(5, 10).samples(10).map(intTest(5, 10)); 75 | }); 76 | }); 77 | 78 | describe('random normal', function() { 79 | function normalTest(u, s, samples) { 80 | var sum = samples.reduce(function(a,b) { return a+b; }, 0); 81 | var avg = sum / samples.length; 82 | var dev = samples.reduce(function(a,b) { return a+(b-avg)*(b-avg); }, 0); 83 | dev = dev / (samples.length-1); 84 | // mean within 99.9% confidence interval 85 | assert.closeTo(u, avg, 4*dev/Math.sqrt(samples.length)); 86 | } 87 | it('should generate normal samples', function() { 88 | normalTest(0, 1, gen.random.normal().samples(1000)); 89 | normalTest(5, 1, gen.random.normal(5).samples(1000)); 90 | normalTest(1, 10, gen.random.normal(1, 10).samples(1000)); 91 | }); 92 | }); 93 | 94 | }); -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datalib 2 | 3 | [![Build Status](https://travis-ci.org/vega/datalib.svg?branch=master)](https://travis-ci.org/vega/datalib) 4 | [![npm version](https://img.shields.io/npm/v/datalib.svg)](https://www.npmjs.com/package/datalib) 5 | 6 | Datalib is a JavaScript data utility library. It provides facilities for data loading, type inference, common statistics, and string templates. While created to power [Vega](http://vega.github.io) and related projects, datalib is a standalone library useful for data-driven JavaScript applications on both the client (web browser) and server (e.g., node.js). 7 | 8 | For documentation, see the datalib [API Reference](../../wiki/API-Reference). 9 | 10 | ## Use 11 | 12 | Datalib provides a set of utilities for working with data. These include: 13 | 14 | - Loading and parsing data files (JSON, TopoJSON, CSV, TSV). 15 | - Summary statistics (mean, deviation, median, correlation, histograms, etc). 16 | - Group-by aggregation queries, including streaming data support. 17 | - Data-driven string templates with expressive formatting filters. 18 | - Utilities for working with JavaScript functions, objects and arrays. 19 | 20 | Datalib can be used both server-side and client-side. For use in node.js, 21 | simply `npm install datalib` or include datalib as a dependency in your package.json file. For use on the client, install datalib via `bower install datalib` or include datalib.min.js on your web page. The minified JS file is built using browserify (see below for details). 22 | 23 | ### Example 24 | 25 | ```javascript 26 | // Load datalib. 27 | var dl = require('datalib'); 28 | 29 | // Load and parse a CSV file. Datalib does type inference for you. 30 | // The result is an array of JavaScript objects with named values. 31 | // Parsed dates are stored as UNIX timestamp values. 32 | var data = dl.csv('http://vega.github.io/datalib/data/stocks.csv'); 33 | 34 | // Show summary statistics for each column of the data table. 35 | console.log(dl.print.summary(data)); 36 | 37 | // Compute mean and standard deviation by ticker symbol. 38 | var rollup = dl.groupby('symbol') 39 | .summarize({'price': ['mean', 'stdev']}) 40 | .execute(data); 41 | console.log(dl.print.table(rollup)); 42 | 43 | // Compute correlation measures between price and date. 44 | console.log( 45 | dl.cor(data, 'price', 'date'), // Pearson product-moment correlation 46 | dl.cor.rank(data, 'price', 'date'), // Spearman rank correlation 47 | dl.cor.dist(data, 'price', 'date') // Distance correlation 48 | ); 49 | 50 | // Compute mutual information distance between years and binned price. 51 | var bin_price = dl.$bin(data, 'price'); // returns binned price values 52 | var year_date = dl.$year('date'); // returns year from date field 53 | var counts = dl.groupby(year_date, bin_price).count().execute(data); 54 | console.log(dl.mutual.dist(counts, 'bin_price', 'year_date', 'count')); 55 | ``` 56 | 57 | ## Build Process 58 | 59 | To use datalib in the browser, you need to build the datalib.js and datalib.min.js files. We assume that you have [npm](https://www.npmjs.com/) installed. 60 | 61 | 1. Run `npm install` in the datalib folder to install dependencies. 62 | 2. Run `npm run build`. This will invoke [browserify](http://browserify.org/) to bundle the source files into datalib.js, and then [uglify-js](http://lisperator.net/uglifyjs/) to create the minified datalib.min.js. 63 | 64 | ## Dependencies 65 | 66 | When used in the browser, datalib has one (weak) dependency to note. The [TopoJSON library](https://github.com/mbostock/topojson) is expected to reside in the global scope if [dl.topojson](https://github.com/uwdata/datalib/wiki/Import#dl_topojson) is used to parse TopoJSON data. Otherwise, errors will result. 67 | -------------------------------------------------------------------------------- /src/format.js: -------------------------------------------------------------------------------- 1 | var d3_time = require('d3-time'), 2 | d3_timeF = require('d3-time-format'), 3 | d3_numberF = require('d3-format'), 4 | numberF = d3_numberF, // defaults to EN-US 5 | timeF = d3_timeF; // defaults to EN-US 6 | 7 | function numberLocale(l) { 8 | var f = d3_numberF.localeFormat(l); 9 | if (f == null) throw Error('Unrecognized locale: ' + l); 10 | numberF = f; 11 | } 12 | 13 | function timeLocale(l) { 14 | var f = d3_timeF.localeFormat(l); 15 | if (f == null) throw Error('Unrecognized locale: ' + l); 16 | timeF = f; 17 | } 18 | 19 | module.exports = { 20 | // Update number formatter to use provided locale configuration. 21 | // For more see https://github.com/d3/d3-format 22 | numberLocale: numberLocale, 23 | number: function(f) { return numberF.format(f); }, 24 | numberPrefix: function(f, v) { return numberF.formatPrefix(f, v); }, 25 | 26 | // Update time formatter to use provided locale configuration. 27 | // For more see https://github.com/d3/d3-time-format 28 | timeLocale: timeLocale, 29 | time: function(f) { return timeF.format(f); }, 30 | utc: function(f) { return timeF.utcFormat(f); }, 31 | 32 | // Set number and time locale simultaneously. 33 | locale: function(l) { numberLocale(l); timeLocale(l); }, 34 | 35 | // automatic formatting functions 36 | auto: { 37 | number: numberAutoFormat, 38 | time: function() { return timeAutoFormat(); }, 39 | utc: function() { return utcAutoFormat(); } 40 | } 41 | }; 42 | 43 | var e10 = Math.sqrt(50), 44 | e5 = Math.sqrt(10), 45 | e2 = Math.sqrt(2); 46 | 47 | function intervals(domain, count) { 48 | if (count == null) count = 10; 49 | 50 | var start = domain[0], 51 | stop = domain[domain.length - 1]; 52 | 53 | if (stop < start) { error = stop; stop = start; start = error; } 54 | 55 | var span = stop - start, 56 | step = Math.pow(10, Math.floor(Math.log(span / count) / Math.LN10)), 57 | error = span / count / step; 58 | 59 | // Filter ticks to get closer to the desired count. 60 | if (error >= e10) step *= 10; 61 | else if (error >= e5) step *= 5; 62 | else if (error >= e2) step *= 2; 63 | 64 | // Round start and stop values to step interval. 65 | return [ 66 | Math.ceil(start / step) * step, 67 | Math.floor(stop / step) * step + step / 2, // inclusive 68 | step 69 | ]; 70 | } 71 | 72 | function numberAutoFormat(domain, count, f) { 73 | var range = intervals(domain, count); 74 | if (f == null) { 75 | f = ',.' + d3_numberF.precisionFixed(range[2]) + 'f'; 76 | } else { 77 | switch (f = d3_numberF.formatSpecifier(f), f.type) { 78 | case 's': { 79 | var value = Math.max(Math.abs(range[0]), Math.abs(range[1])); 80 | if (f.precision == null) f.precision = d3_numberF.precisionPrefix(range[2], value); 81 | return numberF.formatPrefix(f, value); 82 | } 83 | case '': 84 | case 'e': 85 | case 'g': 86 | case 'p': 87 | case 'r': { 88 | if (f.precision == null) f.precision = d3_numberF.precisionRound(range[2], Math.max(Math.abs(range[0]), Math.abs(range[1]))) - (f.type === 'e'); 89 | break; 90 | } 91 | case 'f': 92 | case '%': { 93 | if (f.precision == null) f.precision = d3_numberF.precisionFixed(range[2]) - (f.type === '%') * 2; 94 | break; 95 | } 96 | } 97 | } 98 | return numberF.format(f); 99 | } 100 | 101 | function timeAutoFormat() { 102 | var f = timeF.format, 103 | formatMillisecond = f('.%L'), 104 | formatSecond = f(':%S'), 105 | formatMinute = f('%I:%M'), 106 | formatHour = f('%I %p'), 107 | formatDay = f('%a %d'), 108 | formatWeek = f('%b %d'), 109 | formatMonth = f('%B'), 110 | formatYear = f('%Y'); 111 | 112 | return function(date) { 113 | var d = +date; 114 | return (d3_time.second(date) < d ? formatMillisecond 115 | : d3_time.minute(date) < d ? formatSecond 116 | : d3_time.hour(date) < d ? formatMinute 117 | : d3_time.day(date) < d ? formatHour 118 | : d3_time.month(date) < d ? 119 | (d3_time.week(date) < d ? formatDay : formatWeek) 120 | : d3_time.year(date) < d ? formatMonth 121 | : formatYear)(date); 122 | }; 123 | } 124 | 125 | function utcAutoFormat() { 126 | var f = timeF.utcFormat, 127 | formatMillisecond = f('.%L'), 128 | formatSecond = f(':%S'), 129 | formatMinute = f('%I:%M'), 130 | formatHour = f('%I %p'), 131 | formatDay = f('%a %d'), 132 | formatWeek = f('%b %d'), 133 | formatMonth = f('%B'), 134 | formatYear = f('%Y'); 135 | 136 | return function(date) { 137 | var d = +date; 138 | return (d3_time.utcSecond(date) < d ? formatMillisecond 139 | : d3_time.utcMinute(date) < d ? formatSecond 140 | : d3_time.utcHour(date) < d ? formatMinute 141 | : d3_time.utcDay(date) < d ? formatHour 142 | : d3_time.utcMonth(date) < d ? 143 | (d3_time.utcWeek(date) < d ? formatDay : formatWeek) 144 | : d3_time.utcYear(date) < d ? formatMonth 145 | : formatYear)(date); 146 | }; 147 | } 148 | -------------------------------------------------------------------------------- /src/time.js: -------------------------------------------------------------------------------- 1 | var d3_time = require('d3-time'); 2 | 3 | var tempDate = new Date(), 4 | baseDate = new Date(0, 0, 1).setFullYear(0), // Jan 1, 0 AD 5 | utcBaseDate = new Date(Date.UTC(0, 0, 1)).setUTCFullYear(0); 6 | 7 | function date(d) { 8 | return (tempDate.setTime(+d), tempDate); 9 | } 10 | 11 | // create a time unit entry 12 | function entry(type, date, unit, step, min, max) { 13 | var e = { 14 | type: type, 15 | date: date, 16 | unit: unit 17 | }; 18 | if (step) { 19 | e.step = step; 20 | } else { 21 | e.minstep = 1; 22 | } 23 | if (min != null) e.min = min; 24 | if (max != null) e.max = max; 25 | return e; 26 | } 27 | 28 | function create(type, unit, base, step, min, max) { 29 | return entry(type, 30 | function(d) { return unit.offset(base, d); }, 31 | function(d) { return unit.count(base, d); }, 32 | step, min, max); 33 | } 34 | 35 | var locale = [ 36 | create('second', d3_time.second, baseDate), 37 | create('minute', d3_time.minute, baseDate), 38 | create('hour', d3_time.hour, baseDate), 39 | create('day', d3_time.day, baseDate, [1, 7]), 40 | create('month', d3_time.month, baseDate, [1, 3, 6]), 41 | create('year', d3_time.year, baseDate), 42 | 43 | // periodic units 44 | entry('seconds', 45 | function(d) { return new Date(1970, 0, 1, 0, 0, d); }, 46 | function(d) { return date(d).getSeconds(); }, 47 | null, 0, 59 48 | ), 49 | entry('minutes', 50 | function(d) { return new Date(1970, 0, 1, 0, d); }, 51 | function(d) { return date(d).getMinutes(); }, 52 | null, 0, 59 53 | ), 54 | entry('hours', 55 | function(d) { return new Date(1970, 0, 1, d); }, 56 | function(d) { return date(d).getHours(); }, 57 | null, 0, 23 58 | ), 59 | entry('weekdays', 60 | function(d) { return new Date(1970, 0, 4+d); }, 61 | function(d) { return date(d).getDay(); }, 62 | [1], 0, 6 63 | ), 64 | entry('dates', 65 | function(d) { return new Date(1970, 0, d); }, 66 | function(d) { return date(d).getDate(); }, 67 | [1], 1, 31 68 | ), 69 | entry('months', 70 | function(d) { return new Date(1970, d % 12, 1); }, 71 | function(d) { return date(d).getMonth(); }, 72 | [1], 0, 11 73 | ) 74 | ]; 75 | 76 | var utc = [ 77 | create('second', d3_time.utcSecond, utcBaseDate), 78 | create('minute', d3_time.utcMinute, utcBaseDate), 79 | create('hour', d3_time.utcHour, utcBaseDate), 80 | create('day', d3_time.utcDay, utcBaseDate, [1, 7]), 81 | create('month', d3_time.utcMonth, utcBaseDate, [1, 3, 6]), 82 | create('year', d3_time.utcYear, utcBaseDate), 83 | 84 | // periodic units 85 | entry('seconds', 86 | function(d) { return new Date(Date.UTC(1970, 0, 1, 0, 0, d)); }, 87 | function(d) { return date(d).getUTCSeconds(); }, 88 | null, 0, 59 89 | ), 90 | entry('minutes', 91 | function(d) { return new Date(Date.UTC(1970, 0, 1, 0, d)); }, 92 | function(d) { return date(d).getUTCMinutes(); }, 93 | null, 0, 59 94 | ), 95 | entry('hours', 96 | function(d) { return new Date(Date.UTC(1970, 0, 1, d)); }, 97 | function(d) { return date(d).getUTCHours(); }, 98 | null, 0, 23 99 | ), 100 | entry('weekdays', 101 | function(d) { return new Date(Date.UTC(1970, 0, 4+d)); }, 102 | function(d) { return date(d).getUTCDay(); }, 103 | [1], 0, 6 104 | ), 105 | entry('dates', 106 | function(d) { return new Date(Date.UTC(1970, 0, d)); }, 107 | function(d) { return date(d).getUTCDate(); }, 108 | [1], 1, 31 109 | ), 110 | entry('months', 111 | function(d) { return new Date(Date.UTC(1970, d % 12, 1)); }, 112 | function(d) { return date(d).getUTCMonth(); }, 113 | [1], 0, 11 114 | ) 115 | ]; 116 | 117 | var STEPS = [ 118 | [31536e6, 5], // 1-year 119 | [7776e6, 4], // 3-month 120 | [2592e6, 4], // 1-month 121 | [12096e5, 3], // 2-week 122 | [6048e5, 3], // 1-week 123 | [1728e5, 3], // 2-day 124 | [864e5, 3], // 1-day 125 | [432e5, 2], // 12-hour 126 | [216e5, 2], // 6-hour 127 | [108e5, 2], // 3-hour 128 | [36e5, 2], // 1-hour 129 | [18e5, 1], // 30-minute 130 | [9e5, 1], // 15-minute 131 | [3e5, 1], // 5-minute 132 | [6e4, 1], // 1-minute 133 | [3e4, 0], // 30-second 134 | [15e3, 0], // 15-second 135 | [5e3, 0], // 5-second 136 | [1e3, 0] // 1-second 137 | ]; 138 | 139 | function find(units, span, minb, maxb) { 140 | var step = STEPS[0], i, n, bins; 141 | 142 | for (i=1, n=STEPS.length; i step[0]) { 145 | bins = span / step[0]; 146 | if (bins > maxb) { 147 | return units[STEPS[i-1][1]]; 148 | } 149 | if (bins >= minb) { 150 | return units[step[1]]; 151 | } 152 | } 153 | } 154 | return units[STEPS[n-1][1]]; 155 | } 156 | 157 | function toUnitMap(units) { 158 | var map = {}, i, n; 159 | for (i=0, n=units.length; i 1 && domain[idx-1] === '.' && domain.lastIndexOf(d) === idx); 55 | }); 56 | if (!whiteListed) { 57 | throw 'URL is not whitelisted: ' + url; 58 | } 59 | } 60 | } 61 | return url; 62 | } 63 | 64 | function load(opt, callback) { 65 | var error = callback || function(e) { throw e; }, url; 66 | 67 | try { 68 | url = load.sanitizeUrl(opt); // enable override 69 | } catch (err) { 70 | error(err); 71 | return; 72 | } 73 | 74 | if (!url) { 75 | error('Invalid URL: ' + opt.url); 76 | } else if (load.useXHR) { 77 | // on client, use xhr 78 | return xhr(url, callback); 79 | } else if (startsWith(url, fileProtocol)) { 80 | // on server, if url starts with 'file://', strip it and load from file 81 | return file(url.slice(fileProtocol.length), callback); 82 | } else if (url.indexOf('://') < 0) { // TODO better protocol check? 83 | // on server, if no protocol assume file 84 | return file(url, callback); 85 | } else { 86 | // for regular URLs on server 87 | return http(url, callback); 88 | } 89 | } 90 | 91 | function xhrHasResponse(request) { 92 | var type = request.responseType; 93 | return type && type !== 'text' ? 94 | request.response : // null on error 95 | request.responseText; // '' on error 96 | } 97 | 98 | function xhr(url, callback) { 99 | var async = !!callback; 100 | var request = new XMLHttpRequest(); 101 | // If IE does not support CORS, use XDomainRequest (copied from d3.xhr) 102 | if (this.XDomainRequest && 103 | !('withCredentials' in request) && 104 | /^(http(s)?:)?\/\//.test(url)) request = new XDomainRequest(); 105 | 106 | function respond() { 107 | var status = request.status; 108 | if (!status && xhrHasResponse(request) || status >= 200 && status < 300 || status === 304) { 109 | callback(null, request.responseText); 110 | } else { 111 | callback(request, null); 112 | } 113 | } 114 | 115 | if (async) { 116 | if ('onload' in request) { 117 | request.onload = request.onerror = respond; 118 | } else { 119 | request.onreadystatechange = function() { 120 | if (request.readyState > 3) respond(); 121 | }; 122 | } 123 | } 124 | 125 | request.open('GET', url, async); 126 | request.send(); 127 | 128 | if (!async && xhrHasResponse(request)) { 129 | return request.responseText; 130 | } 131 | } 132 | 133 | function file(filename, callback) { 134 | var fs = require('fs'); 135 | if (!callback) { 136 | return fs.readFileSync(filename, 'utf8'); 137 | } 138 | fs.readFile(filename, callback); 139 | } 140 | 141 | function http(url, callback) { 142 | if (!callback) { 143 | return require('sync-request')('GET', url).getBody(); 144 | } 145 | 146 | var options = {url: url, encoding: null, gzip: true}; 147 | require('request')(options, function(error, response, body) { 148 | if (!error && response.statusCode === 200) { 149 | callback(null, body); 150 | } else { 151 | error = error || 152 | 'Load failed with response code ' + response.statusCode + '.'; 153 | callback(error, null); 154 | } 155 | }); 156 | } 157 | 158 | function startsWith(string, searchString) { 159 | return string == null ? false : string.lastIndexOf(searchString, 0) === 0; 160 | } 161 | 162 | load.sanitizeUrl = sanitizeUrl; 163 | 164 | load.useXHR = (typeof XMLHttpRequest !== 'undefined'); 165 | 166 | module.exports = load; 167 | -------------------------------------------------------------------------------- /src/aggregate/measures.js: -------------------------------------------------------------------------------- 1 | var util = require('../util'); 2 | 3 | var types = { 4 | 'values': measure({ 5 | name: 'values', 6 | init: 'cell.collect = true;', 7 | set: 'cell.data.values()', idx: -1 8 | }), 9 | 'count': measure({ 10 | name: 'count', 11 | set: 'cell.num' 12 | }), 13 | 'missing': measure({ 14 | name: 'missing', 15 | set: 'this.missing' 16 | }), 17 | 'valid': measure({ 18 | name: 'valid', 19 | set: 'this.valid' 20 | }), 21 | 'sum': measure({ 22 | name: 'sum', 23 | init: 'this.sum = 0;', 24 | add: 'this.sum += v;', 25 | rem: 'this.sum -= v;', 26 | set: 'this.sum' 27 | }), 28 | 'mean': measure({ 29 | name: 'mean', 30 | init: 'this.mean = 0;', 31 | add: 'var d = v - this.mean; this.mean += d / this.valid;', 32 | rem: 'var d = v - this.mean; this.mean -= this.valid ? d / this.valid : this.mean;', 33 | set: 'this.mean' 34 | }), 35 | 'average': measure({ 36 | name: 'average', 37 | set: 'this.mean', 38 | req: ['mean'], idx: 1 39 | }), 40 | 'variance': measure({ 41 | name: 'variance', 42 | init: 'this.dev = 0;', 43 | add: 'this.dev += d * (v - this.mean);', 44 | rem: 'this.dev -= d * (v - this.mean);', 45 | set: 'this.valid > 1 ? this.dev / (this.valid-1) : 0', 46 | req: ['mean'], idx: 1 47 | }), 48 | 'variancep': measure({ 49 | name: 'variancep', 50 | set: 'this.valid > 1 ? this.dev / this.valid : 0', 51 | req: ['variance'], idx: 2 52 | }), 53 | 'stdev': measure({ 54 | name: 'stdev', 55 | set: 'this.valid > 1 ? Math.sqrt(this.dev / (this.valid-1)) : 0', 56 | req: ['variance'], idx: 2 57 | }), 58 | 'stdevp': measure({ 59 | name: 'stdevp', 60 | set: 'this.valid > 1 ? Math.sqrt(this.dev / this.valid) : 0', 61 | req: ['variance'], idx: 2 62 | }), 63 | 'median': measure({ 64 | name: 'median', 65 | set: 'cell.data.q2(this.get)', 66 | req: ['values'], idx: 3 67 | }), 68 | 'q1': measure({ 69 | name: 'q1', 70 | set: 'cell.data.q1(this.get)', 71 | req: ['values'], idx: 3 72 | }), 73 | 'q3': measure({ 74 | name: 'q3', 75 | set: 'cell.data.q3(this.get)', 76 | req: ['values'], idx: 3 77 | }), 78 | 'distinct': measure({ 79 | name: 'distinct', 80 | set: 'this.distinct(cell.data.values(), this.get)', 81 | req: ['values'], idx: 3 82 | }), 83 | 'argmin': measure({ 84 | name: 'argmin', 85 | add: 'if (v < this.min) this.argmin = t;', 86 | rem: 'if (v <= this.min) this.argmin = null;', 87 | set: 'this.argmin = this.argmin || cell.data.argmin(this.get)', 88 | req: ['min'], str: ['values'], idx: 3 89 | }), 90 | 'argmax': measure({ 91 | name: 'argmax', 92 | add: 'if (v > this.max) this.argmax = t;', 93 | rem: 'if (v >= this.max) this.argmax = null;', 94 | set: 'this.argmax = this.argmax || cell.data.argmax(this.get)', 95 | req: ['max'], str: ['values'], idx: 3 96 | }), 97 | 'min': measure({ 98 | name: 'min', 99 | init: 'this.min = +Infinity;', 100 | add: 'if (v < this.min) this.min = v;', 101 | rem: 'if (v <= this.min) this.min = NaN;', 102 | set: 'this.min = (isNaN(this.min) ? cell.data.min(this.get) : this.min)', 103 | str: ['values'], idx: 4 104 | }), 105 | 'max': measure({ 106 | name: 'max', 107 | init: 'this.max = -Infinity;', 108 | add: 'if (v > this.max) this.max = v;', 109 | rem: 'if (v >= this.max) this.max = NaN;', 110 | set: 'this.max = (isNaN(this.max) ? cell.data.max(this.get) : this.max)', 111 | str: ['values'], idx: 4 112 | }), 113 | 'modeskew': measure({ 114 | name: 'modeskew', 115 | set: 'this.dev===0 ? 0 : (this.mean - cell.data.q2(this.get)) / Math.sqrt(this.dev/(this.valid-1))', 116 | req: ['mean', 'stdev', 'median'], idx: 5 117 | }) 118 | }; 119 | 120 | function measure(base) { 121 | return function(out) { 122 | var m = util.extend({init:'', add:'', rem:'', idx:0}, base); 123 | m.out = out || base.name; 124 | return m; 125 | }; 126 | } 127 | 128 | function resolve(agg, stream) { 129 | function collect(m, a) { 130 | function helper(r) { if (!m[r]) collect(m, m[r] = types[r]()); } 131 | if (a.req) a.req.forEach(helper); 132 | if (stream && a.str) a.str.forEach(helper); 133 | return m; 134 | } 135 | var map = agg.reduce( 136 | collect, 137 | agg.reduce(function(m, a) { return (m[a.name] = a, m); }, {}) 138 | ); 139 | return util.vals(map).sort(function(a, b) { return a.idx - b.idx; }); 140 | } 141 | 142 | function create(agg, stream, accessor, mutator) { 143 | var all = resolve(agg, stream), 144 | ctr = 'this.cell = cell; this.tuple = t; this.valid = 0; this.missing = 0;', 145 | add = 'if (v==null) this.missing++; if (!this.isValid(v)) return; ++this.valid;', 146 | rem = 'if (v==null) this.missing--; if (!this.isValid(v)) return; --this.valid;', 147 | set = 'var t = this.tuple; var cell = this.cell;'; 148 | 149 | all.forEach(function(a) { 150 | if (a.idx < 0) { 151 | ctr = a.init + ctr; 152 | add = a.add + add; 153 | rem = a.rem + rem; 154 | } else { 155 | ctr += a.init; 156 | add += a.add; 157 | rem += a.rem; 158 | } 159 | }); 160 | agg.slice() 161 | .sort(function(a, b) { return a.idx - b.idx; }) 162 | .forEach(function(a) { 163 | set += 'this.assign(t,\''+a.out+'\','+a.set+');'; 164 | }); 165 | set += 'return t;'; 166 | 167 | /* jshint evil: true */ 168 | ctr = Function('cell', 't', ctr); 169 | ctr.prototype.assign = mutator; 170 | ctr.prototype.add = Function('t', 'var v = this.get(t);' + add); 171 | ctr.prototype.rem = Function('t', 'var v = this.get(t);' + rem); 172 | ctr.prototype.set = Function(set); 173 | ctr.prototype.get = accessor; 174 | ctr.prototype.distinct = require('../stats').count.distinct; 175 | ctr.prototype.isValid = util.isValid; 176 | ctr.fields = agg.map(util.$('out')); 177 | return ctr; 178 | } 179 | 180 | types.create = create; 181 | module.exports = types; -------------------------------------------------------------------------------- /src/template.js: -------------------------------------------------------------------------------- 1 | var util = require('./util'), 2 | format = require('./format'); 3 | 4 | var context = { 5 | formats: [], 6 | format_map: {}, 7 | truncate: util.truncate, 8 | pad: util.pad 9 | }; 10 | 11 | function template(text) { 12 | var src = source(text, 'd'); 13 | src = 'var __t; return ' + src + ';'; 14 | 15 | /* jshint evil: true */ 16 | return (new Function('d', src)).bind(context); 17 | } 18 | 19 | template.source = source; 20 | template.context = context; 21 | module.exports = template; 22 | 23 | // Clear cache of format objects. 24 | // This can *break* prior template functions, so invoke with care! 25 | template.clearFormatCache = function() { 26 | context.formats = []; 27 | context.format_map = {}; 28 | }; 29 | 30 | // Generate property access code for use within template source. 31 | // object: the name of the object (variable) containing template data 32 | // property: the property access string, verbatim from template tag 33 | template.property = function(object, property) { 34 | var src = util.field(property).map(util.str).join(']['); 35 | return object + '[' + src + ']'; 36 | }; 37 | 38 | // Generate source code for a template function. 39 | // text: the template text 40 | // variable: the name of the data object variable ('obj' by default) 41 | // properties: optional hash for collecting all accessed properties 42 | function source(text, variable, properties) { 43 | variable = variable || 'obj'; 44 | var index = 0; 45 | var src = '\''; 46 | var regex = template_re; 47 | 48 | // Compile the template source, escaping string literals appropriately. 49 | text.replace(regex, function(match, interpolate, offset) { 50 | src += text 51 | .slice(index, offset) 52 | .replace(template_escaper, template_escapeChar); 53 | index = offset + match.length; 54 | 55 | if (interpolate) { 56 | src += '\'\n+((__t=(' + 57 | template_var(interpolate, variable, properties) + 58 | '))==null?\'\':__t)+\n\''; 59 | } 60 | 61 | // Adobe VMs need the match returned to produce the correct offest. 62 | return match; 63 | }); 64 | return src + '\''; 65 | } 66 | 67 | function template_var(text, variable, properties) { 68 | var filters = text.match(filter_re); 69 | var prop = filters.shift().trim(); 70 | var stringCast = true; 71 | 72 | function strcall(fn) { 73 | fn = fn || ''; 74 | if (stringCast) { 75 | stringCast = false; 76 | src = 'String(' + src + ')' + fn; 77 | } else { 78 | src += fn; 79 | } 80 | return src; 81 | } 82 | 83 | function date() { 84 | return '(typeof ' + src + '==="number"?new Date('+src+'):'+src+')'; 85 | } 86 | 87 | function number_format(fmt, key) { 88 | a = template_format(args[0], key, fmt); 89 | stringCast = false; 90 | src = 'this.formats['+a+']('+src+')'; 91 | } 92 | 93 | function time_format(fmt, key) { 94 | a = template_format(args[0], key, fmt); 95 | stringCast = false; 96 | src = 'this.formats['+a+']('+date()+')'; 97 | } 98 | 99 | if (properties) properties[prop] = 1; 100 | var src = template.property(variable, prop); 101 | 102 | for (var i=0; i 0) { 106 | f = f.slice(0, pidx); 107 | args = filters[i].slice(pidx+1) 108 | .match(args_re) 109 | .map(function(s) { return s.trim(); }); 110 | } 111 | f = f.trim(); 112 | 113 | switch (f) { 114 | case 'length': 115 | strcall('.length'); 116 | break; 117 | case 'lower': 118 | strcall('.toLowerCase()'); 119 | break; 120 | case 'upper': 121 | strcall('.toUpperCase()'); 122 | break; 123 | case 'lower-locale': 124 | strcall('.toLocaleLowerCase()'); 125 | break; 126 | case 'upper-locale': 127 | strcall('.toLocaleUpperCase()'); 128 | break; 129 | case 'trim': 130 | strcall('.trim()'); 131 | break; 132 | case 'left': 133 | a = util.number(args[0]); 134 | strcall('.slice(0,' + a + ')'); 135 | break; 136 | case 'right': 137 | a = util.number(args[0]); 138 | strcall('.slice(-' + a +')'); 139 | break; 140 | case 'mid': 141 | a = util.number(args[0]); 142 | b = a + util.number(args[1]); 143 | strcall('.slice(+'+a+','+b+')'); 144 | break; 145 | case 'slice': 146 | a = util.number(args[0]); 147 | strcall('.slice('+ a + 148 | (args.length > 1 ? ',' + util.number(args[1]) : '') + 149 | ')'); 150 | break; 151 | case 'truncate': 152 | a = util.number(args[0]); 153 | b = args[1]; 154 | b = (b!=='left' && b!=='middle' && b!=='center') ? 'right' : b; 155 | src = 'this.truncate(' + strcall() + ',' + a + ',\'' + b + '\')'; 156 | break; 157 | case 'pad': 158 | a = util.number(args[0]); 159 | b = args[1]; 160 | b = (b!=='left' && b!=='middle' && b!=='center') ? 'right' : b; 161 | src = 'this.pad(' + strcall() + ',' + a + ',\'' + b + '\')'; 162 | break; 163 | case 'number': 164 | number_format(format.number, 'number'); 165 | break; 166 | case 'time': 167 | time_format(format.time, 'time'); 168 | break; 169 | case 'time-utc': 170 | time_format(format.utc, 'time-utc'); 171 | break; 172 | default: 173 | throw Error('Unrecognized template filter: ' + f); 174 | } 175 | } 176 | 177 | return src; 178 | } 179 | 180 | var template_re = /\{\{(.+?)\}\}|$/g, 181 | filter_re = /(?:"[^"]*"|\'[^\']*\'|[^\|"]+|[^\|\']+)+/g, 182 | args_re = /(?:"[^"]*"|\'[^\']*\'|[^,"]+|[^,\']+)+/g; 183 | 184 | // Certain characters need to be escaped so that they can be put into a 185 | // string literal. 186 | var template_escapes = { 187 | '\'': '\'', 188 | '\\': '\\', 189 | '\r': 'r', 190 | '\n': 'n', 191 | '\u2028': 'u2028', 192 | '\u2029': 'u2029' 193 | }; 194 | 195 | var template_escaper = /\\|'|\r|\n|\u2028|\u2029/g; 196 | 197 | function template_escapeChar(match) { 198 | return '\\' + template_escapes[match]; 199 | } 200 | 201 | function template_format(pattern, key, fmt) { 202 | if ((pattern[0] === '\'' && pattern[pattern.length-1] === '\'') || 203 | (pattern[0] === '"' && pattern[pattern.length-1] === '"')) { 204 | pattern = pattern.slice(1, -1); 205 | } else { 206 | throw Error('Format pattern must be quoted: ' + pattern); 207 | } 208 | key = key + ':' + pattern; 209 | if (!context.format_map[key]) { 210 | var f = fmt(pattern); 211 | var i = context.formats.length; 212 | context.formats.push(f); 213 | context.format_map[key] = i; 214 | } 215 | return context.format_map[key]; 216 | } 217 | -------------------------------------------------------------------------------- /test/load.test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var assert = require('chai').assert; 4 | var load = require('../src/import/load'); 5 | 6 | var host = 'vega.github.io'; 7 | var hostsub = 'github.io'; 8 | var dir = '/datalib/'; 9 | var base = 'http://' + host + dir; 10 | var uri = 'data/flare.json'; 11 | var url = base + uri; 12 | var rel = '//' + host + dir + uri; 13 | var file = './test/' + uri; 14 | var fake = 'http://globalhost/invalid.dne'; 15 | var text = require('fs').readFileSync(file, 'utf8'); 16 | 17 | describe('load', function() { 18 | 19 | global.XMLHttpRequest = require('./lib/XMLHttpRequest'); 20 | 21 | it('should not use xhr on server', function() { 22 | assert.isFalse(load.useXHR); 23 | }); 24 | 25 | it('should sanitize url', function() { 26 | assert.equal('file://a.txt', load.sanitizeUrl({ 27 | file: 'a.txt' 28 | })); 29 | assert.equal('hostname/a.txt', load.sanitizeUrl({ 30 | url: 'a.txt', 31 | baseURL: 'hostname' 32 | })); 33 | assert.equal('hostname/a.txt', load.sanitizeUrl({ 34 | url: 'a.txt', 35 | baseURL: 'hostname/' 36 | })); 37 | assert.equal('http://h.com/a.txt', load.sanitizeUrl({ 38 | url: '//h.com/a.txt' 39 | })); 40 | assert.equal('https://h.com/a.txt', load.sanitizeUrl({ 41 | url: '//h.com/a.txt', 42 | defaultProtocol: 'https' 43 | })); 44 | assert.equal(null, load.sanitizeUrl({url: undefined})); 45 | assert.equal(null, load.sanitizeUrl({url: null})); 46 | }); 47 | 48 | it('should handle client-side sanitization', function() { 49 | var host = ''; 50 | load.useXHR = true; 51 | global.window = {location: {hostname: 'localhost'}}; 52 | global.document = { 53 | createElement: function() { 54 | return {host: host, href: '', hostname: 'localhost'}; 55 | } 56 | }; 57 | 58 | assert.equal('http://localhost/a.txt', load.sanitizeUrl({ 59 | url: 'http://localhost/a.txt', 60 | domainWhiteList: ['localhost'] 61 | })); 62 | 63 | var host = 'localhost'; 64 | assert.equal('http://localhost/a.txt', load.sanitizeUrl({ 65 | url: 'http://localhost/a.txt', 66 | domainWhiteList: ['localhost'] 67 | })); 68 | 69 | load.useXHR = false; 70 | delete global.document; 71 | delete global.window; 72 | }); 73 | 74 | it('should throw error for invalid path', function() { 75 | assert.throws(function() { return load({}); }); 76 | }); 77 | 78 | it('should throw error for empty url', function() { 79 | assert.throws(function() { return load({url: ''}); }); 80 | }); 81 | 82 | it('should load from file path', function(done) { 83 | load({file: file}, function(error, data) { 84 | assert.equal(text, data); 85 | done(); 86 | }); 87 | }); 88 | 89 | it('should load from file path synchronously', function() { 90 | assert.equal(text, load({file: file})); 91 | }); 92 | 93 | it('should infer file load in node', function() { 94 | assert.equal(text, load({url: file})); 95 | }); 96 | 97 | it('should load from file url', function(done) { 98 | load({url: 'file://' + file}, function(error, data) { 99 | assert.equal(text, data); 100 | done(); 101 | }); 102 | }); 103 | 104 | it('should load from http url', function(done) { 105 | load({url: url}, function(error, data) { 106 | assert.equal(text, data); 107 | done(); 108 | }); 109 | }); 110 | 111 | it('should error with invalid url', function(done) { 112 | load({url: url+'.invalid'}, function(error, data) { 113 | assert.isNull(data); 114 | assert.isNotNull(error); 115 | done(); 116 | }); 117 | }); 118 | 119 | it('should load from http url synchronously', function() { 120 | assert.equal(text, load({url: url})); 121 | }); 122 | 123 | it('should load from http base url + uri', function(done) { 124 | load( 125 | {baseURL: base, url: uri}, 126 | function(error, data) { 127 | assert.equal(text, data); 128 | done(); 129 | } 130 | ); 131 | }); 132 | 133 | it('should load from relative protocol http url', function(done) { 134 | load({url: rel}, 135 | function(error, data) { 136 | assert.equal(text, data); 137 | done(); 138 | } 139 | ); 140 | }); 141 | 142 | it('should load from relative protocol file url', function(done) { 143 | load({url: '//'+file, defaultProtocol: 'file'}, 144 | function(error, data) { 145 | assert.equal(text, data); 146 | done(); 147 | } 148 | ); 149 | }); 150 | 151 | it('should load from white-listed http domain', function(done) { 152 | load({url: url, domainWhiteList: [host]}, 153 | function(error, data) { 154 | assert.equal(text, data); 155 | done(); 156 | } 157 | ); 158 | }); 159 | 160 | it('should load from white-listed http subdomain', function(done) { 161 | load({url: url, domainWhiteList: [hostsub]}, 162 | function(error, data) { 163 | assert.equal(text, data); 164 | done(); 165 | } 166 | ); 167 | }); 168 | 169 | it('should not load from un-white-listed http domain', function(done) { 170 | load({url: url, domainWhiteList: []}, 171 | function(error, data) { 172 | assert.isNotNull(error); 173 | done(); 174 | } 175 | ); 176 | }); 177 | 178 | it('should return error for invalid protocol', function(done) { 179 | load({url: 'htsp://globalhost/invalid.dne'}, 180 | function(error, data) { 181 | assert.isNull(data); 182 | assert.isNotNull(error); 183 | done(); 184 | } 185 | ); 186 | }); 187 | 188 | it('should support xhr async', function(done) { 189 | load.useXHR = true; 190 | load({url: file}, function(error, data) { 191 | load.useXHR = false; 192 | assert.equal(text, data); 193 | done(); 194 | }); 195 | }); 196 | 197 | it('should support xhr async fallbacks', function(done) { 198 | load.useXHR = true; 199 | XMLHttpRequest.prototype.type = 'data'; 200 | load({url: file}, function(error, data) { 201 | load.useXHR = false; 202 | delete XMLHttpRequest.prototype.type; 203 | assert.equal(text, data); 204 | done(); 205 | }); 206 | }); 207 | 208 | it('should support xhr sync', function() { 209 | load.useXHR = true; 210 | assert.equal(text, load({url: file})); 211 | load.useXHR = false; 212 | }); 213 | 214 | it('should return error on failed xhr', function(done) { 215 | load.useXHR = true; 216 | load({url: fake}, function(error, data) { 217 | load.useXHR = false; 218 | assert.isNotNull(error); 219 | assert.isNull(data); 220 | done(); 221 | }); 222 | }); 223 | 224 | it('should use XDomainRequest for xhr if available', function(done) { 225 | load.useXHR = true; 226 | global.XDomainRequest = global.XMLHttpRequest; 227 | load({url: fake}, function(error, data) { 228 | load.useXHR = false; 229 | delete global.XDomainRequest; 230 | assert.isNotNull(error); 231 | done(); 232 | }); 233 | }); 234 | 235 | it('should use onload for xhr if available', function(done) { 236 | load.useXHR = true; 237 | XMLHttpRequest.prototype.onload = function() {}; 238 | load({url: fake}, function(error, data) { 239 | load.useXHR = false; 240 | delete XMLHttpRequest.prototype.onload; 241 | assert.isNotNull(error); 242 | done(); 243 | }); 244 | }); 245 | 246 | }); 247 | -------------------------------------------------------------------------------- /src/aggregate/aggregator.js: -------------------------------------------------------------------------------- 1 | var util = require('../util'), 2 | Measures = require('./measures'), 3 | Collector = require('./collector'); 4 | 5 | function Aggregator() { 6 | this._cells = {}; 7 | this._aggr = []; 8 | this._stream = false; 9 | } 10 | 11 | var Flags = Aggregator.Flags = { 12 | ADD_CELL: 1, 13 | MOD_CELL: 2 14 | }; 15 | 16 | var proto = Aggregator.prototype; 17 | 18 | // Parameters 19 | 20 | proto.stream = function(v) { 21 | if (v == null) return this._stream; 22 | this._stream = !!v; 23 | this._aggr = []; 24 | return this; 25 | }; 26 | 27 | // key accessor to use for streaming removes 28 | proto.key = function(key) { 29 | if (key == null) return this._key; 30 | this._key = util.$(key); 31 | return this; 32 | }; 33 | 34 | // Input: array of objects of the form 35 | // {name: string, get: function} 36 | proto.groupby = function(dims) { 37 | this._dims = util.array(dims).map(function(d, i) { 38 | d = util.isString(d) ? {name: d, get: util.$(d)} 39 | : util.isFunction(d) ? {name: util.name(d) || d.name || ('_' + i), get: d} 40 | : (d.name && util.isFunction(d.get)) ? d : null; 41 | if (d == null) throw 'Invalid groupby argument: ' + d; 42 | return d; 43 | }); 44 | return this.clear(); 45 | }; 46 | 47 | // Input: array of objects of the form 48 | // {name: string, ops: [string, ...]} 49 | proto.summarize = function(fields) { 50 | fields = summarize_args(fields); 51 | this._count = true; 52 | var aggr = (this._aggr = []), 53 | m, f, i, j, op, as, get; 54 | 55 | for (i=0; i 0) { 212 | // consolidate collector values 213 | if (cell.collect) { 214 | cell.data.values(); 215 | } 216 | // update tuple properties 217 | for (i=0; i 1 ? 153 | function(x) { return s.reduce(function(x,f) { return x[f]; }, x); } : 154 | function(x) { return x[f]; } 155 | ); 156 | }; 157 | 158 | // short-cut for accessor 159 | u.$ = u.accessor; 160 | 161 | u.mutator = function(f) { 162 | var s; 163 | return u.isString(f) && (s=u.field(f)).length > 1 ? 164 | function(x, v) { 165 | for (var i=0; i y) return sign[i]; 223 | } 224 | return 0; 225 | }; 226 | }; 227 | 228 | u.cmp = function(a, b) { 229 | if (a < b) { 230 | return -1; 231 | } else if (a > b) { 232 | return 1; 233 | } else if (a >= b) { 234 | return 0; 235 | } else if (a === null) { 236 | return -1; 237 | } else if (b === null) { 238 | return 1; 239 | } 240 | return NaN; 241 | }; 242 | 243 | u.numcmp = function(a, b) { return a - b; }; 244 | 245 | u.stablesort = function(array, sortBy, keyFn) { 246 | var indices = array.reduce(function(idx, v, i) { 247 | return (idx[keyFn(v)] = i, idx); 248 | }, {}); 249 | 250 | array.sort(function(a, b) { 251 | var sa = sortBy(a), 252 | sb = sortBy(b); 253 | return sa < sb ? -1 : sa > sb ? 1 254 | : (indices[keyFn(a)] - indices[keyFn(b)]); 255 | }); 256 | 257 | return array; 258 | }; 259 | 260 | 261 | // string functions 262 | 263 | u.pad = function(s, length, pos, padchar) { 264 | padchar = padchar || " "; 265 | var d = length - s.length; 266 | if (d <= 0) return s; 267 | switch (pos) { 268 | case 'left': 269 | return strrep(d, padchar) + s; 270 | case 'middle': 271 | case 'center': 272 | return strrep(Math.floor(d/2), padchar) + 273 | s + strrep(Math.ceil(d/2), padchar); 274 | default: 275 | return s + strrep(d, padchar); 276 | } 277 | }; 278 | 279 | function strrep(n, str) { 280 | var s = "", i; 281 | for (i=0; i b) b = v; 177 | } 178 | } 179 | return [a, b]; 180 | }; 181 | 182 | // Find the integer indices of the minimum and maximum values. 183 | stats.extent.index = function(values, f) { 184 | f = util.$(f); 185 | var x = -1, y = -1, a, b, v, i, n = values.length; 186 | for (i=0; i b) { b = v; y = i; } 195 | } 196 | } 197 | return [x, y]; 198 | }; 199 | 200 | // Compute the dot product of two arrays of numbers. 201 | stats.dot = function(values, a, b) { 202 | var sum = 0, i, v; 203 | if (!b) { 204 | if (values.length !== a.length) { 205 | throw Error('Array lengths must match.'); 206 | } 207 | for (i=0; i -1 && p !== v) { 240 | mu = 1 + (i-1 + tie) / 2; 241 | for (; tie -1) { 249 | mu = 1 + (n-1 + tie) / 2; 250 | for (; tie max) max = x; 448 | delta = x - mean; 449 | mean = mean + delta / (++valid); 450 | M2 = M2 + delta * (x - mean); 451 | vals.push(x); 452 | } 453 | } 454 | M2 = M2 / (valid - 1); 455 | sd = Math.sqrt(M2); 456 | 457 | // sort values for median and iqr 458 | vals.sort(util.cmp); 459 | 460 | return { 461 | type: type(values, f), 462 | unique: u, 463 | count: values.length, 464 | valid: valid, 465 | missing: missing, 466 | distinct: distinct, 467 | min: min, 468 | max: max, 469 | mean: mean, 470 | stdev: sd, 471 | median: (v = stats.quantile(vals, 0.5)), 472 | q1: stats.quantile(vals, 0.25), 473 | q3: stats.quantile(vals, 0.75), 474 | modeskew: sd === 0 ? 0 : (mean - v) / sd 475 | }; 476 | }; 477 | 478 | // Compute profiles for all variables in a data set. 479 | stats.summary = function(data, fields) { 480 | fields = fields || util.keys(data[0]); 481 | var s = fields.map(function(f) { 482 | var p = stats.profile(data, util.$(f)); 483 | return (p.field = f, p); 484 | }); 485 | return (s.__summary__ = true, s); 486 | }; 487 | 488 | module.exports = stats; -------------------------------------------------------------------------------- /test/data/stocks.csv: -------------------------------------------------------------------------------- 1 | symbol,date,price 2 | MSFT,Jan 1 2000,39.81 3 | MSFT,Feb 1 2000,36.35 4 | MSFT,Mar 1 2000,43.22 5 | MSFT,Apr 1 2000,28.37 6 | MSFT,May 1 2000,25.45 7 | MSFT,Jun 1 2000,32.54 8 | MSFT,Jul 1 2000,28.4 9 | MSFT,Aug 1 2000,28.4 10 | MSFT,Sep 1 2000,24.53 11 | MSFT,Oct 1 2000,28.02 12 | MSFT,Nov 1 2000,23.34 13 | MSFT,Dec 1 2000,17.65 14 | MSFT,Jan 1 2001,24.84 15 | MSFT,Feb 1 2001,24 16 | MSFT,Mar 1 2001,22.25 17 | MSFT,Apr 1 2001,27.56 18 | MSFT,May 1 2001,28.14 19 | MSFT,Jun 1 2001,29.7 20 | MSFT,Jul 1 2001,26.93 21 | MSFT,Aug 1 2001,23.21 22 | MSFT,Sep 1 2001,20.82 23 | MSFT,Oct 1 2001,23.65 24 | MSFT,Nov 1 2001,26.12 25 | MSFT,Dec 1 2001,26.95 26 | MSFT,Jan 1 2002,25.92 27 | MSFT,Feb 1 2002,23.73 28 | MSFT,Mar 1 2002,24.53 29 | MSFT,Apr 1 2002,21.26 30 | MSFT,May 1 2002,20.71 31 | MSFT,Jun 1 2002,22.25 32 | MSFT,Jul 1 2002,19.52 33 | MSFT,Aug 1 2002,19.97 34 | MSFT,Sep 1 2002,17.79 35 | MSFT,Oct 1 2002,21.75 36 | MSFT,Nov 1 2002,23.46 37 | MSFT,Dec 1 2002,21.03 38 | MSFT,Jan 1 2003,19.31 39 | MSFT,Feb 1 2003,19.34 40 | MSFT,Mar 1 2003,19.76 41 | MSFT,Apr 1 2003,20.87 42 | MSFT,May 1 2003,20.09 43 | MSFT,Jun 1 2003,20.93 44 | MSFT,Jul 1 2003,21.56 45 | MSFT,Aug 1 2003,21.65 46 | MSFT,Sep 1 2003,22.69 47 | MSFT,Oct 1 2003,21.45 48 | MSFT,Nov 1 2003,21.1 49 | MSFT,Dec 1 2003,22.46 50 | MSFT,Jan 1 2004,22.69 51 | MSFT,Feb 1 2004,21.77 52 | MSFT,Mar 1 2004,20.46 53 | MSFT,Apr 1 2004,21.45 54 | MSFT,May 1 2004,21.53 55 | MSFT,Jun 1 2004,23.44 56 | MSFT,Jul 1 2004,23.38 57 | MSFT,Aug 1 2004,22.47 58 | MSFT,Sep 1 2004,22.76 59 | MSFT,Oct 1 2004,23.02 60 | MSFT,Nov 1 2004,24.6 61 | MSFT,Dec 1 2004,24.52 62 | MSFT,Jan 1 2005,24.11 63 | MSFT,Feb 1 2005,23.15 64 | MSFT,Mar 1 2005,22.24 65 | MSFT,Apr 1 2005,23.28 66 | MSFT,May 1 2005,23.82 67 | MSFT,Jun 1 2005,22.93 68 | MSFT,Jul 1 2005,23.64 69 | MSFT,Aug 1 2005,25.35 70 | MSFT,Sep 1 2005,23.83 71 | MSFT,Oct 1 2005,23.8 72 | MSFT,Nov 1 2005,25.71 73 | MSFT,Dec 1 2005,24.29 74 | MSFT,Jan 1 2006,26.14 75 | MSFT,Feb 1 2006,25.04 76 | MSFT,Mar 1 2006,25.36 77 | MSFT,Apr 1 2006,22.5 78 | MSFT,May 1 2006,21.19 79 | MSFT,Jun 1 2006,21.8 80 | MSFT,Jul 1 2006,22.51 81 | MSFT,Aug 1 2006,24.13 82 | MSFT,Sep 1 2006,25.68 83 | MSFT,Oct 1 2006,26.96 84 | MSFT,Nov 1 2006,27.66 85 | MSFT,Dec 1 2006,28.13 86 | MSFT,Jan 1 2007,29.07 87 | MSFT,Feb 1 2007,26.63 88 | MSFT,Mar 1 2007,26.35 89 | MSFT,Apr 1 2007,28.3 90 | MSFT,May 1 2007,29.11 91 | MSFT,Jun 1 2007,27.95 92 | MSFT,Jul 1 2007,27.5 93 | MSFT,Aug 1 2007,27.34 94 | MSFT,Sep 1 2007,28.04 95 | MSFT,Oct 1 2007,35.03 96 | MSFT,Nov 1 2007,32.09 97 | MSFT,Dec 1 2007,34 98 | MSFT,Jan 1 2008,31.13 99 | MSFT,Feb 1 2008,26.07 100 | MSFT,Mar 1 2008,27.21 101 | MSFT,Apr 1 2008,27.34 102 | MSFT,May 1 2008,27.25 103 | MSFT,Jun 1 2008,26.47 104 | MSFT,Jul 1 2008,24.75 105 | MSFT,Aug 1 2008,26.36 106 | MSFT,Sep 1 2008,25.78 107 | MSFT,Oct 1 2008,21.57 108 | MSFT,Nov 1 2008,19.66 109 | MSFT,Dec 1 2008,18.91 110 | MSFT,Jan 1 2009,16.63 111 | MSFT,Feb 1 2009,15.81 112 | MSFT,Mar 1 2009,17.99 113 | MSFT,Apr 1 2009,19.84 114 | MSFT,May 1 2009,20.59 115 | MSFT,Jun 1 2009,23.42 116 | MSFT,Jul 1 2009,23.18 117 | MSFT,Aug 1 2009,24.43 118 | MSFT,Sep 1 2009,25.49 119 | MSFT,Oct 1 2009,27.48 120 | MSFT,Nov 1 2009,29.27 121 | MSFT,Dec 1 2009,30.34 122 | MSFT,Jan 1 2010,28.05 123 | MSFT,Feb 1 2010,28.67 124 | MSFT,Mar 1 2010,28.8 125 | AMZN,Jan 1 2000,64.56 126 | AMZN,Feb 1 2000,68.87 127 | AMZN,Mar 1 2000,67 128 | AMZN,Apr 1 2000,55.19 129 | AMZN,May 1 2000,48.31 130 | AMZN,Jun 1 2000,36.31 131 | AMZN,Jul 1 2000,30.12 132 | AMZN,Aug 1 2000,41.5 133 | AMZN,Sep 1 2000,38.44 134 | AMZN,Oct 1 2000,36.62 135 | AMZN,Nov 1 2000,24.69 136 | AMZN,Dec 1 2000,15.56 137 | AMZN,Jan 1 2001,17.31 138 | AMZN,Feb 1 2001,10.19 139 | AMZN,Mar 1 2001,10.23 140 | AMZN,Apr 1 2001,15.78 141 | AMZN,May 1 2001,16.69 142 | AMZN,Jun 1 2001,14.15 143 | AMZN,Jul 1 2001,12.49 144 | AMZN,Aug 1 2001,8.94 145 | AMZN,Sep 1 2001,5.97 146 | AMZN,Oct 1 2001,6.98 147 | AMZN,Nov 1 2001,11.32 148 | AMZN,Dec 1 2001,10.82 149 | AMZN,Jan 1 2002,14.19 150 | AMZN,Feb 1 2002,14.1 151 | AMZN,Mar 1 2002,14.3 152 | AMZN,Apr 1 2002,16.69 153 | AMZN,May 1 2002,18.23 154 | AMZN,Jun 1 2002,16.25 155 | AMZN,Jul 1 2002,14.45 156 | AMZN,Aug 1 2002,14.94 157 | AMZN,Sep 1 2002,15.93 158 | AMZN,Oct 1 2002,19.36 159 | AMZN,Nov 1 2002,23.35 160 | AMZN,Dec 1 2002,18.89 161 | AMZN,Jan 1 2003,21.85 162 | AMZN,Feb 1 2003,22.01 163 | AMZN,Mar 1 2003,26.03 164 | AMZN,Apr 1 2003,28.69 165 | AMZN,May 1 2003,35.89 166 | AMZN,Jun 1 2003,36.32 167 | AMZN,Jul 1 2003,41.64 168 | AMZN,Aug 1 2003,46.32 169 | AMZN,Sep 1 2003,48.43 170 | AMZN,Oct 1 2003,54.43 171 | AMZN,Nov 1 2003,53.97 172 | AMZN,Dec 1 2003,52.62 173 | AMZN,Jan 1 2004,50.4 174 | AMZN,Feb 1 2004,43.01 175 | AMZN,Mar 1 2004,43.28 176 | AMZN,Apr 1 2004,43.6 177 | AMZN,May 1 2004,48.5 178 | AMZN,Jun 1 2004,54.4 179 | AMZN,Jul 1 2004,38.92 180 | AMZN,Aug 1 2004,38.14 181 | AMZN,Sep 1 2004,40.86 182 | AMZN,Oct 1 2004,34.13 183 | AMZN,Nov 1 2004,39.68 184 | AMZN,Dec 1 2004,44.29 185 | AMZN,Jan 1 2005,43.22 186 | AMZN,Feb 1 2005,35.18 187 | AMZN,Mar 1 2005,34.27 188 | AMZN,Apr 1 2005,32.36 189 | AMZN,May 1 2005,35.51 190 | AMZN,Jun 1 2005,33.09 191 | AMZN,Jul 1 2005,45.15 192 | AMZN,Aug 1 2005,42.7 193 | AMZN,Sep 1 2005,45.3 194 | AMZN,Oct 1 2005,39.86 195 | AMZN,Nov 1 2005,48.46 196 | AMZN,Dec 1 2005,47.15 197 | AMZN,Jan 1 2006,44.82 198 | AMZN,Feb 1 2006,37.44 199 | AMZN,Mar 1 2006,36.53 200 | AMZN,Apr 1 2006,35.21 201 | AMZN,May 1 2006,34.61 202 | AMZN,Jun 1 2006,38.68 203 | AMZN,Jul 1 2006,26.89 204 | AMZN,Aug 1 2006,30.83 205 | AMZN,Sep 1 2006,32.12 206 | AMZN,Oct 1 2006,38.09 207 | AMZN,Nov 1 2006,40.34 208 | AMZN,Dec 1 2006,39.46 209 | AMZN,Jan 1 2007,37.67 210 | AMZN,Feb 1 2007,39.14 211 | AMZN,Mar 1 2007,39.79 212 | AMZN,Apr 1 2007,61.33 213 | AMZN,May 1 2007,69.14 214 | AMZN,Jun 1 2007,68.41 215 | AMZN,Jul 1 2007,78.54 216 | AMZN,Aug 1 2007,79.91 217 | AMZN,Sep 1 2007,93.15 218 | AMZN,Oct 1 2007,89.15 219 | AMZN,Nov 1 2007,90.56 220 | AMZN,Dec 1 2007,92.64 221 | AMZN,Jan 1 2008,77.7 222 | AMZN,Feb 1 2008,64.47 223 | AMZN,Mar 1 2008,71.3 224 | AMZN,Apr 1 2008,78.63 225 | AMZN,May 1 2008,81.62 226 | AMZN,Jun 1 2008,73.33 227 | AMZN,Jul 1 2008,76.34 228 | AMZN,Aug 1 2008,80.81 229 | AMZN,Sep 1 2008,72.76 230 | AMZN,Oct 1 2008,57.24 231 | AMZN,Nov 1 2008,42.7 232 | AMZN,Dec 1 2008,51.28 233 | AMZN,Jan 1 2009,58.82 234 | AMZN,Feb 1 2009,64.79 235 | AMZN,Mar 1 2009,73.44 236 | AMZN,Apr 1 2009,80.52 237 | AMZN,May 1 2009,77.99 238 | AMZN,Jun 1 2009,83.66 239 | AMZN,Jul 1 2009,85.76 240 | AMZN,Aug 1 2009,81.19 241 | AMZN,Sep 1 2009,93.36 242 | AMZN,Oct 1 2009,118.81 243 | AMZN,Nov 1 2009,135.91 244 | AMZN,Dec 1 2009,134.52 245 | AMZN,Jan 1 2010,125.41 246 | AMZN,Feb 1 2010,118.4 247 | AMZN,Mar 1 2010,128.82 248 | IBM,Jan 1 2000,100.52 249 | IBM,Feb 1 2000,92.11 250 | IBM,Mar 1 2000,106.11 251 | IBM,Apr 1 2000,99.95 252 | IBM,May 1 2000,96.31 253 | IBM,Jun 1 2000,98.33 254 | IBM,Jul 1 2000,100.74 255 | IBM,Aug 1 2000,118.62 256 | IBM,Sep 1 2000,101.19 257 | IBM,Oct 1 2000,88.5 258 | IBM,Nov 1 2000,84.12 259 | IBM,Dec 1 2000,76.47 260 | IBM,Jan 1 2001,100.76 261 | IBM,Feb 1 2001,89.98 262 | IBM,Mar 1 2001,86.63 263 | IBM,Apr 1 2001,103.7 264 | IBM,May 1 2001,100.82 265 | IBM,Jun 1 2001,102.35 266 | IBM,Jul 1 2001,94.87 267 | IBM,Aug 1 2001,90.25 268 | IBM,Sep 1 2001,82.82 269 | IBM,Oct 1 2001,97.58 270 | IBM,Nov 1 2001,104.5 271 | IBM,Dec 1 2001,109.36 272 | IBM,Jan 1 2002,97.54 273 | IBM,Feb 1 2002,88.82 274 | IBM,Mar 1 2002,94.15 275 | IBM,Apr 1 2002,75.82 276 | IBM,May 1 2002,72.97 277 | IBM,Jun 1 2002,65.31 278 | IBM,Jul 1 2002,63.86 279 | IBM,Aug 1 2002,68.52 280 | IBM,Sep 1 2002,53.01 281 | IBM,Oct 1 2002,71.76 282 | IBM,Nov 1 2002,79.16 283 | IBM,Dec 1 2002,70.58 284 | IBM,Jan 1 2003,71.22 285 | IBM,Feb 1 2003,71.13 286 | IBM,Mar 1 2003,71.57 287 | IBM,Apr 1 2003,77.47 288 | IBM,May 1 2003,80.48 289 | IBM,Jun 1 2003,75.42 290 | IBM,Jul 1 2003,74.28 291 | IBM,Aug 1 2003,75.12 292 | IBM,Sep 1 2003,80.91 293 | IBM,Oct 1 2003,81.96 294 | IBM,Nov 1 2003,83.08 295 | IBM,Dec 1 2003,85.05 296 | IBM,Jan 1 2004,91.06 297 | IBM,Feb 1 2004,88.7 298 | IBM,Mar 1 2004,84.41 299 | IBM,Apr 1 2004,81.04 300 | IBM,May 1 2004,81.59 301 | IBM,Jun 1 2004,81.19 302 | IBM,Jul 1 2004,80.19 303 | IBM,Aug 1 2004,78.17 304 | IBM,Sep 1 2004,79.13 305 | IBM,Oct 1 2004,82.84 306 | IBM,Nov 1 2004,87.15 307 | IBM,Dec 1 2004,91.16 308 | IBM,Jan 1 2005,86.39 309 | IBM,Feb 1 2005,85.78 310 | IBM,Mar 1 2005,84.66 311 | IBM,Apr 1 2005,70.77 312 | IBM,May 1 2005,70.18 313 | IBM,Jun 1 2005,68.93 314 | IBM,Jul 1 2005,77.53 315 | IBM,Aug 1 2005,75.07 316 | IBM,Sep 1 2005,74.7 317 | IBM,Oct 1 2005,76.25 318 | IBM,Nov 1 2005,82.98 319 | IBM,Dec 1 2005,76.73 320 | IBM,Jan 1 2006,75.89 321 | IBM,Feb 1 2006,75.09 322 | IBM,Mar 1 2006,77.17 323 | IBM,Apr 1 2006,77.05 324 | IBM,May 1 2006,75.04 325 | IBM,Jun 1 2006,72.15 326 | IBM,Jul 1 2006,72.7 327 | IBM,Aug 1 2006,76.35 328 | IBM,Sep 1 2006,77.26 329 | IBM,Oct 1 2006,87.06 330 | IBM,Nov 1 2006,86.95 331 | IBM,Dec 1 2006,91.9 332 | IBM,Jan 1 2007,93.79 333 | IBM,Feb 1 2007,88.18 334 | IBM,Mar 1 2007,89.44 335 | IBM,Apr 1 2007,96.98 336 | IBM,May 1 2007,101.54 337 | IBM,Jun 1 2007,100.25 338 | IBM,Jul 1 2007,105.4 339 | IBM,Aug 1 2007,111.54 340 | IBM,Sep 1 2007,112.6 341 | IBM,Oct 1 2007,111 342 | IBM,Nov 1 2007,100.9 343 | IBM,Dec 1 2007,103.7 344 | IBM,Jan 1 2008,102.75 345 | IBM,Feb 1 2008,109.64 346 | IBM,Mar 1 2008,110.87 347 | IBM,Apr 1 2008,116.23 348 | IBM,May 1 2008,125.14 349 | IBM,Jun 1 2008,114.6 350 | IBM,Jul 1 2008,123.74 351 | IBM,Aug 1 2008,118.16 352 | IBM,Sep 1 2008,113.53 353 | IBM,Oct 1 2008,90.24 354 | IBM,Nov 1 2008,79.65 355 | IBM,Dec 1 2008,82.15 356 | IBM,Jan 1 2009,89.46 357 | IBM,Feb 1 2009,90.32 358 | IBM,Mar 1 2009,95.09 359 | IBM,Apr 1 2009,101.29 360 | IBM,May 1 2009,104.85 361 | IBM,Jun 1 2009,103.01 362 | IBM,Jul 1 2009,116.34 363 | IBM,Aug 1 2009,117 364 | IBM,Sep 1 2009,118.55 365 | IBM,Oct 1 2009,119.54 366 | IBM,Nov 1 2009,125.79 367 | IBM,Dec 1 2009,130.32 368 | IBM,Jan 1 2010,121.85 369 | IBM,Feb 1 2010,127.16 370 | IBM,Mar 1 2010,125.55 371 | GOOG,Aug 1 2004,102.37 372 | GOOG,Sep 1 2004,129.6 373 | GOOG,Oct 1 2004,190.64 374 | GOOG,Nov 1 2004,181.98 375 | GOOG,Dec 1 2004,192.79 376 | GOOG,Jan 1 2005,195.62 377 | GOOG,Feb 1 2005,187.99 378 | GOOG,Mar 1 2005,180.51 379 | GOOG,Apr 1 2005,220 380 | GOOG,May 1 2005,277.27 381 | GOOG,Jun 1 2005,294.15 382 | GOOG,Jul 1 2005,287.76 383 | GOOG,Aug 1 2005,286 384 | GOOG,Sep 1 2005,316.46 385 | GOOG,Oct 1 2005,372.14 386 | GOOG,Nov 1 2005,404.91 387 | GOOG,Dec 1 2005,414.86 388 | GOOG,Jan 1 2006,432.66 389 | GOOG,Feb 1 2006,362.62 390 | GOOG,Mar 1 2006,390 391 | GOOG,Apr 1 2006,417.94 392 | GOOG,May 1 2006,371.82 393 | GOOG,Jun 1 2006,419.33 394 | GOOG,Jul 1 2006,386.6 395 | GOOG,Aug 1 2006,378.53 396 | GOOG,Sep 1 2006,401.9 397 | GOOG,Oct 1 2006,476.39 398 | GOOG,Nov 1 2006,484.81 399 | GOOG,Dec 1 2006,460.48 400 | GOOG,Jan 1 2007,501.5 401 | GOOG,Feb 1 2007,449.45 402 | GOOG,Mar 1 2007,458.16 403 | GOOG,Apr 1 2007,471.38 404 | GOOG,May 1 2007,497.91 405 | GOOG,Jun 1 2007,522.7 406 | GOOG,Jul 1 2007,510 407 | GOOG,Aug 1 2007,515.25 408 | GOOG,Sep 1 2007,567.27 409 | GOOG,Oct 1 2007,707 410 | GOOG,Nov 1 2007,693 411 | GOOG,Dec 1 2007,691.48 412 | GOOG,Jan 1 2008,564.3 413 | GOOG,Feb 1 2008,471.18 414 | GOOG,Mar 1 2008,440.47 415 | GOOG,Apr 1 2008,574.29 416 | GOOG,May 1 2008,585.8 417 | GOOG,Jun 1 2008,526.42 418 | GOOG,Jul 1 2008,473.75 419 | GOOG,Aug 1 2008,463.29 420 | GOOG,Sep 1 2008,400.52 421 | GOOG,Oct 1 2008,359.36 422 | GOOG,Nov 1 2008,292.96 423 | GOOG,Dec 1 2008,307.65 424 | GOOG,Jan 1 2009,338.53 425 | GOOG,Feb 1 2009,337.99 426 | GOOG,Mar 1 2009,348.06 427 | GOOG,Apr 1 2009,395.97 428 | GOOG,May 1 2009,417.23 429 | GOOG,Jun 1 2009,421.59 430 | GOOG,Jul 1 2009,443.05 431 | GOOG,Aug 1 2009,461.67 432 | GOOG,Sep 1 2009,495.85 433 | GOOG,Oct 1 2009,536.12 434 | GOOG,Nov 1 2009,583 435 | GOOG,Dec 1 2009,619.98 436 | GOOG,Jan 1 2010,529.94 437 | GOOG,Feb 1 2010,526.8 438 | GOOG,Mar 1 2010,560.19 439 | AAPL,Jan 1 2000,25.94 440 | AAPL,Feb 1 2000,28.66 441 | AAPL,Mar 1 2000,33.95 442 | AAPL,Apr 1 2000,31.01 443 | AAPL,May 1 2000,21 444 | AAPL,Jun 1 2000,26.19 445 | AAPL,Jul 1 2000,25.41 446 | AAPL,Aug 1 2000,30.47 447 | AAPL,Sep 1 2000,12.88 448 | AAPL,Oct 1 2000,9.78 449 | AAPL,Nov 1 2000,8.25 450 | AAPL,Dec 1 2000,7.44 451 | AAPL,Jan 1 2001,10.81 452 | AAPL,Feb 1 2001,9.12 453 | AAPL,Mar 1 2001,11.03 454 | AAPL,Apr 1 2001,12.74 455 | AAPL,May 1 2001,9.98 456 | AAPL,Jun 1 2001,11.62 457 | AAPL,Jul 1 2001,9.4 458 | AAPL,Aug 1 2001,9.27 459 | AAPL,Sep 1 2001,7.76 460 | AAPL,Oct 1 2001,8.78 461 | AAPL,Nov 1 2001,10.65 462 | AAPL,Dec 1 2001,10.95 463 | AAPL,Jan 1 2002,12.36 464 | AAPL,Feb 1 2002,10.85 465 | AAPL,Mar 1 2002,11.84 466 | AAPL,Apr 1 2002,12.14 467 | AAPL,May 1 2002,11.65 468 | AAPL,Jun 1 2002,8.86 469 | AAPL,Jul 1 2002,7.63 470 | AAPL,Aug 1 2002,7.38 471 | AAPL,Sep 1 2002,7.25 472 | AAPL,Oct 1 2002,8.03 473 | AAPL,Nov 1 2002,7.75 474 | AAPL,Dec 1 2002,7.16 475 | AAPL,Jan 1 2003,7.18 476 | AAPL,Feb 1 2003,7.51 477 | AAPL,Mar 1 2003,7.07 478 | AAPL,Apr 1 2003,7.11 479 | AAPL,May 1 2003,8.98 480 | AAPL,Jun 1 2003,9.53 481 | AAPL,Jul 1 2003,10.54 482 | AAPL,Aug 1 2003,11.31 483 | AAPL,Sep 1 2003,10.36 484 | AAPL,Oct 1 2003,11.44 485 | AAPL,Nov 1 2003,10.45 486 | AAPL,Dec 1 2003,10.69 487 | AAPL,Jan 1 2004,11.28 488 | AAPL,Feb 1 2004,11.96 489 | AAPL,Mar 1 2004,13.52 490 | AAPL,Apr 1 2004,12.89 491 | AAPL,May 1 2004,14.03 492 | AAPL,Jun 1 2004,16.27 493 | AAPL,Jul 1 2004,16.17 494 | AAPL,Aug 1 2004,17.25 495 | AAPL,Sep 1 2004,19.38 496 | AAPL,Oct 1 2004,26.2 497 | AAPL,Nov 1 2004,33.53 498 | AAPL,Dec 1 2004,32.2 499 | AAPL,Jan 1 2005,38.45 500 | AAPL,Feb 1 2005,44.86 501 | AAPL,Mar 1 2005,41.67 502 | AAPL,Apr 1 2005,36.06 503 | AAPL,May 1 2005,39.76 504 | AAPL,Jun 1 2005,36.81 505 | AAPL,Jul 1 2005,42.65 506 | AAPL,Aug 1 2005,46.89 507 | AAPL,Sep 1 2005,53.61 508 | AAPL,Oct 1 2005,57.59 509 | AAPL,Nov 1 2005,67.82 510 | AAPL,Dec 1 2005,71.89 511 | AAPL,Jan 1 2006,75.51 512 | AAPL,Feb 1 2006,68.49 513 | AAPL,Mar 1 2006,62.72 514 | AAPL,Apr 1 2006,70.39 515 | AAPL,May 1 2006,59.77 516 | AAPL,Jun 1 2006,57.27 517 | AAPL,Jul 1 2006,67.96 518 | AAPL,Aug 1 2006,67.85 519 | AAPL,Sep 1 2006,76.98 520 | AAPL,Oct 1 2006,81.08 521 | AAPL,Nov 1 2006,91.66 522 | AAPL,Dec 1 2006,84.84 523 | AAPL,Jan 1 2007,85.73 524 | AAPL,Feb 1 2007,84.61 525 | AAPL,Mar 1 2007,92.91 526 | AAPL,Apr 1 2007,99.8 527 | AAPL,May 1 2007,121.19 528 | AAPL,Jun 1 2007,122.04 529 | AAPL,Jul 1 2007,131.76 530 | AAPL,Aug 1 2007,138.48 531 | AAPL,Sep 1 2007,153.47 532 | AAPL,Oct 1 2007,189.95 533 | AAPL,Nov 1 2007,182.22 534 | AAPL,Dec 1 2007,198.08 535 | AAPL,Jan 1 2008,135.36 536 | AAPL,Feb 1 2008,125.02 537 | AAPL,Mar 1 2008,143.5 538 | AAPL,Apr 1 2008,173.95 539 | AAPL,May 1 2008,188.75 540 | AAPL,Jun 1 2008,167.44 541 | AAPL,Jul 1 2008,158.95 542 | AAPL,Aug 1 2008,169.53 543 | AAPL,Sep 1 2008,113.66 544 | AAPL,Oct 1 2008,107.59 545 | AAPL,Nov 1 2008,92.67 546 | AAPL,Dec 1 2008,85.35 547 | AAPL,Jan 1 2009,90.13 548 | AAPL,Feb 1 2009,89.31 549 | AAPL,Mar 1 2009,105.12 550 | AAPL,Apr 1 2009,125.83 551 | AAPL,May 1 2009,135.81 552 | AAPL,Jun 1 2009,142.43 553 | AAPL,Jul 1 2009,163.39 554 | AAPL,Aug 1 2009,168.21 555 | AAPL,Sep 1 2009,185.35 556 | AAPL,Oct 1 2009,188.5 557 | AAPL,Nov 1 2009,199.91 558 | AAPL,Dec 1 2009,210.73 559 | AAPL,Jan 1 2010,192.06 560 | AAPL,Feb 1 2010,204.62 561 | AAPL,Mar 1 2010,223.02 -------------------------------------------------------------------------------- /test/stats.test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var assert = require('chai').assert; 4 | var stats = require('../src/stats'); 5 | var EPSILON = 1e-15; 6 | 7 | var a = function(x) { return x.a; }, 8 | b = function(x) { return x.b; }, 9 | c = function(x) { return x.c; }, 10 | d = function(x) { return x.d; }; 11 | 12 | describe('stats', function() { 13 | 14 | describe('unique', function() { 15 | it('should return unique values in the original order', function() { 16 | var u = stats.unique([3, 1, 2]); 17 | [3, 1, 2].forEach(function(v, i) { assert.equal(v, u[i]); }); 18 | }); 19 | 20 | it('should filter out repeated occurrences of values', function() { 21 | var u = stats.unique([1, 1, 2, 1, 2, 3, 1, 2, 3, 3, 3]); 22 | [1, 2, 3].forEach(function(v, i) { assert.equal(v, u[i]); }); 23 | }); 24 | 25 | it('should treat undefined as a value and remove duplicates', function() { 26 | var u = stats.unique([1, undefined, 2, undefined]); 27 | [1, undefined, 2].forEach(function(v, i) { assert.equal(v, u[i]); }); 28 | }); 29 | 30 | it('should apply transformation to array elements', function() { 31 | var u = stats.unique([1,2,3], function (d) { return -2 * d; }); 32 | [-2, -4, -6].forEach(function(v, i) { assert.equal(v, u[i]); }); 33 | }); 34 | 35 | it('should filter out repeated occurrences of transformed values', function() { 36 | var u = stats.unique([1,1,2,3], function (d) { return d<3 ? 1 : 3; }); 37 | [1, 3].forEach(function(v, i) { assert.equal(v, u[i]); }); 38 | }); 39 | }); 40 | 41 | describe('count', function() { 42 | it('should count all values', function() { 43 | assert.equal(stats.count([]), 0); 44 | assert.equal(stats.count([3, 1, 2]), 3); 45 | assert.equal(stats.count([null, 1, 2, null]), 4); 46 | assert.equal(stats.count([NaN, 1, 2]), 3); 47 | assert.equal(stats.count([1, undefined, 2, undefined, 3]), 5); 48 | }); 49 | }); 50 | 51 | describe('valid', function() { 52 | it('should ignore null values', function() { 53 | assert.equal(stats.count.valid([3, 1, 2]), 3); 54 | assert.equal(stats.count.valid([null, 1, 2, null]), 2); 55 | }); 56 | 57 | it('should ignore NaN values', function() { 58 | assert.equal(stats.count.valid([NaN, 1, 2]), 2); 59 | }); 60 | 61 | it('should ignore undefined values', function() { 62 | assert.equal(stats.count.valid([1, undefined, 2, undefined, 3]), 3); 63 | }); 64 | 65 | it('should support accessor', function() { 66 | assert.equal(stats.count.valid([{a:3}, {a:1}, {a:2}], 'a'), 3); 67 | }); 68 | }); 69 | 70 | describe('count.distinct', function() { 71 | it('should count distinct values', function() { 72 | assert.equal(stats.count.distinct([3, 1, 2]), 3); 73 | assert.equal(stats.count.distinct([1, 1, 2, 1, 2, 3, 1, 2, 3, 3, 3]), 3); 74 | }); 75 | 76 | it('should recognize null values', function() { 77 | assert.equal(stats.count.distinct([null, 1, 2]), 3); 78 | }); 79 | 80 | it('should recognize undefined values', function() { 81 | assert.equal(stats.count.distinct([1, undefined, 2, undefined, 3]), 4); 82 | }); 83 | 84 | it('should support accessor', function() { 85 | assert.equal(stats.count.distinct([{a:3}, {a:1}, {a:2}], 'a'), 3); 86 | }); 87 | }); 88 | 89 | describe('count.missing', function() { 90 | it('should count null values', function() { 91 | assert.equal(stats.count.missing([3, 1, 2]), 0); 92 | assert.equal(stats.count.missing([null, 0, 1, 2, null]), 2); 93 | }); 94 | 95 | it('should ignore NaN values', function() { 96 | assert.equal(stats.count.missing([NaN, 1, 2]), 0); 97 | }); 98 | 99 | it('should count undefined values', function() { 100 | assert.equal(stats.count.missing([1, undefined, 2, undefined, 3]), 2); 101 | }); 102 | 103 | it('should support accessor', function() { 104 | assert.equal(stats.count.missing([{a:3}, {a:1}, {a:2}], 'a'), 0); 105 | }); 106 | }); 107 | 108 | describe('count.map', function() { 109 | it('should create count hash', function() { 110 | var map = stats.count.map(['a', 'a', 'b']); 111 | assert.equal(map.a, 2); 112 | assert.equal(map.b, 1); 113 | assert.isUndefined(map.c); 114 | }); 115 | 116 | it('should support accessor', function() { 117 | var map = stats.count.map([{a:'a'}, {a:'a'}, {a:'b'}], 'a'); 118 | assert.equal(map.a, 2); 119 | assert.equal(map.b, 1); 120 | assert.isUndefined(map.c); 121 | }); 122 | }); 123 | 124 | describe('extent/min/max', function() { 125 | it('should calculate min and max values', function() { 126 | var a = [1, 2, 3, 4, 5]; 127 | var e = stats.extent(a); 128 | assert.equal(e[0], 1); 129 | assert.equal(e[1], 5); 130 | assert.equal(e[0], stats.min(a)); 131 | assert.equal(e[1], stats.max(a)); 132 | 133 | a = [1.1, 2.2, 3.3, 4.4, 5.5]; 134 | e = stats.extent(a); 135 | assert.equal(e[0], 1.1); 136 | assert.equal(e[1], 5.5); 137 | assert.equal(e[0], stats.min(a)); 138 | assert.equal(e[1], stats.max(a)); 139 | }); 140 | 141 | it('should handle non-numeric values', function() { 142 | var a = ['aa', 'eeeee', 'bbb', 'cccc', 'dddddd']; 143 | var e = stats.extent(a); 144 | assert.equal(e[0], 'aa'); 145 | assert.equal(e[1], 'eeeee'); 146 | assert.equal(e[0], stats.min(a)); 147 | assert.equal(e[1], stats.max(a)); 148 | }); 149 | 150 | it('should ignore null values', function() { 151 | var a = [2, 1, null, 5, 4]; 152 | var e = stats.extent(a); 153 | assert.equal(e[0], 1); 154 | assert.equal(e[1], 5); 155 | assert.equal(e[0], stats.min(a)); 156 | assert.equal(e[1], stats.max(a)); 157 | }); 158 | }); 159 | 160 | describe('extent.index', function() { 161 | it('should calculate min and max indices', function() { 162 | var e = stats.extent.index([1, 2, 3, 4, 5]); 163 | assert.equal(e[0], 0); 164 | assert.equal(e[1], 4); 165 | 166 | e = stats.extent.index([1.1, 2.2, 3.3, 4.4, 5.5]); 167 | assert.equal(e[0], 0); 168 | assert.equal(e[1], 4); 169 | }); 170 | 171 | it('should handle non-numeric values', function() { 172 | var e = stats.extent.index(['aa', 'eeeee', 'bbb', 'cccc', 'dddddd']); 173 | assert.equal(e[0], 0); 174 | assert.equal(e[1], 1); 175 | }); 176 | 177 | it('should ignore null values', function() { 178 | var e = stats.extent.index([2, 1, null, 5, 4]); 179 | assert.equal(e[0], 1); 180 | assert.equal(e[1], 3); 181 | }); 182 | }); 183 | 184 | describe('median', function() { 185 | it('should calculate median values', function() { 186 | assert.equal(stats.median([3, 1, 2]), 2); 187 | assert.equal(stats.median([-2, -2, -1, 1, 2, 2]), 0); 188 | }); 189 | 190 | it('should ignore null values', function() { 191 | assert.equal(stats.median([1, 2, null]), 1.5); 192 | }); 193 | 194 | it('should support accessor', function() { 195 | assert.equal(stats.median([{a:3}, {a:1}, {a:2}], 'a'), 2); 196 | }); 197 | }); 198 | 199 | describe('quantile', function() { 200 | it('should calculate quantile values', function() { 201 | var a = [1, 2, 3, 4, 5]; 202 | assert.equal(stats.quantile(a, 0.00), 1); 203 | assert.equal(stats.quantile(a, 0.25), 2); 204 | assert.equal(stats.quantile(a, 0.50), 3); 205 | assert.equal(stats.quantile(a, 0.75), 4); 206 | assert.equal(stats.quantile(a, 1.00), 5); 207 | 208 | var a = [1, 2, 3, 4]; 209 | assert.equal(stats.quantile(a, 0.00), 1); 210 | assert.equal(stats.quantile(a, 0.25), 1.75); 211 | assert.equal(stats.quantile(a, 0.50), 2.5); 212 | assert.equal(stats.quantile(a, 0.75), 3.25); 213 | assert.equal(stats.quantile(a, 1.00), 4); 214 | }); 215 | 216 | it('should support accessor', function() { 217 | var a = [{a:1}, {a:2}, {a:3}, {a:4}, {a:5}]; 218 | assert.equal(stats.quantile(a, 'a', 0.00), 1); 219 | assert.equal(stats.quantile(a, 'a', 0.25), 2); 220 | assert.equal(stats.quantile(a, 'a', 0.50), 3); 221 | assert.equal(stats.quantile(a, 'a', 0.75), 4); 222 | assert.equal(stats.quantile(a, 'a', 1.00), 5); 223 | }); 224 | }); 225 | 226 | describe('mean', function() { 227 | it('should calculate mean values', function() { 228 | assert.closeTo(stats.mean([3, 1, 2]), 2, EPSILON); 229 | assert.closeTo(stats.mean([-2, -2, -1, 1, 2, 2]), 0, EPSILON); 230 | assert.closeTo(stats.mean([4, 5]), 4.5, EPSILON); 231 | }); 232 | 233 | it('should ignore null values', function() { 234 | assert.closeTo(stats.mean([1, 2, null]), 1.5, EPSILON); 235 | }); 236 | 237 | it('should support accessor', function() { 238 | assert.equal(stats.mean([{a:1}, {a:2}], 'a'), 1.5); 239 | }); 240 | }); 241 | 242 | describe('variance & stdev', function() { 243 | it('should calculate variance and stdev values', function() { 244 | assert.closeTo(stats.variance([3, 1, 2]), 1, EPSILON); 245 | assert.closeTo(stats.variance([1, 3]), 2, EPSILON); 246 | assert.closeTo(stats.variance([-2, -2, -1, 1, 2, 2]), 3.6, EPSILON); 247 | assert.equal(Math.sqrt(stats.variance([3, 1, 2])), stats.stdev([3, 1, 2])); 248 | assert.equal(Math.sqrt(stats.variance([1, 3])), stats.stdev([1, 3])); 249 | assert.equal( 250 | Math.sqrt(stats.variance([-2, -2, -1, 1, 2, 2])), 251 | stats.stdev([-2, -2, -1, 1, 2, 2]) 252 | ); 253 | assert.equal(0, stats.variance([])); 254 | assert.equal(0, stats.variance([1])); 255 | assert.equal(0, stats.stdev([])); 256 | assert.equal(0, stats.stdev([1])); 257 | }); 258 | 259 | it('should ignore null values', function() { 260 | assert.equal(stats.variance([3, 1, 2, null]), 1); 261 | assert.equal(stats.stdev([3, 1, 2, null]), 1); 262 | }); 263 | 264 | it('should support accessor', function() { 265 | assert.equal(stats.variance([{a:1}, {a:3}], 'a'), 2); 266 | assert.equal(stats.stdev([{a:1}, {a:3}], 'a'), Math.sqrt(2)); 267 | }); 268 | }); 269 | 270 | describe('modeskew', function() { 271 | it('should calculate modeskew values', function() { 272 | assert.equal(stats.modeskew([]), 0); 273 | assert.equal(stats.modeskew([1]), 0); 274 | assert.equal(stats.modeskew([1,3]), 0); 275 | assert.equal(stats.modeskew([1,1,4]), 1/Math.sqrt(3)); 276 | }); 277 | 278 | it('should support accessor', function() { 279 | assert.equal(stats.modeskew([{a:1}, {a:2}], 'a'), 0); 280 | assert.equal(stats.modeskew([{a:1}, {a:1} ,{a:4}], 'a'), 1/Math.sqrt(3)); 281 | }); 282 | }); 283 | 284 | describe('rank', function() { 285 | it('should calculate rank values', function() { 286 | assert.deepEqual([1, 3, 2, 4], stats.rank([3,5,4,6])); 287 | assert.deepEqual([1.5, 1.5, 3, 4], stats.rank([3,3,4,5])); 288 | assert.deepEqual([1, 2.5, 2.5, 4], stats.rank([3,4,4,5])); 289 | assert.deepEqual([1, 2, 3.5, 3.5], stats.rank([3,4,5,5])); 290 | }); 291 | }); 292 | 293 | describe('dot', function() { 294 | var table = [ 295 | {a:1, b:2, c:3}, 296 | {a:4, b:5, c:6}, 297 | {a:7, b:8, c:9} 298 | ]; 299 | 300 | it('should accept object array and accessors', function() { 301 | assert.equal(1*2+4*5+7*8, stats.dot(table, a, b)); 302 | assert.equal(1*2+4*5+7*8, stats.dot(table, b, a)); 303 | assert.equal(1*3+4*6+7*9, stats.dot(table, a, c)); 304 | assert.equal(1*3+4*6+7*9, stats.dot(table, c, a)); 305 | assert.equal(2*3+5*6+8*9, stats.dot(table, b, c)); 306 | assert.equal(2*3+5*6+8*9, stats.dot(table, c, b)); 307 | }); 308 | 309 | it('should accept two arrays', function() { 310 | var x = table.map(a), y = table.map(b), z = table.map(c); 311 | assert.equal(1*2+4*5+7*8, stats.dot(x, y)); 312 | assert.equal(1*2+4*5+7*8, stats.dot(y, x)); 313 | assert.equal(1*3+4*6+7*9, stats.dot(x, z)); 314 | assert.equal(1*3+4*6+7*9, stats.dot(z, x)); 315 | assert.equal(2*3+5*6+8*9, stats.dot(y, z)); 316 | assert.equal(2*3+5*6+8*9, stats.dot(z, y)); 317 | }); 318 | 319 | it('should throw error with inputs of unequal length', function() { 320 | assert.throws(function() { stats.dot([1,2,3], [1,2]); }); 321 | }); 322 | 323 | it('should ignore NaN values', function() { 324 | var a = [1, 2, NaN]; 325 | var b = [3, 2, 1]; 326 | assert.equal(stats.dot(a, b), 1*3 + 2*2); 327 | var t = [{a:1, b:3}, {a:2, b:2}, {a:NaN, b:1}]; 328 | assert.equal(stats.dot(t, 'a', 'b'), 1*3 + 2*2); 329 | }); 330 | }); 331 | 332 | describe('cor', function() { 333 | var table = [ 334 | {a:1, b:0, c:-1}, 335 | {a:0, b:1, c:0}, 336 | {a:-1, b:0, c:1} 337 | ]; 338 | 339 | it('should accept object array and accessors', function() { 340 | assert.closeTo( 0, stats.cor(table, a, b), EPSILON); 341 | assert.closeTo( 0, stats.cor(table, b, a), EPSILON); 342 | assert.closeTo(-1, stats.cor(table, a, c), EPSILON); 343 | assert.closeTo(-1, stats.cor(table, c, a), EPSILON); 344 | assert.closeTo( 0, stats.cor(table, b, c), EPSILON); 345 | assert.closeTo( 0, stats.cor(table, c, b), EPSILON); 346 | assert.closeTo( 1, stats.cor(table, a, a), EPSILON); 347 | assert.closeTo( 1, stats.cor(table, b, b), EPSILON); 348 | assert.closeTo( 1, stats.cor(table, c, c), EPSILON); 349 | }); 350 | 351 | it('should accept two arrays', function() { 352 | var x = table.map(a), y = table.map(b), z = table.map(c); 353 | assert.closeTo( 0, stats.cor(x, y), EPSILON); 354 | assert.closeTo( 0, stats.cor(y, x), EPSILON); 355 | assert.closeTo(-1, stats.cor(x, z), EPSILON); 356 | assert.closeTo(-1, stats.cor(z, x), EPSILON); 357 | assert.closeTo( 0, stats.cor(y, z), EPSILON); 358 | assert.closeTo( 0, stats.cor(z, y), EPSILON); 359 | assert.closeTo( 1, stats.cor(x, x), EPSILON); 360 | assert.closeTo( 1, stats.cor(y, y), EPSILON); 361 | assert.closeTo( 1, stats.cor(z, z), EPSILON); 362 | }); 363 | 364 | it('should return NaN with zero-valued input', function() { 365 | assert(isNaN(stats.cor([0,0,0], [0,0,0]))); 366 | assert(isNaN(stats.cor([0,0,0], [1,2,3]))); 367 | assert(isNaN(stats.cor([1,2,3], [0,0,0]))); 368 | }); 369 | }); 370 | 371 | describe('cor.rank', function() { 372 | var table = [ 373 | {a:1, b:5, c:8, d:3}, 374 | {a:2, b:6, c:7, d:1}, 375 | {a:3, b:7, c:6, d:4}, 376 | {a:4, b:8, c:5, d:2} 377 | ]; 378 | 379 | it('should accept two arrays', function() { 380 | assert.equal( 1, stats.cor.rank([1,2,3,4],[5,6,7,8])); 381 | assert.equal(-1, stats.cor.rank([1,2,3,4],[8,7,6,5])); 382 | assert.equal( 0, stats.cor.rank([1,2,3,4],[3,1,4,2])); 383 | }); 384 | 385 | it('should accept object array and accessors', function() { 386 | assert.equal( 1, stats.cor.rank(table, a, b)); 387 | assert.equal(-1, stats.cor.rank(table, a, c)); 388 | assert.equal( 0, stats.cor.rank(table, a, d)); 389 | }); 390 | }); 391 | 392 | describe('cor.dist', function() { 393 | var table = [ 394 | {a:1, b:-1}, 395 | {a:0, b:0}, 396 | {a:-1, b:1} 397 | ]; 398 | 399 | it('should accept object array and accessors', function() { 400 | assert.closeTo(1, stats.cor.dist(table, a, b), EPSILON); 401 | assert.closeTo(1, stats.cor.dist(table, b, a), EPSILON); 402 | assert.closeTo(1, stats.cor.dist(table, a, a), EPSILON); 403 | assert.closeTo(1, stats.cor.dist(table, b, b), EPSILON); 404 | }); 405 | 406 | it('should accept two arrays', function() { 407 | var x = table.map(a), y = table.map(b), z = table.map(c); 408 | assert.closeTo(1, stats.cor.dist(x, y), EPSILON); 409 | assert.closeTo(1, stats.cor.dist(y, x), EPSILON); 410 | assert.closeTo(1, stats.cor.dist(x, x), EPSILON); 411 | assert.closeTo(1, stats.cor.dist(y, y), EPSILON); 412 | }); 413 | 414 | it('should return NaN with zero-valued input', function() { 415 | assert(isNaN(stats.cor.dist([0,0,0], [0,0,0]))); 416 | assert(isNaN(stats.cor.dist([0,0,0], [1,2,3]))); 417 | assert(isNaN(stats.cor.dist([1,2,3], [0,0,0]))); 418 | }); 419 | }); 420 | 421 | describe('dist', function() { 422 | var table = [ 423 | {a:1, b:-1}, 424 | {a:0, b:0}, 425 | {a:-1, b:1} 426 | ]; 427 | 428 | it('should accept object array and accessors', function() { 429 | assert.equal(0, stats.dist(table, a, a)); 430 | assert.equal(0, stats.dist(table, b, b)); 431 | assert.equal(Math.sqrt(8), stats.dist(table, a, b)); 432 | assert.equal(Math.sqrt(8), stats.dist(table, b, a)); 433 | }); 434 | 435 | it('should accept two arrays', function() { 436 | var x = table.map(a), y = table.map(b); 437 | assert.equal(0, stats.dist(x, x)); 438 | assert.equal(0, stats.dist(y, y)); 439 | assert.equal(Math.sqrt(8), stats.dist(x, y)); 440 | assert.equal(Math.sqrt(8), stats.dist(y, x)); 441 | }); 442 | 443 | it('should compute non-Euclidean distances', function() { 444 | assert.equal(2, stats.dist([1,1], [2,2], 1)); 445 | assert.equal(4, stats.dist([1,1], [2,2], 0.5)); 446 | assert.equal(Math.pow(2, 1/3), stats.dist([1,1], [2,2], 3)); 447 | }); 448 | }); 449 | 450 | describe('entropy', function() { 451 | var even = [1, 1, 1, 1, 1, 1], ee = -Math.log(1/6)/Math.LN2; 452 | var skew = [6, 0, 0, 0, 0, 0], se = 0; 453 | 454 | it('should calculate entropy', function() { 455 | assert.equal(ee, stats.entropy(even)); 456 | assert.equal(se, stats.entropy(skew)); 457 | }); 458 | 459 | it('should handle accessor argument', function() { 460 | var wrap = function(a, x) { return (a.push({a:x}), a); }; 461 | assert.equal(ee, stats.entropy(even.reduce(wrap, []), a)); 462 | assert.equal(se, stats.entropy(skew.reduce(wrap, []), a)); 463 | }); 464 | 465 | it('should handle zero vectors', function() { 466 | assert.equal(0, stats.entropy([0,0,0,0])); 467 | }); 468 | 469 | it('should handle zero vectors', function() { 470 | assert.equal(0, stats.entropy([0,0,0,0])); 471 | }); 472 | }); 473 | 474 | describe('mutual', function() { 475 | var table = [ 476 | {a:'a', b:1, c:1, d:1}, 477 | {a:'a', b:2, c:0, d:1}, 478 | {a:'b', b:1, c:0, d:0}, 479 | {a:'b', b:2, c:1, d:0} 480 | ]; 481 | 482 | it('should accept object array and accessors', function() { 483 | assert.deepEqual([1, 0], stats.mutual(table, a, b, c)); 484 | assert.deepEqual([0, 1], stats.mutual(table, a, b, d)); 485 | }); 486 | 487 | it('should handle zero vectors', function() { 488 | var u = table.map(a), v = table.map(b), 489 | x = table.map(c), y = table.map(d); 490 | assert.deepEqual([1, 0], stats.mutual(u, v, x)); 491 | assert.deepEqual([0, 1], stats.mutual(u, v, y)); 492 | }); 493 | 494 | it('should support info/dist sub-methods', function() { 495 | var m = stats.mutual(table, a, b, c); 496 | assert.equal(m[0], stats.mutual.info(table, a, b, c)); 497 | assert.equal(m[1], stats.mutual.dist(table, a, b, c)); 498 | m = stats.mutual(table, a, b, d); 499 | assert.equal(m[0], stats.mutual.info(table, a, b, d)); 500 | assert.equal(m[1], stats.mutual.dist(table, a, b, d)); 501 | }); 502 | }); 503 | 504 | describe('profile', function() { 505 | it('should compute q1 correctly', function() { 506 | assert.equal(1.00, stats.profile([1]).q1); 507 | assert.equal(1.25, stats.profile([1,2]).q1); 508 | assert.equal(1.50, stats.profile([1,2,3]).q1); 509 | assert.equal(1.75, stats.profile([1,2,3,4]).q1); 510 | assert.equal(2.00, stats.profile([1,2,3,4,5]).q1); 511 | assert.equal(2.25, stats.profile([1,2,3,4,5,6]).q1); 512 | assert.equal(2.50, stats.profile([1,2,3,4,5,6,7]).q1); 513 | assert.equal(2.75, stats.profile([1,2,3,4,5,6,7,8]).q1); 514 | }); 515 | 516 | it('should compute q3 correctly', function() { 517 | assert.equal(1.00, stats.profile([1]).q3); 518 | assert.equal(1.75, stats.profile([1,2]).q3); 519 | assert.equal(2.50, stats.profile([1,2,3]).q3); 520 | assert.equal(3.25, stats.profile([1,2,3,4]).q3); 521 | assert.equal(4.00, stats.profile([1,2,3,4,5]).q3); 522 | assert.equal(4.75, stats.profile([1,2,3,4,5,6]).q3); 523 | assert.equal(5.50, stats.profile([1,2,3,4,5,6,7]).q3); 524 | assert.equal(6.25, stats.profile([1,2,3,4,5,6,7,8]).q3); 525 | }); 526 | 527 | it('should match stand-alone statistics', function() { 528 | var v = [1, 1, 3, 4, 20, null, undefined, NaN]; 529 | var p = stats.profile(v); 530 | assert.equal(8, p.count); 531 | assert.equal(5, p.valid); 532 | assert.equal(2, p.missing); 533 | assert.equal(7, p.distinct); 534 | assert.equal(stats.count(v), p.count); 535 | assert.equal(stats.count.valid(v), p.valid); 536 | assert.equal(stats.count.missing(v), p.missing); 537 | assert.equal(stats.count.distinct(v), p.distinct); 538 | assert.equal(stats.extent(v)[0], p.min); 539 | assert.equal(stats.extent(v)[1], p.max); 540 | assert.equal(stats.mean(v), p.mean); 541 | assert.equal(stats.stdev(v), p.stdev); 542 | assert.equal(stats.median(v), p.median); 543 | assert.equal(stats.quartile(v)[0], p.q1); 544 | assert.equal(stats.quartile(v)[2], p.q3); 545 | assert.equal(stats.modeskew(v), p.modeskew); 546 | assert.deepEqual(stats.count.map(v), p.unique); 547 | }); 548 | 549 | it('should return length statistics for strings', function(){ 550 | var p = stats.profile(['aa', 'eeeeeeeee', 'bbb', 'cccc', 'dddddd']); 551 | assert.equal(p.min, 2); 552 | assert.equal(p.max, 9); 553 | }); 554 | }); 555 | 556 | }); 557 | --------------------------------------------------------------------------------