├── .gitignore ├── .travis.yml ├── README.md ├── lib ├── chart.js └── datakit.js ├── package.json ├── plot.png └── spec ├── support └── jasmine.json └── test ├── test.csv ├── test2.csv └── testSpec.js /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | *.swp 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '0.12' 4 | - '0.11' 5 | - '0.10' 6 | - 'iojs' 7 | before_script: 8 | - 'npm i -g jasmine' 9 | deploy: 10 | provider: npm 11 | email: ne2210@columbia.edu 12 | api_key: 13 | secure: WDjnZSc8q9Oh2ro0Gpoa75/s2Nb4k1CguyZeQwibt3dl2+QveuTdbg9mAsKmbwSBSy+e+EWiZI5/C0j+pDGlZy/pjqse7c1VGcLzVqeYrSbJtvFXKl5eEUXbM0tuvw9ASKlHGM7P88xY/oe8osqoXmUnkjUHPWsXaTHhsG3HqDc= 14 | on: 15 | tags: true 16 | repo: NathanEpstein/datakit -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datakit 2 | 3 | 4 | 5 | 6 | ## About 7 | A lightweight library/framework for data analysis in JavaScript. 8 | 9 | ## Usage 10 | 11 | ```npm install datakitjs --save``` 12 | 13 | ## Documentation & Examples 14 | 15 | ### Reading, Filtering, & Plotting Data 16 | ```javascript 17 | var dk = require('datakitjs'); 18 | 19 | //READ A CSV FILE 20 | 21 | //file.csv 22 | // COL1, COL2 23 | // val11, val12 24 | // val21, val22 25 | 26 | dk.csv('file.csv', function(data) { 27 | console.log(data); 28 | }); 29 | 30 | //Output: 31 | //[{ COL1: val11, COL2: val12 }, { COL1: val21, COL2: val22 }] 32 | 33 | 34 | //GET A COLUMN FROM AN ARRAY OF ROW OBJECTS 35 | dk.csv('file.csv', function(data) { 36 | var c2 = dk.col(data, 'COL2'); 37 | console.log(c2); 38 | }); 39 | 40 | //Output: 41 | //[val12, val22] 42 | 43 | // By default, dk.csv will convert all values to strings. You can convert select 44 | // columns to numbers by passing an array of column names to 'dk.numeric'. 45 | 46 | //file2.csv 47 | // COL1, COL2 48 | // val11, 1 49 | // val21, 2 50 | 51 | dk.csv('file2.csv', function(data) { 52 | var d = dk.numeric(data, ['COL2'], 0) // The third parameter value will be filled 53 | // in to blank cells. Its default value is 0. 54 | var c2 = dk.col(d, 'COL2'); 55 | console.log(c2); 56 | }); 57 | 58 | //Output: 59 | //[1, 2] 60 | 61 | 62 | //PLOT ARRAY(S) OF DATA 63 | 64 | var chart = new dk.Chart({ 65 | //optional config 66 | height: 500, 67 | width: 500, 68 | xLab: 'x-Axis Label', 69 | yLab: 'y-Axis Label' 70 | }); 71 | 72 | chart.addDataSet({ 73 | x: [1, 2, 3], 74 | y: [4, 5, 6], 75 | z: [2, 3, 5], 76 | colors: ['blue', 'green', 'red'] 77 | }).addDataSet({ 78 | x: [1, 10], 79 | y: [2, -1], 80 | type: 'line' 81 | }).addDataSet({ 82 | x: [10, 5, 1], 83 | y: [4, 5, 2], 84 | labels: ["first", "second", "third"] 85 | }).plot(); 86 | ``` 87 | 88 | ### Statistical Methods 89 | 90 | ```javascript 91 | var dk = require('datakitjs'); 92 | 93 | //MEAN OF AN ARRAY 94 | dk.mean([1, 2, 3]); //returns 2 95 | 96 | //STANDARD DEVIATION AND VARIANCE OF AN ARRAY 97 | dk.sd([1, 2, 3]); //returns 1 98 | dk.vari([1, 2, 3]); //returns 1 99 | 100 | //COVARIANCE OF TWO ARRAYS 101 | dk.cov([1, 2, 3], [3, 2, 1]); //returns -1 102 | 103 | //SIMPLE LINEAR REGRESSION 104 | 105 | var x = [1, 2, 3]; 106 | var y = [2, 1, 3]; 107 | 108 | var model = dk.reg(x, y); 109 | 110 | // model.f is a function that returns the estimated y for an input x (estimated via standard OLS regression) 111 | // model.f = function(x) { 112 | // return (a + b * x); 113 | // }; 114 | 115 | // model.pts is an array of the estimated y for each element of x 116 | // model.pts = [1.5, 2, 2.5]; 117 | 118 | // model.endPoints is an object with the coordinates of the boundary points 119 | // model.endPoints = { x1: 1, x2: 3, y1: 1.5, y2: 2.5 }; 120 | 121 | ``` 122 | 123 | ### Convenience Methods 124 | ```javascript 125 | var dk = require('datakitjs'); 126 | 127 | //GENERATE AN ARRAY WITH A SEQUENCE OF NUMBERS 128 | 129 | dk.seq(1, 5); //returns [1, 2, 3, 4, 5] 130 | 131 | dk.seq(0, 1, 0.25); //returns [0, 0.25, 0.5, 0.75, 1] 132 | 133 | //GENERATE AN ARRAY WITH REPEATED VALUE 134 | 135 | dk.rep(1, 5); //returns [1, 1, 1, 1, 1] 136 | 137 | //CHECK IF NUMBERS ARE CLOSE 138 | dk.isclose(0, Math.pow(10, -15)); //returns true 139 | 140 | dk.isclose(0, Math.pow(10, -5)); //returns false 141 | 142 | //SUM AN ARRAY OF NUMBERS 143 | //uses Kahan summation 144 | 145 | dk.sum([1, 2, 3]); //returns 6 146 | 147 | //PRODUCT OF AN ARRAY OF NUMBERS 148 | //implementation from 'Accurate Floating Point Product' - Stef Graillat 149 | 150 | dk.prod([1, 2, 3]); //returns 6 151 | 152 | //MAX AND MIN OF AN ARRAY 153 | var x = [1, 2, 3]; 154 | dk.min(x); //returns 1 155 | dk.max(x); //returns 3 156 | 157 | ``` 158 | 159 | ### Random Numbers 160 | ```javascript 161 | var dk = require('datakitjs'); 162 | 163 | //GET AN ARRAY OF EXPONENTIALLY DISTRIBUTED VALUES 164 | 165 | dk.exp(3, 1); //returns [0.3584189321510761, 1.0466439500242446, 0.08887770301056963] 166 | 167 | 168 | //GET AN ARRAY OF NORMALLY DISTRIBUTED VALUES 169 | 170 | dk.norm(3, 0, 1); //returns [-1.709768103193772, 0.23530041388459744, 0.4431320382580479] 171 | 172 | //GET AN ARRAY OF UNIFORMLY DISTRIBUTED VALUES 173 | 174 | dk.uni(3); //returns [0.30658303829841316, 0.1601463456172496, 0.8538850131444633] 175 | 176 | ``` 177 | 178 | ## Testing 179 | 180 | Just run `npm test` to run the tests. 181 | 182 | 183 | ## Contributing 184 | 185 | Additional methods for random number generation, data filtration, convenience functions, and common statistical analyses are welcome additions. Just add tests following the structure in `spec/test/testSpec.js`. 186 | 187 | ## License 188 | 189 | **The MIT License (MIT)** 190 | 191 | > Copyright (c) 2015 Nathan Epstein 192 | > 193 | > Permission is hereby granted, free of charge, to any person obtaining a copy 194 | > of this software and associated documentation files (the "Software"), to deal 195 | > in the Software without restriction, including without limitation the rights 196 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 197 | > copies of the Software, and to permit persons to whom the Software is 198 | > furnished to do so, subject to the following conditions: 199 | > 200 | > The above copyright notice and this permission notice shall be included in 201 | > all copies or substantial portions of the Software. 202 | > 203 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 204 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 205 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 206 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 208 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 209 | > THE SOFTWARE. 210 | 211 | -------------------------------------------------------------------------------- /lib/chart.js: -------------------------------------------------------------------------------- 1 | var Chart = function(config) { 2 | var self = this; 3 | 4 | // initial configuration of size and datasets 5 | var datasets = []; 6 | 7 | self.config = config || {}; 8 | self.config.height = self.config.height || 500; 9 | self.config.width = self.config.width || 500; 10 | self.config.bufferHeight = self.config.height - Math.min(200, self.config.height * 0.2); 11 | self.config.bufferWidth = self.config.width - Math.min(200, self.config.width * 0.4); 12 | 13 | // public method to plot by calling all private methods 14 | this.render = function() { 15 | buildCanvas(); 16 | buildMapping(); 17 | buildAxes(); 18 | plotDataSets(); 19 | return self; 20 | }; 21 | 22 | // public methods to get and set datasets 23 | this.datasets = function() { 24 | return datasets.slice(); 25 | }; 26 | 27 | this.addDataSet = function(dataset) { 28 | datasets.push(dataset); 29 | return self; 30 | }; 31 | 32 | // private methods 33 | var buildCanvas = function() { 34 | self.canvas = d3.select(self.config.selector || 'body') 35 | .append('svg') 36 | .attr('height', self.config.height) 37 | .attr('width', self.config.width); 38 | 39 | self.buffer = self.canvas.append('g'); 40 | 41 | var xTranslate = (self.config.width - self.config.bufferWidth) / 2; 42 | var yTranslate = (self.config.height - self.config.bufferHeight) / 2; 43 | 44 | self.buffer.attr( 45 | 'transform', 46 | 'translate(' + xTranslate + ', ' + yTranslate + ')' 47 | ); 48 | }; 49 | 50 | var buildMapping = function() { 51 | var xMin = mini(datasets[0].x), 52 | yMin = mini(datasets[0].y), 53 | xMax = maxi(datasets[0].x), 54 | yMax = maxi(datasets[0].y); 55 | 56 | datasets.forEach(function(dataset, index) { 57 | if (index > 0) { 58 | xMin = Math.min(mini(dataset.x), xMin); 59 | yMin = Math.min(mini(dataset.y), yMin); 60 | xMax = Math.max(maxi(dataset.x), xMax); 61 | yMax = Math.max(maxi(dataset.y), yMax); 62 | } 63 | }); 64 | 65 | self.xMap = d3.scale.linear() 66 | .domain([xMin, xMax]) 67 | .range([0, self.config.bufferWidth]); 68 | 69 | self.yMap = d3.scale.linear() 70 | .domain([yMax, yMin]) 71 | .range([0, self.config.bufferHeight]); 72 | 73 | }; 74 | 75 | var buildAxes = function() { 76 | var xAxis = d3.svg.axis() 77 | .scale(self.xMap); 78 | 79 | var yAxis = d3.svg.axis() 80 | .scale(self.yMap) 81 | .orient('left'); 82 | 83 | self.buffer.append('g') 84 | .attr('transform','translate(0,' + self.config.bufferHeight + ')') 85 | .call(xAxis); 86 | 87 | self.buffer.append('g') 88 | .call(yAxis); 89 | 90 | var xLabel = self.buffer.append('text') 91 | .attr('x', self.config.bufferWidth * 0.5) 92 | .attr('y', self.config.bufferHeight + 50) 93 | .text(self.config.xLab) 94 | .attr('text-anchor','middle'); 95 | 96 | var yLabel = self.buffer.append('text') 97 | .attr('x', -self.config.bufferHeight * 0.5) 98 | .attr('y', -50) 99 | .attr('transform','rotate(-90)') 100 | .text(self.config.yLab) 101 | .attr('text-anchor','middle'); 102 | 103 | }; 104 | 105 | var plotDataSets = function() { 106 | datasets.forEach(function(dataset) { 107 | if (dataset.type == 'line') { 108 | for (var i = 1; i < dataset.x.length; i++) { 109 | self.buffer.append('line') 110 | .attr('stroke-width', 1) 111 | .attr('stroke', 'black') 112 | .attr('x1', self.xMap(dataset.x[i-1])) 113 | .attr('x2', self.xMap(dataset.x[i])) 114 | .attr('y1', self.yMap(dataset.y[i-1])) 115 | .attr('y2', self.yMap(dataset.y[i])); 116 | }; 117 | } 118 | else if (typeof dataset.labels !== 'undefined') { 119 | for (var i = 0; i < dataset.x.length; i++) { 120 | self.buffer.append('text') 121 | .attr('x', self.xMap(dataset.x[i])) 122 | .attr('y', self.yMap(dataset.y[i])) 123 | .text(dataset.labels[i]) 124 | .attr('text-anchor','middle') 125 | .attr('stroke', dataset.color || 'black'); 126 | }; 127 | } 128 | else { 129 | // make a scatter plot if not a line 130 | for (var i = 0; i < dataset.x.length; i++) { 131 | var zMin = typeof dataset.z === 'undefined' ? null : mini(dataset.z); 132 | var zMax = typeof dataset.z === 'undefined' ? null : maxi(dataset.z); 133 | 134 | self.buffer.append('circle') 135 | .attr('r', function() { 136 | if (typeof dataset.z === 'undefined') { 137 | return self.config.height * self.config.width * (0.00001); 138 | } 139 | else { 140 | var minSize = self.config.height * self.config.width * 0.000025; 141 | var sizeMultiplier = self.config.height * self.config.width * (0.0001); 142 | var proportionOfMaxValue = (dataset.z[i] - zMin) / (zMax - zMin); 143 | 144 | return (minSize + sizeMultiplier * proportionOfMaxValue); 145 | } 146 | }) 147 | .attr('cx', self.xMap(dataset.x[i])) 148 | .attr('cy', self.yMap(dataset.y[i])) 149 | .attr('opacity',function() { 150 | if (typeof z === 'undefined') { 151 | return 1; 152 | } 153 | else{ 154 | return 0.3; 155 | } 156 | }) 157 | .attr('fill',function() { 158 | if (typeof dataset.colors === 'undefined') { 159 | return 'none'; 160 | } 161 | else{ 162 | return dataset.colors[i]; 163 | } 164 | }) 165 | .attr('stroke', 'black'); 166 | }; 167 | }; 168 | }); 169 | }; 170 | 171 | function mini(arr) { 172 | return Math.min.apply(null, arr); 173 | }; 174 | 175 | function maxi(arr) { 176 | return Math.max.apply(null, arr); 177 | }; 178 | }; 179 | 180 | module.exports = Chart; 181 | 182 | -------------------------------------------------------------------------------- /lib/datakit.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | var fs = require('fs'); 3 | var exec = require('child_process').exec; 4 | var express = require('express'); 5 | 6 | // SUMMARY STATISTICS/CONVENIENCE METHODS 7 | 8 | //check if args are close (implementation from numpy) 9 | var isclose = module.exports.isclose = function(a, b) { 10 | var atol = Math.pow(10, -8); 11 | var rtol = Math.pow(10, -5); 12 | return (Math.abs(a - b) <= (atol + rtol * Math.abs(b))); 13 | }; 14 | //array sum (Kahan summation algorithm) 15 | var sum = module.exports.sum = function(arr) { 16 | var s = 0; 17 | var c = 0; 18 | for (var i = 0; i < arr.length; i++) { 19 | var y = arr[i] - c; 20 | var t = s + y; 21 | c = (t - s) - y; 22 | s = t; 23 | } 24 | return s; 25 | }; 26 | 27 | // from 'Accurate Floating Point Product' - Stef Graillat 28 | // EF split of float into 2 parts 29 | var split = function(val) { 30 | var factor = Math.pow(2, 27) + 1; 31 | var c = factor * val; 32 | var x = c - (c - val); 33 | var y = val - x; 34 | return [x, y]; 35 | }; 36 | // EFT of the product of 2 floats 37 | var twoProd = function(a, b) { 38 | var x = a * b; 39 | var A = split(a); 40 | var B = split(b); 41 | var y = A[1] * B[1] - (((x - A[0] * B[0]) - A[1] * B[0]) - A[0] * B[1]); 42 | return [x, y]; 43 | }; 44 | //array product (compensated product method) 45 | var prod = module.exports.prod = function(arr) { 46 | var p_ = arr[0]; 47 | var e_ = 0; 48 | for (var i = 1; i < arr.length; i++) { 49 | var step = twoProd(p_, arr[i]); 50 | p_ = step[0]; 51 | e_ = e_ * arr[i] + step[1]; 52 | } 53 | return (p_ + e_); 54 | }; 55 | //array mean 56 | var mean = module.exports.mean = function(arr) { 57 | return prod([sum(arr), 1 / arr.length]); 58 | }; 59 | //array max and min 60 | var min = module.exports.min = function(arr) { 61 | return Math.min.apply(null, arr); 62 | }; 63 | var max = module.exports.max = function(arr) { 64 | return Math.max.apply(null, arr); 65 | }; 66 | //mean shifted covariance to stabilize against catastrophic cancellation 67 | var cov = module.exports.cov = function(arr1, arr2) { 68 | var n = arr1.length; 69 | if (n < 2) return 0; 70 | var m1 = mean(arr1), 71 | m2 = mean(arr2), 72 | res = 0; 73 | 74 | for (var i = 0; i < arr1.length; i++) { 75 | var a = (arr1[i] - m1), 76 | b = (arr2[i] - m2); 77 | res += a * b / (n - 1); 78 | } 79 | return res; 80 | }; 81 | //std deviation and variance 82 | var vari = module.exports.vari = function(arr) { 83 | return cov(arr, arr); 84 | }; 85 | var sd = module.exports.sd = function(arr) { 86 | return Math.sqrt(cov(arr, arr)); 87 | }; 88 | 89 | //READ AND MANIPULATE DATA 90 | 91 | //read csv 92 | var csv = module.exports.csv = function(path, callback) { 93 | fs.readFile(path,function(err, data) { 94 | var reg = new RegExp('\r', 'g'); 95 | var parse = String(data) 96 | .replace(reg, '') 97 | .split('\n'); 98 | 99 | var colnames = parse[0].split(','); 100 | 101 | var res = []; 102 | for (var i = 1; i < parse.length; i++) { 103 | var rowObj = {}; 104 | parse[i].split(',').forEach(function(el, j) { 105 | rowObj[colnames[j]] = el; 106 | }); 107 | res.push(rowObj); 108 | } 109 | callback(res); 110 | }); 111 | }; 112 | 113 | //Given an array of objects (arr), get all values associated with a key (key). 114 | var col = module.exports.col = function(arr, key) { 115 | var res = []; 116 | arr.forEach(function(row) { 117 | res.push(row[key]); 118 | }); 119 | return res; 120 | }; 121 | 122 | // Assumes headings is an array of headings. Would be nice if it could also be an 123 | // array of column numbers. 124 | var numeric = module.exports.numeric = function(data, headings, replaceBlanks) { 125 | var heads = Array.prototype.slice.call(arguments, 1, 2)[0]; 126 | if (heads.length === 0 || heads === undefined) { 127 | throw new Error('No headings supplied to numeric.'); 128 | } 129 | 130 | var includeNaN = false; 131 | 132 | for (var i = 0; i < heads.length; i++) { 133 | var head = heads[i]; 134 | for (var j = 0; j < data.length; j++) { 135 | if (data[j][head] === '') { 136 | data[j][head] = replaceBlanks || 0; 137 | } 138 | else { 139 | data[j][head] = Number(data[j][head]); 140 | } 141 | if (isNaN(data[j][head])) includeNaN = true; 142 | }; 143 | }; 144 | 145 | if (includeNaN === true) console.log("Warning: Some values are NaN."); 146 | 147 | return data; 148 | }; 149 | 150 | //RANDOM NUMBERS 151 | 152 | //array of exponential random variables 153 | var exp = module.exports.exp = function(n, lambda) { 154 | lambda = lambda || 1; 155 | var res = []; 156 | for (var i = 0; i < n; i++) { 157 | var U = Math.random(); 158 | res.push(-Math.log(U) / lambda); 159 | } 160 | return res; 161 | }; 162 | 163 | //array of uniform random variables 164 | var uni = module.exports.uni = function(n) { 165 | var res = []; 166 | for (var i = 0; i < n; i++) { 167 | res.push(Math.random()); 168 | } 169 | return res; 170 | }; 171 | 172 | var norm = module.exports.norm = function(n, mu, sig) { 173 | mu = mu || 0; 174 | sig = sig || 1; 175 | 176 | //makes a pair of normals with specified parameters via Box-Muller 177 | function box(mu_, sig_) { 178 | var u1 = Math.random(); 179 | var u2 = Math.random(); 180 | var z1 = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * u2 * Math.PI); 181 | var z2 = Math.sqrt(-2 * Math.log(u1)) * Math.sin(2 * u2 * Math.PI); 182 | return [(mu_ + (sig_ * z1)), (mu_ + (sig_ * z2))]; 183 | }; 184 | var res = []; 185 | if (n % 2 == 0) { 186 | var iter = n / 2; 187 | } 188 | else { 189 | var iter = (n - 1) / 2; 190 | res.push(box(mu, sig).pop()); 191 | } 192 | for (var i = 0; i < iter; i++) { 193 | res = res.concat(box(mu, sig)); 194 | } 195 | return res; 196 | }; 197 | 198 | //SEQUENCE METHOD 199 | var seq = module.exports.seq = function(start, end, incr) { 200 | var res = []; 201 | var num = start; 202 | incr = incr || 1; 203 | 204 | while (num <= end) { 205 | res.push(num); 206 | num += incr; 207 | } 208 | return res; 209 | }; 210 | 211 | // LINEAR REGRESSION 212 | var reg = module.exports.reg = function(x, y) { 213 | // infer the regression line 214 | var beta = cov(x, y) / vari(x); 215 | var alpha = mean(y) - (beta * mean(x)); 216 | 217 | var res = {}; 218 | res.f = function(input) { 219 | return (alpha + (beta * input)); 220 | }; 221 | 222 | // set array of estimated y values for each x 223 | res.pts = []; 224 | x.forEach(function(point, i) { 225 | res.pts.push(res.f(point)); 226 | }); 227 | 228 | // set endpoints 229 | var xMin = min(x); 230 | var xMax = max(x); 231 | res.endPoints = { 232 | x1: xMin, 233 | x2: xMax, 234 | y1: res.f(xMin), 235 | y2: res.f(xMax) 236 | } 237 | 238 | return res; 239 | }; 240 | 241 | //REPEATED VALUES 242 | var rep = module.exports.rep = function(val, n) { 243 | var res = []; 244 | for (var i = 0; i < n; i++) { 245 | res.push(val); 246 | } 247 | return res; 248 | }; 249 | 250 | 251 | //PLOTTING 252 | var server, html; 253 | var app = express(); 254 | app.get('/', function(req, res) { 255 | res.send(html); 256 | }); 257 | 258 | var Chart = module.exports.Chart = require('./chart.js'); 259 | 260 | Chart.prototype.plot = function() { 261 | var self = this; 262 | var head = '\n\n\n\n\n' 263 | 264 | var body = '\n\n' 273 | 274 | html = head + body + dataStrings + tail; 275 | if (typeof server === 'undefined') server = app.listen(2000); 276 | exec('open http://localhost:2000/'); 277 | return html; 278 | }; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "datakitjs", 3 | "version": "0.1.4", 4 | "description": "A lightweight library for data analysis in JavaScript.", 5 | "main": "lib/datakit.js", 6 | "scripts": { 7 | "test": "jasmine" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/NathanEpstein/datakit.git" 12 | }, 13 | "keywords": [ 14 | "data", 15 | "csv", 16 | "statistics" 17 | ], 18 | "author": "Nathan Epstein", 19 | "license": "MIT", 20 | "bugs": { 21 | "url": "https://github.com/NathanEpstein/datakit/issues" 22 | }, 23 | "homepage": "https://github.com/NathanEpstein/datakit", 24 | "dependencies": { 25 | "express": "^4.12.2" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NathanEpstein/datakit/890ab8178218400421844c1748025510ff17b366/plot.png -------------------------------------------------------------------------------- /spec/support/jasmine.json: -------------------------------------------------------------------------------- 1 | { 2 | "spec_dir": "spec", 3 | "spec_files": [ 4 | "**/*[sS]pec.js" 5 | ], 6 | "helpers": [ 7 | "helpers/**/*.js" 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /spec/test/test.csv: -------------------------------------------------------------------------------- 1 | COL1,COL2,COL3 2 | val11,val12,val13 3 | val21,val22,val23 4 | val31,val32,val33 -------------------------------------------------------------------------------- /spec/test/test2.csv: -------------------------------------------------------------------------------- 1 | COL1,COL2,COL3 2 | Group1,1,2 3 | Group2,2,3 4 | Group3,,3 5 | -------------------------------------------------------------------------------- /spec/test/testSpec.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | var dk = require('../../lib/datakit.js'); 3 | var fs = require('fs'); 4 | var exec = require('child_process').exec; 5 | 6 | describe('isclose', function() { 7 | it('should return true for close numbers', function() { 8 | expect(dk.isclose(0, Math.pow(10, -15))).toBe(true); 9 | }); 10 | it('should return false for distant numbers', function() { 11 | expect(dk.isclose(0,Math.pow(10, -5))).toBe(false); 12 | }); 13 | }); 14 | 15 | var floats = []; 16 | for (var i = 0; i < 10000; i++) { 17 | floats.push(0.1); 18 | floats.push(0.2); 19 | floats.push(0.3); 20 | } 21 | 22 | describe('sum', function() { 23 | var top = 10000; 24 | var nums = []; 25 | for (var i = 0; i <= top; i++) { 26 | nums.push(i); 27 | } 28 | it('should accurately sum numbers', function() { 29 | expect(dk.sum(nums)).toBe((top * (top + 1) / 2)); 30 | }); 31 | 32 | it('should minimize errors in floating point arithmetic', function() { 33 | expect(dk.isclose(6000, dk.sum(floats))).toBe(true); 34 | }); 35 | }); 36 | 37 | describe('mean',function() { 38 | var nums = [-10, 15, 25, 0, -5]; 39 | it('should accurately compute the mean of numbers', function() { 40 | expect(dk.mean(nums)).toBe(5); 41 | }); 42 | 43 | it('should minimize errors in floating point arithmetic', function() { 44 | expect(dk.isclose(0.2, dk.mean(floats))).toBe(true); 45 | }); 46 | }); 47 | 48 | describe('prod', function() { 49 | var top = 10; 50 | var nums = []; 51 | for (var i = 1; i <= top; i++) { 52 | nums.push(i); 53 | } 54 | it('should accurately compute a product of numbers', function() { 55 | expect(dk.prod(nums)).toBe(3628800); 56 | }); 57 | 58 | var floats_ = [Math.pow(10, 75)].concat(floats.slice(0, 99)); 59 | it('should minimize errors in floating point arithmetic', function() { 60 | expect(dk.isclose(dk.prod(floats_), 47.751966659678405306351616)).toBe(true); 61 | }); 62 | }); 63 | 64 | describe('max and min',function() { 65 | var nums = [10, -2, 23, 12, 43, 123213, 2]; 66 | it('should accurately identify the maximum', function() { 67 | expect(dk.max(nums)).toBe(123213); 68 | }); 69 | 70 | it('should accurately identify the minimum', function() { 71 | expect(dk.min(nums)).toBe(-2); 72 | }); 73 | }); 74 | 75 | describe('cov, vari, and sd', function() { 76 | var nums = [-10, -5, 0, 5, 10]; 77 | var nums_ = [0, 10, 20, 30, 40]; 78 | it('should accurately compute covariance', function() { 79 | expect(dk.cov(nums,nums_)).toBe(125); 80 | }); 81 | it('should accurately compute variance', function() { 82 | expect(dk.vari(nums)).toBe(62.5); 83 | }); 84 | it('should accurately compute standard deviation', function() { 85 | expect(dk.sd(nums_)).toBe(Math.sqrt(250)); 86 | }); 87 | }); 88 | 89 | describe('random number generators', function() { 90 | var u = dk.uni(100); 91 | var n = dk.norm(100); 92 | var e = dk.exp(100); 93 | var n_ = dk.norm(3); 94 | it('should have the right length', function() { 95 | expect(u.length).toBe(100); 96 | expect(n.length).toBe(100); 97 | expect(e.length).toBe(100); 98 | expect(n_.length).toBe(3); 99 | }); 100 | it('should generate normals from Box-Muller', function() { 101 | var z1 = n[0]; 102 | var z2 = n[1]; 103 | var u = Math.exp(Math.pow(Math.pow(z1, 2) + Math.pow(z2, 2), 2) * (-2)); 104 | expect(u).toBeGreaterThan(0); 105 | expect(u).toBeLessThan(1); 106 | }); 107 | it('should have the correct uniform boundaries', function() { 108 | expect(dk.min(u)).toBeGreaterThan(0); 109 | expect(dk.max(u)).toBeLessThan(1); 110 | }); 111 | it('should generate exponentials from uniforms', function() { 112 | u = Math.exp(-e[0]); 113 | expect(u).toBeGreaterThan(0); 114 | expect(u).toBeLessThan(1); 115 | }); 116 | }); 117 | 118 | describe('csv and col', function() { 119 | var val, d; 120 | beforeEach(function(done) { 121 | dk.csv('spec/test/test.csv', function(data) { 122 | d = data; 123 | val = d[0].COL1; 124 | done(); 125 | }); 126 | }); 127 | 128 | it('should correctly read a csv file and find columns', function(done) { 129 | expect(val).toBe('val11'); 130 | expect(dk.col(d,'COL2')[0]).toBe('val12'); 131 | done(); 132 | }); 133 | }); 134 | 135 | describe('seq', function() { 136 | it('should return accurate sequences', function() { 137 | expect(dk.seq(1, 10).length).toBe(10); 138 | expect(dk.seq(-10, 10, 2)[5]).toBe(0); 139 | }); 140 | }); 141 | 142 | describe('reg', function() { 143 | var x = [1, 2, 3, 5]; 144 | var y = [2, 4, 6, 10]; 145 | var m = dk.reg(x,y); 146 | it('should return an accurate model', function() { 147 | expect(m.f(4)).toBe(8); 148 | }); 149 | it('should return an accurate interpolation of the linear model', function() { 150 | expect(m.pts[0]).toBe(2); 151 | }); 152 | it('should have the correct end points', function() { 153 | expect(m.endPoints).toEqual({ x1: 1, x2: 5, y1: 2, y2: 10 }); 154 | }); 155 | }); 156 | 157 | describe('rep', function() { 158 | var arr = dk.rep(0.5, 100); 159 | it('should have the correct length', function() { 160 | expect(arr.length).toBe(100); 161 | }); 162 | it('should have the correct value', function() { 163 | expect(arr[0]).toBe(0.5); 164 | }); 165 | }); 166 | 167 | describe('plot', function() { 168 | var p; 169 | beforeEach(function(done) { 170 | p = new dk.Chart([1, 2, 3]).addDataSet({ 171 | x: [1, 2, 3], 172 | y: [4, 5, 1] 173 | }).plot(); 174 | done(); 175 | }); 176 | 177 | it('should return an html string', function(done) { 178 | expect(p.slice(0,6)).toBe(''); 179 | expect(p.slice(p.length-7,p.length)).toBe('') 180 | done(); 181 | }); 182 | }); 183 | 184 | describe('numeric', function() { 185 | var d; 186 | beforeEach(function(done) { 187 | dk.csv('spec/test/test2.csv', function(data) { 188 | d = dk.numeric(data, ['COL2', 'COL3'], 6253); 189 | done(); 190 | }); 191 | }); 192 | 193 | it('should convert string values to numbers', function(done) { 194 | expect(dk.col(d, 'COL2')[0]).toBe(1); 195 | expect(dk.col(d, 'COL3')[0]).toBe(2); 196 | done(); 197 | }); 198 | 199 | it('should convert empty cells to the supplied default', function(done) { 200 | expect(dk.col(d, 'COL2')[2]).toBe(6253); 201 | done(); 202 | }); 203 | }); 204 | --------------------------------------------------------------------------------