├── .gitignore
├── LICENSE
├── README.md
├── benchmark
    ├── QUESTIONS.md
    ├── create.js
    ├── datavore
    │   ├── README.md
    │   ├── create.js
    │   ├── datavore
    │   │   └── index.js
    │   ├── table.query.sum.js
    │   └── table.query.sum.multi.js
    ├── groupby.sum.js
    ├── groupby_sum.js
    ├── median.js
    ├── sum.js
    ├── where.js
    └── where_sum.js
├── lib
    ├── frame-index.js
    ├── frame.js
    ├── stream-reducers.js
    └── test.js
├── package.json
├── requirements.txt
└── test
    ├── argmax.js
    ├── count.js
    ├── create.js
    ├── data
        ├── binary_matrix.py
        ├── generate.js
        ├── generate.py
        ├── groupby.count
        │   ├── operation.py
        │   └── small.json
        ├── groupby.mean
        │   ├── operation.py
        │   └── small.json
        ├── groupby.sum
        │   ├── operation.py
        │   └── small.json
        ├── groupby.where.sum
        │   ├── operation.py
        │   └── small.json
        ├── mean
        │   ├── operation.py
        │   └── small.json
        ├── where.in.sum
        │   ├── operation.py
        │   └── small.json
        └── where.mean
        │   ├── operation.py
        │   └── small.json
    ├── groupby.count.js
    ├── groupby.js
    ├── groupby.mean.js
    ├── groupby.sum.js
    ├── groupby.where.sum.js
    ├── join.js
    ├── mean.js
    ├── ungroup.js
    ├── where.in.sum.js
    ├── where.js
    └── where.mean.js


/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | __pycache__
 3 | *.pyc
 4 | 
 5 | test/data/*/00*
 6 | 
 7 | # Logs
 8 | logs
 9 | *.log
10 | npm-debug.log*
11 | 
12 | # Runtime data
13 | pids
14 | *.pid
15 | *.seed
16 | 
17 | # Directory for instrumented libs generated by jscoverage/JSCover
18 | lib-cov
19 | 
20 | # Coverage directory used by tools like istanbul
21 | coverage
22 | 
23 | # nyc test coverage
24 | .nyc_output
25 | 
26 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
27 | .grunt
28 | 
29 | # node-waf configuration
30 | .lock-wscript
31 | 
32 | # Compiled binary addons (http://nodejs.org/api/addons.html)
33 | build/Release
34 | 
35 | # Dependency directories
36 | node_modules
37 | jspm_packages
38 | 
39 | # Optional npm cache directory
40 | .npm
41 | 
42 | # Optional REPL history
43 | .node_repl_history
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Dataship
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # frame
 2 | 
 3 | a DataFrame for Javascript.
 4 | 
 5 | _crunch numbers in Node or the Browser_
 6 | 
 7 | ## features
 8 | * Interactive performance (<100ms) on millions of rows
 9 | * Syntax similar to SQL and Pandas
10 | * Compatible with `PapaParse` and [`BabyParse`](https://github.com/Rich-Harris/BabyParse)
11 | 
12 | ## examples
13 | Parse the [Iris](https://vincentarelbundock.github.io/Rdatasets/datasets.html)
14 | dataset (with [`BabyParse`](https://github.com/Rich-Harris/BabyParse)) and create a `Frame` from the result.
15 | 
16 | ```javascript
17 | var baby = require('babyparse'),
18 |     Frame = require('frame');
19 | 
20 | // parse the csv file
21 | config = {"header" :true, "dynamicTyping" : true, "skipEmptyLines" : true};
22 | iris = baby.parseFiles('iris.csv', config).data;
23 | 
24 | // create a frame from the parsed results
25 | frame = new Frame(iris);
26 | ```
27 | ### groupby
28 | 
29 | Group on `Species` and find the average value (`mean`) for `Sepal.Length`.
30 | ```javascript
31 | g = frame.groupby("Species");
32 | g.mean("Sepal.Length");
33 | ```
34 | ```json
35 | { "virginica": 6.58799, "versicolor": 5.9360, "setosa": 5.006 }
36 | ```
37 | Using the same grouping, find the average value for `Sepal.Width`.
38 | ```javascript
39 | g.mean("Sepal.Width");
40 | ```
41 | ```json
42 | { "virginica": 2.97399, "versicolor": 2.770, "setosa": 3.4279 }
43 | ```
44 | 
45 | ### where
46 | Filter by `Species` value `virginica` then find the average.
47 | ```javascript
48 | f = frame.where("Species", "virginica");
49 | f.mean("Sepal.Length");
50 | ```
51 | ```json
52 | 6.58799
53 | ```
54 | Get the number of rows that match the filter.
55 | ```javascript
56 | f.count();
57 | ```
58 | ```json
59 | 50
60 | ```
61 | Columns can also be accessed directly (with the filter applied).
62 | ```javascript
63 | f["Species"]
64 | ```
65 | ```javascript
66 | ["virginica", "virginica", "virginica", ..., "virginica"]
67 | ```
68 | # tests
69 | Hundreds of tests verify correctness on millions of data points (against a Pandas reference).
70 | 
71 | `npm run data && npm run test`
72 | 
73 | # benchmarks
74 | `npm run bench`
75 | 
76 | typical performance on one million rows
77 | 
78 | operation | time
79 | ----------|------
80 | `groupby` | 54ms
81 | `where`   | 29ms
82 | `sum`     | 5ms
83 | 
84 | # design goals and inspiration
85 | 
86 |  * compatibility with [feather](https://github.com/wesm/feather)
87 | 
88 | ## interface
89 | 
90 | * pandas
91 | * R
92 | * Linq
93 | * rethinkDB
94 | * Matlab
95 | 
96 | ## performance
97 | 
98 | * [datavore](https://github.com/StanfordHCI/datavore)
99 | 


--------------------------------------------------------------------------------
/benchmark/QUESTIONS.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Why are my dv results not consistent with their benchmark webpage?
 3 | because it slows down with consecutive runs, dropping to a quarter of initial performance.
 4 | 
 5 | ok 1 table.query.sum: 1000000x3
 6 | # 12.019 MFlops/sec ±16.51%  n = 15 µ = 83ms : [0.022,0.02225,0.0935,0.092,0.0925,0.0925,0.09325,0.092,0.093,0.09275,0.09275,0.09225,0.09275,0.092,0.0925]
 7 | 
 8 | 
 9 | ### Can I make Frame as fast as dv by encoding the strings?
10 | likely it will give a 3x speedup.
11 | 
12 | #### integers
13 | ok 1 groupby.sum: 1000000x3
14 | # 13.952 MFlops/sec ±2.01%  n = 29 µ = 72ms : [0.0645,0.0625,0.0635,0.062,0.0775,0.0725,0.0715,0.073,0.0725,0.073,0.0735,0.0745,0.0725,0.0725,0.074,0.073,0.0715,0.077,0.072,0.071,0.072,0.0715,0.0725,0.0735,0.0725,0.073,0.0745,0.0715,0.0735]
15 | 
16 | #### strings
17 | ok 1 groupby.sum: 1000000x3
18 | # 4.120 MFlops/sec ±3.74%  n = 14 µ = 243ms : [0.239,0.235,0.232,0.234,0.267,0.267,0.24,0.235,0.235,0.236,0.279,0.233,0.233,0.233]
19 | 
20 | ### Is the FrameIndex.reduce faster than dv.query, when Frame.groupby has already been run?
21 | yes, but not quite faster than the ultra-fast first two runs of dv.query
22 | 
23 | ok 1 sum: 1000000x3
24 | # 23.298 MFlops/sec ±1.40%  n = 34 µ = 43ms : [0.037333333333333336,0.042333333333333334,0.042333333333333334,0.042666666666666665,0.042333333333333334,0.042666666666666665,0.042333333333333334,0.043333333333333335,0.042666666666666665,0.04566666666666667,0.044000000000000004,0.042666666666666665,0.044333333333333336,0.042666666666666665,0.04733333333333333,0.043666666666666666,0.042,0.042333333333333334,0.043000000000000003,0.042333333333333334,0.042666666666666665,0.042,0.044000000000000004,0.042666666666666665,0.042666666666666665,0.042666666666666665,0.042,0.042333333333333334,0.043000000000000003,0.048666666666666664,0.041666666666666664,0.041666666666666664,0.042333333333333334,0.043000000000000003]
25 | 
26 | ### Can I make FrameIndex.reduce faster than the ultra-fast dv.query?
27 | 
28 | try:
29 | 1. reproducing results
30 | 2. removing the function call
31 | 
32 | 
33 | ### Is the dv setup longer?
34 | 
35 | ### Why is dv faster initially?
36 | 


--------------------------------------------------------------------------------
/benchmark/create.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../lib/test').generate,
 3 | 	Frame = require('../lib/frame');
 4 | 
 5 | function createSetup(N, K, useStrings){
 6 | 	return function(event){
 7 | 		// generate data
 8 | 		this.groupCol = gen.Array.int(N, K);
 9 | 		this.valueCol = gen.Array.int(N, 100);
10 | 
11 | 		// map to strings
12 | 		if(useStrings)
13 | 			this.groupCol = this.groupCol.map(i => ["a", "b", "c"][i]);
14 | 
15 | 	};
16 | }
17 | 
18 | function test(){
19 | 
20 | 	// create frame
21 | 	var columnDict = {
22 | 		"group-col" : this.groupCol,
23 | 		"reduce-col" : this.valueCol
24 | 	};
25 | 
26 | 	this.frame = new Frame(columnDict);
27 | }
28 | 
29 | var N = 100000,
30 | 	K = 3;
31 | 
32 | var name = "create: " + N + "x" + K;
33 | benchtap(name, {"operations": N}, createSetup(N, K), test);
34 | 
35 | 
36 | name += " (strings)";
37 | benchtap(name, {"operations": N}, createSetup(N, K, true), test);
38 | 
39 | 
40 | 
41 | var N = 1000000;
42 | 
43 | name = "create: " + N + "x" + K;
44 | benchtap(name, {"operations": N}, createSetup(N, K), test);
45 | 
46 | 
47 | name += " (strings)";
48 | benchtap(name, {"operations": N}, createSetup(N, K, true), test);
49 | 


--------------------------------------------------------------------------------
/benchmark/datavore/README.md:
--------------------------------------------------------------------------------
1 | Comparison benchmarks of similar operations for [datavore](https://github.com/StanfordHCI/datavore)
2 | 


--------------------------------------------------------------------------------
/benchmark/datavore/create.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../../generate'),
 3 | 	dv = require('./datavore');
 4 | 
 5 | function createSetup(N, K, useStrings){
 6 | 	return function(event){
 7 | 
 8 | 		this.groupCol = gen.Array.int(N, K);
 9 | 		this.valueCol = gen.Array.int(N, 100);
10 | 
11 | 		if(useStrings)
12 | 			this.groupCol = this.groupCol.map(i => ["a", "b", "c"][i]);
13 | 	};
14 | }
15 | 
16 | function test(){
17 | 
18 | 	// create table
19 | 	var table = dv.table([
20 | 		{name:"group-col", type:"nominal", values:this.groupCol},
21 | 		{name:"reduce-col", type:"numeric", values:this.valueCol}
22 | 	]);
23 | }
24 | 
25 | 
26 | // 1 hundred thousand data points/rows
27 | var N = 100000,
28 | 	K = 3;
29 | 
30 | var name = "create: " + N + "x" + K;
31 | benchtap(name, {"operations" : 2*N}, createSetup(N, K), test);
32 | 
33 | name += " (strings)";
34 | benchtap(name, {"operations" : 2*N}, createSetup(N, K, true), test);
35 | 
36 | // 1 million data points/rows
37 | var N = 1000000;
38 | 
39 | name = "create: " + N + "x" + K;
40 | benchtap(name, {"operations" : 2*N}, createSetup(N, K), test);
41 | 
42 | name += " (strings)";
43 | benchtap(name, {"operations" : 2*N}, createSetup(N, K, true), test);
44 | 


--------------------------------------------------------------------------------
/benchmark/datavore/datavore/index.js:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | module.exports = (function() {
  4 | /**
  5 |  * The top-level Datavore namespace. All public methods and fields should be
  6 |  * registered on this object. Note that core Datavore source is surrounded by an
  7 |  * anonymous function, so any other declared globals will not be visible outside
  8 |  * of core methods. This also allows multiple versions of Datavore to coexist,
  9 |  * since each version will see their own <tt>dv</tt> namespace.
 10 |  *
 11 |  * @namespace The top-level Datavore namespace, <tt>dv</tt>.
 12 |  */
 13 | var dv = {version: "1.0.0"};
 14 | 
 15 | dv.array = function(n) {
 16 |     var a = Array(n);
 17 |     for (var i = n; --i >= 0;) { a[i] = 0; }
 18 |     return a;
 19 | }
 20 | 
 21 | // -- RANDOM NUMBER GENERATORS ------------------------------------------------
 22 | 
 23 | dv.rand = {};
 24 | 
 25 | dv.rand.uniform = function(min, max) {
 26 |     min = min || 0;
 27 |     max = max || 1;
 28 |     var delta = max - min;
 29 |     return function() {
 30 |         return min + delta * Math.random();
 31 |     }
 32 | };
 33 | 
 34 | dv.rand.integer = function(a, b) {
 35 |     if (b === undefined) {
 36 |         b = a;
 37 |         a = 0;
 38 |     }
 39 |     return function() {
 40 |         return a + Math.max(0, Math.floor(b * (Math.random() - 0.001)));
 41 |     }
 42 | }
 43 | 
 44 | dv.rand.normal = function(mean, stdev) {
 45 |     mean = mean || 0;
 46 |     stdev = stdev || 1;
 47 |     var next = undefined;
 48 |     return function() {
 49 |         var x = 0, y = 0, rds, c;
 50 |         if (next !== undefined) {
 51 |             x = next;
 52 |             next = undefined;
 53 |             return x;
 54 |         }
 55 |         do {
 56 |             x = Math.random() * 2 - 1;
 57 |             y = Math.random() * 2 - 1;
 58 |             rds = x * x + y * y;
 59 |         } while (rds == 0 || rds > 1);
 60 |         c = Math.sqrt(-2 * Math.log(rds) / rds); // Box-Muller transform
 61 |         next = mean + y * c * stdev;
 62 |         return mean + x * c * stdev;
 63 |     }
 64 | }
 65 | // -- DATA TABLE --------------------------------------------------------------
 66 | 
 67 | dv.type = {
 68 |     nominal: "nominal",
 69 |     ordinal: "ordinal",
 70 |     numeric: "numeric",
 71 |     unknown: "unknown"
 72 | };
 73 | 
 74 | dv.table = function(input)
 75 | {
 76 |     var table = []; // the data table
 77 | 
 78 |     table.addColumn = function(name, values, type, iscolumn) {
 79 |         type = type || dv.type.unknown;
 80 |         var compress = (type === dv.type.nominal || type === dv.type.ordinal);
 81 |         var vals = values;
 82 | 
 83 |         if (compress && !iscolumn) {
 84 |             vals = [];
 85 |             vals.lut = code(values);
 86 |             for (var i = 0, map=dict(vals.lut); i < values.length; ++i) {
 87 |                 vals.push(map[values[i]]);
 88 |             }
 89 |             vals.get = function(idx) { return this.lut[this[idx]]; }
 90 |         } else if (!iscolumn) {
 91 |             vals.get = function(idx) { return this[idx]; }
 92 |         }
 93 |         vals.name = name;
 94 |         vals.index = table.length;
 95 |         vals.type = type;
 96 | 
 97 |         table.push(vals);
 98 |         table[name] = vals;
 99 |     };
100 | 
101 |     table.removeColumn = function(col) {
102 |         col = table[col] || null;
103 |         if (col != null) {
104 |             delete table[col.name];
105 |             table.splice(col.index, 1);
106 |         }
107 |         return col;
108 |     };
109 | 
110 |     table.rows = function() { return table[0] ? table[0].length : 0; };
111 | 
112 |     table.cols = function() { return table.length; };
113 | 
114 |     table.get = function(col, row) { return table[col].get(row); }
115 | 
116 |     table.dense_query = function(q) {
117 |         var tab = q.where ? table.where(q.where) : table;
118 |         var dims = [], sz = [1], hasDims = q.dims;
119 |         if (hasDims) {
120 |             sz = [];
121 |             for (i = 0; i < q.dims.length; ++i) {
122 |                 var dim = q.dims[i], type = typeof dim;
123 |                 if (type === "string" || type === "number") {
124 |                     col = tab[dim];
125 |                 } else if (dim.array) {
126 |                     col = dim.array(tab[dim.value]);
127 |                 }
128 |                 dims.push(col);
129 |                 sz.push(col.lut.length);
130 |             }
131 |         }
132 | 
133 |         var vals = q.vals,  // aggregate query operators
134 |             C = sz.reduce(function(a,b) { return a * b; }, 1), // cube cardinality
135 |             N = tab[0].length, p, col, v, name, expr,        // temp vars
136 |             cnt, sum, ssq, min, max,            // aggregate values
137 |             _cnt, _sum, _ssq, _min, _max,       // aggregate flags
138 |             ctx = {}, emap = {}, exp = [], lut, // aggregate state vars
139 |             i = 0, j = 0, k = 0, l = 0, idx = 0, len, slen = sz.length; // indices
140 | 
141 |         // Identify Requested Aggregates
142 |         var star = false;
143 |         for (i = 0; i < vals.length; ++i) {
144 |             var req = vals[i].init();
145 |             for (expr in req) {
146 |                 if (expr == "*") {
147 |                     req[expr].map(function(func) {
148 |                         ctx[func] = dv.array(C);
149 |                     });
150 |                     star = true;
151 |                 } else {
152 |                     idx = tab[expr].index;
153 |                     name = tab[expr].name;
154 |                     req[expr].map(function(func) {
155 |                         ctx[func + "_" + name] = (ctx[func + "_" + idx] = dv.array(C));
156 |                     });
157 |                     if (!emap[idx]) {
158 |                         emap[idx] = true;
159 |                         exp.push(idx);
160 |                     }
161 |                 }
162 |             }
163 |         }
164 |         if (exp.length == 0 && star) { exp.push(-1) };
165 | 
166 |         // Compute Cube Index Coefficients
167 |         for (i = 0, p = [1]; i < slen; ++i) {
168 |             p.push(p[i] * sz[i]);
169 |         }
170 | 
171 |         // Execute Query: Compute Aggregates
172 |         for (j = 0, len = exp.length; j < len; ++j) {
173 |             expr = exp[j];
174 |             cnt = ctx["cnt"]; _cnt = (cnt && j==0);
175 |             sum = ctx["sum_" + expr]; _sum = (sum !== undefined);
176 |             ssq = ctx["ssq_" + expr]; _ssq = (ssq !== undefined);
177 |             min = ctx["min_" + expr]; _min = (min !== undefined);
178 |             max = ctx["max_" + expr]; _max = (max !== undefined);
179 |             col = tab[expr];
180 | outer:
181 |             for (i = 0; i < N; ++i) {
182 |                 for (idx = 0, k = 0; k < slen; ++k) {
183 |                     // compute cube index
184 |                     l = (hasDims ? dims[k][i] : 0);
185 |                     if (l < 0) continue outer;
186 |                     idx += p[k] * l;
187 |                 }
188 |                 if (col) { v = col[i]; }
189 |                 if (_cnt) { cnt[idx] += 1; }
190 |                 if (_sum) { sum[idx] += v; }
191 |                 if (_ssq) { ssq[idx] += v * v; }
192 |                 if (_min && v < min[idx]) { min[idx] = v; }
193 |                 if (_max && v > max[idx]) { max[idx] = v; }
194 |             }
195 |         }
196 | 
197 |         // Generate Results
198 |         var result = [], stride = 1, s, val, code = q.code || false;
199 |         for (i = 0; i < dims.length; ++i) {
200 |             col = [];
201 |             lut = dims[i].lut;
202 |             s = sz[i];
203 |             val = 0;
204 |             for (j = 0, k = 0, c = -1; j < C; ++j, ++k) {
205 |                 if (k == stride) { k = 0; val = (val + 1) % s; }
206 |                 col[j] = code ? val : lut[val];
207 |             }
208 |             stride *= s;
209 |             col.unique = lut.length;
210 |             result.push(col);
211 |         }
212 |         vals.map(function(op) { result.push(op.done(ctx)); });
213 |         return result;
214 |     };
215 | 
216 |     table.query = table.dense_query;
217 | 
218 |     table.sparse_query = function(q) {
219 |         var tab = q.where ? table.where(q.where) : table;
220 |         var dims = [], sz = [1], hasDims = q.dims;
221 |         if (hasDims) {
222 |             sz = [];
223 |             for (i=0; i<q.dims.length; ++i) {
224 |                 var dim = q.dims[i], type = typeof dim;
225 |                 if (type === "string" || type === "number") {
226 |                     col = tab[dim];
227 |                 } else if (dim.array) {
228 |                     col = dim.array(tab[dim.value]);
229 |                 }
230 |                 dims.push(col);
231 |                 sz.push(col.lut.length);
232 |             }
233 |         }
234 | 
235 |         var vals = q.vals,  // aggregate query operators
236 |             C = sz.reduce(function(a,b) { return a*b; }, 1), // cube cardinality
237 |             N = tab[0].length, p, col, v, name, expr,      // temp vars
238 |             cnt, sum, ssq, min, max,            // aggregate values
239 |             _cnt, _sum, _ssq, _min, _max,       // aggregate flags
240 |             ctx = {}, emap = {}, exp = [], lut, // aggregate state vars
241 |             i = 0, j = 0, k = 0, l = 0, idx = 0, len, slen = sz.length; // indices
242 | 
243 |         // Identify Requested Aggregates
244 |         var star = false;
245 |         for (i = 0; i < vals.length; ++i) {
246 |             var req = vals[i].init();
247 |             for (expr in req) {
248 |                 if (expr == "*") {
249 |                     req[expr].map(function(func) {
250 |                         ctx[func] = {};
251 |                     });
252 |                     star = true;
253 |                 } else {
254 |                     idx = tab[expr].index;
255 |                     name = tab[expr].name;
256 |                     req[expr].map(function(func) {
257 |                         ctx[func + "_" + name] = (ctx[func + "_" + idx] = {});
258 |                     });
259 |                     if (!emap[idx]) {
260 |                         emap[idx] = true;
261 |                         exp.push(idx);
262 |                     }
263 |                 }
264 |             }
265 |         }
266 |         if (exp.length == 0 && star) { exp.push(-1) };
267 | 
268 |         // Compute Cube Index Coefficients
269 |         for (i = 0, p=[1]; i < slen; ++i) {
270 |             p.push(p[i] * sz[i]);
271 |         }
272 |         // Execute Query: Compute Aggregates
273 |         for (j = 0, len = exp.length; j < len; ++j) {
274 |             expr = exp[j];
275 |             cnt = ctx["cnt"]; _cnt = (cnt && j==0);
276 |             sum = ctx["sum_" + expr]; _sum = (sum !== undefined);
277 |             ssq = ctx["ssq_" + expr]; _ssq = (ssq !== undefined);
278 |             min = ctx["min_" + expr]; _min = (min !== undefined);
279 |             max = ctx["max_" + expr]; _max = (max !== undefined);
280 |             col = tab[expr];
281 | outer:
282 |             for (i = 0; i < N; ++i) {
283 |                 for (idx = 0, k = 0; k < slen; ++k) {
284 |                     // compute cube index
285 |                     l = (hasDims ? dims[k][i] : 0);
286 |                     if (l < 0) continue outer;
287 |                     idx += p[k] * l;
288 |                 }
289 |                 if (col) { v = col[i]; }
290 |                 if (_cnt) {
291 |                     if (cnt[idx] === undefined) { cnt[idx]=0; }
292 |                     cnt[idx] += 1;
293 |                 }
294 |                 if (_sum) {
295 |                     if (sum[idx] === undefined) { sum[idx]=0; }
296 |                     sum[idx] += v;
297 |                 }
298 |                 if (_ssq) {
299 |                     if (ssq[idx] === undefined) { ssq[idx]=0; }
300 |                     ssq[idx] += v * v;
301 |                 }
302 |                 if (_min && (min[idx] === undefined || v < min[idx])) {
303 |                     min[idx] = v;
304 |                 }
305 |                 if (_max && (max[idx] === undefined || v > max[idx])) {
306 |                     max[idx] = v;
307 |                 }
308 |             }
309 |         }
310 | 
311 |         // Generate Results
312 |         var rr = vals.map(function(op) { return op.done(ctx); });
313 |         var keys = rr[0];
314 |         if (rr.length > 1) {
315 |             keys = {};
316 |             rr.forEach(function(o) { for (var k in o) keys[k] = 1; });
317 |         }
318 |         var result = dims.map(function() { return []; });
319 |         vals.forEach(function() { result.push([]); });
320 |         len = dims.length;
321 | 
322 |         for (k in keys) {
323 |             // map index i to dimensional indices
324 |             var nn = C, uv, div;
325 |             for (i = k, j = len; --j >= 0;) {
326 |                 uv = dims[j].lut.length;
327 |                 div = ~~(nn / uv);
328 |                 result[j].push(dims[j].lut[~~(i / div)]);
329 |                 i = i % div;
330 |                 nn = ~~(nn / uv);
331 |             }
332 |             for (j = 0; j < rr.length; ++j) {
333 |                 val = rr[j][k];
334 |                 result[len + j].push(val === undefined ? 0 : val);
335 |             }
336 |         }
337 |         return result;
338 |     };
339 | 
340 |     table.where = function(f) {
341 |         var nrows = table.rows(),
342 |             ncols = table.cols();
343 | 
344 |         // initialize result table
345 |         var result = dv.table([]);
346 |         for (var i = 0; i < ncols; ++i) {
347 |             result.push([]);
348 |             result[i].name = table[i].name;
349 |             result[i].type = table[i].type;
350 |             result[i].index = i;
351 |             result[table[i].name] = result[i];
352 |             if (table[i].lut) { result[i].lut = table[i].lut; }
353 |         }
354 | 
355 |         // populate result table
356 |         for (var row = 0, j = -1; row < nrows; ++row) {
357 |             if (f(table, row)) {
358 |                 for (i = 0, ++j; i < ncols; ++i) {
359 |                     result[i][j] = table[i][row];
360 |                 }
361 |             }
362 |         }
363 |         return result;
364 |     };
365 | 
366 |     /** @private */
367 |     function code(a) {
368 |         var c = [], d = {}, v;
369 |         for (var i=0, len=a.length; i<len; ++i) {
370 |             if (d[v = a[i]] === undefined) { d[v] = 1; c.push(v); }
371 |         }
372 |         return typeof(c[0]) !== "number" ? c.sort()
373 |             : c.sort(function(a,b) { return a - b; });
374 |     };
375 | 
376 |     /** @private */
377 |     function dict(lut) {
378 |         return lut.reduce(function(a,b,i) { a[b] = i; return a; }, {});
379 |     };
380 | 
381 |     // populate data table
382 |     if (input) {
383 |         input.forEach(function(d) {
384 |             table.addColumn(d.name, d.values, d.type);
385 |         });
386 |     }
387 |     return table;
388 | };
389 | 
390 | // -- QUERY OPERATORS ---------------------------------------------------------
391 | 
392 | dv.noop = function() {};
393 | 
394 | // -- aggregation (value) operators ---
395 | 
396 | dv.count = function(expr) {
397 |     var op = {};
398 |     op.init = function() {
399 |         return {"*":["cnt"]};
400 |     }
401 |     op.done = function(ctx) { return ctx["cnt"]; };
402 |     op.value = expr;
403 |     return op;
404 | };
405 | 
406 | dv.min = function(expr) {
407 |     var op = {};
408 |     op.init = function() {
409 |         var o = {}; o[expr] = ["min"]; return o;
410 |     }
411 |     op.done = function(ctx) { return ctx["min_" + expr]; };
412 |     op.value = expr;
413 |     return op;
414 | };
415 | 
416 | dv.max = function(expr) {
417 |     var op = {};
418 |     op.init = function() {
419 |         var o = {}; o[expr] = ["max"]; return o;
420 |     }
421 |     op.done = function(ctx) { return ctx["max_" + expr]; };
422 |     op.value = expr;
423 |     return op;
424 | };
425 | 
426 | dv.sum = function(expr) {
427 |     var op = {};
428 |     op.init = function() {
429 |         var o = {}; o[expr] = ["sum"]; return o;
430 |     }
431 |     op.done = function(ctx) { return ctx["sum_" + expr]; };
432 |     op.value = expr;
433 |     return op;
434 | };
435 | 
436 | dv.avg = function(expr) {
437 |     var op = {};
438 |     op.init = function() {
439 |         var o = {"*":["cnt"]}; o[expr] = ["sum"]; return o;
440 |     };
441 |     op.done = function(ctx) {
442 |         var akey = "avg_" + expr, avg = ctx[akey];
443 |         if (!avg) {
444 |             var sum = ctx["sum_" + expr], cnt = ctx["cnt"];
445 |              if (Object.prototype.toString.call(sum) === "[object Array]") {
446 |                 ctx[akey] = (avg = sum.map(function(v,i) { return v / cnt[i]; }));
447 |             } else {
448 |                 ctx[akey] = (avg = {});
449 |                 for (var i in sum) { avg[i] = sum[i] / cnt[i]; }
450 |             }
451 |         }
452 |         return avg;
453 |     };
454 |     op.value = expr;
455 |     return op;
456 | };
457 | 
458 | dv.variance = function(expr, sample) {
459 |     var op = {}, adj = sample ? 1 : 0;
460 |     op.init = function() {
461 |         var o = {"*":["cnt"]}; o[expr] = ["sum","ssq"]; return o;
462 |     };
463 |     op.done = function(ctx) {
464 |         var cnt = ctx["cnt"], sum = ctx["sum_" + expr], ssq = ctx["ssq_" + expr];
465 |         var akey = "avg_" + expr, avg = ctx[akey];
466 | 
467 |         if (!avg) {
468 |             if (Object.prototype.toString.call(sum) === "[object Array]") {
469 |                 ctx[akey] = (avg = sum.map(function(v,i) { return v / cnt[i]; }));
470 |             } else {
471 |                 ctx[akey] = (avg = {});
472 |                 for (var i in sum) { avg[i] = sum[i] / cnt[i]; }
473 |             }
474 |         }
475 |         if (Object.prototype.toString.call(ssq) === "[object Array]") {
476 |             return ssq.map(function(v,i) {
477 |                 return v / cnt[i] - avg[i] * avg[i];
478 |             });
479 |         } else {
480 |             var va = {};
481 |             for (var i in ssq) { va[i] = ssq[i] / cnt[i] - avg[i] * avg[i]; }
482 |             return va;
483 |         }
484 |     };
485 |     op.value = expr;
486 |     return op;
487 | };
488 | 
489 | dv.stdev = function(expr, sample) {
490 |     var op = dv.variance(expr, sample), end = op.done;
491 |     op.done = function(ctx) {
492 |         var dev = end(ctx);
493 |         if (Object.prototype.toString.call(dev) === "[object Array]") {
494 |             for (var i = 0; i < dev.length; ++i) { dev[i] = Math.sqrt(dev[i]); }
495 |         } else {
496 |             for (var i in dev) { dev[i] = Math.sqrt(dev[i]); }
497 |         }
498 |         return dev;
499 |     }
500 |     return op;
501 | };
502 | 
503 | // -- dimension operators ---
504 | 
505 | dv.bin = function(expr, step, min, max) {
506 |     var op = {};
507 |     op.array = function(values) {
508 |         var N = values.length, val, idx, i,
509 |             minv = min, maxv = max, minb = false, maxb = false;
510 |         if (minv === undefined) { minv = Infinity; minb = true; }
511 |         if (maxv === undefined) { maxv = -Infinity; maxb = true; }
512 |         if (minb || maxb) {
513 |             for (i = 0; i < N; ++i) {
514 |                 val = values[i];
515 |                 if (minb && val < minv) { minv = val; }
516 |                 if (maxb && val > maxv) { maxv = val; }
517 |             }
518 |             if (minb) { minv = Math.floor(minv / step) * step; }
519 |             if (maxb) { maxv = Math.ceil(maxv / step) * step; }
520 |         }
521 |         // compute index array
522 |         var a = [], lut = (a.lut = []),
523 |             range = (maxv - minv), unique = Math.ceil(range / step);
524 |         for (i = 0; i < N; ++i) {
525 |             val = values[i];
526 |             if (val < minv || val > maxv) { a.push(-1); }
527 |             else if (val == maxv) { a.push(unique - 1); }
528 |             else { a.push(~~((values[i] - minv) / step)); }
529 |         }
530 |         for (i = 0; i < unique; ++i) {
531 |             // multiply b/c adding garners round-off error
532 |             lut.push(minv + i * step);
533 |         }
534 |         return a;
535 |     };
536 |     op.step = function(x) {
537 |         if (x === undefined) return step;
538 |         step = x;
539 |         return op;
540 |     };
541 |     op.min = function(x) {
542 |         if (x === undefined) return min;
543 |         min = x;
544 |         return op;
545 |     };
546 |     op.max = function(x) {
547 |         if (x === undefined) return max;
548 |         max = x;
549 |         return op;
550 |     };
551 |     op.value = expr;
552 |     return op;
553 | };
554 | 
555 | dv.quantile = function(expr, n) {
556 |     function search(array, value) {
557 |         var low = 0, high = array.length - 1;
558 |         while (low <= high) {
559 |             var mid = (low + high) >> 1, midValue = array[mid];
560 |             if (midValue < value) { low = mid + 1; }
561 |             else if (midValue > value) { high = mid - 1; }
562 |             else { return mid; }
563 |         }
564 |         var i = -low - 1;
565 |         return (i < 0) ? (-i - 1) : i;
566 |     }
567 | 
568 |     var op = {};
569 |     op.array = function(values) {
570 |         // get sorted data values
571 |         var i, d = values.sorted;
572 |         if (!d) {
573 |             var cmp;
574 |             if (values.type && values.type === "numeric") {
575 |                 cmp = function(a,b) { return a - b; }
576 |             } else {
577 |                 cmp = function(a,b) { return a < b ? -1 : a > b ? 1 : 0; }
578 |             }
579 |             values.sorted = (d = values.slice().sort(cmp));
580 |         }
581 |         // compute quantile boundaries
582 |         var q = [d[0]], a = [], lut = (a.lut = []);
583 |         for (i = 1; i <= n; ++i) {
584 |             q[i] = d[~~(i * (d.length - 1) / n)];
585 |             lut.push(i - 1);
586 |         }
587 |         // iterate through data and label quantiles
588 |         for (i = 0; i < values.length; ++i) {
589 |             a.push(Math.max(0, search(q, values[i]) - 1));
590 |         }
591 |         return a;
592 |     }
593 |     op.bins = function(x) {
594 |         if (x === undefined) return n;
595 |         n = x;
596 |         return op;
597 |     }
598 |     op.value = expr;
599 |     return op;
600 | };
601 | 
602 | return dv; })();
603 | 


--------------------------------------------------------------------------------
/benchmark/datavore/table.query.sum.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../../generate'),
 3 | 	dv = require('./datavore');
 4 | 
 5 | function createSetup(N, K, useStrings){
 6 | 	return function(event){
 7 | 
 8 | 		var groupCol = gen.Array.int(N, K);
 9 | 		var valueCol = gen.Array.int(N, 100);
10 | 
11 | 		if(useStrings)
12 | 			groupCol = groupCol.map(i => ["a", "b", "c"][i]);
13 | 
14 | 		// create table
15 | 		this.table = dv.table([
16 | 			{name:"group-col", type:"nominal", values:groupCol},
17 | 			{name:"reduce-col", type:"numeric", values:valueCol}
18 | 		]);
19 | 
20 | 		// generate data
21 | 		/*
22 | 		this.table = dv.table();
23 | 		this.table.addColumn("group-col", groupCol, dv.type.nominal);
24 | 		this.table.addColumn("reduce-col", valueCol, dv.type.numeric);
25 | 		*/
26 | 
27 | 	};
28 | }
29 | 
30 | function test(){
31 | 
32 | 	var result = this.table.query({
33 | 		"dims" : [0],
34 | 		"vals" : [dv.sum("reduce-col")]
35 | 	});
36 | }
37 | 
38 | 
39 | var N = 100000,
40 | 	K = 3;
41 | 
42 | var name = "table.query.sum: " + N + "x" + K;
43 | benchtap(name, {"operations" : 2*N}, createSetup(N, K), test);
44 | 
45 | name += " (strings)";
46 | benchtap(name, {"operations" : 2*N}, createSetup(N, K, true), test);
47 | 
48 | 
49 | var N = 1000000;
50 | 
51 | name = "table.query.sum: " + N + "x" + K;
52 | benchtap(name, {"operations" : 2*N}, createSetup(N, K), test);
53 | 
54 | name += " (strings)";
55 | benchtap(name, {"operations" : 2*N}, createSetup(N, K, true), test);
56 | 


--------------------------------------------------------------------------------
/benchmark/datavore/table.query.sum.multi.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../../generate'),
 3 | 	dv = require('./datavore');
 4 | 
 5 | var STRINGS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"];
 6 | 
 7 | function createSetup(N, K, M, useStrings){
 8 | 	return function(event){
 9 | 
10 | 		var columns = [
11 | 			{"name" : "value", "type":"numeric", "values": gen.Array.int(N, 100)}
12 | 		];
13 | 		var names = [];
14 | 		for (var m = 0; m < M; m++){
15 | 			var name = "id_"+m;
16 | 			var column = {
17 | 				"name" : name,
18 | 				"type" : "ordinal",
19 | 				"values" : gen.Array.int(N, K)
20 | 			};
21 | 
22 | 			// map to strings
23 | 			if(useStrings){
24 | 				column.values = column.values.map(i => STRINGS[i]);
25 | 			}
26 | 
27 | 			columns.push(column);
28 | 
29 | 
30 | 			names[m] = name;
31 | 		}
32 | 
33 | 		// create table
34 | 		this.table = dv.table(columns);
35 | 
36 | 		// generate data
37 | 		/*
38 | 		this.table = dv.table();
39 | 		this.table.addColumn("group-col", groupCol, dv.type.nominal);
40 | 		this.table.addColumn("reduce-col", valueCol, dv.type.numeric);
41 | 		*/
42 | 
43 | 	};
44 | }
45 | 
46 | 
47 | function test(){
48 | 
49 | 	//var names = this.names;
50 | 	//"dims" : ["id_0", "id_1"],
51 | 	var result = this.table.query({
52 | 		"dims" : ["id_0", "id_1", "id_2", "id_3"],
53 | 		"vals" : [dv.sum("value")]
54 | 	});
55 | }
56 | 
57 | 
58 | var N = 100000,
59 | 	K = 3,
60 | 	M = 4;
61 | 
62 | var name = "table.query.sum.multi: " + N + "x" + K + "x" + M;
63 | benchtap(name, {"operations" : 2*N}, createSetup(N, K, M), test);
64 | 
65 | name += " (strings)";
66 | benchtap(name, {"operations" : 2*N}, createSetup(N, K, M, true), test);
67 | 
68 | 
69 | var N = 1000000;
70 | 
71 | name = "table.query.sum.multi: " + N + "x" + K + "x" + M;
72 | benchtap(name, {"operations" : 2*N}, createSetup(N, K, M), test);
73 | 
74 | name += " (strings)";
75 | benchtap(name, {"operations" : 2*N}, createSetup(N, K, M, true), test);
76 | 


--------------------------------------------------------------------------------
/benchmark/groupby.sum.js:
--------------------------------------------------------------------------------
  1 | var benchtap = require('benchtap'),
  2 | 	gen = require('../lib/test').generate,
  3 | 	Frame = require('../lib/frame');
  4 | 
  5 | 
  6 | var STRINGS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"];
  7 | 
  8 | // create a frame for multidimensional groupby
  9 | function createSetup(N, K, M, useStrings){
 10 | 	return function(event){
 11 | 		// generate data
 12 | 		var columns = {
 13 | 			"value" : gen.Array.int(N, 100)
 14 | 		};
 15 | 		var names = [];
 16 | 		for (var m = 0; m < M; m++){
 17 | 			var name = "id_"+m;
 18 | 			columns[name] = gen.Array.int(N, K);
 19 | 
 20 | 			// map to strings
 21 | 			if(useStrings){
 22 | 				columns[name] = columns[name].map(i => STRINGS[i]);
 23 | 			}
 24 | 
 25 | 			names[m] = name;
 26 | 		}
 27 | 		//console.log(names);
 28 | 
 29 | 
 30 | 		// create frame
 31 | 		this.frame = new Frame(columns);
 32 | 	};
 33 | }
 34 | 
 35 | 
 36 | var N = 100000,
 37 | 	K = 3,
 38 | 	M = 1;
 39 | 
 40 | var groups = [];
 41 | for(var i = 0; i < M; i ++) groups.push("id_"+i);
 42 | 
 43 | var name = "groupby.sum: " + N + "x" + K + "x" + M;
 44 | 
 45 | benchtap(name, {"operations" :  2*N}, createSetup(N, K, M), function(){
 46 | 
 47 | 	//var group = this.frame.groupbymulti(["group-col0", "group-col1"]);
 48 | 	var group = this.frame.groupby(groups);
 49 | 	var result = group.sum("value");
 50 | });
 51 | 
 52 | name += " (strings)";
 53 | 
 54 | benchtap(name, {"operations" :  2*N}, createSetup(N, K, M, true), function(){
 55 | 
 56 | 	//var group = this.frame.groupbymulti(["group-col0", "group-col1"]);
 57 | 	var group = this.frame.groupby(groups);
 58 | 	var result = group.sum("value");
 59 | });
 60 | 
 61 | N = 1000000;
 62 | name = "groupby.sum: " + N + "x" + K + "x" + M;
 63 | 
 64 | benchtap(name, {"operations" :  2*N}, createSetup(N, K, M), function(){
 65 | 
 66 | 	//var group = this.frame.groupbymulti(["group-col0", "group-col1"]);
 67 | 	var group = this.frame.groupby(groups);
 68 | 	var result = group.sum("value");
 69 | });
 70 | 
 71 | name += " (strings)";
 72 | 
 73 | benchtap(name, {"operations" :  2*N}, createSetup(N, K, M, true), function(){
 74 | 
 75 | 	//var group = this.frame.groupbymulti(["group-col0", "group-col1"]);
 76 | 	var group = this.frame.groupby(groups);
 77 | 	var result = group.sum("value");
 78 | });
 79 | 
 80 | M = 2;
 81 | 
 82 | name = "groupby.sum: " + N + "x" + K + "x" + M;
 83 | 
 84 | benchtap(name, {"operations" :  2*N}, createSetup(N, K, M), function(){
 85 | 
 86 | 	//var group = this.frame.groupbymulti(["group-col0", "group-col1"]);
 87 | 	var group = this.frame.groupby(groups);
 88 | 	var result = group.sum("value");
 89 | });
 90 | 
 91 | K = 200;
 92 | M = 2;
 93 | name = "groupby.sum: " + N + "x" + K + "x" + M;
 94 | benchtap(name, {"operations" :  2*N}, createSetup(N, K, M, true), function(){
 95 | 
 96 | 	//var group = this.frame.groupbymulti(["group-col0", "group-col1"]);
 97 | 	var group = this.frame.groupby(groups);
 98 | 	var result = group.sum("value");
 99 | });
100 | /*
101 | var tests = [
102 | 	0
103 | ];
104 | 
105 | var RTOL = 1e-05, // 1e-05
106 | 	ATOL = 1e-12; // 1e-12
107 | 
108 | var dataDirectory = 'test/data/sum/',
109 | 	testFile = 'small.json';
110 | 
111 | var floader = require('floader'),
112 | 	dtest = require('../lib/test');
113 | 
114 | floader.load(dataDirectory + testFile, function(err, config){
115 | 
116 | 	var suite = JSON.parse(config);
117 | 
118 | 	for(var j = 0; j < tests.length; j++){
119 | 
120 | 		var i = tests[j];
121 | 		var prefix = String("0000" + (i + 1)).slice(-4);
122 | 
123 | 		// directory containing matrix data files for current test
124 | 		var directory = dataDirectory + prefix + '/';
125 | 
126 | 		var test = suite[i];
127 | 
128 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
129 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
130 | 
131 | 		var value_names = ["value_0"];
132 | 		var value_types = [test.value[0].type];
133 | 
134 | 		var N = test.N; // number of rows
135 | 		var distincts = test.id.map(function(spec, i){ return spec.K; });
136 | 
137 | 		var testName = "groupby.summulti: " + N + " x " + "(" + distincts.join(", ") + ")"
138 | 		//tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
139 | 
140 | 		//var name = "groupby.sum.multi: " + N + "x" + K + "x" + M;
141 | 
142 | 		benchtap(testName, {"operations" :  2*N},
143 | 			createSetup(directory, names, types, value_names, value_types),
144 | 			function(event){
145 | 
146 | 				var g = this.frame.groupbymulti(names);
147 | 				var actual = g.summulti(value_names[0]);
148 | 
149 | 				event.resolve();
150 | 		});
151 | 	}
152 | });
153 | 
154 | var OUT_FILENAME = "out.json";
155 | 
156 | function createSetup(directory, id_names, id_types, value_names, value_types){
157 | 	return function(event){
158 | 
159 | 		var self = this;
160 | 		var names = id_names.concat(value_names);
161 | 		var types = id_types.concat(value_types);
162 | 
163 | 		// which columns require a key file?
164 | 		var key_names = id_names.filter(function(item, i){
165 | 			return id_types[i] in dtest.string_types
166 | 		});
167 | 		var key_types = id_types.filter(function(item, i){
168 | 			return item in dtest.string_types
169 | 		});
170 | 
171 | 		console.log(directory);
172 | 		// load columns from files
173 | 		dtest.load(directory, names, types, function(err, columns){
174 | 
175 | 			if(err) return console.log(err);
176 | 
177 | 			console.log("running setup.");
178 | 			// load key files
179 | 			dtest.load_key(directory, key_names, key_types, function(err, keys){
180 | 
181 | 				floader.load(directory + OUT_FILENAME, function(err, out){
182 | 					var expected = JSON.parse(out);
183 | 
184 | 					var column_set = {};
185 | 					for (var i = 0; i < names.length; i++){
186 | 						var name = names[i];
187 | 						var column = columns[i];
188 | 						column_set[name] = column;
189 | 					}
190 | 					// keys map a small set of integers to other things (like strings)
191 | 					// they're a very simple form of fixed length coding
192 | 					var key_set = {};
193 | 					for (var i = 0; i < keys.length; i++){
194 | 						var name = key_names[i];
195 | 						var key = keys[i];
196 | 						key_set[name] = key;
197 | 					}
198 | 
199 | 					self.frame = new Frame(column_set, key_set);
200 | 
201 | 					event.resolve();
202 | 
203 | 				});
204 | 
205 | 			});
206 | 		});
207 | 	};
208 | }
209 | */
210 | 


--------------------------------------------------------------------------------
/benchmark/groupby_sum.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../lib/test').generate,
 3 | 	Frame = require('../lib/frame');
 4 | 
 5 | var STRINGS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"];
 6 | /*
 7 | N - number of rows
 8 | K - number of distinct values in id columns
 9 | M - number of id columns
10 | */
11 | function createSetup(N, K, M, useStrings){
12 | 	return function(event){
13 | 		// generate data
14 | 		var columns = {
15 | 			"value" : gen.Array.int(N, 100)
16 | 		};
17 | 		var names = [];
18 | 		for (var m = 0; m < M; m++){
19 | 			var name = "id_"+m;
20 | 			columns[name] = gen.Array.int(N, K);
21 | 
22 | 			// map to strings
23 | 			if(useStrings){
24 | 				columns[name] = columns[name].map(i => STRINGS[i]);
25 | 			}
26 | 
27 | 			names[m] = name;
28 | 		}
29 | 
30 | 		// create frame
31 | 		this.frame = new Frame(columns);
32 | 		// group on all id columns
33 | 		this.group = this.frame.groupby(names);
34 | 	};
35 | }
36 | 
37 | var N = 100000,
38 | 	K = 3,
39 | 	M = 1;
40 | 
41 | var name = "sum: " + N + "x" + K + "x" + M;
42 | 
43 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
44 | 	var result = this.group.sum("value");
45 | });
46 | 
47 | /*
48 | name += " (strings)";
49 | 
50 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
51 | 	var result = this.group.reduce("reduce-col");
52 | });
53 | */
54 | 
55 | 
56 | var N = 1000000;
57 | 
58 | name = "sum: " + N + "x" + K + "x" + M;
59 | 
60 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
61 | 	var result = this.group.sum("value");
62 | });
63 | 
64 | M = 2;
65 | 
66 | name = "sum: " + N + "x" + K + "x" + M;
67 | 
68 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
69 | 	var result = this.group.sum("value");
70 | });
71 | 
72 | /*
73 | name += " (strings)";
74 | 
75 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
76 | 	var result = this.group.reduce("reduce-col");
77 | });
78 | */
79 | 
80 | K = 200;
81 | M = 2;
82 | 
83 | var name = "sum: " + N + "x" + K + "x" + M;
84 | 
85 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
86 | 	var result = this.group.sum("value");
87 | });
88 | 


--------------------------------------------------------------------------------
/benchmark/median.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../lib/test').generate,
 3 | 	Frame = require('../lib/frame');
 4 | 
 5 | var STRINGS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"];
 6 | /*
 7 | N - number of rows
 8 | K - number of distinct values in id columns
 9 | M - number of id columns
10 | */
11 | function createSetup(N, K, M, useStrings){
12 | 	return function(event){
13 | 		// generate data
14 | 		var columns = {
15 | 			"value" : gen.Array.int(N, 100)
16 | 		};
17 | 		var names = [];
18 | 		for (var m = 0; m < M; m++){
19 | 			var name = "id_"+m;
20 | 			columns[name] = gen.Array.int(N, K);
21 | 
22 | 			// map to strings
23 | 			if(useStrings){
24 | 				columns[name] = columns[name].map(i => STRINGS[i]);
25 | 			}
26 | 
27 | 			names[m] = name;
28 | 		}
29 | 
30 | 		// create frame
31 | 		this.frame = new Frame(columns);
32 | 	};
33 | }
34 | 
35 | var N = 100000,
36 | 	K = 3,
37 | 	M = 1;
38 | 
39 | var name = "median: " + N + "x" + K + "x" + M;
40 | 
41 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
42 | 	var result = this.frame.median("value");
43 | });
44 | 
45 | /*
46 | name += " (strings)";
47 | 
48 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
49 | 	var result = this.group.reduce("reduce-col");
50 | });
51 | */
52 | 
53 | 
54 | var N = 1000000;
55 | 
56 | name = "median: " + N + "x" + K + "x" + M;
57 | 
58 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
59 | 	var result = this.frame.median("value");
60 | });
61 | 
62 | /*
63 | name += " (strings)";
64 | 
65 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
66 | 	var result = this.group.reduce("reduce-col");
67 | });
68 | */
69 | 
70 | K = 200;
71 | M = 2;
72 | 
73 | var name = "median: " + N + "x" + K + "x" + M;
74 | 
75 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
76 | 	var result = this.frame.median("value");
77 | });
78 | 


--------------------------------------------------------------------------------
/benchmark/sum.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../lib/test').generate,
 3 | 	Frame = require('../lib/frame');
 4 | 
 5 | var STRINGS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"];
 6 | /*
 7 | N - number of rows
 8 | K - number of distinct values in id columns
 9 | M - number of id columns
10 | */
11 | function createSetup(N, K, M, useStrings){
12 | 	return function(event){
13 | 		// generate data
14 | 		var columns = {
15 | 			"value" : gen.Array.int(N, 100)
16 | 		};
17 | 		var names = [];
18 | 		for (var m = 0; m < M; m++){
19 | 			var name = "id_"+m;
20 | 			columns[name] = gen.Array.int(N, K);
21 | 
22 | 			// map to strings
23 | 			if(useStrings){
24 | 				columns[name] = columns[name].map(i => STRINGS[i]);
25 | 			}
26 | 
27 | 			names[m] = name;
28 | 		}
29 | 
30 | 		// create frame
31 | 		this.frame = new Frame(columns);
32 | 	};
33 | }
34 | 
35 | var N = 100000,
36 | 	K = 3,
37 | 	M = 1;
38 | 
39 | var name = "sum: " + N + "x" + K + "x" + M;
40 | 
41 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
42 | 	var result = this.frame.sum("value");
43 | });
44 | 
45 | /*
46 | name += " (strings)";
47 | 
48 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
49 | 	var result = this.group.reduce("reduce-col");
50 | });
51 | */
52 | 
53 | 
54 | var N = 1000000;
55 | 
56 | name = "sum: " + N + "x" + K + "x" + M;
57 | 
58 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
59 | 	var result = this.frame.sum("value");
60 | });
61 | 
62 | /*
63 | name += " (strings)";
64 | 
65 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
66 | 	var result = this.group.reduce("reduce-col");
67 | });
68 | */
69 | 
70 | K = 200;
71 | M = 2;
72 | 
73 | var name = "sum: " + N + "x" + K + "x" + M;
74 | 
75 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
76 | 	var result = this.frame.sum("value");
77 | });
78 | 


--------------------------------------------------------------------------------
/benchmark/where.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../lib/test').generate,
 3 | 	Frame = require('../lib/frame');
 4 | 
 5 | var STRINGS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"];
 6 | /*
 7 | N - number of rows
 8 | K - number of distinct values in id columns
 9 | M - number of id columns
10 | */
11 | function createSetup(N, K, M, useStrings){
12 | 	return function(event){
13 | 		// generate data
14 | 		var columns = {
15 | 			"value" : gen.Array.int(N, 100)
16 | 		};
17 | 		var names = [];
18 | 		for (var m = 0; m < M; m++){
19 | 			var name = "id_"+m;
20 | 			columns[name] = gen.Array.int(N, K);
21 | 
22 | 			// map to strings
23 | 			if(useStrings){
24 | 				columns[name] = columns[name].map(i => STRINGS[i]);
25 | 			}
26 | 
27 | 			names[m] = name;
28 | 		}
29 | 
30 | 		// create frame
31 | 		this.frame = new Frame(columns);
32 | 		//this.frame.where(row => row["id_1"] == 1);
33 | 		//this.frame.where("id_1", id => id == 1);
34 | 	};
35 | }
36 | 
37 | var N = 100000,
38 | 	K = 3,
39 | 	M = 2;
40 | 
41 | var name = "where.function: " + N + "x" + K + "x" + M;
42 | 
43 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
44 | 	//var result = this.frame.where(row => row["id_0"] == 1);
45 | 	var result = this.frame.where("id_0", id => id == 1);
46 | });
47 | 
48 | /*
49 | name += " (strings)";
50 | 
51 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
52 | 	var result = this.group.reduce("reduce-col");
53 | });
54 | */
55 | 
56 | 
57 | var N = 1000000;
58 | 
59 | name = "where.function: " + N + "x" + K + "x" + M;
60 | 
61 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
62 | 	//var result = this.frame.where(row => row["id_0"] == 1);
63 | 	var result = this.frame.where("id_0", id => id == 1);
64 | });
65 | 
66 | /*
67 | name += " (strings)";
68 | 
69 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
70 | 	var result = this.group.reduce("reduce-col");
71 | });
72 | */
73 | 
74 | N = 1000000;
75 | K = 200;
76 | M = 2;
77 | 
78 | var name = "where.equal: " + N + "x" + K + "x" + M;
79 | 
80 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
81 | 	//var result = this.frame.where(row => row["id_0"] == 1);
82 | 	var result = this.frame.where("id_0", 1);
83 | });
84 | 
85 | var name = "where.in: " + N + "x" + K + "x" + M;
86 | 
87 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
88 | 	//var result = this.frame.where(row => row["id_0"] == 1);
89 | 	var result = this.frame.where("id_0", [0, 1, 3, 10, 12, 18, 101, 52, 23, 18, 7, 12, 154, 34, 117, 5]);
90 | });
91 | 


--------------------------------------------------------------------------------
/benchmark/where_sum.js:
--------------------------------------------------------------------------------
 1 | var benchtap = require('benchtap'),
 2 | 	gen = require('../lib/test').generate,
 3 | 	Frame = require('../lib/frame');
 4 | 
 5 | var STRINGS = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o"];
 6 | /*
 7 | N - number of rows
 8 | K - number of distinct values in id columns
 9 | M - number of id columns
10 | */
11 | function createSetup(N, K, M, useStrings){
12 | 	return function(event){
13 | 		// generate data
14 | 		var columns = {
15 | 			"value" : gen.Array.int(N, 100)
16 | 		};
17 | 		var names = [];
18 | 		for (var m = 0; m < M; m++){
19 | 			var name = "id_"+m;
20 | 			columns[name] = gen.Array.int(N, K);
21 | 
22 | 			// map to strings
23 | 			if(useStrings){
24 | 				columns[name] = columns[name].map(i => STRINGS[i]);
25 | 			}
26 | 
27 | 			names[m] = name;
28 | 		}
29 | 
30 | 		// create frame
31 | 		this.frame = new Frame(columns);
32 | 		this.frame = this.frame.where("id_0", 0);
33 | 		//this.frame.where(row => row["id_1"] == 1);
34 | 		//this.frame.where("id_1", id => id == 1);
35 | 	};
36 | }
37 | 
38 | var N = 100000,
39 | 	K = 3,
40 | 	M = 2;
41 | 
42 | var name = "where.sum: " + N + "x" + K + "x" + M;
43 | 
44 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
45 | 	//var result = this.frame.where(row => row["id_0"] == 1);
46 | 	var result = this.frame.sum("value");
47 | });
48 | 
49 | /*
50 | name += " (strings)";
51 | 
52 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
53 | 	var result = this.group.reduce("reduce-col");
54 | });
55 | */
56 | 
57 | 
58 | var N = 1000000;
59 | 
60 | name = "where.sum: " + N + "x" + K + "x" + M;
61 | 
62 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
63 | 	//var result = this.frame.where(row => row["id_0"] == 1);
64 | 	var result = this.frame.sum("value");
65 | });
66 | 
67 | /*
68 | name += " (strings)";
69 | 
70 | benchtap(name, {"operations": N}, createSetup(N, K, true), function(){
71 | 	var result = this.group.reduce("reduce-col");
72 | });
73 | */
74 | 
75 | N = 1000000;
76 | K = 200;
77 | M = 2;
78 | 
79 | var name = "where.sum: " + N + "x" + K + "x" + M;
80 | 
81 | benchtap(name, {"operations": N}, createSetup(N, K, M), function(){
82 | 	var result = this.frame.sum("value")
83 | });
84 | 


--------------------------------------------------------------------------------
/lib/frame-index.js:
--------------------------------------------------------------------------------
  1 | 
  2 | var reducers = require('./stream-reducers');
  3 | 
  4 | function isobject(obj){ return Object.prototype.toString.call(obj) === "[object Object]";}
  5 | 
  6 | /* A heirarchical index for the Frame data structure, the result of a call to
  7 |  * Frame.groupby
  8 |  */
  9 | function FrameIndex(frame, index, groups){
 10 | 	this._frame = frame;
 11 | 	this._index = index;
 12 | 	this._groups = groups;
 13 | }
 14 | 
 15 | module.exports = FrameIndex;
 16 | 
 17 | /*
 18 |  */
 19 | FrameIndex.prototype.columns = function(){
 20 | 	return this._frame.columns();
 21 | };
 22 | 
 23 | FrameIndex.prototype.groups = function(){
 24 | 	return this._groups;
 25 | };
 26 | 
 27 | FrameIndex.prototype.count = function(){
 28 | 	var reduced = {};
 29 | 	var index = this._index;
 30 | 
 31 | 	// depth first iteration
 32 | 	var todo = [[index, reduced, 0]];
 33 | 
 34 | 	var result;
 35 | 	while (todo.length > 0){
 36 | 		n = todo.pop();// object
 37 | 		index = n[0];
 38 | 		result = n[1];
 39 | 		level = n[2];
 40 | 
 41 | 		var c, name;
 42 | 		for(key in index){ // keys in object
 43 | 			c = index[key];
 44 | 			name = this._groups[level];
 45 | 
 46 | 			// decode the key, if possible
 47 | 			if(this._frame._keys && name in this._frame._keys){
 48 | 				decoder = this._frame._keys[name];
 49 | 				key = decoder[key];
 50 | 			}
 51 | 
 52 | 			if(isobject(c)){
 53 | 				result[key] = {};
 54 | 				todo.push([c, result[key], level + 1]);
 55 | 			} else {
 56 | 				result[key] = c.length; // reduce
 57 | 			}
 58 | 		}
 59 | 	}
 60 | 
 61 | 	return reduced;
 62 | 
 63 | };
 64 | 
 65 | FrameIndex.prototype.sum = function(selector){
 66 | 	return this.reduce(selector, reducers.sum);
 67 | };
 68 | 
 69 | FrameIndex.prototype.reduce = function(selector, reducer, initial){
 70 | 
 71 | 	var reduced = {};
 72 | 	var index = this._index;
 73 | 	var column = this._frame._cols[selector];
 74 | 
 75 | 	reducer = reducer ||
 76 | 		((column.length > 0 && Object.prototype.toString.call(column[0]) == "[object Number]") ?
 77 | 			reducers.sum :
 78 | 			reducers.max);
 79 | 
 80 | 	// depth first traversal
 81 | 	var todo = [[index, reduced, 0]];
 82 | 
 83 | 	var result;
 84 | 	while (todo.length > 0){
 85 | 		n = todo.pop();// object
 86 | 		index = n[0];
 87 | 		result = n[1];
 88 | 		level = n[2];
 89 | 
 90 | 		var c, name;
 91 | 		for(key in index){ // keys in object
 92 | 			c = index[key];
 93 | 			group = this._groups[level];
 94 | 
 95 | 			// decode the key, if possible
 96 | 			if(this._frame._keys && group in this._frame._keys){
 97 | 				decoder = this._frame._keys[group];
 98 | 				key = decoder[key];
 99 | 			}
100 | 
101 | 			if(isobject(c)){
102 | 				result[key] = {};
103 | 				todo.push([c, result[key], level + 1]);
104 | 			} else {
105 | 				var indices = c;
106 | 				var value = indexreduce(column, indices, reducer, initial);
107 | 
108 | 				result[key] = value;
109 | 			}
110 | 		}
111 | 	}
112 | 
113 | 	return reduced;
114 | 
115 | };
116 | 
117 | /* reduce a subset of an array given by a set of indices using a supplied
118 |    reducing function.
119 |  */
120 | function indexreduce(column, indices, reducer, initial){
121 | 
122 | 	var start,
123 | 		value;
124 | 
125 | 	// chose initial values and start of loop based on number of inputs and
126 | 	// supplied initial value
127 | 	if(initial !== void(0)){
128 | 		start = 0;
129 | 		value = initial;
130 | 	} else if(indices.length > 0) {
131 | 		start = 1;
132 | 		value = column[indices[0]];
133 | 	} else {
134 | 		start = 0;
135 | 		value = 0;
136 | 	}
137 | 
138 | 	for(var i = start; i < indices.length; i++){
139 | 		index = indices[i];
140 | 		value = reducer(value, column[index], i);
141 | 	}
142 | 
143 | 	return value;
144 | 
145 | }
146 | 


--------------------------------------------------------------------------------
/lib/frame.js:
--------------------------------------------------------------------------------
  1 | 
  2 | var reducers = require('./stream-reducers');
  3 | var BitArray = require('bit-array');
  4 | 
  5 | 
  6 | function isarray(obj){ return Object.prototype.toString.call(obj) === "[object Array]";}
  7 | function isobject(obj){ return Object.prototype.toString.call(obj) === "[object Object]";}
  8 | function isnumber(obj){ return Object.prototype.toString.call(obj) === "[object Number]";}
  9 | function isinteger(num){ return num % 1 === 0;}
 10 | function isstring(obj){ return Object.prototype.toString.call(obj) === "[object String]";}
 11 | function isfunction(obj){ return Object.prototype.toString.call(obj) === "[object Function]"; }
 12 | function isdate(obj){ return Object.prototype.toString.call(obj) === "[object Date]";}
 13 | var typed_array_constructors = {
 14 | 	"[object Int32Array]" : true,
 15 | 	"[object Uint32Array]" : true,
 16 | 	"[object Float32Array]" : true,
 17 | 	"[object Int8Array]" : true,
 18 | 	"[object Uint8Array]" : true,
 19 | 	"[object Int16Array]" : true,
 20 | 	"[object Uint16Array]" : true,
 21 | 	"[object Float64Array]" : true
 22 | }
 23 | function istypedarray(obj){
 24 | 	var tag = Object.prototype.toString.call(obj);
 25 | 	return tag in typed_array_constructors;
 26 | }
 27 | 
 28 | 
 29 | function shallowcopy(obj){
 30 | 	if(obj == null) return obj; // null or undefined
 31 | 
 32 | 	var copy = {};
 33 | 	for(var key in obj){
 34 | 		copy[key] = obj[key];
 35 | 	}
 36 | 
 37 | 	return copy;
 38 | }
 39 | 
 40 | //function isframe(obj){ return isarray(obj) && (obj.length == 0 || isobject(obj[0])); }
 41 | 
 42 | 
 43 | /* A lightweight, high performance Columnar Data Store disguised as a Data Frame
 44 |  *
 45 |  * Interface similarity targets and inspiration:
 46 |  * pandas, R, Linq, rethinkDB, Matlab
 47 |  *
 48 |  * column names:
 49 |  * columns.values.tolist(), colnames(f),
 50 |  *
 51 |  * aggregation:
 52 |  * groupby, , ,
 53 |  *
 54 |  * filtering:
 55 |  *
 56 |  * # References
 57 |  * https://github.com/StanfordHCI/datavore
 58 |  * http://vincentarelbundock.github.io/Rdatasets/datasets.html
 59 |  * https://galeascience.wordpress.com/2016/08/10/top-10-pandas-numpy-and-scipy-functions-on-github/
 60 |  * https://github.com/visualfabriq/bquery/blob/master/bquery/khash.h
 61 |  * ## R
 62 |  * http://www.r-tutor.com/r-introduction/data-frame
 63 |  * https://www.datacamp.com/community/tutorials/15-easy-solutions-data-frame-problems-r#gs.ArNaS44
 64 |  * ## Pandas
 65 |  * http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.groupby.html
 66 |  * http://chrisalbon.com/python/pandas_index_select_and_filter.html
 67 |  * ## Linq
 68 |  * https://msdn.microsoft.com/en-us/library/bb534304(v=vs.110).aspx?cs-save-lang=1&cs-lang=csharp#code-snippet-1
 69 |  */
 70 | /*	Create a data frame object from some data, like the Pandas and R objects
 71 |  *	of similar name.
 72 |  *
 73 |  *	@examples
 74 |  *
 75 |  *	// an array of row objects, like the output from babyparse and papaparse
 76 |  *
 77 |  *	rows =
 78 | 	[
 79 | 		{ "name" : "Finn",  "age" : 16, "title" : "Finn the Human"},
 80 | 		{ "name" : "Jake", "age" : 32 , "title" : "Jake the Dog"},
 81 | 		{ "name" : "Simon", "age" : 1043, "title" : "Ice King"},
 82 | 		{ "name" : "Bonnibel", "age" : 827, "title" : "Princess Bubblegum"},
 83 | 		{ "name" : "Marceline", "age" : 1004, "title" : "Marceline the Vampire Queen"}
 84 | 	 ];
 85 |  *	df = Frame(rows);
 86 |  *
 87 |  * // an object (dict) mapping column names to arrays of values
 88 |  *
 89 |  * columns =
 90 |  * {
 91 |  *	"name" : ["Finn", "Jake", "Simon", "Bonnibel", "Marceline"],
 92 |  *	"age" : [16, 32, 1043, 827, 1004],
 93 |  *	"title" : ["Finn the Human", "Jake the Dog", "Ice King", "Princess Bubblegum", "Marceline the Vampire Queen"]
 94 |  * };
 95 |  *
 96 |  * df = Frame(columns);
 97 |  *
 98 |  * // an optional keys argument allows string columns to be more compactly
 99 |  * // represented when duplicates are present
100 |  *
101 |  *	columns =
102 |  * {
103 |  *	"name" : [0, 1, 2, 3, 4],
104 |  *	"age" : [16, 32, 1043, 827, 1004],
105 |  *	"title" : [0, 1, 2, 3, 4]
106 |  * };
107 |  *
108 |  * keys = {
109 |  * 	"name" : ["Finn", "Jake", "Simon", "Bonnibel", "Marceline"],
110 |  *	"title" : ["Finn the Human", "Jake the Dog", "Ice King", "Princess Bubblegum", "Marceline the Vampire Queen"]
111 |  * }
112 |  *
113 |  * df = Frame(columns, keys);
114 |  *
115 |  */
116 | function Frame(data, keys, index, groups, filters){
117 | 	// f.constructor.name return "Frame"
118 | 	if(!(this instanceof Frame)) return new Frame(data, keys, index, groups, filters);
119 | 
120 | 	if(Symbol && Symbol.toStringTag) this[Symbol.toStringTag] = 'Frame';
121 | 
122 | 
123 | 	// TODO: deep copy index
124 | 	if(index){
125 | 		Object.defineProperty(this, "_index", {
126 | 			"enumerable" : false,
127 | 			"value" : index
128 | 		});
129 | 	}
130 | 
131 | 	// was a filters argument provided?
132 | 	if(filters){
133 | 		// yes, construct a single filter from the values
134 | 		var filter;
135 | 		for(key in filters){
136 | 			if(filter == null){
137 | 				filter = filters[key].copy();
138 | 			} else {
139 | 				filter.and(filters[key]);
140 | 			}
141 | 		}
142 | 		// copy of all defined filters
143 | 		Object.defineProperty(this, "_filters", {
144 | 			"enumerable" : false,
145 | 			"value" : filters
146 | 		});
147 | 		// single filter produced from combining all filters
148 | 		Object.defineProperty(this, "_filter", {
149 | 			"enumerable" : false,
150 | 			"value" : filter
151 | 		});
152 | 		Object.defineProperty(this, "_count", {
153 | 			"enumerable" : false,
154 | 			"value" : filter.count()
155 | 		});
156 | 	}
157 | 	if(groups){
158 | 		Object.defineProperty(this, "_groups", {
159 | 			"enumerable" : false,
160 | 			"value" : groups.slice(0)
161 | 		});
162 | 	}
163 | 
164 | 	// do we have input?
165 | 	if(data == null){
166 | 		// no, just return an empty Frame
167 | 		return;
168 | 	}
169 | 
170 | 	// what type of data input do we have?
171 | 	if(isobject(data)){
172 | 		// object, check it's values
173 | 		var column, length;
174 | 
175 | 		for(var key in data){
176 | 			column = data[key];
177 | 
178 | 			// are the items arrays?
179 | 			if(isarray(column) || istypedarray(column)){
180 | 				// yes, check for consistent lengths
181 | 
182 | 				if(length == null){
183 | 					length = column.length;
184 | 				} else if(length !== column.length){
185 | 					throw new Error("Invalid data, arrays in object must be of equal length");
186 | 				}
187 | 			} else {
188 | 				// no, invalid data
189 | 				throw new Error("Invalid data, must be array of rows or dict of columns");
190 | 			}
191 | 		}
192 | 
193 | 		Object.defineProperty(this, "length", {
194 | 			"enumerable" : false,
195 | 			"value" : length
196 | 		});
197 | 
198 | 		// all checks pass use data as columns
199 | 		Object.defineProperty(this, "_cols", {
200 | 			"enumerable" : false,
201 | 			"value" : shallowcopy(data)
202 | 		});
203 | 
204 | 		// do we also have a key/decoding object?
205 | 		if(keys && isobject(keys)){
206 | 
207 | 			// check validity
208 | 			for(var key in keys){
209 | 				if(!(key in this._cols)) throw new Error("Invalid data, keys object doesn't match columns");
210 | 			}
211 | 
212 | 			Object.defineProperty(this, "_keys", {
213 | 				"enumerable" : false,
214 | 				"value" : shallowcopy(keys)
215 | 			});
216 | 		}
217 | 
218 | 	} else if(isarray(data)) {
219 | 		// array, check it's elements
220 | 		if(data.length == 0){
221 | 			return;
222 | 		}
223 | 
224 | 		Object.defineProperty(this, "length", {
225 | 			"enumerable" : false,
226 | 			"value" : data.length
227 | 		});
228 | 		// all checks pass use data as columns
229 | 		Object.defineProperty(this, "_cols", {
230 | 			"enumerable" : false,
231 | 			"value" : {}
232 | 		});
233 | 
234 | 		var row;
235 | 		for(key in data[0]){
236 | 			this._cols[key] = [];
237 | 		}
238 | 		for(var i = 0; i < data.length; i++){
239 | 			row = data[i];
240 | 
241 | 			// are the rows objects?
242 | 			if(isobject(row)){
243 | 				// yes
244 | 				for(key in this._cols){
245 | 					if(key in row)
246 | 						this._cols[key][i] = row[key];
247 | 					else
248 | 						this._cols[key][i] = null;
249 | 				}
250 | 			} else {
251 | 				// no, invalid data
252 | 				throw new Error("Invalid data, must be array of rows or dict of columns");
253 | 			}
254 | 		}
255 | 	}
256 | 
257 | 	// expose columns as properties
258 | 	for(name in this._cols){
259 | 		addColumn(this, name);
260 | 	}
261 | }
262 | 
263 | Object.defineProperty(Frame.prototype, "add", {
264 | 	enumerable: false,
265 | 	value : function(name, values){
266 | 
267 | 		if(this.length !== values.length)
268 | 			throw new Error("Invalid data, arrays in object must be of equal length");
269 | 
270 | 		this._cols[name] = values;
271 | 		addColumn(this, name);
272 | 	}
273 | });
274 | 
275 | // internal function for exposing a data column as a property on the Frame
276 | function addColumn(frame, name){
277 | 	Object.defineProperty(frame, name, {
278 | 		enumerable : true,
279 | 		configurable: true,
280 | 		get: function(){
281 | 			// decode?
282 | 			var result = [];
283 | 			if(frame._keys && name in frame._keys){
284 | 				// yes, get keys
285 | 				var keys = frame._keys[name];
286 | 
287 | 				// map data column onto decoded column
288 | 				// data column should be an array of indices into
289 | 				// the keys array
290 | 				var column = frame._cols[name];
291 | 				result = new Array(column.length);
292 | 				for(var i = 0; i < column.length;i++){
293 | 					result[i] = keys[column[i]];
294 | 				}
295 | 			} else {
296 | 				// no, just return the column
297 | 				result = frame._cols[name];
298 | 			}
299 | 
300 | 			if(frame._filter){
301 | 				return result.filter(function(item, i){ return frame._filter.get(i);});
302 | 			} else {
303 | 				return result;
304 | 			}
305 | 		},
306 | 		set : function(data){
307 | 			if(!isarray(data)) throw new Error("data must be an array");
308 | 			if(data.length != frame.length) throw new Error("array must match length");
309 | 
310 | 			if(frame._keys && name in frame._keys){
311 | 				throw new Error("setting keyed column not supported yet");
312 | 			} else {
313 | 				frame._cols[name] = data.slice(0);
314 | 			}
315 | 		}
316 | 	});
317 | }
318 | 
319 | /*
320 | // alternate syntax for toStringTag
321 | get [Symbol.toStringTag]() {
322 | 	return 'Validator';
323 | 	}
324 | */
325 | module.exports = Frame;
326 | 
327 | /*
328 | 	Get column names
329 |  */
330 | Object.defineProperty(Frame.prototype, "columns", {
331 | 	enumerable: false,
332 | 	get : function(){
333 | 		return Object.keys(this._cols);
334 | 	}
335 | });
336 | 
337 | Object.defineProperty(Frame.prototype, "rename", {
338 | 	enumerable: false,
339 | 	value : function(old_name, new_name){
340 | 		if(!(old_name in this._cols))
341 | 			throw new Error("Couldn't find a column named '" + selector + "'");
342 | 
343 | 		// copy column to new name
344 | 		var column = this._cols[old_name];
345 | 		this._cols[new_name] = column;
346 | 
347 | 		// delete old column
348 | 		delete this._cols[old_name];
349 | 		delete this[old_name];
350 | 
351 | 		// rename any decode key
352 | 		if(this._keys && old_name in this._keys){
353 | 			this._keys[new_name] = this._keys[old_name];
354 | 			delete this._keys[old_name]
355 | 		}
356 | 
357 | 		addColumn(this, new_name);
358 | 
359 | 	}
360 | })
361 | 
362 | Object.defineProperty(Frame.prototype, "distinct", {"enumerable": false, "value" : distinct});
363 | 
364 | function distinct(selector){
365 | 	if(!(selector in this._cols))
366 | 		throw new Error("Couldn't find a column named '" + selector + "'");
367 | 
368 | 	var key;
369 | 	if(this._keys) key = this._keys[selector];
370 | 
371 | 	var column = this._cols[selector];
372 | 	var set = {};
373 | 	var value;
374 | 	for(var i = 0; i < column.length; i++){
375 | 		if(key) value = key[column[i]];
376 | 		else value = column[i];
377 | 		if(this._filter){
378 | 			if(this._filter.get(i)) set[value] = value;
379 | 		} else {
380 | 			set[value] = value;
381 | 		}
382 | 	}
383 | 
384 | 	// this step enables non-string values
385 | 	var vals = [];
386 | 	for(key in set) vals.push(set[key]);
387 | 
388 | 	return vals;
389 | };
390 | 
391 | Object.defineProperty(Frame.prototype, "where", {"enumerable" : false, "value" : where});
392 | 
393 | /* element of, takes an array as an argument
394 |  create and return a function that takes a single argument and returns true if
395 |  that argument is contained in the given array
396 | 
397 |  NOTE: null and undefined may both be present in arr, and will be distinct from one another
398 | */
399 | function el(arr){
400 | 	var set = {};
401 | 	for (var i = 0; i < arr.length; i++) set[arr[i]] = true;
402 | 	return function(v){ return set[v] != null;};
403 | }
404 | 
405 | function eq(a){
406 | 	return function(v){ return v == a; };
407 | }
408 | 
409 | function where(selector, condition){
410 | 
411 | 	if(!(selector in this._cols))
412 | 		throw new Error("Couldn't find a column named '" + selector + "'");
413 | 
414 | 	var column = this._cols[selector];
415 | 	var filter = new BitArray(this.length);
416 | 
417 | 	var bits = filter.wordArray;
418 | 	var index = 0;
419 | 	var word = 0|0;
420 | 	var offset;
421 | 	var max = column.length - 1;
422 | 
423 | 	if(isnumber(condition) || isstring(condition)){
424 | 		// keyed selector column?
425 | 		if(isstring(condition) && this._keys && selector in this._keys){
426 | 			// yes, encode condition
427 | 			var keys = this._keys[selector];
428 | 			condition = keys.indexOf(condition);
429 | 		}
430 | 		for(var i = 0; i < bits.length; i++){
431 | 			word = 0|0;
432 | 			offset = i * 32;
433 | 			var j = 31 + offset;
434 | 			if(j > max) j = max;
435 | 			for(; j >= offset; j--){
436 | 				if(column[j] === condition) word |= 1;
437 | 				if(j > offset) word <<= 1;
438 | 			}
439 | 			bits[i] = word;
440 | 		}
441 | 	} else {
442 | 		if(isarray(condition) || istypedarray(condition)){
443 | 			condition = el(condition);
444 | 		}
445 | 		if(this._keys && selector in this._keys){
446 | 			// yes, encode condition
447 | 			var keys = this._keys[selector];
448 | 		}
449 | 
450 | 		var value;
451 | 		for(var i = 0; i < bits.length; i++){
452 | 			word = 0|0;
453 | 			offset = i * 32;
454 | 			var j = 31 + offset;
455 | 			if(j > max) j = max;
456 | 			for(; j >= offset; j--){
457 | 				if(keys) value = keys[column[j]];
458 | 				else value = column[j];
459 | 				if(condition(value)) word |= 1;
460 | 				if(j > offset) word <<= 1;
461 | 			}
462 | 			bits[i] = word;
463 | 		}
464 | 	}
465 | 
466 | 	// create and return a new Frame with the new filter
467 | 	var filters = {};
468 | 	if(this._filters){
469 | 		Object.assign(filters, this._filters);
470 | 	}
471 | 	filters[selector] = filter;
472 | 
473 | 	return new Frame(this._cols, this._keys, this._index, this._groups, filters);
474 | 
475 | }
476 | 
477 | Object.defineProperty(Frame.prototype, "join", {"enumerable" : false, "value" : join});
478 | Object.defineProperty(Frame.prototype, "groupby", {"enumerable" : false, "value" : groupby});
479 | Object.defineProperty(Frame.prototype, "ungroup", {"enumerable" : false, "value" : ungroup});
480 | Object.defineProperty(Frame.prototype, "count", {"enumerable" : false, "value" : count});
481 | Object.defineProperty(Frame.prototype, "argmax", {"enumerable" : false, "value": argmax});
482 | Object.defineProperty(Frame.prototype, "argmin", {"enumerable" : false, "value": argmin});
483 | Object.defineProperty(Frame.prototype, "min", {"enumerable" : false, "value": min});
484 | Object.defineProperty(Frame.prototype, "max", {"enumerable" : false, "value": max});
485 | Object.defineProperty(Frame.prototype, "sum", {"enumerable" : false, "value": sum});
486 | Object.defineProperty(Frame.prototype, "mean", {"enumerable" : false, "value": mean});
487 | Object.defineProperty(Frame.prototype, "median", {"enumerable" : false, "value": median});
488 | Object.defineProperty(Frame.prototype, "reduce", {"enumerable" : false, "value": reduce});
489 | 
490 | 
491 | /* use the partition method to find the median */
492 | function median(selector){
493 | 
494 | 	var column = this._cols[selector];
495 | 	var key = selector && this._keys ? this._keys[selector] : null;
496 | 
497 | 	if (column.length == 0) return null;
498 | 
499 | 	var p, m;
500 | 
501 | 	middle = column.length / 2 | 0;
502 | 
503 | 	var low = 0,
504 | 		high = column.length  - 1;
505 | 
506 | 	var i = 0;
507 | 	// partition the array
508 | 	while(p != middle && i < column.length){
509 | 		i++;
510 | 		p = partition(column, low, high);
511 | 
512 | 		if( p < middle) low = p + 1;
513 | 		else high = p - 1;
514 | 	}
515 | 
516 | 	if(i == column.length){
517 | 		console.error("Maximum partition reached");
518 | 	}
519 | 
520 | 	if(key) return key[column[p]];
521 | 	else return column[p];
522 | }
523 | 
524 | /* partition an array, in place */
525 | function partition(arr, low, high){
526 | 
527 | 	if (low >= high) return high;
528 | 
529 | 	// choose a random index for the pivot
530 | 	var pivot = randint(low, high);
531 | 
532 | 	// swap pivot into last location
533 | 	swap(arr, high, pivot);
534 | 
535 | 	pivot = low; // location of pivot in result
536 | 	// scan array and swap elements less than pivot into low end
537 | 	for(var i = low; i < high; i++){
538 | 		if (arr[i] < arr[high]){
539 | 			swap(arr, i, pivot);
540 | 			pivot++;
541 | 		}
542 | 	}
543 | 
544 | 	swap(arr, high, pivot);
545 | 
546 | 	return pivot;
547 | 
548 | }
549 | 
550 | /* get random integer in the inclusive interval [a, b]
551 | 	 a and b must be integers for correct performance
552 | */
553 | function randint(a, b){
554 | 	r = Math.random(); //[0, 1)
555 | 	return a + Math.floor((b - a + 1)*r);
556 | }
557 | 
558 | function swap(arr, i, j){
559 | 	var temp = arr[i];
560 | 	arr[i] = arr[j];
561 | 	arr[j] = temp;
562 | }
563 | 
564 | function join(frame, link){
565 | 
566 | 	// verify length of link column
567 | 	if(link.length !== this.length) throw new Error("Length of link column must match frame.");
568 | 
569 | 	if(!("_cols" in frame)) throw new Error("First argument must be a frame.");
570 | 
571 | 	// duplicate columns and keys
572 | 	var columns = shallowcopy(this._cols),
573 | 		keys = shallowcopy(this._keys) || {};
574 | 
575 | 	// add virtual columns for each column in the joining frame
576 | 	for(name in frame._cols){
577 | 		// skip columns with duplicate names
578 | 		if(name in columns) continue;
579 | 
580 | 		// don't join encoded columns
581 | 		if(frame._keys && name in frame._keys) continue;
582 | 
583 | 		// add link column as encoded column data
584 | 		columns[name] = link;
585 | 		// add joining frame column as key column
586 | 		keys[name] = frame._cols[name];
587 | 	}
588 | 
589 | 	return new Frame(columns, keys, this._index, this._groups, this._filters);
590 | 
591 | }
592 | 
593 | 
594 | /*
595 |  * group the data in the frame by a selector or set of selectors
596 |  */
597 | function groupby(){
598 | 
599 | 	if(arguments.length == 0) throw new Error("No arguments provided");
600 | 
601 | 	// collect arguments into list of selectors
602 | 	var selectors = [],
603 | 		arg;
604 | 	if(arguments.length === 1){
605 | 		arg = arguments[0];
606 | 		if(isstring(arg)) selectors = [arg];
607 | 		else if(isarray(arg)) selectors = arg;
608 | 	} else {
609 | 		for(var i = 0; i < arguments.length; i++){
610 | 			arg = arguments[i];
611 | 			if(!isstring(arg)) throw new Error("Invalid arguments");
612 | 
613 | 			selectors.push(arg);
614 | 		}
615 | 	}
616 | 
617 | 	var index = {};
618 | 	if(this._index){
619 | 		index = this._index;
620 | 		selectors = this._groups.concat(selectors);
621 | 	}
622 | 
623 | 	// get references to all the columns involved in groups
624 | 	var columns = Array(selectors.length);
625 | 	var keys = {};
626 | 	for (var m = 0; m < selectors.length; m++){
627 | 		selector = selectors[m];
628 | 
629 | 		if(!(selector in this._cols))
630 | 			throw new Error("Couldn't find a column named '" + selector + "'");
631 | 
632 | 		columns[m] = this._cols[selector];
633 | 		if(this._keys && selector in this._keys) keys[m] = this._keys[selector];
634 | 	}
635 | 
636 | 	var N = columns[0].length;
637 | 	var path = Array(columns.length);
638 | 	// iterate through rows
639 | 	for(var i = 0; i < N; i++){
640 | 
641 | 		// compute distinct values for group columns describing the bin for
642 | 		// the current row
643 | 		for (var m = 0; m < columns.length; m++){
644 | 			var column = columns[m];
645 | 			if(m in keys) path[m] = keys[m][column[i]];
646 | 			else path[m] = column[i];
647 | 		}
648 | 
649 | 		// add this row to the index using the group column values
650 | 		// by descending the hierarchy to the correct leaf
651 | 		var level = index;
652 | 		for(var j = 0; j < path.length - 1; j++){
653 | 
654 | 			key = path[j];
655 | 			next = level[key];
656 | 			if(next == null || isarray(next)){
657 | 				next = {};
658 | 				level[key] = next;
659 | 			}
660 | 			level = next;
661 | 		}
662 | 
663 | 		// update array of row indices stored in leaf
664 | 		key = path[path.length - 1];
665 | 		var arr = level[key];
666 | 		if(arr == null){
667 | 			level[key] = [i];
668 | 		} else {
669 | 			arr[arr.length] = i;
670 | 		}
671 | 	}
672 | 
673 | 	/*
674 | 	this._index = index;
675 | 	this._groups = selectors.slice(0);
676 | 	return this;
677 | 	*/
678 | 	return new Frame(this._cols, this._keys, index, selectors, this._filters);
679 | }
680 | 
681 | /* remove the grouping created by the last remaining groupby selector */
682 | function ungroup(){
683 | 	if(this._index == null || this._groups.length < 1)
684 | 		throw new Error("Not enough groups")
685 | 
686 | 	var frame = new Frame(this._cols, this._keys, null, null, this._filters);
687 | 
688 | 	// handle special case of single group
689 | 	if(this._groups.length == 1)
690 | 		return frame;
691 | 
692 | 	// for other cases do new groupby with one fewer groups
693 | 	return frame.groupby(this._groups.slice(0, -1));
694 | }
695 | 
696 | function count(){
697 | 	if(this._index) return this.reduce();
698 | 
699 | 	if(this._filter) return this._count;
700 | 
701 | 	return this.length;
702 | }
703 | 
704 | function min(selector){
705 | 	return this.reduce(selector, reducers.min);
706 | }
707 | 
708 | function max(selector){
709 | 	return this.reduce(selector, reducers.max);
710 | }
711 | 
712 | function sum(selector){
713 | 	return this.reduce(selector, reducers.sum);
714 | }
715 | 
716 | function mean(selector){
717 | 	return this.reduce(selector, reducers.mean);
718 | }
719 | 
720 | function argmax(selector){
721 | 	return this.reduce(selector, reducers.argmax);
722 | }
723 | 
724 | function argmin(selector){
725 | 	return this.reduce(selector, reducers.argmin);
726 | }
727 | 
728 | function reduce(selector, reducer, initial){
729 | 
730 | 	var column = selector ? this._cols[selector] : null;
731 | 	var key = selector && this._keys ? this._keys[selector] : null;
732 | 
733 | 	// choose default reduce, if none was supplied
734 | 	var is_numeric = column && column.length > 0 && Object.prototype.toString.call(column[0]) == "[object Number]";
735 | 	reducer = reducer || (is_numeric ? reducers.sum : reducers.max);
736 | 
737 | 	if(this._index){
738 | 		return treereduce(column, key, this._index, this._keys, this._groups, this._filter, reducer, initial);
739 | 	} else if(this._filter) {
740 | 		return filterreduce(column, key, this._filter, reducer, initial);
741 | 	} else {
742 | 		return fullreduce(column, key, reducer, initial);
743 | 	}
744 | }
745 | 
746 | function treereduce(column, rkey, index, keys, groups, filter, reducer, initial){
747 | 
748 | 	var reduced = {};
749 | 	var parents = {};
750 | 
751 | 	// depth first traversal
752 | 	var todo = [[index, null, 0]];
753 | 	var leaves = [];
754 | 
755 | 	var result, pkey, level, n;
756 | 	while (todo.length > 0){
757 | 		n = todo.pop();// object
758 | 		index = n[0];
759 | 		pkey = n[1];
760 | 		level = n[2];
761 | 		result = {}; // container for this subtree in result
762 | 
763 | 		var c, name;
764 | 		for(key in index){ // keys in object
765 | 			c = index[key];
766 | 			group = groups[level];
767 | 
768 | 			// decode the key, if possible
769 | 			/*
770 | 			if(keys && group in keys){
771 | 				decoder = keys[group];
772 | 				key = decoder[key];
773 | 			}*/
774 | 
775 | 			ckey = pkey ? pkey + "@" + key : key;
776 | 
777 | 			if(isobject(c)){
778 | 				todo.push([c, ckey, level + 1]);
779 | 			} else {
780 | 				var indices = c;
781 | 				var filtered = filterindices(indices, filter);
782 | 				if(filtered.length != 0){
783 | 					var value;
784 | 					if(column){
785 | 						value = subsetreduce(column, rkey, filtered, reducer, initial);
786 | 					} else {
787 | 						value = filtered.length; // default to count
788 | 					}
789 | 					leaves.push([ckey, value]);
790 | 				}
791 | 			}
792 | 			parents[ckey] = [pkey, result];
793 | 		}
794 | 	}
795 | 
796 | 	var root;
797 | 	while (leaves.length > 0){
798 | 		n = leaves.pop();
799 | 		ckey = n[0]; // composite key, parent + child
800 | 		value = n[1];
801 | 
802 | 		p = parents[ckey];
803 | 		pkey = p[0];
804 | 		index = p[1];
805 | 
806 | 		key = pkey ? ckey.slice(pkey.length + 1) : ckey;
807 | 		index[key] = value;
808 | 		if(pkey == null){
809 | 			root = index;
810 | 		} else {
811 | 			leaves.push([pkey, index]);
812 | 		}
813 | 	}
814 | 
815 | 	return root;
816 | };
817 | 
818 | function empty (obj){
819 | 	for (var key in obj) {
820 | 		if (obj.hasOwnProperty(key)) {
821 | 			return false
822 | 		}
823 | 	}
824 | 	return true
825 | }
826 | 
827 | function filterindices(indices, filter){
828 | 	if(!filter) return indices;
829 | 
830 | 	result = [];
831 | 	for(var i = 0; i < indices.length; i++){
832 | 		index = indices[i];
833 | 		if(filter.get(index)){
834 | 			result.push(index);
835 | 		}
836 | 	}
837 | 	return result;
838 | }
839 | 
840 | /* reduce a subset of an array given by a set of indices using a supplied
841 |    reducing function.
842 | 
843 |    Extracting this code into a function produces an order of magnitude speedup.
844 |    I don't know why.
845 |  */
846 | function subsetreduce(column, key, indices, reducer, initial){
847 | 
848 | 	var value = null;
849 | 	if(initial) value = initial;
850 | 
851 | 	if(key){
852 | 		for(var i = 0; i < indices.length; i++){
853 | 			index = indices[i];
854 | 			if(value === null) value = key[column[index]];
855 | 			else value = reducer(value, key[column[index]], i);
856 | 		}
857 | 	} else {
858 | 		for(var i = 0; i < indices.length; i++){
859 | 			index = indices[i];
860 | 			if(value === null) value = column[index];
861 | 			else value = reducer(value, column[index], i);
862 | 		}
863 | 	}
864 | 
865 | 	return value || 0;
866 | }
867 | 
868 | function filterreduce(column, key, filter, reducer, initial){
869 | 
870 | 	var value = null;
871 | 	if(initial) value = initial;
872 | 
873 | 	var word,
874 | 		mask,
875 | 		cutoff;
876 | 	var bits = filter.wordArray;
877 | 	var total = 0;
878 | 	var max = column.length;
879 | 
880 | 	if(key){
881 | 		for(var i = 0; i < bits.length; i++){
882 | 			word = bits[i];
883 | 			if(word !== 0){
884 | 				cutoff = (i + 1) * 32;
885 | 				if(cutoff > max) cutoff = max;
886 | 				mask = 1;
887 | 				for(var j = i * 32; j < cutoff; j++){
888 | 					if((word & mask) !== 0) {
889 | 						if(value === null) value = key[column[j]];
890 | 						else value = reducer(value, key[column[j]], total);
891 | 						total++;
892 | 					}
893 | 					mask <<= 1;
894 | 				}
895 | 			}
896 | 		}
897 | 	} else {
898 | 		for(var i = 0; i < bits.length; i++){
899 | 			word = bits[i];
900 | 			if(word !== 0){
901 | 				cutoff = (i + 1) * 32;
902 | 				if(cutoff > max) cutoff = max;
903 | 				mask = 1;
904 | 				for(var j = i * 32; j < cutoff; j++){
905 | 					if((word & mask) !== 0) {
906 | 						if(value === null) value = column[j];
907 | 						else value = reducer(value, column[j], total);
908 | 						total++;
909 | 					}
910 | 					mask <<= 1;
911 | 				}
912 | 			}
913 | 		}
914 | 	}
915 | 
916 | 
917 | 	return value || 0;
918 | }
919 | 
920 | function fullreduce(column, key, reducer, initial){
921 | 
922 | 	var start,
923 | 		value;
924 | 
925 | 	// chose initial values and start of loop based on number of inputs and
926 | 	// supplied initial value
927 | 	if(initial !== void(0)){
928 | 		start = 0;
929 | 		value = initial;
930 | 	} else if(column.length > 0) {
931 | 		start = 1;
932 | 		value = key ? key[column[0]] : column[0];
933 | 	} else {
934 | 		start = 0;
935 | 		value = 0;
936 | 	}
937 | 
938 | 	if(key){
939 | 		for(var i = start; i < column.length; i++){
940 | 			value = reducer(value, key[column[i]], i);
941 | 		}
942 | 	} else {
943 | 		for(var i = start; i < column.length; i++){
944 | 			value = reducer(value, column[i], i);
945 | 		}
946 | 	}
947 | 
948 | 	return value;
949 | }
950 | 


--------------------------------------------------------------------------------
/lib/stream-reducers.js:
--------------------------------------------------------------------------------
  1 | 
  2 | module.exports = {
  3 | 	"count" : count,
  4 | 	"sum" : sum,
  5 | 	"max" : max,
  6 | 	"min" : min,
  7 | 	"mean" : mean,
  8 | 	"mode" : mode,
  9 | 	"median" : median,
 10 | 	"argmax" : argmax,
 11 | 	"argmin" : argmin
 12 | };
 13 | 
 14 | /* Array.prototype.reduce style function for finding the maximum
 15 |  * @examples
 16 |  * [1, 1, 1].reduce(ds.reduce.max);			// => 1
 17 |  * [3, 1, 3, 5].reduce(ds.reduce.max);		// => 5
 18 |  * reduce({"a" : 1, "b" : 0, "c" : 2}, ds.reduce.max);	// => 2
 19 |  */
 20 | function max(agg, val) { return agg > val ? agg : val; };
 21 | 
 22 | /* Array.prototype.reduce style function for finding the minimum
 23 |  * @examples
 24 |  * [1, 1, 1].reduce(ds.reduce.min);			// => 1
 25 |  * [3, 1, 3, 5].reduce(ds.reduce.min);		// => 1
 26 |  * reduce({"a" : 1, "b" : 0, "c" : 2}, ds.reduce.min);	// => 0
 27 |  */
 28 | function min(agg, val) { return agg < val ? agg : val; };
 29 | 
 30 | /* Array.prototype.reduce style function for finding the most common value
 31 |  * @examples
 32 |  * [1, 1, 1].reduce(ds.reduce.mode);			// => 1
 33 |  * [1, 3, 3, 7].reduce(ds.reduce.mode);		// => 3
 34 |  * reduce({"a" : 1, "b" : 0, "c" : 2}, ds.reduce.mode);	// => 1
 35 |  */
 36 | function mode(agg, val, n) {
 37 | 	if(n === 0) return val;
 38 | 
 39 | 	var self;
 40 | 	if(n === 1){
 41 | 		// internal state hack (compatible with groupby)
 42 | 		self = mode.state = {};
 43 | 		self.values = {};
 44 | 		self.values[agg] = 1;
 45 | 		self.argmax = agg;
 46 | 	} else {
 47 | 		self = mode.state;
 48 | 	}
 49 | 
 50 | 	if(val in self.values)
 51 | 		self.values[val] += 1;
 52 | 	else
 53 | 		self.values[val] = 1;
 54 | 
 55 | 	if(self.values[val] > self.values[agg])
 56 | 		self.argmax = val;
 57 | 
 58 | 	return self.argmax;
 59 | }
 60 | 
 61 | function argmax(agg, val, n){
 62 | 	var self;
 63 | 	if(n === 0){
 64 | 		// internal state hack (compatible with groupby)
 65 | 		self = argmax.state = {};
 66 | 		self.max = val;
 67 | 		return 0;
 68 | 	}
 69 | 
 70 | 	if(n === 1){
 71 | 		if(argmax.state == null) self = argmax.state = {};
 72 | 		else self = argmax.state;
 73 | 		// is this the first time we've called this function on this array?
 74 | 		if(self.max != null && self.argmax == null){
 75 | 			// no
 76 | 		} else {
 77 | 			// yes
 78 | 			self.max = agg;
 79 | 		}
 80 | 		self.argmax = 0;
 81 | 	} else {
 82 | 		self = argmax.state;
 83 | 	}
 84 | 
 85 | 	if(val > self.max){
 86 | 		self.max = val;
 87 | 		self.argmax = n;
 88 | 	}
 89 | 
 90 | 	return self.argmax;
 91 | }
 92 | 
 93 | 
 94 | function argmin(agg, val, n){
 95 | 	var self;
 96 | 	if(n === 0){
 97 | 		// internal state hack (compatible with groupby)
 98 | 		self = argmin.state = {};
 99 | 		self.min = val;
100 | 		return 0;
101 | 	}
102 | 
103 | 	if(n === 1){
104 | 		if(argmin.state == null) self = argmin.state = {};
105 | 		else self = argmin.state;
106 | 		// is this the first time we've called this function on this array?
107 | 		if(self.min != null && self.argmin == null){
108 | 			// no
109 | 		} else {
110 | 			// yes
111 | 			self.min = agg;
112 | 		}
113 | 		self.argmin = 0;
114 | 	} else {
115 | 		self = argmin.state;
116 | 	}
117 | 
118 | 	if(val < self.min){
119 | 		self.min = val;
120 | 		self.argmin = n;
121 | 	}
122 | 
123 | 	return self.argmin;
124 | }
125 | 
126 | /* Array.prototype.reduce style function for finding the middle value
127 |  * @examples
128 |  * [1, 1, 1].reduce(ds.reduce.median);			// => 1
129 |  * [1, 3, 3, 7].reduce(ds.reduce.median);		// => 3
130 |  * [4, 1, 7].reduce(ds.reduce.median);			// => 4
131 |  * reduce({"a" : 4, "b" : 1, "c" : 7}, ds.reduce.median);	// => 4
132 | 
133 |  DON'T USE THIS FUNCTION, IT'S VERY SLOW
134 |  */
135 | function median(agg, val, n) {
136 | 	if(n === 0) return val;
137 | 
138 | 	if(n === 1){
139 | 		// internal state hack (compatible with groupby)
140 | 		self = median.state = {};
141 | 		self.values = [agg];
142 | 	} else {
143 | 		self = median.state;
144 | 	}
145 | 
146 | 	// insert the new value into the sorted array
147 | 	insert(self.values, val);
148 | 
149 | 	var middle = self.values.length / 2 | 0;
150 | 	// even number of elements?
151 | 	if(self.values.length % 2 !== 0){
152 | 		// no, return the middle one
153 | 		return self.values[middle];
154 | 	} else {
155 | 		// yes, return the average of the middle two
156 | 		return (self.values[middle - 1] + self.values[middle]) / 2;
157 | 	}
158 | }
159 | 
160 | /* Array.prototype.reduce style function for counting number of elements
161 |  * @examples
162 |  * [1, 1, 1].reduce(ds.reduce.count);			// => 3
163 |  * [3, 1, 3, 5].reduce(ds.reduce.count);		// => 4
164 |  * reduce({"a" : 1, "b" : 0, "c" : 2}, ds.reduce.count);	// => 3
165 |  */
166 | function count(agg, val, n){ return n + 1; };
167 | 
168 | /* Array.prototype.reduce style function for finding the sum
169 |  * @examples
170 |  * [1, 1, 1].reduce(ds.reduce.sum);			// => 3
171 |  * [3, 1, 3, 5].reduce(ds.reduce.sum);		// => 12
172 |  * reduce({"a" : 1, "b" : 0, "c" : 2}, ds.reduce.sum);	// => 3
173 |  */
174 | function sum(agg, val){ return agg + val; };
175 | 
176 | /* Array.prototype.reduce style function for finding the arithmetic mean
177 |  * @examples
178 |  * [1, 1, 1].reduce(ds.reduce.mean);			// => 1
179 |  * [3, 1, 3, 5].reduce(ds.reduce.mean);		// => 3
180 |  * reduce({"a" : 1, "b" : 0, "c" : 2}, ds.reduce.mean);	// => 1
181 |  */
182 | function mean(agg, val, n){ return (agg + ((val - agg)/(n + 1))); };
183 | 
184 | var d = function(a, b){ return a > b ? 1 : a < b ? -1 : 0;};
185 | 
186 | function insert(arr, el){
187 | 	var index = binarySearch(arr, el, d);
188 | 	arr.splice(index, 0, el);
189 | 
190 | 	return arr;
191 | };
192 | 
193 | var binarySearch = function binarySearch(arr, el, comparator) {
194 | 
195 | 	var m = 0;
196 | 	var n = arr.length - 1;
197 | 	while (m <= n) {
198 | 		var k = (n + m) >> 1;
199 | 		var cmp = comparator(el, arr[k]); // comparator(arr[k], el);
200 | 		if (cmp > 0) {
201 | 			m = k + 1;
202 | 		} else if(cmp < 0) {
203 | 			n = k - 1;
204 | 		} else {
205 | 			return k;
206 | 		}
207 | 	}
208 | 
209 | 	return m;
210 | }
211 | 


--------------------------------------------------------------------------------
/lib/test.js:
--------------------------------------------------------------------------------
  1 | var async = require('async'),
  2 | 	path = require('path'),
  3 | 	floader = require('floader'),
  4 | 	aloader = require('arrayloader');
  5 | 
  6 | test = {};
  7 | 
  8 | test.DEFAULT_TYPE = DEFAULT_TYPE = "int32";
  9 | 
 10 | test.type_map = type_map = {
 11 | 	"int8" : ".i8",
 12 | 	"uint8" : ".u8",
 13 | 	"int16" : ".i16",
 14 | 	"uint16" : ".u16",
 15 | 	"int32" : ".i32",
 16 | 	"uint32" : ".u32",
 17 | 	"float32" : ".f32",
 18 | 	"float64" : ".f64",
 19 | 	"str8" : ".s8",
 20 | 	"str16" : ".s16"
 21 | };
 22 | 
 23 | test.extension_map = extension_map = {
 24 | 	".i8" : Int8Array,
 25 | 	".u8" : Uint8Array,
 26 | 	".i16" : Int16Array,
 27 | 	".u16" : Uint16Array,
 28 | 	".i32" : Int32Array,
 29 | 	".u32" : Uint32Array,
 30 | 	".f32" : Float32Array,
 31 | 	".f64" : Float64Array,
 32 | 	".s8" : Int8Array,
 33 | 	".s16" : Int16Array
 34 | };
 35 | 
 36 | test.float_types = {
 37 | 	"float32" : true,
 38 | 	"float64" : true
 39 | };
 40 | 
 41 | test.string_types = {
 42 | 	"str8" : true,
 43 | 	"str16" : true
 44 | };
 45 | 
 46 | 
 47 | /* load a binary file as a TypedArray with the type given by the extension */
 48 | function loadArray(filePath, cb){
 49 | 
 50 | 	var ext = path.extname(filePath);
 51 | 	ext = ext.toLowerCase();
 52 | 
 53 | 	if (ext in extension_map)
 54 | 		constructor = extension_map[ext];
 55 | 	else
 56 | 		constructor = Int32Array;
 57 | 
 58 | 	return aloader.load(filePath, constructor, cb);
 59 | }
 60 | 
 61 | test.load = function(directory, names, types, callback){
 62 | 
 63 | 	// array of paths to matrix data files for current test
 64 | 	var paths = names.map(function(name, i){
 65 | 		type = types[i];
 66 | 		if (!(type in type_map)) type = DEFAULT_TYPE;
 67 | 
 68 | 		ext = type_map[types[i]];
 69 | 
 70 | 		return directory + name + ext;
 71 | 	});
 72 | 
 73 | 	//console.log(testFiles);
 74 | 	async.map(paths, loadArray,
 75 | 		function(err, results){
 76 | 
 77 | 			if(err) return callback(err);
 78 | 
 79 | 			callback(err, results);
 80 | 		}
 81 | 	);
 82 | }
 83 | /* a key file is just a JSON array of strings
 84 |    the index of the string in the array is it's code
 85 |  */
 86 | function loadKey(filePath, cb){
 87 | 
 88 | 	floader.load(filePath, function(err, key){
 89 | 		if(err) return cb(err);
 90 | 
 91 | 		return cb(null, JSON.parse(key));
 92 | 	});
 93 | }
 94 | 
 95 | test.load_key = function(directory, names, types, callback){
 96 | 
 97 | 	// array of paths to matrix data files for current test
 98 | 	var paths = names.map(function(name, i){
 99 | 		return directory + name + ".key";
100 | 	});
101 | 
102 | 	//console.log(testFiles);
103 | 	async.map(paths, loadKey,
104 | 		function(err, results){
105 | 
106 | 			if(err) return callback(err);
107 | 
108 | 			callback(err, results);
109 | 		}
110 | 	);
111 | }
112 | 
113 | function isobject(obj){ return Object.prototype.toString.call(obj) === "[object Object]";}
114 | 
115 | /* is there a key in object 'a' not found in object 'b'?
116 |    if so, return the first key that's not found
117 |    if not, return null
118 |  */
119 | function diffkeys(a, b){
120 | 	same_keys = true;
121 | 	for(key in a){
122 | 		same_keys &= (key in b);
123 | 		if(!same_keys){
124 | 			return key;
125 | 		}
126 | 	}
127 | 
128 | 	return null;
129 | }
130 | 
131 | /* comp is s comparison function for leaves
132 | a - actual
133 | b - expected
134 | */
135 | function treediff(a, b, comp){
136 | 
137 | 	var p = "(r)";
138 | 	var todo = [[a, b, p]]; // tuple of (a, b, p)
139 | 	var parents = { p : null };
140 | 
141 | 	var diff_key = null,
142 | 		diff_a = null,
143 | 		diff_b = null;
144 | 	var t;
145 | 	while (todo.length > 0 && !diff_key){
146 | 		t = todo.pop();
147 | 		n_a = t[0];
148 | 		n_b = t[1];
149 | 		p = t[2];
150 | 
151 | 		// are all the keys the same?
152 | 		diff_b = diffkeys(n_b, n_a);
153 | 		if(diff_b){
154 | 			diff_key = p;
155 | 			break;
156 | 		}
157 | 		diff_a = diffkeys(n_a, n_b);
158 | 		if(diff_a){
159 | 			diff_key = p;
160 | 			break;
161 | 		}
162 | 
163 | 		// check children
164 | 		for(key in n_b){
165 | 			// both objects/internal nodes?
166 | 			if(isobject(n_b[key]) && isobject(n_a[key])){
167 | 				// yes, add to stack
168 | 				parents[key] = p;
169 | 				todo.push([n_a[key], n_b[key], key]);
170 | 
171 | 			// both leaves?
172 | 			} else if(!isobject(n_b[key]) && !isobject(n_a[key])) {
173 | 				// yes, compare values
174 | 				if(!comp(n_b[key], n_a[key])){
175 | 					diff_key = key;
176 | 					diff_a = n_a[key];
177 | 					diff_b = n_b[key];
178 | 					break;
179 | 				}
180 | 			} else {
181 | 				// one is leaf the other is internal
182 | 				diff_key = key;
183 | 				if(isobject(n_b)){
184 | 					diff_a = n_a[key];
185 | 				} else {
186 | 					diff_b = n_b[key];
187 | 				}
188 | 				break;
189 | 			}
190 | 		}
191 | 	}
192 | 
193 | 	var path;
194 | 	// difference found?
195 | 	if(diff_key){
196 | 		// yes, reconstruct the path
197 | 		var n = diff_key;
198 | 		path = [n];
199 | 		while(parents[n]){
200 | 			n = parents[n];
201 | 			path.push(n);
202 | 		}
203 | 
204 | 		// diff_a and diff_b are both present on a leaf difference
205 | 		// only one is present for an internal node difference
206 | 		return {"path" : path.reverse(), "a" : diff_a, "b" : diff_b};
207 | 	}
208 | 
209 | 	return null;
210 | 
211 | }
212 | 
213 | test.assert = {};
214 | test.assert.tree = {};
215 | 
216 | /* determine whether two trees are equivalent
217 | */
218 | test.assert.tree.equal = function(t, a, b, msg) {
219 | 	var fail = treediff(a, b, function(a_n, b_n){
220 | 		return a_n === b_n;
221 | 	});
222 | 
223 | 	msg = msg || 'trees should be equal';
224 | 	return treeassert(t, fail, msg);
225 | };
226 | 
227 | /* determine whether two trees are approximately equivalent:
228 | internal nodes are identical
229 | leaves are within specified floating point tolerances
230 |  */
231 | test.assert.tree.allclose = function(t, a, b, msg, RTOL, ATOL) {
232 | 	RTOL= RTOL || 1e-05;  // for 32 bit precision: 1e-06
233 | 	ATOL= ATOL || 1e-08;
234 | 
235 | 	// treeequal with a floating point comparison function
236 | 	var fail = treediff(a, b, function(a_n, b_n){
237 | 		return Math.abs(a_n - b_n) <= ATOL + RTOL * Math.abs(b_n)
238 | 	});
239 | 
240 | 	msg = msg || 'trees should be allclose';
241 | 	return treeassert(t, fail, msg);
242 | };
243 | 
244 | test.assert.close = function(t, a, b, msg, RTOL, ATOL){
245 | 	RTOL= RTOL || 1e-05;  // for 32 bit precision: 1e-06
246 | 	ATOL= ATOL || 1e-08;
247 | 
248 | 	// treeequal with a floating point comparison function
249 | 	var success = Math.abs(a - b) <= ATOL + RTOL * Math.abs(b)
250 | 
251 | 	t._assert(success, {
252 | 		message : msg,
253 | 		operator : 'close',
254 | 		actual : a,
255 | 		expected : b,
256 | 		extra : null
257 | 	});
258 | 
259 | 	return success;
260 | }
261 | 
262 | var NULL_PLACEHOLDER = "(null)";
263 | function treeassert(t, fail, msg){
264 | 
265 | 	if(fail){
266 | 		var actual = fail.path.join(" -> "),
267 | 			expected = fail.path.join(" -> ");
268 | 
269 | 		fail.a = fail.a || NULL_PLACEHOLDER;
270 | 		fail.b = fail.b || NULL_PLACEHOLDER;
271 | 		actual += " -> " + fail.a;
272 | 		expected += " -> " + fail.b;
273 | 	}
274 | 
275 | 	t._assert(!fail, {
276 | 		message : msg,
277 | 		operator : 'tree.equal',
278 | 		actual : actual,
279 | 		expected : expected,
280 | 		extra : null
281 | 	});
282 | 
283 | 	return !fail;
284 | };
285 | 
286 | test.generate = {
287 | 	"Array" : {
288 | 		"int" : randomIntArray,
289 | 		"float" : randomFloatArray
290 | 	}
291 | };
292 | 
293 | function randomIntArray(N, K){
294 | 
295 | 	var data = [];
296 | 
297 | 	for(var i = 0; i < N; i++){
298 | 		data.push(Math.random() * K | 0);
299 | 	}
300 | 
301 | 	return data;
302 | }
303 | 
304 | function randomFloatArray(N){
305 | 
306 | 	var data = [];
307 | 
308 | 	for(var i = 0; i < N; i++){
309 | 		data.push(Math.random() / Math.sqrt(N));
310 | 	}
311 | 
312 | 	return data;
313 | }
314 | 
315 | module.exports = test;
316 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "dataship-frame",
 3 |   "version": "2.1.1",
 4 |   "description": "A Data Frame for Javascript. Crunch numbers in node and the browser.",
 5 |   "main": "lib/frame.js",
 6 |   "directories": {
 7 |     "test": "test"
 8 |   },
 9 |   "scripts": {
10 |     "data": "node test/data/generate.js",
11 |     "test": "browserify test/*.js | testling -x $npm_config_browser",
12 |     "dist": "mkdir -p dist && browserify lib/frame.js -s Frame > dist/frame.js",
13 |     "bench": "browserify benchmark/*.js | testling -x $npm_config_browser",
14 |     "bench-datavore": "browserify benchmark/datavore/*.js | testling -x $npm_config_browser"
15 |   },
16 |   "repository": {
17 |     "type": "git",
18 |     "url": "git+https://github.com/dataship/frame.git"
19 |   },
20 |   "keywords": [
21 |     "dataframe",
22 |     "statistics",
23 |     "math",
24 |     "pandas",
25 |     "R"
26 |   ],
27 |   "author": "",
28 |   "license": "MIT",
29 |   "bugs": {
30 |     "url": "https://github.com/dataship/frame/issues"
31 |   },
32 |   "homepage": "https://github.com/dataship/frame#readme",
33 |   "devDependencies": {
34 |     "arrayloader": "^1.1.2",
35 |     "async": "^2.1.5",
36 |     "benchtap": "^1.0.0",
37 |     "floader": "^1.0.1",
38 |     "tape": "^4.6.3"
39 |   },
40 |   "dependencies": {
41 |     "bit-array": "^0.2.2"
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | docopt
3 | 


--------------------------------------------------------------------------------
/test/argmax.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | // simple instructive test cases
  5 | function simpleTestCases(){
  6 | 
  7 | 	tape("argmax works with integers", function(t){
  8 | 		t.plan(1);
  9 | 		var frame = new Frame({
 10 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 11 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 12 | 		});
 13 | 
 14 | 		var expected = 6; // 1 + 2 + 2 + 3 + 1 + 3 + 4 + 2 + 1
 15 | 
 16 | 		var actual = frame.argmax("value");
 17 | 
 18 | 		t.equal(actual, expected);
 19 | 	});
 20 | 
 21 | 	tape("argmax works with integers", function(t){
 22 | 		t.plan(1);
 23 | 		var frame = new Frame({
 24 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1],
 25 | 			"value" : [4, 2, 7, 1, 3, 6, 5, 2, 1, 7, 8]
 26 | 		});
 27 | 
 28 | 		var expected = 10; // (4 + 2 + 7 + 1 + 3 + 6 + 5 + 2 + 1 + 7 + 8) / 11
 29 | 
 30 | 		var actual = frame.argmax("value");
 31 | 
 32 | 		t.equal(actual, expected);
 33 | 	});
 34 | 
 35 | 	tape("argmax works floats", function(t){
 36 | 		t.plan(1);
 37 | 		var frame = new Frame({
 38 | 			"id"  :   [0, 2, 0, 1, 1, 0, 2, 0, 1],
 39 | 			"value" : [1.2, 6.4, 2.3, 12.1, 1.6, 3.5, 7.2, 2.1, 10.2]
 40 | 		});
 41 | 
 42 | 		var expected = 3; // (1.2 + 6.4 + 2.3 + 12.1 + 1.6 + 3.5 + 7.2 + 2.1 + 10.2) / 9
 43 | 		var actual = frame.argmax("value");
 44 | 
 45 | 		t.equal(actual, expected);
 46 | 	});
 47 | 
 48 | 	tape("argmax works floats", function(t){
 49 | 		t.plan(1);
 50 | 		var frame = new Frame({
 51 | 			"id"  :   [0, 2, 0, 1, 1, 0, 2, 0, 1],
 52 | 			"value" : [1.2, 6.4, 2.3, 1.1, 10.6, 3.5, 7.2, 2.1, 9.2]
 53 | 		});
 54 | 
 55 | 		var expected = 4; // (1.2 + 6.4 + 2.3 + 12.1 + 1.6 + 3.5 + 7.2 + 2.1 + 10.2) / 9
 56 | 		var actual = frame.argmax("value");
 57 | 
 58 | 		t.equal(actual, expected);
 59 | 	});
 60 | 
 61 | 	tape("argmax wonky edge case", function(t){
 62 | 		t.plan(1);
 63 | 		var frame = new Frame({
 64 | 			"id"  :   [0, 2, 0, 1, 1, 0, 2, 0, 1],
 65 | 			"value" : [11.2, 6.4, 2.3, 1.1, 10.6, 3.5, 7.2, 2.1, 9.2]
 66 | 		});
 67 | 
 68 | 		// zero argmax
 69 | 		var primer = frame.argmax("value");
 70 | 
 71 | 		var expected = 4;
 72 | 		var frame2 = new Frame({
 73 | 			"id"  :   [0, 2, 0, 1, 1, 0, 2, 0, 1],
 74 | 			"value" : [1.2, 6.4, 2.3, 1.1, 10.6, 3.5, 7.2, 2.1, 9.2]
 75 | 		});
 76 | 
 77 | 		var actual = frame2.argmax("value");
 78 | 		t.equal(actual, expected);
 79 | 	});
 80 | }
 81 | 
 82 | simpleTestCases();
 83 | /*
 84 | var RTOL = 1e-05, // 1e-05
 85 | 	ATOL = 1e-12; // 1e-12
 86 | 
 87 | var dataDirectory = 'test/data/mean/',
 88 | 	testFile = 'small.json';
 89 | 
 90 | var floader = require('floader'),
 91 | 	dtest = require('../lib/test');
 92 | 
 93 | floader.load(dataDirectory + testFile, function(err, config){
 94 | 
 95 | 	var suite = JSON.parse(config);
 96 | 	simpleTestCases();
 97 | 
 98 | 	for(var i = 0; i < suite.length; i++){
 99 | 
100 | 		var prefix = String("0000" + (i + 1)).slice(-4);
101 | 
102 | 		// directory containing matrix data files for current test
103 | 		var directory = dataDirectory + prefix + '/';
104 | 
105 | 		var test = suite[i];
106 | 
107 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
108 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
109 | 
110 | 		var N = test.N; // number of rows
111 | 		var distincts = test.id.map(function(spec, i){ return spec.K; });
112 | 
113 | 		var testName = "mean: " + N + " x " + "(" + distincts.join(", ") + ")"
114 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
115 | 	}
116 | });
117 | 
118 | var OUT_FILENAME = "out.json";
119 | 
120 | 
121 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
122 | 	return function(t){
123 | 		t.plan(1);
124 | 
125 | 		var names = id_names.concat(value_names);
126 | 		var types = id_types.concat(value_types);
127 | 
128 | 		// which columns require a key file?
129 | 		var key_names = id_names.filter(function(item, i){
130 | 			return id_types[i] in dtest.string_types
131 | 		});
132 | 		var key_types = id_types.filter(function(item, i){
133 | 			return item in dtest.string_types
134 | 		});
135 | 
136 | 		// load columns from files
137 | 		dtest.load(directory, names, types, function(err, columns){
138 | 
139 | 			// load key files
140 | 			dtest.load_key(directory, key_names, key_types, function(err, keys){
141 | 
142 | 				floader.load(directory + OUT_FILENAME, function(err, out){
143 | 					var expected = JSON.parse(out);
144 | 
145 | 					var column_set = {};
146 | 					for (var i = 0; i < names.length; i++){
147 | 						var name = names[i];
148 | 						var column = columns[i];
149 | 						column_set[name] = column;
150 | 					}
151 | 					// keys map a small set of integers to other things (like strings)
152 | 					// they're a very simple form of fixed length coding
153 | 					var key_set = {};
154 | 					for (var i = 0; i < keys.length; i++){
155 | 						var name = key_names[i];
156 | 						var key = keys[i];
157 | 						key_set[name] = key;
158 | 					}
159 | 
160 | 					var frame = new Frame(column_set, key_set);
161 | 
162 | 					//console.log(subset);
163 | 					var actual = frame.mean("value_0");
164 | 
165 | 					dtest.assert.close(t, actual, expected, "close", RTOL, ATOL);
166 | 				});
167 | 
168 | 			});
169 | 		});
170 | 	};
171 | }
172 | */
173 | 


--------------------------------------------------------------------------------
/test/count.js:
--------------------------------------------------------------------------------
 1 | var tape = require('tape'),
 2 | 	Frame = require('../lib/frame');
 3 | 
 4 | tape("count gives length with no filter", function(t){
 5 | 	t.plan(1);
 6 | 
 7 | 	var frame = new Frame({
 8 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 9 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
10 | 	});
11 | 
12 | 	var expected = 9;
13 | 
14 | 	var actual = frame.count();
15 | 	t.equals(actual, expected);
16 | });
17 | 
18 | tape("count works with where", function(t){
19 | 	t.plan(1);
20 | 
21 | 	var frame = new Frame({
22 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
23 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
24 | 	});
25 | 
26 | 	//frame.where(row => row.id == 1);
27 | 	frame = frame.where("id", v => v == 1);
28 | 
29 | 	var expected = 4;
30 | 
31 | 	var actual = frame.count();
32 | 	t.equals(actual, expected);
33 | });
34 | 
35 | tape("count works with where.equals", function(t){
36 | 	t.plan(1);
37 | 
38 | 	var frame = new Frame({
39 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
40 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
41 | 	});
42 | 
43 | 	frame = frame.where("id", 1);
44 | 
45 | 	var expected = 4;
46 | 
47 | 	var actual = frame.count();
48 | 	t.equals(actual, expected);
49 | });
50 | 
51 | tape("count works with where.in", function(t){
52 | 	t.plan(1);
53 | 
54 | 	var frame = new Frame({
55 | 		"id"  : [0, 2, 0, 1, 1, 0, 2, 0, 1],
56 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
57 | 	});
58 | 
59 | 	frame = frame.where("id", [0, 2]);
60 | 
61 | 	var expected = 6;
62 | 
63 | 	var actual = frame.count();
64 | 	t.equals(actual, expected);
65 | });
66 | 
67 | tape("count works with multiple where", function(t){
68 | 	t.plan(1);
69 | 
70 | 	var frame = new Frame({
71 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
72 | 		"id_1"  : [0, 0, 1, 1, 0, 1, 0, 0, 1],
73 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
74 | 	});
75 | 
76 | 	//frame.where(row => row.id == 1);
77 | 	frame = frame.where("id_1", id => id == 1);
78 | 	frame = frame.where("id_0", id => id == 1);
79 | 
80 | 	var expected = 2;
81 | 
82 | 	var actual = frame.count();
83 | 	t.equals(actual, expected);
84 | });
85 | 
86 | 
87 | /*
88 | function eq(a){
89 | 	return function(v){ v == a; };
90 | }
91 | 
92 | function in(arr){
93 | 	var set = {};
94 | 	for (a in arr) set[a] = true;
95 | 	return function(v){ return v in set;};
96 | }*/
97 | 


--------------------------------------------------------------------------------
/test/create.js:
--------------------------------------------------------------------------------
  1 | var test = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | test("access column from hidden property", function(t){
  5 | 	t.plan(1);
  6 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
  7 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
  8 | 
  9 | 	var frame = new Frame({
 10 | 		"a" : a,
 11 | 		"b" : b
 12 | 	});
 13 | 
 14 | 	t.equals(JSON.stringify(frame._cols["a"]), JSON.stringify(a));
 15 | });
 16 | 
 17 | test("access keys from hidden property", function(t){
 18 | 	t.plan(1);
 19 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
 20 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
 21 | 	var k = ["one", "two"];
 22 | 
 23 | 	var frame = new Frame({
 24 | 			"a" : a,
 25 | 			"b" : b
 26 | 		},
 27 | 		{
 28 | 			"a" : k
 29 | 	});
 30 | 
 31 | 
 32 | 	t.equals(JSON.stringify(frame._keys["a"]), JSON.stringify(k));
 33 | });
 34 | 
 35 | test("row based constructor creates columns correctly", function(t){
 36 | 	t.plan(2);
 37 | 	var rows = [
 38 | 		{"a" : 0, "b" : 1},
 39 | 		{"a" : 0, "b" : 2},
 40 | 		{"a" : 0, "b" : 2},
 41 | 		{"a" : 1, "b" : 3},
 42 | 		{"a" : 1, "b" : 1},
 43 | 		{"a" : 0, "b" : 3},
 44 | 		{"a" : 1, "b" : 4},
 45 | 		{"a" : 0, "b" : 2},
 46 | 		{"a" : 1, "b" : 1},
 47 | 	];
 48 | 
 49 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
 50 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
 51 | 
 52 | 	var frame = new Frame(rows);
 53 | 
 54 | 
 55 | 	t.equals(JSON.stringify(frame._cols["a"]), JSON.stringify(a));
 56 | 	t.equals(JSON.stringify(frame._cols["b"]), JSON.stringify(b));
 57 | });
 58 | 
 59 | test("access column as property", function(t){
 60 | 	t.plan(1);
 61 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
 62 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
 63 | 
 64 | 	var frame = new Frame({
 65 | 		"a" : a,
 66 | 		"b" : b
 67 | 	});
 68 | 
 69 | 
 70 | 	t.equals(JSON.stringify(frame["a"]), JSON.stringify(a));
 71 | });
 72 | 
 73 | test("accessing column as property decodes when key is present", function(t){
 74 | 	t.plan(1);
 75 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
 76 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
 77 | 	var k = ["one", "two"];
 78 | 
 79 | 	var frame = new Frame({
 80 | 			"a" : a,
 81 | 			"b" : b
 82 | 		},
 83 | 		{
 84 | 			"a" : k
 85 | 	});
 86 | 
 87 | 
 88 | 	var expected = ["one", "one", "one", "two", "two", "one", "two", "one", "two"];
 89 | 	t.equals(JSON.stringify(frame["a"]), JSON.stringify(expected));
 90 | });
 91 | 
 92 | test("only columns are enumerable", function(t){
 93 | 	t.plan(2);
 94 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
 95 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
 96 | 
 97 | 	var frame = new Frame({
 98 | 		"a" : a,
 99 | 		"b" : b
100 | 	});
101 | 
102 | 	var expected = ["a", "b"];
103 | 
104 | 	t.equals(JSON.stringify(Object.keys(frame)), JSON.stringify(expected));
105 | 
106 | 	var found = [];
107 | 
108 | 	for(name in frame){
109 | 		found.push(name);
110 | 	}
111 | 
112 | 	t.equals(JSON.stringify(found), JSON.stringify(expected));
113 | });
114 | 
115 | test("Symbol.toStringTag correctly overridden", function(t){
116 | 	t.plan(1);
117 | 	var frame = new Frame({
118 | 		"a" : [0],
119 | 		"b" : [1]
120 | 	});
121 | 
122 | 	var expected = "[object Frame]";
123 | 
124 | 	t.equals(Object.prototype.toString.call(frame), expected);
125 | });
126 | 
127 | test("rename column correctly modifies frame properties", function(t){
128 | 	t.plan(2);
129 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
130 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
131 | 
132 | 	var frame = new Frame({
133 | 		"a" : a,
134 | 		"b" : b
135 | 	});
136 | 
137 | 	var expected = ["a", "c"];
138 | 
139 | 	frame.rename("b", "c");
140 | 
141 | 	t.equals(JSON.stringify(Object.keys(frame)), JSON.stringify(expected));
142 | 
143 | 	var found = [];
144 | 
145 | 	for(name in frame){
146 | 		found.push(name);
147 | 	}
148 | 
149 | 	t.equals(JSON.stringify(found), JSON.stringify(expected));
150 | });
151 | 
152 | test("rename column correctly adds accessor", function(t){
153 | 	t.plan(1);
154 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
155 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
156 | 
157 | 	var frame = new Frame({
158 | 		"a" : a,
159 | 		"b" : b
160 | 	});
161 | 
162 | 	var expected = b;
163 | 
164 | 	frame.rename("b", "c");
165 | 
166 | 	t.equals(JSON.stringify(frame["c"]), JSON.stringify(expected));
167 | });
168 | 
169 | test("rename column correctly converts key", function(t){
170 | 	t.plan(1);
171 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
172 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
173 | 
174 | 	var frame = new Frame({
175 | 		"a" : a,
176 | 		"b" : b
177 | 	},
178 | 	{
179 | 		"b" : ["zero", "one", "two", "three", "four"]
180 | 	});
181 | 
182 | 	var expected = ["one", "two", "two", "three", "one", "three", "four", "two", "one"];
183 | 
184 | 	frame.rename("b", "c");
185 | 
186 | 	t.equals(JSON.stringify(frame["c"]), JSON.stringify(expected));
187 | });
188 | 
189 | test("setting via property accessor works correctly", function(t){
190 | 	t.plan(1);
191 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
192 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
193 | 
194 | 	var frame = new Frame({
195 | 		"a" : a,
196 | 		"b" : b
197 | 	});
198 | 	var c = [3, 4, 1, 0, 2, 1, 2, 3, 3];
199 | 
200 | 	frame["b"] = c;
201 | 
202 | 	var expected = c.slice(0);
203 | 	t.equals(JSON.stringify(frame["b"]), JSON.stringify(expected));
204 | });
205 | 
206 | test("distinct works correctly", function(t){
207 | 	t.plan(2);
208 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
209 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
210 | 
211 | 	var frame = new Frame({
212 | 		"a" : a,
213 | 		"b" : b
214 | 	});
215 | 
216 | 	var expected = [1, 2, 3, 4];
217 | 	var actual = frame.distinct("b");
218 | 
219 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
220 | 
221 | 	var expected = [0, 1];
222 | 	var actual = frame.distinct("a");
223 | 
224 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
225 | });
226 | 
227 | test("distinct works with keyed column", function(t){
228 | 	t.plan(1);
229 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
230 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
231 | 
232 | 	var frame = new Frame({
233 | 		"a" : a,
234 | 		"b" : b
235 | 	}, {
236 | 		"a" : ["zero", "one"]
237 | 	});
238 | 
239 | 	var expected = ["zero", "one"];
240 | 	var actual = frame.distinct("a");
241 | 
242 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
243 | });
244 | 
245 | test("distinct works with where", function(t){
246 | 	t.plan(2);
247 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
248 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
249 | 
250 | 	var frame = new Frame({
251 | 		"a" : a,
252 | 		"b" : b
253 | 	});
254 | 
255 | 	var expected = [1, 3, 4];
256 | 	frame = frame.where("a", 1);
257 | 	var actual = frame.distinct("b");
258 | 
259 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
260 | 
261 | 	var expected = [1];
262 | 	var actual = frame.distinct("a");
263 | 
264 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
265 | });
266 | 
267 | test("argmax works correctly", function(t){
268 | 	t.plan(1);
269 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
270 | 	var b = [1, 2, 2, 3, 1, 0, 4, 2, 1];
271 | 
272 | 	var frame = new Frame({
273 | 		"a" : a,
274 | 		"b" : b
275 | 	});
276 | 
277 | 	var expected = 6;
278 | 	var actual = frame.argmax("b");
279 | 
280 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
281 | 
282 | });
283 | 
284 | test("argmin works correctly", function(t){
285 | 	t.plan(1);
286 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
287 | 	var b = [1, 2, 2, 3, 1, 0, 4, 2, 1];
288 | 
289 | 	var frame = new Frame({
290 | 		"a" : a,
291 | 		"b" : b
292 | 	});
293 | 
294 | 	var expected = 5;
295 | 	var actual = frame.argmin("b");
296 | 
297 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
298 | 
299 | });
300 | 
301 | test("median works correctly", function(t){
302 | 	t.plan(2);
303 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
304 | 	var b = [1, 2, 2, 3, 4, 3, 4, 2, 1];
305 | 
306 | 	var frame = new Frame({
307 | 		"a" : a,
308 | 		"b" : b
309 | 	});
310 | 
311 | 	var expected = 2;
312 | 	var actual = frame.median("b");
313 | 
314 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
315 | 
316 | 	var expected = 0;
317 | 	var actual = frame.median("a");
318 | 
319 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
320 | });
321 | 
322 | test("min works correctly", function(t){
323 | 	t.plan(2);
324 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
325 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
326 | 
327 | 	var frame = new Frame({
328 | 		"a" : a,
329 | 		"b" : b
330 | 	});
331 | 
332 | 	var expected = 1;
333 | 	var actual = frame.min("b");
334 | 
335 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
336 | 
337 | 	var expected = 0;
338 | 	var actual = frame.min("a");
339 | 
340 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
341 | });
342 | 
343 | test("min works with where", function(t){
344 | 	t.plan(2);
345 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
346 | 	var b = [1, 2, 2, 3, 4, 3, 4, 2, 3];
347 | 
348 | 	var frame = new Frame({
349 | 		"a" : a,
350 | 		"b" : b
351 | 	});
352 | 
353 | 	var expected = 3;
354 | 	frame = frame.where("a", 1);
355 | 	var actual = frame.min("b");
356 | 
357 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
358 | 
359 | 	var expected = 1;
360 | 	var actual = frame.min("a");
361 | 
362 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
363 | });
364 | 
365 | test("min works correctly on ISO date strings", function(t){
366 | 	t.plan(1);
367 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
368 | 	var b = ["2016-03-11", "2016-05-11", "2016-04-10", "2016-03-15",
369 | 			 "2016-03-03", "2016-04-21", "2016-05-28", "2016-03-17",
370 | 			 "2016-04-04"];
371 | 
372 | 	var frame = new Frame({
373 | 		"a" : a,
374 | 		"b" : b
375 | 	});
376 | 
377 | 	var expected = "2016-03-03";
378 | 	var actual = frame.min("b");
379 | 
380 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
381 | });
382 | 
383 | test("min works correctly on keyed column", function(t){
384 | 	t.plan(1);
385 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
386 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
387 | 	var k = ["b", "a"];
388 | 
389 | 	var frame = new Frame({
390 | 		"a" : a,
391 | 		"b" : b
392 | 	}, {
393 | 		"a" : k
394 | 	});
395 | 
396 | 	var expected = "a";
397 | 	var actual = frame.min("a");
398 | 
399 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
400 | });
401 | 
402 | test("min works with where on keyed column", function(t){
403 | 	t.plan(1);
404 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
405 | 	var b = [1, 2, 2, 3, 4, 3, 4, 2, 3];
406 | 	var k = ["b", "a"];
407 | 
408 | 	var frame = new Frame({
409 | 		"a" : a,
410 | 		"b" : b
411 | 	}, {
412 | 		"a" : k
413 | 	});
414 | 
415 | 	var expected = "a";
416 | 	frame = frame.where("b", 3);
417 | 	var actual = frame.min("a");
418 | 
419 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
420 | });
421 | 
422 | test("max works correctly", function(t){
423 | 	t.plan(2);
424 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
425 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
426 | 
427 | 	var frame = new Frame({
428 | 		"a" : a,
429 | 		"b" : b
430 | 	});
431 | 
432 | 	var expected = 4;
433 | 	var actual = frame.max("b");
434 | 
435 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
436 | 
437 | 	var expected = 1;
438 | 	var actual = frame.max("a");
439 | 
440 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
441 | });
442 | 
443 | test("max works correctly on ISO date strings", function(t){
444 | 	t.plan(1);
445 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
446 | 	var b = ["2016-03-11", "2016-05-11", "2016-04-10", "2016-03-15",
447 | 			 "2016-03-03", "2016-04-21", "2016-05-28", "2016-03-17",
448 | 			 "2016-04-04"];
449 | 
450 | 	var frame = new Frame({
451 | 		"a" : a,
452 | 		"b" : b
453 | 	});
454 | 
455 | 	var expected = "2016-05-28";
456 | 	var actual = frame.max("b");
457 | 
458 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
459 | });
460 | 
461 | test("max works correctly on keyed column", function(t){
462 | 	t.plan(1);
463 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
464 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
465 | 	var k = ["b", "a"];
466 | 
467 | 	var frame = new Frame({
468 | 		"a" : a,
469 | 		"b" : b
470 | 	}, {
471 | 		"a" : k
472 | 	});
473 | 
474 | 	var expected = "b";
475 | 	var actual = frame.max("a");
476 | 
477 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
478 | });
479 | 
480 | test("max works with where on keyed column", function(t){
481 | 	t.plan(1);
482 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
483 | 	var b = [1, 2, 2, 3, 4, 3, 4, 2, 3];
484 | 	var k = ["b", "a"];
485 | 
486 | 	var frame = new Frame({
487 | 		"a" : a,
488 | 		"b" : b
489 | 	}, {
490 | 		"a" : k
491 | 	});
492 | 
493 | 	var expected = "b";
494 | 	frame = frame.where("b", 3);
495 | 	var actual = frame.max("a");
496 | 
497 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
498 | });
499 | 
500 | test("add creates new column", function(t){
501 | 	t.plan(3);
502 | 	var a = [0, 0, 0, 1, 1, 0, 1, 0, 1];
503 | 	var b = [1, 2, 2, 3, 1, 3, 4, 2, 1];
504 | 	var c = [2, 7, 2, 1, 9, 3, 2, 1, 1];
505 | 
506 | 	var frame = new Frame({
507 | 		"a" : a,
508 | 		"b" : b
509 | 	});
510 | 
511 | 	var expected = ["a", "b", "c"];
512 | 
513 | 	frame.add("c", c);
514 | 
515 | 	t.equals(JSON.stringify(Object.keys(frame)), JSON.stringify(expected));
516 | 
517 | 	var found = [];
518 | 
519 | 	for(name in frame){
520 | 		found.push(name);
521 | 	}
522 | 
523 | 	t.equals(JSON.stringify(found), JSON.stringify(expected));
524 | 
525 | 	t.equals(JSON.stringify(frame["c"]), JSON.stringify(c));
526 | });
527 | 


--------------------------------------------------------------------------------
/test/data/binary_matrix.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Create two randomly generated matrices, of the specified sizes and write them
 3 | to JSON files.
 4 | 
 5 | """
 6 | import json
 7 | import numpy as np
 8 | import os
 9 | 
10 | 
11 | type_map = {
12 | 	'.i8' : np.int8,
13 | 	'.u8' : np.uint8,
14 | 	'.i16' : np.int16,
15 | 	'.u16' : np.uint16,
16 | 	'.i32' : np.int32,
17 | 	'.u32' : np.uint32,
18 | 	'.f32' : np.float32,
19 | 	'.i64' : np.int64, # not compatible with javascript
20 | 	'.u64' : np.uint64,# not compatible with javascript
21 | 	'.f64' : np.float64,
22 | 	'.s8' : np.int8,
23 | 	'.s16' : np.int16
24 | }
25 | 
26 | def get_extension(path):
27 | 	filename, file_extension = os.path.splitext(path)
28 | 	return file_extension
29 | 
30 | def read(path):
31 | 
32 | 	extension = get_extension(path)
33 | 	if extension in type_map:
34 | 		dtype = type_map[extension]
35 | 	else:
36 | 		dtype=np.float32
37 | 
38 | 	with open(path, 'rb') as f:
39 | 		matrix = np.fromfile(f, dtype=dtype)
40 | 
41 | 	return matrix
42 | 
43 | def write(path, matrix):
44 | 
45 | 	extension = get_extension(path)
46 | 	if extension in type_map:
47 | 		dtype = type_map[extension]
48 | 	else:
49 | 		dtype=np.float32
50 | 
51 | 	with open(path, 'wb') as f:
52 | 		f.write(matrix.astype(dtype=dtype).tostring())
53 | 
54 | 	return matrix
55 | 


--------------------------------------------------------------------------------
/test/data/generate.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | var spawn = require('child_process').spawn,
 3 | 	async = require('async');
 4 | 
 5 | 
 6 | /*
 7 | ./generate.py count/ count/small.json
 8 | ./generate.py sum/ sum/small.json
 9 | */
10 | 
11 | var tasks = [
12 | 	['generate.py', 'groupby.count/', 'groupby.count/small.json'],
13 | 	['generate.py', 'groupby.sum/',   'groupby.sum/small.json'],
14 | 	['generate.py', 'groupby.mean/',   'groupby.mean/small.json'],
15 | 	['generate.py', 'groupby.where.sum/',   'groupby.where.sum/small.json'],
16 | 	['generate.py', 'where.in.sum/',   'where.in.sum/small.json'],
17 | 	['generate.py', 'mean/',   'mean/small.json'],
18 | 	['generate.py', 'where.mean/',   'where.mean/small.json']
19 | ];
20 | var options = {
21 | 	"cwd" : __dirname,
22 | 	"stdio": ["inherit", "inherit", "inherit"]
23 | };
24 | 
25 | 
26 | async.eachSeries(tasks, function(task, callback){
27 | 		spawn('python', task, options).on('close', callback);
28 | 	},
29 | 	function(){
30 | 		// all done
31 | });
32 | 


--------------------------------------------------------------------------------
/test/data/generate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """Create data for the test suite described by the given specification.
  3 | Deleting the file out.json in a subdirectory will cause it to be recreated
  4 | with existing data and new "args". Deleting all files in a subdirectory will
  5 | case all data to be recreated.
  6 | 
  7 | 	spec.json contains an array of objects, each object contains
  8 | 
  9 | 		"N" - a number of rows to generate
 10 | 		"id" - a list of id columns to generate
 11 | 			K - number of distinct values to generate
 12 | 			type - type of data to generate for column
 13 | 			[{"K" : 3, "type": "int32"}, {"K" : 3, "type": "int32"}],
 14 | 		"value" - a list of value columns to generate
 15 | 			[{"K" : 100, "type": "int32"}, {"K" : 100, "type": "int32"}]
 16 | 
 17 | NOTE: 64 bit integer types are not compatible with Javascript.
 18 | This includes np.int64 and np.uint64
 19 | 
 20 | Implementing test data generation for a new operation involves two things:
 21 | 1. creation of a json spec
 22 | 2. implementing an operation file with a single function named execute
 23 | 
 24 | Usage:
 25 | 	generate.py <directory> <spec.json>
 26 | """
 27 | from docopt import docopt
 28 | import os
 29 | import sys
 30 | import json
 31 | import math
 32 | import collections
 33 | import numpy as np
 34 | import binary_matrix
 35 | 
 36 | OUT_FILENAME = "out.json"
 37 | 
 38 | KEY_EXT = ".key"
 39 | 
 40 | extension_map = {
 41 | 	"int8" : ".i8",
 42 | 	"uint8" : ".u8",
 43 | 	"int16" : ".i16",
 44 | 	"uint16" : ".u16",
 45 | 	"int32" : ".i32",
 46 | 	"uint32" : '.u32',
 47 | 	"float32" : '.f32',
 48 | 	"int64" : '.i64',  # not compatible with javascript
 49 | 	"uint64" : '.u64', # not compatible with javascript
 50 | 	"float64" : '.f64'
 51 | }
 52 | 
 53 | # function adapted from this issue request on numpy
 54 | # https://github.com/numpy/numpy/issues/3155
 55 | def random_sample(size=None, dtype=np.float64):
 56 | 
 57 | 	if type(dtype) == str or type(dtype) == unicode:
 58 | 		dtype = np.dtype(dtype).type
 59 | 
 60 | 	type_max = 1 << np.finfo(dtype).nmant
 61 | 	sample = np.empty(size, dtype=dtype)
 62 | 	sample[...] = np.random.randint(0, type_max, size=size) / dtype(type_max)
 63 | 	if size is None:
 64 | 		sample = sample[()]
 65 | 	return sample
 66 | 
 67 | int_types = set([
 68 | 	"int8", "uint8",
 69 | 	"int16", "uint16",
 70 | 	"int32", "uint32",
 71 | 	"int64", "uint64"]) # not compatible with javascript
 72 | float_types = set(["float32", "float64"])
 73 | 
 74 | def create_column(N, K, type="int32"):
 75 | 
 76 | 	if type in int_types:
 77 | 		return np.random.randint(0, K, N, dtype=type)
 78 | 
 79 | 	if type in float_types:
 80 | 		return K * random_sample(N, dtype=type)
 81 | 
 82 | 
 83 | 
 84 | 	return np.random.randint(0, K, N, dtype="int32")
 85 | 
 86 | def write_result(result, location):
 87 | 	"""write a dict to a file as a json document"""
 88 | 	try:
 89 | 		with open(location, 'w') as f:
 90 | 			json.dump(result, f, indent=4)
 91 | 	except Exception as e:
 92 | 		print("Couldn't write output JSON file: {0}".format(e.message))
 93 | 		sys.exit(1)
 94 | 
 95 | def write_code(result, location):
 96 | 	write_result(result, location)
 97 | 
 98 | def read_code(location):
 99 | 	with open(location, 'r') as f:
100 | 		code = json.load(f)
101 | 
102 | 	return code
103 | 
104 | class queue(collections.deque):
105 | 	def pop(self):
106 | 		return self.popleft()
107 | 	def push(self, n):
108 | 		self.append(n)
109 | 
110 | def gen_strings(N):
111 | 	chars = [chr(i) for i in range(ord('a'), ord('z') + 1)]
112 | 	L = int(math.ceil(math.log(N) / math.log(len(chars))))
113 | 
114 | 	results = queue([""])
115 | 
116 | 	for i in range(L):
117 | 		for j in range(len(results)):
118 | 			r = results.pop()
119 | 
120 | 			for c in chars:
121 | 				results.push(r+c)
122 | 
123 | 	return list(results)[:N]
124 | 
125 | if __name__ == '__main__':
126 | 	arguments = docopt(__doc__, version='JSON Groupby Generator')
127 | 
128 | 	# arguments parsed from Usage statement by docopt
129 | 	base_directory = os.path.join(arguments['<directory>'], '')
130 | 	test_file = arguments['<spec.json>']
131 | 
132 | 	sys.path.insert(0, './' + base_directory)
133 | 
134 | 	operation = __import__("operation")
135 | 
136 | 	with open(test_file, 'r') as f:
137 | 		try:
138 | 			tests = json.load(f)
139 | 		except Exception as e:
140 | 			print("Couldn't parse JSON configuration file: {0}".format(e.message))
141 | 			sys.exit(1)
142 | 
143 | 
144 | 	for i in range(len(tests)):
145 | 
146 | 		options = tests[i]
147 | 		N = options['N']
148 | 
149 | 		# test directory is a string of four numbers starting at 0001
150 | 		directory = base_directory + "{0:0>4}/".format(i + 1)
151 | 
152 | 		if not os.path.exists(directory):
153 | 			os.makedirs(directory)
154 | 
155 | 		# if a result exists, skip this data set
156 | 		if os.path.exists(directory + OUT_FILENAME):
157 | 			print("Skipping {0}".format(directory))
158 | 			continue
159 | 
160 | 		id_columns = {}
161 | 		for i in range(len(options['id'])):
162 | 			name = "id_{0}".format(i)
163 | 			spec = options['id'][i]
164 | 			K = spec['K']
165 | 			dtype = spec['type']
166 | 			if dtype[:3] == 'str':
167 | 
168 | 				if K <= 256 and dtype == 'str8':
169 | 					dtype = "int8"
170 | 					extension = ".s8"
171 | 				elif K <= 65536 and dtype == 'str16':
172 | 					dtype = "int16"
173 | 					extension = ".s16"
174 | 				else:
175 | 					raise Exception("Too many strings!")
176 | 
177 | 				if os.path.exists(directory + name + KEY_EXT) and os.path.exists(directory + name + extension):
178 | 					# read binary row file
179 | 					rows = binary_matrix.read(directory + name + extension)
180 | 					# read key file
181 | 					code = read_code(directory + name + KEY_EXT)
182 | 
183 | 				else:
184 | 					rows = create_column(N, K, dtype)
185 | 					# map integers onto random strings
186 | 					code = gen_strings(K)
187 | 					# write key file
188 | 					write_code(code, directory + name + KEY_EXT)
189 | 					binary_matrix.write(directory + name + extension, rows)
190 | 
191 | 				column = [code[index] for index in rows]
192 | 
193 | 			else:
194 | 				if dtype not in extension_map:
195 | 					dtype = "int32"
196 | 
197 | 				extension = extension_map[dtype]
198 | 
199 | 				if os.path.exists(directory + name + extension):
200 | 					column = binary_matrix.read(directory + name + extension)
201 | 				else:
202 | 					column = create_column(N, K, dtype)
203 | 					binary_matrix.write(directory + name + extension, column)
204 | 
205 | 			id_columns[name] = column
206 | 
207 | 		value_columns = {}
208 | 		for i in range(len(options['value'])):
209 | 			name = "value_{0}".format(i)
210 | 			spec = options['value'][i]
211 | 			K = spec['K']
212 | 			dtype = spec['type']
213 | 			if dtype not in extension_map:
214 | 				dtype = "int32"
215 | 
216 | 			extension = extension_map[dtype]
217 | 			if os.path.exists(directory + name + extension):
218 | 				column = binary_matrix.read(directory + name + extension)
219 | 			else:
220 | 				column = create_column(N, K, dtype)
221 | 				binary_matrix.write(directory + name + extension, column)
222 | 
223 | 			value_columns[name] = column
224 | 
225 | 		# run reduction
226 | 		arguments = options['arg'] if 'arg' in options else {}
227 | 		out = operation.execute(arguments, id_columns, value_columns)
228 | 
229 | 		# write result
230 | 		#binary_matrix.write(directory + "out.arr", out.flatten())
231 | 		write_result(out, directory + OUT_FILENAME)
232 | 
233 | 		print("Created {0}".format(directory))
234 | 


--------------------------------------------------------------------------------
/test/data/groupby.count/operation.py:
--------------------------------------------------------------------------------
 1 | """count operation
 2 | """
 3 | import pandas as pd
 4 | 
 5 | def convert_to_dict(r):
 6 | 
 7 | 	# returns a dictionary whose keys are tuples
 8 | 	tupled = r.to_dict()
 9 | 
10 | 	# convert tuple keys to nested dictionaries
11 | 	dicted = {}
12 | 	for (t, k) in tupled.items():
13 | 		level = dicted
14 | 
15 | 		# create a nested dictionary for each item in the tuple
16 | 		for l in t[:-1]:
17 | 			if l in level:
18 | 				level = level[l]
19 | 			else:
20 | 				level[l] = {}
21 | 				level = level[l]
22 | 
23 | 		# the last level points to the value
24 | 		l = t[-1]
25 | 		level[l] = k.item() # convert numpy type to python type
26 | 
27 | 	return dicted
28 | 
29 | def execute(options, id_columns, value_columns):
30 | 
31 | 	columns = id_columns.copy()
32 | 	columns.update(value_columns)
33 | 	#print(columns)
34 | 
35 | 	frame = pd.DataFrame(columns)
36 | 
37 | 	g = frame.groupby(by=list(id_columns.keys()))
38 | 	return convert_to_dict(g.count()["value_0"])
39 | 


--------------------------------------------------------------------------------
/test/data/groupby.count/small.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{"N" : 10000,
 3 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "int8"}],
 4 | 		"value" : [{"K" : 100, "type" : "int32"}]
 5 | 	},
 6 | 	{"N" : 10000,
 7 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "int32"}],
 8 | 		"value" : [{"K" : 100, "type" : "float32"}]
 9 | 	},
10 | 	{"N" : 10000,
11 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "int32"}],
12 | 		"value" : [{"K" : 100, "type" : "int32"}]
13 | 	},
14 | 	{"N" : 100000,
15 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "int32"}],
16 | 		"value" : [{"K" : 100, "type" : "int32"}]
17 | 	},
18 | 	{"N" : 1000000,
19 | 		"id" : [{"K" : 3, "type" : "uint8"}, {"K" : 3, "type" : "int32"}],
20 | 		"value" : [{"K" : 100, "type" : "float64"}]
21 | 	}
22 | ]
23 | 


--------------------------------------------------------------------------------
/test/data/groupby.mean/operation.py:
--------------------------------------------------------------------------------
 1 | """groupby mean operation
 2 | """
 3 | import pandas as pd
 4 | 
 5 | def convert_to_dict(r):
 6 | 
 7 | 	# returns a dictionary whose keys are tuples
 8 | 	tupled = r.to_dict()
 9 | 
10 | 	# convert tuple keys to nested dictionaries
11 | 	dicted = {}
12 | 	for (t, k) in tupled.items():
13 | 		level = dicted
14 | 
15 | 		# create a nested dictionary for each item in the tuple
16 | 		for l in t[:-1]:
17 | 			if l in level:
18 | 				level = level[l]
19 | 			else:
20 | 				level[l] = {}
21 | 				level = level[l]
22 | 
23 | 		# the last level points to the value
24 | 		l = t[-1]
25 | 		level[l] = k.item() # convert numpy type to python type
26 | 
27 | 	return dicted
28 | 
29 | def execute(options, id_columns, value_columns):
30 | 
31 | 	columns = id_columns.copy()
32 | 	columns.update(value_columns)
33 | 	#print(columns)
34 | 
35 | 	frame = pd.DataFrame(columns)
36 | 
37 | 	g = frame.groupby(by=list(id_columns.keys()))
38 | 	return convert_to_dict(g.mean()["value_0"])
39 | 


--------------------------------------------------------------------------------
/test/data/groupby.mean/small.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{"N" : 10000,
 3 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "uint8"}],
 4 | 		"value" : [{"K" : 100, "type" : "int32"}]
 5 | 	},
 6 | 	{"N" : 10000,
 7 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
 8 | 		"value" : [{"K" : 100, "type" : "int32"}]
 9 | 	},
10 | 	{"N" : 10000,
11 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
12 | 		"value" : [{"K" : 1000, "type" : "int32"}]
13 | 	},
14 | 	{"N" : 10000,
15 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 100, "type" : "str8"}],
16 | 		"value" : [{"K" : 1000, "type" : "int32"}]
17 | 	},
18 | 	{"N" : 10000,
19 | 		"id" : [{"K" : 3, "type" : "int16"}, {"K" : 3, "type" : "uint16"}],
20 | 		"value" : [{"K" : 100, "type" : "float32"}]
21 | 	},
22 | 	{"N" : 10000,
23 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "uint32"}],
24 | 		"value" : [{"K" : 100, "type" : "int32"}]
25 | 	},
26 | 	{"N" : 100000,
27 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "int32"}],
28 | 		"value" : [{"K" : 100, "type" : "int32"}]
29 | 	},
30 | 	{"N" : 100000,
31 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
32 | 		"value" : [{"K" : 100, "type" : "int32"}]
33 | 	},
34 | 	{"N" : 100000,
35 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
36 | 		"value" : [{"K" : 100, "type" : "float32"}]
37 | 	},
38 | 	{"N" : 1000000,
39 | 		"id" : [{"K" : 3, "type" : "uint8"}, {"K" : 3, "type" : "int32"}],
40 | 		"value" : [{"K" : 100, "type" : "float64"}]
41 | 	}
42 | ]
43 | 


--------------------------------------------------------------------------------
/test/data/groupby.sum/operation.py:
--------------------------------------------------------------------------------
 1 | """sum operation
 2 | """
 3 | import pandas as pd
 4 | 
 5 | def convert_to_dict(r):
 6 | 
 7 | 	# returns a dictionary whose keys are tuples
 8 | 	tupled = r.to_dict()
 9 | 
10 | 	# convert tuple keys to nested dictionaries
11 | 	dicted = {}
12 | 	for (t, k) in tupled.items():
13 | 		level = dicted
14 | 
15 | 		# create a nested dictionary for each item in the tuple
16 | 		for l in t[:-1]:
17 | 			if l in level:
18 | 				level = level[l]
19 | 			else:
20 | 				level[l] = {}
21 | 				level = level[l]
22 | 
23 | 		# the last level points to the value
24 | 		l = t[-1]
25 | 		level[l] = k.item() # convert numpy type to python type
26 | 
27 | 	return dicted
28 | 
29 | def execute(options, id_columns, value_columns):
30 | 
31 | 	columns = id_columns.copy()
32 | 	columns.update(value_columns)
33 | 	#print(columns)
34 | 
35 | 	frame = pd.DataFrame(columns)
36 | 
37 | 	g = frame.groupby(by=list(id_columns.keys()))
38 | 	return convert_to_dict(g.sum()["value_0"])
39 | 


--------------------------------------------------------------------------------
/test/data/groupby.sum/small.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{"N" : 10000,
 3 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "uint8"}],
 4 | 		"value" : [{"K" : 100, "type" : "int32"}]
 5 | 	},
 6 | 	{"N" : 10000,
 7 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
 8 | 		"value" : [{"K" : 100, "type" : "int32"}]
 9 | 	},
10 | 	{"N" : 10000,
11 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
12 | 		"value" : [{"K" : 1000, "type" : "int32"}]
13 | 	},
14 | 	{"N" : 10000,
15 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 100, "type" : "str8"}],
16 | 		"value" : [{"K" : 1000, "type" : "int32"}]
17 | 	},
18 | 	{"N" : 10000,
19 | 		"id" : [{"K" : 3, "type" : "int16"}, {"K" : 3, "type" : "uint16"}],
20 | 		"value" : [{"K" : 100, "type" : "float32"}]
21 | 	},
22 | 	{"N" : 10000,
23 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "uint32"}],
24 | 		"value" : [{"K" : 100, "type" : "int32"}]
25 | 	},
26 | 	{"N" : 100000,
27 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "int32"}],
28 | 		"value" : [{"K" : 100, "type" : "int32"}]
29 | 	},
30 | 	{"N" : 100000,
31 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
32 | 		"value" : [{"K" : 100, "type" : "int32"}]
33 | 	},
34 | 	{"N" : 100000,
35 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
36 | 		"value" : [{"K" : 100, "type" : "float32"}]
37 | 	},
38 | 	{"N" : 1000000,
39 | 		"id" : [{"K" : 3, "type" : "uint8"}, {"K" : 3, "type" : "int32"}],
40 | 		"value" : [{"K" : 100, "type" : "float64"}]
41 | 	}
42 | ]
43 | 


--------------------------------------------------------------------------------
/test/data/groupby.where.sum/operation.py:
--------------------------------------------------------------------------------
 1 | """sum operation
 2 | """
 3 | import pandas as pd
 4 | import math
 5 | 
 6 | def convert_to_dict(r):
 7 | 
 8 | 	# returns a dictionary whose keys are tuples
 9 | 	tupled = r.to_dict()
10 | 
11 | 	# convert tuple keys to nested dictionaries
12 | 	dicted = {}
13 | 	for (t, k) in tupled.items():
14 | 		level = dicted
15 | 
16 | 		# create a nested dictionary for each item in the tuple
17 | 		for l in t[:-1]:
18 | 			if l in level:
19 | 				level = level[l]
20 | 			else:
21 | 				level[l] = {}
22 | 				level = level[l]
23 | 
24 | 		# the last level points to the value
25 | 		l = t[-1]
26 | 		level[l] = k.item() # convert numpy type to python type
27 | 
28 | 	return dicted
29 | 
30 | SAMPLE = 10
31 | 
32 | def execute(options, id_columns, value_columns):
33 | 	'''
34 | 		id_columns - a dictionary mapping names (strings) to numpy arrays
35 | 		value_columns - a dictionary mapping names (strings) to numpy arrays
36 | 
37 | 	'''
38 | 
39 | 	columns = id_columns.copy()
40 | 	columns.update(value_columns)
41 | 
42 | 	frame = pd.DataFrame(columns)
43 | 
44 | 	id_name = "id_0"
45 | 	value_name = "value_0"
46 | 
47 | 	# create a subset of the column values
48 | 	column = id_columns[id_name]
49 | 	uniques = set(column[:SAMPLE])
50 | 	l = int(math.ceil(len(uniques)/2.0))
51 | 	subset = sorted(list(uniques))[:l]
52 | 	#print(subset)
53 | 
54 | 	#frame.loc[frame[id_name] == 1, value_name].sum()
55 | 	#v = frame.loc[frame[id_name].isin(subset), value_name].sum()
56 | 	filtered = frame.loc[frame[id_name].isin(subset)]
57 | 	grouped = filtered.groupby(by=list(id_columns.keys()))
58 | 
59 | 	return convert_to_dict(grouped.sum()["value_0"])
60 | 


--------------------------------------------------------------------------------
/test/data/groupby.where.sum/small.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{"N" : 10000,
 3 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "uint8"}],
 4 | 		"value" : [{"K" : 100, "type" : "int32"}]
 5 | 	},
 6 | 	{"N" : 10000,
 7 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
 8 | 		"value" : [{"K" : 100, "type" : "int32"}]
 9 | 	},
10 | 	{"N" : 10000,
11 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
12 | 		"value" : [{"K" : 1000, "type" : "int32"}]
13 | 	},
14 | 	{"N" : 10000,
15 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 100, "type" : "str8"}],
16 | 		"value" : [{"K" : 1000, "type" : "int32"}]
17 | 	},
18 | 	{"N" : 10000,
19 | 		"id" : [{"K" : 3, "type" : "int16"}, {"K" : 3, "type" : "uint16"}],
20 | 		"value" : [{"K" : 100, "type" : "float32"}]
21 | 	},
22 | 	{"N" : 10000,
23 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "uint32"}],
24 | 		"value" : [{"K" : 100, "type" : "int32"}]
25 | 	},
26 | 	{"N" : 100000,
27 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "int32"}],
28 | 		"value" : [{"K" : 100, "type" : "int32"}]
29 | 	},
30 | 	{"N" : 100000,
31 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
32 | 		"value" : [{"K" : 100, "type" : "int32"}]
33 | 	},
34 | 	{"N" : 100000,
35 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
36 | 		"value" : [{"K" : 100, "type" : "float32"}]
37 | 	},
38 | 	{"N" : 1000000,
39 | 		"id" : [{"K" : 3, "type" : "uint8"}, {"K" : 3, "type" : "int32"}],
40 | 		"value" : [{"K" : 100, "type" : "float64"}]
41 | 	}
42 | ]
43 | 


--------------------------------------------------------------------------------
/test/data/mean/operation.py:
--------------------------------------------------------------------------------
 1 | """mean operation
 2 | 	find the mean (average) of a column
 3 | """
 4 | import pandas as pd
 5 | import math
 6 | 
 7 | def execute(options, id_columns, value_columns):
 8 | 	'''
 9 | 		id_columns - a dictionary mapping names (strings) to numpy arrays
10 | 		value_columns - a dictionary mapping names (strings) to numpy arrays
11 | 
12 | 	'''
13 | 
14 | 	columns = id_columns.copy()
15 | 	columns.update(value_columns)
16 | 
17 | 	frame = pd.DataFrame(columns)
18 | 
19 | 	v = frame.mean()["value_0"]
20 | 
21 | 	return v.item() # convert from numpy type to python type
22 | 


--------------------------------------------------------------------------------
/test/data/mean/small.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{"N" : 10000,
 3 | 		"id" : [{"K" : 3, "type" : "int8"}],
 4 | 		"value" : [{"K" : 100, "type" : "int32"}]
 5 | 	},
 6 | 	{"N" : 10000,
 7 | 		"id" : [{"K" : 3, "type" : "int8"}],
 8 | 		"value" : [{"K" : 100, "type" : "int32"}]
 9 | 	},
10 | 	{"N" : 10000,
11 | 		"id" : [{"K" : 100, "type" : "int8"}],
12 | 		"value" : [{"K" : 1000, "type" : "int32"}]
13 | 	},
14 | 	{"N" : 10000,
15 | 		"id" : [{"K" : 100, "type" : "int8"}],
16 | 		"value" : [{"K" : 1000, "type" : "int32"}]
17 | 	},
18 | 	{"N" : 10000,
19 | 		"id" : [{"K" : 3, "type" : "int16"}],
20 | 		"value" : [{"K" : 100, "type" : "float32"}]
21 | 	},
22 | 	{"N" : 10000,
23 | 		"id" : [{"K" : 3, "type" : "int32"}],
24 | 		"value" : [{"K" : 100, "type" : "float32"}]
25 | 	},
26 | 	{"N" : 100000,
27 | 		"id" : [{"K" : 3, "type" : "int32"}],
28 | 		"value" : [{"K" : 100, "type" : "int32"}]
29 | 	},
30 | 	{"N" : 100000,
31 | 		"id" : [{"K" : 1000, "type" : "int32"}],
32 | 		"value" : [{"K" : 100, "type" : "int32"}]
33 | 	},
34 | 	{"N" : 100000,
35 | 		"id" : [{"K" : 1000, "type" : "int32"}],
36 | 		"value" : [{"K" : 100, "type" : "float32"}]
37 | 	},
38 | 	{"N" : 1000000,
39 | 		"id" : [{"K" : 3, "type" : "uint8"}],
40 | 		"value" : [{"K" : 100, "type" : "float64"}]
41 | 	}
42 | ]
43 | 


--------------------------------------------------------------------------------
/test/data/where.in.sum/operation.py:
--------------------------------------------------------------------------------
 1 | """where.in sum operation
 2 | 	filter by inclusion in a list, then sum the matches
 3 | """
 4 | import pandas as pd
 5 | import math
 6 | 
 7 | SAMPLE = 10
 8 | 
 9 | def execute(options, id_columns, value_columns):
10 | 	'''
11 | 		id_columns - a dictionary mapping names (strings) to numpy arrays
12 | 		value_columns - a dictionary mapping names (strings) to numpy arrays
13 | 
14 | 	'''
15 | 
16 | 	columns = id_columns.copy()
17 | 	columns.update(value_columns)
18 | 
19 | 	frame = pd.DataFrame(columns)
20 | 
21 | 	id_name = "id_0"
22 | 	value_name = "value_0"
23 | 
24 | 	# create a subset of the column values
25 | 	column = id_columns[id_name]
26 | 	uniques = set(column[:SAMPLE])
27 | 	l = int(math.ceil(len(uniques)/2.0))
28 | 	subset = sorted(list(uniques))[:l]
29 | 
30 | 	#frame.loc[frame[id_name] == 1, value_name].sum()
31 | 	v = frame.loc[frame[id_name].isin(subset), value_name].sum()
32 | 
33 | 	return v.item() # convert from numpy type to python type
34 | 


--------------------------------------------------------------------------------
/test/data/where.in.sum/small.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{"N" : 10000,
 3 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "uint8"}],
 4 | 		"value" : [{"K" : 100, "type" : "int32"}]
 5 | 	},
 6 | 	{"N" : 10000,
 7 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
 8 | 		"value" : [{"K" : 100, "type" : "int32"}]
 9 | 	},
10 | 	{"N" : 10000,
11 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
12 | 		"value" : [{"K" : 1000, "type" : "int32"}]
13 | 	},
14 | 	{"N" : 10000,
15 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 100, "type" : "str8"}],
16 | 		"value" : [{"K" : 1000, "type" : "int32"}]
17 | 	},
18 | 	{"N" : 10000,
19 | 		"id" : [{"K" : 3, "type" : "int16"}, {"K" : 3, "type" : "uint16"}],
20 | 		"value" : [{"K" : 100, "type" : "float32"}]
21 | 	},
22 | 	{"N" : 10000,
23 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "uint32"}],
24 | 		"value" : [{"K" : 100, "type" : "int32"}]
25 | 	},
26 | 	{"N" : 100000,
27 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "int32"}],
28 | 		"value" : [{"K" : 100, "type" : "int32"}]
29 | 	},
30 | 	{"N" : 100000,
31 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
32 | 		"value" : [{"K" : 100, "type" : "int32"}]
33 | 	},
34 | 	{"N" : 100000,
35 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
36 | 		"value" : [{"K" : 100, "type" : "float32"}]
37 | 	},
38 | 	{"N" : 1000000,
39 | 		"id" : [{"K" : 3, "type" : "uint8"}, {"K" : 3, "type" : "int32"}],
40 | 		"value" : [{"K" : 100, "type" : "float64"}]
41 | 	}
42 | ]
43 | 


--------------------------------------------------------------------------------
/test/data/where.mean/operation.py:
--------------------------------------------------------------------------------
 1 | """where mean operation
 2 | 	filter by equality with a value, then take the mean of (average) the matches
 3 | """
 4 | import pandas as pd
 5 | import math
 6 | 
 7 | SAMPLE = 10
 8 | 
 9 | def execute(options, id_columns, value_columns):
10 | 	'''
11 | 		id_columns - a dictionary mapping names (strings) to numpy arrays
12 | 		value_columns - a dictionary mapping names (strings) to numpy arrays
13 | 
14 | 	'''
15 | 
16 | 	columns = id_columns.copy()
17 | 	columns.update(value_columns)
18 | 
19 | 	frame = pd.DataFrame(columns)
20 | 
21 | 	id_name = "id_0"
22 | 	value_name = "value_0"
23 | 
24 | 	# create a subset of the column values
25 | 	column = id_columns[id_name]
26 | 	first = column[0]
27 | 
28 | 	#frame.loc[frame[id_name] == 1, value_name].sum()
29 | 	v = frame.loc[frame[id_name] == first, value_name].mean()
30 | 
31 | 	return v.item() # convert from numpy type to python type
32 | 


--------------------------------------------------------------------------------
/test/data/where.mean/small.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{"N" : 10000,
 3 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "uint8"}],
 4 | 		"value" : [{"K" : 100, "type" : "int32"}]
 5 | 	},
 6 | 	{"N" : 10000,
 7 | 		"id" : [{"K" : 3, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
 8 | 		"value" : [{"K" : 100, "type" : "int32"}]
 9 | 	},
10 | 	{"N" : 10000,
11 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 3, "type" : "str8"}],
12 | 		"value" : [{"K" : 1000, "type" : "int32"}]
13 | 	},
14 | 	{"N" : 10000,
15 | 		"id" : [{"K" : 100, "type" : "int8"}, {"K" : 100, "type" : "str8"}],
16 | 		"value" : [{"K" : 1000, "type" : "int32"}]
17 | 	},
18 | 	{"N" : 10000,
19 | 		"id" : [{"K" : 3, "type" : "int16"}, {"K" : 3, "type" : "uint16"}],
20 | 		"value" : [{"K" : 100, "type" : "float32"}]
21 | 	},
22 | 	{"N" : 10000,
23 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "uint32"}],
24 | 		"value" : [{"K" : 100, "type" : "int32"}]
25 | 	},
26 | 	{"N" : 100000,
27 | 		"id" : [{"K" : 3, "type" : "int32"}, {"K" : 3, "type" : "int32"}],
28 | 		"value" : [{"K" : 100, "type" : "int32"}]
29 | 	},
30 | 	{"N" : 100000,
31 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
32 | 		"value" : [{"K" : 100, "type" : "int32"}]
33 | 	},
34 | 	{"N" : 100000,
35 | 		"id" : [{"K" : 1000, "type" : "int32"}, {"K" : 300, "type" : "str16"}],
36 | 		"value" : [{"K" : 100, "type" : "float32"}]
37 | 	},
38 | 	{"N" : 1000000,
39 | 		"id" : [{"K" : 3, "type" : "uint8"}, {"K" : 3, "type" : "int32"}],
40 | 		"value" : [{"K" : 100, "type" : "float64"}]
41 | 	}
42 | ]
43 | 


--------------------------------------------------------------------------------
/test/groupby.count.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | 
  5 | tape("groupby.count", function(t){
  6 | 	t.plan(1);
  7 | 	var frame = new Frame({
  8 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
  9 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 10 | 	});
 11 | 
 12 | 	var expected = {
 13 | 		0 : 5,
 14 | 		1 : 4
 15 | 	}
 16 | 
 17 | 	var g = frame.groupby("id");
 18 | 	var actual = g.count();
 19 | 
 20 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 21 | 
 22 | });
 23 | 
 24 | tape("groupby.count", function(t){
 25 | 	t.plan(2);
 26 | 	var frame = new Frame({
 27 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 28 | 		"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 29 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 30 | 	});
 31 | 
 32 | 	var expected = {
 33 | 		"0" : {
 34 | 			"0" : 4,
 35 | 			"1" : 1
 36 | 		},
 37 | 		"1" : {
 38 | 			"0" : 1,
 39 | 			"1" : 3
 40 | 		}
 41 | 	};
 42 | 
 43 | 
 44 | 	var g = frame.groupby(["id_0", "id_1"]);
 45 | 	var actual = g.count();
 46 | 
 47 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 48 | 
 49 | 
 50 | 	var g = frame.groupby("id_0", "id_1");
 51 | 	var actual = g.count();
 52 | 
 53 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 54 | });
 55 | 
 56 | 
 57 | 
 58 | var dataDirectory = 'test/data/groupby.count/',
 59 | 	testFile = 'small.json';
 60 | 
 61 | var RTOL = 1e-05, // 1e-05
 62 | 	ATOL = 1e-12; // 1e-12
 63 | 
 64 | var floader = require('floader'),
 65 | 	dtest = require('../lib/test');
 66 | 
 67 | floader.load(dataDirectory + testFile, function(err, config){
 68 | 
 69 | 	var suite = JSON.parse(config);
 70 | 
 71 | 	for(var i = 0; i < suite.length; i++){
 72 | 
 73 | 		var prefix = String("0000" + (i + 1)).slice(-4);
 74 | 
 75 | 		// directory containing matrix data files for current test
 76 | 		var directory = dataDirectory + prefix + '/';
 77 | 
 78 | 		var test = suite[i];
 79 | 		/*
 80 | 		"N" : 10000,
 81 | 		"id" : [{"M" : 3, "strings" : false}, {"M" : 3, "strings" : false}],
 82 | 		"value" : [{"M" : 100}, {"M" : 100}]
 83 | 		*/
 84 | 
 85 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
 86 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
 87 | 
 88 | 		var N = test.N; // number of rows
 89 | 		distincts = test.id.map(function(spec, i){ return spec.K; });
 90 | 
 91 | 		var testName = "groupby.count: " + N + " x " + "(" + distincts.join(", ") + ")"
 92 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
 93 | 	}
 94 | });
 95 | 
 96 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
 97 | 	return function(t){
 98 | 		t.plan(1);
 99 | 
100 | 		var names = id_names.concat(value_names);
101 | 		var types = id_types.concat(value_types);
102 | 		// load columns from files
103 | 		dtest.load(directory, names, types, function(err, columns){
104 | 
105 | 			floader.load(directory + "out.json", function(err, out){
106 | 				var expected = JSON.parse(out);
107 | 
108 | 				var column_set = {};
109 | 				for (var i = 0; i < names.length; i++){
110 | 					var name = names[i];
111 | 					var column = columns[i];
112 | 					column_set[name] = column;
113 | 				}
114 | 				var frame = new Frame(column_set);
115 | 
116 | 				var g = frame.groupby(id_names);
117 | 				var actual = g.count();
118 | 
119 | 				var assert;
120 | 				if(value_types[0] in dtest.float_types){
121 | 					assert = dtest.assert.tree.allclose;
122 | 				} else {
123 | 					assert = dtest.assert.tree.equal;
124 | 				}
125 | 
126 | 				assert(t, actual, expected, null, RTOL, ATOL);
127 | 			});
128 | 
129 | 		});
130 | 	};
131 | }
132 | 


--------------------------------------------------------------------------------
/test/groupby.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | tape("groupby has correct index", function(t){
  5 | 	t.plan(1);
  6 | 	var frame = new Frame({
  7 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
  8 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
  9 | 	});
 10 | 
 11 | 	var expected = {
 12 | 		"0" : [0, 1, 2, 5, 7],
 13 | 		"1" : [3, 4, 6, 8]
 14 | 	};
 15 | 
 16 | 	var g = frame.groupby("id");
 17 | 	var actual = g._index;
 18 | 
 19 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 20 | 
 21 | });
 22 | 
 23 | tape("groupby with two arguments has correct index", function(t){
 24 | 	t.plan(1);
 25 | 	var frame = new Frame({
 26 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 27 | 		"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 28 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 29 | 	});
 30 | 
 31 | 	var expected = {
 32 | 		"0" : {
 33 | 			"0" : [0, 1, 5, 7],
 34 | 			"1" : [2]
 35 | 		},
 36 | 		"1" : {
 37 | 			"0" : [4],
 38 | 			"1" : [3, 6, 8]
 39 | 		}
 40 | 	};
 41 | 
 42 | 	var g = frame.groupby("id_0", "id_1");
 43 | 	var actual = g._index;
 44 | 
 45 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 46 | });
 47 | 
 48 | tape("successive groupby has correct index", function(t){
 49 | 	t.plan(1);
 50 | 	var frame = new Frame({
 51 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 52 | 		"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 53 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 54 | 	});
 55 | 
 56 | 	var expected = {
 57 | 		"0" : {
 58 | 			"0" : [0, 1, 5, 7],
 59 | 			"1" : [2]
 60 | 		},
 61 | 		"1" : {
 62 | 			"0" : [4],
 63 | 			"1" : [3, 6, 8]
 64 | 		}
 65 | 	};
 66 | 
 67 | 	var g = frame.groupby("id_0");
 68 | 	g = g.groupby("id_1");
 69 | 	var actual = g._index;
 70 | 
 71 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 72 | });
 73 | 
 74 | 
 75 | var dataDirectory = 'test/data/groupby.count/',
 76 | 	testFile = 'small.json';
 77 | 
 78 | var RTOL = 1e-05, // 1e-05
 79 | 	ATOL = 1e-12; // 1e-12
 80 | 
 81 | var floader = require('floader'),
 82 | 	dtest = require('../lib/test');
 83 | 
 84 | floader.load(dataDirectory + testFile, function(err, config){
 85 | 
 86 | 	var suite = JSON.parse(config);
 87 | 
 88 | 	for(var i = 0; i < suite.length; i++){
 89 | 
 90 | 		var prefix = String("0000" + (i + 1)).slice(-4);
 91 | 
 92 | 		// directory containing matrix data files for current test
 93 | 		var directory = dataDirectory + prefix + '/';
 94 | 
 95 | 		var test = suite[i];
 96 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
 97 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
 98 | 
 99 | 		var N = test.N; // number of rows
100 | 		distincts = test.id.map(function(spec, i){ return spec.K; });
101 | 
102 | 		var testName = "groupby.count: " + N + " x " + "(" + distincts.join(", ") + ")"
103 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
104 | 	}
105 | });
106 | 
107 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
108 | 	return function(t){
109 | 		t.plan(1);
110 | 
111 | 		var names = id_names.concat(value_names);
112 | 		var types = id_types.concat(value_types);
113 | 		// load columns from files
114 | 		dtest.load(directory, names, types, function(err, columns){
115 | 
116 | 			floader.load(directory + "out.json", function(err, out){
117 | 				var expected = JSON.parse(out);
118 | 
119 | 				var column_set = {};
120 | 				for (var i = 0; i < names.length; i++){
121 | 					var name = names[i];
122 | 					var column = columns[i];
123 | 					column_set[name] = column;
124 | 				}
125 | 				var frame = new Frame(column_set);
126 | 
127 | 				var g = frame;
128 | 				for(var i = 0; i < id_names.length; i++){
129 | 					id_name = id_names[i];
130 | 					g = g.groupby(id_name);
131 | 				}
132 | 				var actual = g.count();
133 | 
134 | 				var assert;
135 | 				if(value_types[0] in dtest.float_types){
136 | 					assert = dtest.assert.tree.allclose;
137 | 				} else {
138 | 					assert = dtest.assert.tree.equal;
139 | 				}
140 | 
141 | 				assert(t, actual, expected, null, RTOL, ATOL);
142 | 			});
143 | 
144 | 		});
145 | 	};
146 | }
147 | 


--------------------------------------------------------------------------------
/test/groupby.mean.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame'),
  3 | 	dtest = require('../lib/test');
  4 | 
  5 | var RTOL = 1e-05, // 1e-05
  6 | 	ATOL = 1e-12; // 1e-12
  7 | 
  8 | // simple instructive test cases
  9 | function simpleTestCases(){
 10 | 
 11 | 	tape("groupby accepts single string", function(t){
 12 | 		t.plan(1);
 13 | 		var frame = new Frame({
 14 | 			"id"    : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 15 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 16 | 		});
 17 | 
 18 | 		var expected = {
 19 | 			0: 2, // 1 + 2 + 2 + 3 + 2
 20 | 			1: 2.25   // 3 + 1 + 4 + 1
 21 | 		};
 22 | 
 23 | 		frame = frame.groupby("id");
 24 | 		var actual = frame.mean("value");
 25 | 
 26 | 		dtest.assert.tree.allclose(t, actual, expected, "mean", RTOL, ATOL);
 27 | 
 28 | 	});
 29 | 
 30 | 	tape("groupby accepts single string", function(t){
 31 | 		t.plan(1);
 32 | 		var frame = new Frame({
 33 | 			"id"    : [ 0,    0,    0,    1,   1,   0,   1,   0,   1],
 34 | 			"value" : [1.4, 10.3, 24.2, 31.2, 1.9, 8.6, 4.7, 21.2, 7.4]
 35 | 		});
 36 | 
 37 | 		var expected = {
 38 | 			0: 13.14, // 1.4 + 10.3 + 24.2 + 8.6 + 21.2
 39 | 			1: 11.3   // 31.2 + 1.9 + 4.7 + 7.4
 40 | 		};
 41 | 
 42 | 		frame = frame.groupby("id");
 43 | 		var actual = frame.mean("value");
 44 | 
 45 | 		dtest.assert.tree.allclose(t, actual, expected, "mean", RTOL, ATOL);
 46 | 
 47 | 	});
 48 | 
 49 | 	tape("groupby accepts single string argument over string variable", function(t){
 50 | 		t.plan(1);
 51 | 		var frame = new Frame({
 52 | 			"id"    : ["b", "a", "a", "a", "b", "a", "b", "a", "b"],
 53 | 			"value" : [ 3,   1,   2,   2,   1,   3,   4,   2,   1]
 54 | 		});
 55 | 		expected = {
 56 | 			"a": 2, // 1 + 2 + 2 + 3 + 2
 57 | 			"b": 2.25   // 3 + 1 + 4 + 1
 58 | 		};
 59 | 
 60 | 		frame = frame.groupby("id");
 61 | 		var actual = frame.mean("value");
 62 | 
 63 | 		dtest.assert.tree.allclose(t, actual, expected, "mean", RTOL, ATOL);
 64 | 	});
 65 | 
 66 | 	tape("groupby accepts array argument", function(t){
 67 | 		t.plan(1);
 68 | 		var frame = new Frame({
 69 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 70 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 71 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 72 | 		});
 73 | 
 74 | 		var expected = {
 75 | 			"0" : {
 76 | 				"0" : 2, // 1 + 2 + 3 + 2
 77 | 				"1" : 2  // 2
 78 | 			},
 79 | 			"1" : {
 80 | 				"0" : 1, // 1
 81 | 				"1" : 2.6666666666  // 3 + 4 + 1
 82 | 			}
 83 | 		};
 84 | 
 85 | 		frame = frame.groupby(["id_0", "id_1"]);
 86 | 		var actual = frame.mean("value");
 87 | 
 88 | 		dtest.assert.tree.allclose(t, actual, expected, "mean", RTOL, ATOL);
 89 | 	});
 90 | 
 91 | 	tape("groupby accepts multiple string arguments", function(t){
 92 | 		t.plan(1);
 93 | 		var frame = new Frame({
 94 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 95 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 96 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 97 | 		});
 98 | 
 99 | 		var expected = {
100 | 			"0" : {
101 | 				"0" : 2, // 1 + 2 + 3 + 2
102 | 				"1" : 2  // 2
103 | 			},
104 | 			"1" : {
105 | 				"0" : 1, // 1
106 | 				"1" : 2.6666666666  // 3 + 4 + 1
107 | 			}
108 | 		};
109 | 
110 | 
111 | 		frame = frame.groupby("id_0", "id_1");
112 | 		var actual = frame.mean("value");
113 | 
114 | 		dtest.assert.tree.allclose(t, actual, expected, "mean", RTOL, ATOL);
115 | 	});
116 | 
117 | 	tape("mean works without groupby", function(t){
118 | 		t.plan(1);
119 | 		var frame = new Frame({
120 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
121 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
122 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
123 | 		});
124 | 
125 | 		var expected = 2.11111111111111; // (1 + 2 + 2 + 3 + 1 + 3 + 4 + 2 + 1) / 9
126 | 
127 | 		var actual = frame.mean("value");
128 | 
129 | 		dtest.assert.close(t, actual, expected, "close", RTOL, ATOL);
130 | 	});
131 | 
132 | 	tape("mean works without groupby", function(t){
133 | 		t.plan(1);
134 | 		var frame = new Frame({
135 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
136 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
137 | 			"value" : [3.5, 4.0, 2.1, 3.4, 1.3, 3.8, 4.2, 2.0, 1.5]
138 | 		});
139 | 
140 | 		var expected = 2.8666666666; // (3.5, 4.0, 2.1, 3.4, 1.3, 3.8, 4.2, 2.0, 1.5) / 9
141 | 
142 | 		var actual = frame.mean("value");
143 | 
144 | 		dtest.assert.close(t, actual, expected, "close", RTOL, ATOL);
145 | 	});
146 | }
147 | 
148 | 
149 | var dataDirectory = 'test/data/groupby.mean/',
150 | 	testFile = 'small.json';
151 | 
152 | var floader = require('floader'),
153 | 	dtest = require('../lib/test');
154 | 
155 | floader.load(dataDirectory + testFile, function(err, config){
156 | 
157 | 	var suite = JSON.parse(config);
158 | 	simpleTestCases();
159 | 
160 | 	for(var i = 0; i < suite.length; i++){
161 | 
162 | 		var prefix = String("0000" + (i + 1)).slice(-4);
163 | 
164 | 		// directory containing matrix data files for current test
165 | 		var directory = dataDirectory + prefix + '/';
166 | 
167 | 		var test = suite[i];
168 | 
169 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
170 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
171 | 
172 | 		var N = test.N; // number of rows
173 | 		var distincts = test.id.map(function(spec, i){ return spec.K; });
174 | 
175 | 		var testName = "groupby.mean: " + N + " x " + "(" + distincts.join(", ") + ")"
176 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
177 | 	}
178 | });
179 | 
180 | var OUT_FILENAME = "out.json";
181 | 
182 | 
183 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
184 | 	return function(t){
185 | 		t.plan(1);
186 | 
187 | 		var names = id_names.concat(value_names);
188 | 		var types = id_types.concat(value_types);
189 | 
190 | 		// which columns require a key file?
191 | 		var key_names = id_names.filter(function(item, i){
192 | 			return id_types[i] in dtest.string_types
193 | 		});
194 | 		var key_types = id_types.filter(function(item, i){
195 | 			return item in dtest.string_types
196 | 		});
197 | 
198 | 		// load columns from files
199 | 		dtest.load(directory, names, types, function(err, columns){
200 | 
201 | 			// load key files
202 | 			dtest.load_key(directory, key_names, key_types, function(err, keys){
203 | 
204 | 				floader.load(directory + OUT_FILENAME, function(err, out){
205 | 					var expected = JSON.parse(out);
206 | 
207 | 					var column_set = {};
208 | 					for (var i = 0; i < names.length; i++){
209 | 						var name = names[i];
210 | 						var column = columns[i];
211 | 						column_set[name] = column;
212 | 					}
213 | 					// keys map a small set of integers to other things (like strings)
214 | 					// they're a very simple form of fixed length coding
215 | 					var key_set = {};
216 | 					for (var i = 0; i < keys.length; i++){
217 | 						var name = key_names[i];
218 | 						var key = keys[i];
219 | 						key_set[name] = key;
220 | 					}
221 | 
222 | 					var frame = new Frame(column_set, key_set);
223 | 
224 | 					var g = frame.groupby(id_names);
225 | 					var actual = g.mean(value_names[0]);
226 | 
227 | 					var assert = dtest.assert.tree.allclose;
228 | 
229 | 					//console.log(actual);
230 | 					assert(t, actual, expected, null, RTOL, ATOL);
231 | 				});
232 | 
233 | 			});
234 | 		});
235 | 	};
236 | }
237 | 


--------------------------------------------------------------------------------
/test/groupby.sum.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | // simple instructive test cases
  5 | function simpleTestCases(){
  6 | 
  7 | 	tape("groupby accepts single string", function(t){
  8 | 		t.plan(1);
  9 | 		var frame = new Frame({
 10 | 			"id"    : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 11 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 12 | 		});
 13 | 
 14 | 		var expected = {
 15 | 			0: 10, // 1 + 2 + 2 + 3 + 2
 16 | 			1: 9   // 3 + 1 + 4 + 1
 17 | 		};
 18 | 
 19 | 		frame = frame.groupby("id");
 20 | 		var actual = frame.sum("value");
 21 | 
 22 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected), "reduce");
 23 | 
 24 | 	});
 25 | 
 26 | 	tape("groupby accepts single string argument over string variable", function(t){
 27 | 		t.plan(1);
 28 | 		var frame = new Frame({
 29 | 			"id"    : ["b", "a", "a", "a", "b", "a", "b", "a", "b"],
 30 | 			"value" : [ 3,   1,   2,   2,   1,   3,   4,   2,   1]
 31 | 		});
 32 | 		expected = {
 33 | 			"a": 10, // 1 + 2 + 2 + 3 + 2
 34 | 			"b": 9   // 3 + 1 + 4 + 1
 35 | 		};
 36 | 
 37 | 		frame = frame.groupby("id");
 38 | 		var actual = frame.sum("value");
 39 | 
 40 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected), "reduce");
 41 | 
 42 | 	});
 43 | 
 44 | 	tape("groupby accepts array argument", function(t){
 45 | 		t.plan(1);
 46 | 		var frame = new Frame({
 47 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 48 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 49 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 50 | 		});
 51 | 
 52 | 		var expected = {
 53 | 			"0" : {
 54 | 				"0" : 8, // 1 + 2 + 3 + 2
 55 | 				"1" : 2  // 2
 56 | 			},
 57 | 			"1" : {
 58 | 				"0" : 1, // 1
 59 | 				"1" : 8  // 3 + 4 + 1
 60 | 			}
 61 | 		};
 62 | 
 63 | 
 64 | 		frame = frame.groupby(["id_0", "id_1"]);
 65 | 		var actual = frame.sum("value");
 66 | 
 67 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected));
 68 | 	});
 69 | 
 70 | 	tape("groupby accepts multiple string arguments", function(t){
 71 | 		t.plan(1);
 72 | 		var frame = new Frame({
 73 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 74 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 75 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 76 | 		});
 77 | 
 78 | 		var expected = {
 79 | 			"0" : {
 80 | 				"0" : 8, // 1 + 2 + 3 + 2
 81 | 				"1" : 2  // 2
 82 | 			},
 83 | 			"1" : {
 84 | 				"0" : 1, // 1
 85 | 				"1" : 8  // 3 + 4 + 1
 86 | 			}
 87 | 		};
 88 | 
 89 | 
 90 | 		frame = frame.groupby("id_0", "id_1");
 91 | 		var actual = frame.sum("value");
 92 | 
 93 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected));
 94 | 	});
 95 | 
 96 | 	tape("sum works without groupby", function(t){
 97 | 		t.plan(1);
 98 | 		var frame = new Frame({
 99 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
100 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
101 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
102 | 		});
103 | 
104 | 		var expected = 19; // 1 + 2 + 2 + 3 + 1 + 3 + 4 + 2 + 1
105 | 
106 | 		var actual = frame.sum("value");
107 | 
108 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected));
109 | 	});
110 | 
111 | 	tape("groupby sum, reduce over keyed column", function(t){
112 | 		t.plan(1);
113 | 		var frame = new Frame({
114 | 			"id"    : [0, 0, 0, 1, 1, 0, 1, 0, 1],
115 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
116 | 		},{
117 | 			"value" : [1, 2, 3, 4, 5]
118 | 		});
119 | 
120 | 		var expected = {
121 | 			0 : 15, // 2 + 3 + 3 + 4 + 3
122 | 			1 : 13 // 4 + 2 + 5 + 2
123 | 		}
124 | 
125 | 		var g = frame.groupby("id");
126 | 		var actual = g.sum("value");
127 | 
128 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected));
129 | 
130 | 	});
131 | }
132 | 
133 | var RTOL = 1e-05, // 1e-05
134 | 	ATOL = 1e-12; // 1e-12
135 | 
136 | var dataDirectory = 'test/data/groupby.sum/',
137 | 	testFile = 'small.json';
138 | 
139 | var floader = require('floader'),
140 | 	dtest = require('../lib/test');
141 | 
142 | floader.load(dataDirectory + testFile, function(err, config){
143 | 
144 | 	var suite = JSON.parse(config);
145 | 	simpleTestCases();
146 | 
147 | 	for(var i = 0; i < suite.length; i++){
148 | 
149 | 		var prefix = String("0000" + (i + 1)).slice(-4);
150 | 
151 | 		// directory containing matrix data files for current test
152 | 		var directory = dataDirectory + prefix + '/';
153 | 
154 | 		var test = suite[i];
155 | 
156 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
157 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
158 | 
159 | 		var N = test.N; // number of rows
160 | 		var distincts = test.id.map(function(spec, i){ return spec.K; });
161 | 
162 | 		var testName = "groupby.sum: " + N + " x " + "(" + distincts.join(", ") + ")"
163 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
164 | 	}
165 | });
166 | 
167 | var OUT_FILENAME = "out.json";
168 | 
169 | 
170 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
171 | 	return function(t){
172 | 		t.plan(1);
173 | 
174 | 		var names = id_names.concat(value_names);
175 | 		var types = id_types.concat(value_types);
176 | 
177 | 		// which columns require a key file?
178 | 		var key_names = id_names.filter(function(item, i){
179 | 			return id_types[i] in dtest.string_types
180 | 		});
181 | 		var key_types = id_types.filter(function(item, i){
182 | 			return item in dtest.string_types
183 | 		});
184 | 
185 | 		// load columns from files
186 | 		dtest.load(directory, names, types, function(err, columns){
187 | 
188 | 			// load key files
189 | 			dtest.load_key(directory, key_names, key_types, function(err, keys){
190 | 
191 | 				floader.load(directory + OUT_FILENAME, function(err, out){
192 | 					var expected = JSON.parse(out);
193 | 
194 | 					var column_set = {};
195 | 					for (var i = 0; i < names.length; i++){
196 | 						var name = names[i];
197 | 						var column = columns[i];
198 | 						column_set[name] = column;
199 | 					}
200 | 					// keys map a small set of integers to other things (like strings)
201 | 					// they're a very simple form of fixed length coding
202 | 					var key_set = {};
203 | 					for (var i = 0; i < keys.length; i++){
204 | 						var name = key_names[i];
205 | 						var key = keys[i];
206 | 						key_set[name] = key;
207 | 					}
208 | 
209 | 					var frame = new Frame(column_set, key_set);
210 | 
211 | 					var g = frame.groupby(id_names);
212 | 					var actual = g.sum(value_names[0]);
213 | 
214 | 					var assert;
215 | 					if(value_types[0] in dtest.float_types){
216 | 						assert = dtest.assert.tree.allclose;
217 | 					} else {
218 | 						assert = dtest.assert.tree.equal;
219 | 					}
220 | 
221 | 					//console.log(actual);
222 | 					assert(t, actual, expected, null, RTOL, ATOL);
223 | 				});
224 | 
225 | 			});
226 | 		});
227 | 	};
228 | }
229 | 


--------------------------------------------------------------------------------
/test/groupby.where.sum.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | // simple instructive test cases
  5 | function simpleTestCases(){
  6 | 
  7 | 	tape("sum works with where before groupby", function(t){
  8 | 		t.plan(1);
  9 | 		var frame = new Frame({
 10 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 11 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 12 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 13 | 		});
 14 | 
 15 | 		var expected = {
 16 | 			"0" : 1, // 1
 17 | 			"1" : 8  // 3 + 4 + 1
 18 | 		};
 19 | 
 20 | 		var actual = frame.where("id_0", 1).groupby("id_1").sum("value");
 21 | 
 22 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected));
 23 | 	});
 24 | 
 25 | 	tape("sum works with where before groupby", function(t){
 26 | 		t.plan(1);
 27 | 		var frame = new Frame({
 28 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 29 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 30 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 31 | 		});
 32 | 
 33 | 		var expected = {
 34 | 			"0" : 8,
 35 | 			"1" : 2
 36 | 		};
 37 | 
 38 | 		var actual = frame.where("id_0", 0).groupby("id_1").sum("value");
 39 | 
 40 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected));
 41 | 	});
 42 | 
 43 | 	tape("sum works with groupby before where", function(t){
 44 | 		t.plan(1);
 45 | 		var frame = new Frame({
 46 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 47 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 48 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 49 | 		});
 50 | 
 51 | 		var expected = {
 52 | 			"0" : 1,
 53 | 			"1" : 8
 54 | 		};
 55 | 
 56 | 		var actual = frame.groupby("id_1").where("id_0", 1).sum("value");
 57 | 
 58 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected));
 59 | 	});
 60 | 
 61 | 
 62 | 
 63 | 	tape("sum works with where.in before groupby", function(t){
 64 | 		t.plan(1);
 65 | 		var frame = new Frame({
 66 | 			"id_0"  : [0, 2, 0, 1, 1, 0, 2, 0, 1],
 67 | 			"id_1"  : [0, 0, 1, 0, 0, 0, 1, 1, 1],
 68 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 69 | 		});
 70 | 
 71 | 		var expected = {
 72 | 			"0" : 6, // 1 + 2 + 3
 73 | 			"1" : 8   // 2 + 4 + 2
 74 | 		};
 75 | 		frame = frame.where("id_0", [0, 2]).groupby("id_1");
 76 | 		var actual = frame.sum("value");
 77 | 
 78 | 		t.equals(JSON.stringify(actual), JSON.stringify(expected));
 79 | 	});
 80 | }
 81 | 
 82 | //simpleTestCases();
 83 | 
 84 | var SAMPLE = 10;
 85 | function numberCompare(a, b){ return a - b; }
 86 | // get a predefined subset of a column (matches test data generation)
 87 | function generate_subset(column){
 88 | 	//column = id_columns[id_name]
 89 | 	var uniques = {};
 90 | 	for(var i = 0; i < SAMPLE; i++){
 91 | 		uniques[column[i]] = column[i];
 92 | 	}
 93 | 	var keys = Object.keys(uniques);
 94 | 	var subset = keys.map(function(k){ return uniques[k]});
 95 | 
 96 | 	l = Math.ceil(subset.length / 2);
 97 | 	return subset.sort(numberCompare).slice(0, l);
 98 | }
 99 | 
100 | var RTOL = 1e-05, // 1e-05
101 | 	ATOL = 1e-12; // 1e-12
102 | 
103 | var dataDirectory = 'test/data/groupby.where.sum/',
104 | 	testFile = 'small.json';
105 | 
106 | var floader = require('floader'),
107 | 	dtest = require('../lib/test');
108 | 
109 | floader.load(dataDirectory + testFile, function(err, config){
110 | 
111 | 	var suite = JSON.parse(config);
112 | 	simpleTestCases();
113 | 
114 | 	for(var i = 0; i < suite.length; i++){
115 | 
116 | 		var prefix = String("0000" + (i + 1)).slice(-4);
117 | 
118 | 		// directory containing matrix data files for current test
119 | 		var directory = dataDirectory + prefix + '/';
120 | 
121 | 		var test = suite[i];
122 | 
123 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
124 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
125 | 
126 | 		var N = test.N; // number of rows
127 | 		var distincts = test.id.map(function(spec, i){ return spec.K; });
128 | 
129 | 		var testName = "groupby.where.sum: " + N + " x " + "(" + distincts.join(", ") + ")"
130 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
131 | 	}
132 | });
133 | 
134 | var OUT_FILENAME = "out.json";
135 | 
136 | 
137 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
138 | 	return function(t){
139 | 		t.plan(1);
140 | 
141 | 		var names = id_names.concat(value_names);
142 | 		var types = id_types.concat(value_types);
143 | 
144 | 		// which columns require a key file?
145 | 		var key_names = id_names.filter(function(item, i){
146 | 			return id_types[i] in dtest.string_types
147 | 		});
148 | 		var key_types = id_types.filter(function(item, i){
149 | 			return item in dtest.string_types
150 | 		});
151 | 
152 | 		// load columns from files
153 | 		dtest.load(directory, names, types, function(err, columns){
154 | 
155 | 			// load key files
156 | 			dtest.load_key(directory, key_names, key_types, function(err, keys){
157 | 
158 | 				floader.load(directory + OUT_FILENAME, function(err, out){
159 | 					var expected = JSON.parse(out);
160 | 
161 | 					var column_set = {};
162 | 					for (var i = 0; i < names.length; i++){
163 | 						var name = names[i];
164 | 						var column = columns[i];
165 | 						column_set[name] = column;
166 | 					}
167 | 					// keys map a small set of integers to other things (like strings)
168 | 					// they're a very simple form of fixed length coding
169 | 					var key_set = {};
170 | 					for (var i = 0; i < keys.length; i++){
171 | 						var name = key_names[i];
172 | 						var key = keys[i];
173 | 						key_set[name] = key;
174 | 					}
175 | 
176 | 					var frame = new Frame(column_set, key_set);
177 | 
178 | 					var subset = generate_subset(column_set["id_0"]);
179 | 					//console.log(subset);
180 | 					frame = frame.where("id_0", subset).groupby(id_names);
181 | 
182 | 					var actual = frame.sum(value_names[0]);
183 | 
184 | 					var assert;
185 | 					if(value_types[0] in dtest.float_types){
186 | 						assert = dtest.assert.tree.allclose;
187 | 					} else {
188 | 						assert = dtest.assert.tree.equal;
189 | 					}
190 | 
191 | 					//console.log(actual);
192 | 					var success = assert(t, actual, expected, null, RTOL, ATOL);
193 | 					/*
194 | 					if(!success){
195 | 						console.log(actual);
196 | 						console.log(expected);
197 | 					}*/
198 | 				});
199 | 
200 | 			});
201 | 		});
202 | 	};
203 | }
204 | 


--------------------------------------------------------------------------------
/test/join.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | tape("join to smaller frame produces correct virtual column", function(t){
  5 | 	t.plan(1);
  6 | 	var frame0 = new Frame({
  7 | 		"value0" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
  8 | 	});
  9 | 
 10 | 	//console.log(JSON.stringify(frame0._cols));
 11 | 	var frame1 = new Frame({
 12 | 		"value1" : [1, 2]
 13 | 	});
 14 | 
 15 | 	var link = [0, 0, 0, 1, 1, 0, 1, 0, 1];
 16 | 
 17 | 	var joined = frame0.join(frame1, link);
 18 | 
 19 | 	var expected = [1, 1, 1, 2, 2, 1, 2, 1, 2]; // 1 + 1 + 1 + 2 + 2 + 1 + 2 + 1 + 2
 20 | 
 21 | 	var actual = joined["value1"];
 22 | 
 23 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 24 | 
 25 | });
 26 | 
 27 | tape("join to smaller frame produces correct sum", function(t){
 28 | 	t.plan(1);
 29 | 	var frame0 = new Frame({
 30 | 		"value0" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 31 | 	});
 32 | 
 33 | 	var frame1 = new Frame({
 34 | 		"value1" : [1, 2]
 35 | 	});
 36 | 
 37 | 	var link = [0, 0, 0, 1, 1, 0, 1, 0, 1];
 38 | 
 39 | 	var joined = frame0.join(frame1, link);
 40 | 
 41 | 	var expected = 13; // 1 + 1 + 1 + 2 + 2 + 1 + 2 + 1 + 2
 42 | 
 43 | 	var actual = joined.sum("value1");
 44 | 
 45 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 46 | 
 47 | });
 48 | 
 49 | tape("join to larger frame produces correct virtual column", function(t){
 50 | 	t.plan(1);
 51 | 	var frame0 = new Frame({
 52 | 		"value0" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 53 | 	});
 54 | 
 55 | 	//console.log(JSON.stringify(frame0._cols));
 56 | 	var frame1 = new Frame({
 57 | 		"value1" : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 58 | 	});
 59 | 
 60 | 	var link = [9, 1, 12, 2, 3, 7, 10, 5, 11];
 61 | 
 62 | 	var joined = frame0.join(frame1, link);
 63 | 
 64 | 	var expected = [10, 2, 13, 3, 4, 8, 11, 6, 12]; // 1 + 1 + 1 + 2 + 2 + 1 + 2 + 1 + 2
 65 | 
 66 | 	var actual = joined["value1"];
 67 | 
 68 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 69 | 
 70 | });
 71 | 
 72 | tape("join to larger frame produces correct argmax and argmin", function(t){
 73 | 	t.plan(2);
 74 | 	var frame0 = new Frame({
 75 | 		"value0" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 76 | 	});
 77 | 
 78 | 	//console.log(JSON.stringify(frame0._cols));
 79 | 	var frame1 = new Frame({
 80 | 		"value1" : [5, 2, 13, 4, 6, 1, 7, 8, 9, 10, 11, 12, 3]
 81 | 	});
 82 | 
 83 | 	var link = [9, 3, 12, 2, 1, 7, 10, 5, 11];
 84 | 
 85 | 	var joined = frame0.join(frame1, link);
 86 | 
 87 | 	var expected = 3;
 88 | 
 89 | 	var actual = joined.argmax("value1");
 90 | 
 91 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 92 | 
 93 | 	var expected = 7;
 94 | 
 95 | 	var actual = joined.argmin("value1");
 96 | 
 97 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 98 | });
 99 | 
100 | tape("join to larger frame produces correct sum", function(t){
101 | 	t.plan(1);
102 | 	var frame0 = new Frame({
103 | 		"value0" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
104 | 	});
105 | 
106 | 	//console.log(JSON.stringify(frame0._cols));
107 | 	var frame1 = new Frame({
108 | 		"value1" : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
109 | 	});
110 | 
111 | 	var link = [9, 1, 12, 2, 3, 7, 10, 5, 11];
112 | 
113 | 	var joined = frame0.join(frame1, link);
114 | 
115 | 	var expected = 69; // 10 + 2 + 13 + 3 + 4 + 8 + 11 + 6 + 12
116 | 
117 | 	var actual = joined.sum("value1");
118 | 
119 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
120 | 
121 | });
122 | 
123 | tape("join with where produces correct sum", function(t){
124 | 	t.plan(1);
125 | 	var frame0 = new Frame({
126 | 		"value0" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
127 | 	});
128 | 
129 | 	//console.log(JSON.stringify(frame0._cols));
130 | 	var frame1 = new Frame({
131 | 		"value1" : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
132 | 	});
133 | 
134 | 	var link = [9, 1, 12, 2, 3, 7, 10, 5, 11];
135 | 
136 | 	var joined = frame0.join(frame1, link);
137 | 
138 | 	var filtered = joined.where("value0", function(v){ return v > 2; });
139 | 	var expected = 22; // 3 + 8 + 11
140 | 
141 | 	var actual = filtered.sum("value1");
142 | 
143 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
144 | 
145 | });
146 | 
147 | tape("join with where produces correct argmax", function(t){
148 | 	t.plan(2);
149 | 	var frame0 = new Frame({
150 | 		"value0" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
151 | 	});
152 | 
153 | 	//console.log(JSON.stringify(frame0._cols));
154 | 	var frame1 = new Frame({
155 | 		"value1" : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
156 | 	});
157 | 
158 | 	var link = [9, 1, 12, 2, 3, 7, 10, 5, 11];
159 | 
160 | 	var joined = frame0.join(frame1, link);
161 | 
162 | 	var filtered = joined.where("value0", function(v){ return v > 2; });
163 | 	var expected = 2; // 3 + 8 + 11
164 | 
165 | 	var actual = filtered.argmax("value1");
166 | 
167 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
168 | 
169 | 	var expected = 11;
170 | 
171 | 	var argmax = actual;
172 | 	var actual = filtered["value1"][argmax];
173 | 
174 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
175 | });
176 | 
177 | /*
178 | tape("groupby has correct index", function(t){
179 | 	t.plan(1);
180 | 	var frame = new Frame({
181 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
182 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
183 | 	});
184 | 
185 | 	var expected = {
186 | 		"0" : [0, 1, 2, 5, 7],
187 | 		"1" : [3, 4, 6, 8]
188 | 	};
189 | 
190 | 	var g = frame.groupby("id");
191 | 	var actual = g._index;
192 | 
193 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
194 | 
195 | });
196 | 
197 | tape("groupby with two arguments has correct index", function(t){
198 | 	t.plan(1);
199 | 	var frame = new Frame({
200 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
201 | 		"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
202 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
203 | 	});
204 | 
205 | 	var expected = {
206 | 		"0" : {
207 | 			"0" : [0, 1, 5, 7],
208 | 			"1" : [2]
209 | 		},
210 | 		"1" : {
211 | 			"0" : [4],
212 | 			"1" : [3, 6, 8]
213 | 		}
214 | 	};
215 | 
216 | 	var g = frame.groupby("id_0", "id_1");
217 | 	var actual = g._index;
218 | 
219 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
220 | });
221 | 
222 | tape("successive groupby has correct index", function(t){
223 | 	t.plan(1);
224 | 	var frame = new Frame({
225 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
226 | 		"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
227 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
228 | 	});
229 | 
230 | 	var expected = {
231 | 		"0" : {
232 | 			"0" : [0, 1, 5, 7],
233 | 			"1" : [2]
234 | 		},
235 | 		"1" : {
236 | 			"0" : [4],
237 | 			"1" : [3, 6, 8]
238 | 		}
239 | 	};
240 | 
241 | 	var g = frame.groupby("id_0");
242 | 	g = g.groupby("id_1");
243 | 	var actual = g._index;
244 | 
245 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
246 | });
247 | 
248 | */
249 | /*
250 | var dataDirectory = 'test/data/groupby.count/',
251 | 	testFile = 'small.json';
252 | 
253 | var RTOL = 1e-05, // 1e-05
254 | 	ATOL = 1e-12; // 1e-12
255 | 
256 | var floader = require('floader'),
257 | 	dtest = require('../lib/test');
258 | 
259 | floader.load(dataDirectory + testFile, function(err, config){
260 | 
261 | 	var suite = JSON.parse(config);
262 | 
263 | 	for(var i = 0; i < suite.length; i++){
264 | 
265 | 		var prefix = String("0000" + (i + 1)).slice(-4);
266 | 
267 | 		// directory containing matrix data files for current test
268 | 		var directory = dataDirectory + prefix + '/';
269 | 
270 | 		var test = suite[i];
271 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
272 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
273 | 
274 | 		var N = test.N; // number of rows
275 | 		distincts = test.id.map(function(spec, i){ return spec.K; });
276 | 
277 | 		var testName = "groupby.count: " + N + " x " + "(" + distincts.join(", ") + ")"
278 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
279 | 	}
280 | });
281 | 
282 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
283 | 	return function(t){
284 | 		t.plan(1);
285 | 
286 | 		var names = id_names.concat(value_names);
287 | 		var types = id_types.concat(value_types);
288 | 		// load columns from files
289 | 		dtest.load(directory, names, types, function(err, columns){
290 | 
291 | 			floader.load(directory + "out.json", function(err, out){
292 | 				var expected = JSON.parse(out);
293 | 
294 | 				var column_set = {};
295 | 				for (var i = 0; i < names.length; i++){
296 | 					var name = names[i];
297 | 					var column = columns[i];
298 | 					column_set[name] = column;
299 | 				}
300 | 				var frame = new Frame(column_set);
301 | 
302 | 				var g = frame;
303 | 				for(var i = 0; i < id_names.length; i++){
304 | 					id_name = id_names[i];
305 | 					g = g.groupby(id_name);
306 | 				}
307 | 				var actual = g.count();
308 | 
309 | 				var assert;
310 | 				if(value_types[0] in dtest.float_types){
311 | 					assert = dtest.assert.tree.allclose;
312 | 				} else {
313 | 					assert = dtest.assert.tree.equal;
314 | 				}
315 | 
316 | 				assert(t, actual, expected, null, RTOL, ATOL);
317 | 			});
318 | 
319 | 		});
320 | 	};
321 | }
322 | */
323 | 


--------------------------------------------------------------------------------
/test/mean.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | // simple instructive test cases
  5 | function simpleTestCases(){
  6 | 
  7 | 	tape("mean works with integers", function(t){
  8 | 		t.plan(1);
  9 | 		var frame = new Frame({
 10 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 11 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 12 | 		});
 13 | 
 14 | 		var expected = 2.111111111; // 1 + 2 + 2 + 3 + 1 + 3 + 4 + 2 + 1
 15 | 
 16 | 		var actual = frame.mean("value");
 17 | 
 18 | 		dtest.assert.close(t, actual, expected);
 19 | 	});
 20 | 
 21 | 	tape("mean works with integers", function(t){
 22 | 		t.plan(1);
 23 | 		var frame = new Frame({
 24 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1],
 25 | 			"value" : [4, 2, 7, 1, 3, 6, 5, 2, 1, 7, 8]
 26 | 		});
 27 | 
 28 | 		var expected = 4.1818181818; // (4 + 2 + 7 + 1 + 3 + 6 + 5 + 2 + 1 + 7 + 8) / 11
 29 | 
 30 | 		var actual = frame.mean("value");
 31 | 
 32 | 		dtest.assert.close(t, actual, expected);
 33 | 	});
 34 | 
 35 | 	tape("mean works floats", function(t){
 36 | 		t.plan(1);
 37 | 		var frame = new Frame({
 38 | 			"id"  :   [0, 2, 0, 1, 1, 0, 2, 0, 1],
 39 | 			"value" : [1.2, 6.4, 2.3, 12.1, 1.6, 3.5, 7.2, 2.1, 10.2]
 40 | 		});
 41 | 
 42 | 		var expected = 5.177777777777779; // (1.2 + 6.4 + 2.3 + 12.1 + 1.6 + 3.5 + 7.2 + 2.1 + 10.2) / 9
 43 | 		var actual = frame.mean("value");
 44 | 
 45 | 		dtest.assert.close(t, actual, expected);
 46 | 	});
 47 | }
 48 | 
 49 | //simpleTestCases();
 50 | 
 51 | var RTOL = 1e-05, // 1e-05
 52 | 	ATOL = 1e-12; // 1e-12
 53 | 
 54 | var dataDirectory = 'test/data/mean/',
 55 | 	testFile = 'small.json';
 56 | 
 57 | var floader = require('floader'),
 58 | 	dtest = require('../lib/test');
 59 | 
 60 | floader.load(dataDirectory + testFile, function(err, config){
 61 | 
 62 | 	var suite = JSON.parse(config);
 63 | 	simpleTestCases();
 64 | 
 65 | 	for(var i = 0; i < suite.length; i++){
 66 | 
 67 | 		var prefix = String("0000" + (i + 1)).slice(-4);
 68 | 
 69 | 		// directory containing matrix data files for current test
 70 | 		var directory = dataDirectory + prefix + '/';
 71 | 
 72 | 		var test = suite[i];
 73 | 
 74 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
 75 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
 76 | 
 77 | 		var N = test.N; // number of rows
 78 | 		var distincts = test.id.map(function(spec, i){ return spec.K; });
 79 | 
 80 | 		var testName = "mean: " + N + " x " + "(" + distincts.join(", ") + ")"
 81 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
 82 | 	}
 83 | });
 84 | 
 85 | var OUT_FILENAME = "out.json";
 86 | 
 87 | 
 88 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
 89 | 	return function(t){
 90 | 		t.plan(1);
 91 | 
 92 | 		var names = id_names.concat(value_names);
 93 | 		var types = id_types.concat(value_types);
 94 | 
 95 | 		// which columns require a key file?
 96 | 		var key_names = id_names.filter(function(item, i){
 97 | 			return id_types[i] in dtest.string_types
 98 | 		});
 99 | 		var key_types = id_types.filter(function(item, i){
100 | 			return item in dtest.string_types
101 | 		});
102 | 
103 | 		// load columns from files
104 | 		dtest.load(directory, names, types, function(err, columns){
105 | 
106 | 			// load key files
107 | 			dtest.load_key(directory, key_names, key_types, function(err, keys){
108 | 
109 | 				floader.load(directory + OUT_FILENAME, function(err, out){
110 | 					var expected = JSON.parse(out);
111 | 
112 | 					var column_set = {};
113 | 					for (var i = 0; i < names.length; i++){
114 | 						var name = names[i];
115 | 						var column = columns[i];
116 | 						column_set[name] = column;
117 | 					}
118 | 					// keys map a small set of integers to other things (like strings)
119 | 					// they're a very simple form of fixed length coding
120 | 					var key_set = {};
121 | 					for (var i = 0; i < keys.length; i++){
122 | 						var name = key_names[i];
123 | 						var key = keys[i];
124 | 						key_set[name] = key;
125 | 					}
126 | 
127 | 					var frame = new Frame(column_set, key_set);
128 | 
129 | 					//console.log(subset);
130 | 					var actual = frame.mean("value_0");
131 | 
132 | 					dtest.assert.close(t, actual, expected, "close", RTOL, ATOL);
133 | 				});
134 | 
135 | 			});
136 | 		});
137 | 	};
138 | }
139 | 


--------------------------------------------------------------------------------
/test/ungroup.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | tape("ungroup single groupby has correct index", function(t){
  5 | 	t.plan(1);
  6 | 	var frame = new Frame({
  7 | 		"id"  :   [0, 0, 0, 1, 1, 0, 1, 0, 1],
  8 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
  9 | 	});
 10 | 
 11 | 	var expected; // undefined
 12 | 
 13 | 	var g = frame.groupby("id");
 14 | 	var g = g.ungroup();
 15 | 	var actual = g._index;
 16 | 
 17 | 	t.equals(actual, expected);
 18 | 
 19 | });
 20 | 
 21 | tape("ungroup on multiple groupby has correct index", function(t){
 22 | 	t.plan(1);
 23 | 	var frame = new Frame({
 24 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 25 | 		"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 26 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 27 | 	});
 28 | 
 29 | 
 30 | 	var expected = {
 31 | 		"0" : [0, 1, 2, 5, 7],
 32 | 		"1" : [3, 4, 6, 8]
 33 | 	};
 34 | 
 35 | 	var g = frame.groupby("id_0", "id_1");
 36 | 	g = g.ungroup();
 37 | 	var actual = g._index;
 38 | 
 39 | 	t.equals(JSON.stringify(actual), JSON.stringify(expected));
 40 | });
 41 | 
 42 | tape("successive ungroup on multiple groupby has correct index", function(t){
 43 | 	t.plan(1);
 44 | 	var frame = new Frame({
 45 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 46 | 		"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 47 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 48 | 	});
 49 | 
 50 | 	var expected; // undefined
 51 | 
 52 | 	var g = frame.groupby("id_0", "id_1");
 53 | 	g = g.ungroup();
 54 | 	g = g.ungroup();
 55 | 	var actual = g._index;
 56 | 
 57 | 	t.equals(actual, expected);
 58 | });
 59 | 
 60 | /*
 61 | var dataDirectory = 'test/data/groupby.count/',
 62 | 	testFile = 'small.json';
 63 | 
 64 | var RTOL = 1e-05, // 1e-05
 65 | 	ATOL = 1e-12; // 1e-12
 66 | 
 67 | var floader = require('floader'),
 68 | 	dtest = require('../lib/test');
 69 | 
 70 | floader.load(dataDirectory + testFile, function(err, config){
 71 | 
 72 | 	var suite = JSON.parse(config);
 73 | 
 74 | 	for(var i = 0; i < suite.length; i++){
 75 | 
 76 | 		var prefix = String("0000" + (i + 1)).slice(-4);
 77 | 
 78 | 		// directory containing matrix data files for current test
 79 | 		var directory = dataDirectory + prefix + '/';
 80 | 
 81 | 		var test = suite[i];
 82 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
 83 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
 84 | 
 85 | 		var N = test.N; // number of rows
 86 | 		distincts = test.id.map(function(spec, i){ return spec.K; });
 87 | 
 88 | 		var testName = "groupby.count: " + N + " x " + "(" + distincts.join(", ") + ")"
 89 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
 90 | 	}
 91 | });
 92 | 
 93 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
 94 | 	return function(t){
 95 | 		t.plan(1);
 96 | 
 97 | 		var names = id_names.concat(value_names);
 98 | 		var types = id_types.concat(value_types);
 99 | 		// load columns from files
100 | 		dtest.load(directory, names, types, function(err, columns){
101 | 
102 | 			floader.load(directory + "out.json", function(err, out){
103 | 				var expected = JSON.parse(out);
104 | 
105 | 				var column_set = {};
106 | 				for (var i = 0; i < names.length; i++){
107 | 					var name = names[i];
108 | 					var column = columns[i];
109 | 					column_set[name] = column;
110 | 				}
111 | 				var frame = new Frame(column_set);
112 | 
113 | 				var g = frame;
114 | 				for(var i = 0; i < id_names.length; i++){
115 | 					id_name = id_names[i];
116 | 					g = g.groupby(id_name);
117 | 				}
118 | 				var actual = g.count();
119 | 
120 | 				var assert;
121 | 				if(value_types[0] in dtest.float_types){
122 | 					assert = dtest.assert.tree.allclose;
123 | 				} else {
124 | 					assert = dtest.assert.tree.equal;
125 | 				}
126 | 
127 | 				assert(t, actual, expected, null, RTOL, ATOL);
128 | 			});
129 | 
130 | 		});
131 | 	};
132 | }
133 | */
134 | 


--------------------------------------------------------------------------------
/test/where.in.sum.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | // simple instructive test cases
  5 | function simpleTestCases(){
  6 | 
  7 | 	tape("sum works with where", function(t){
  8 | 		t.plan(1);
  9 | 		var frame = new Frame({
 10 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 11 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 12 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 13 | 		});
 14 | 
 15 | 		var expected = 9; // 3 + 1 + 4 + 1
 16 | 
 17 | 		var actual = frame.where("id_0", 1).sum("value");
 18 | 
 19 | 		t.equals(actual, expected);
 20 | 	});
 21 | 
 22 | 	tape("sum works with where", function(t){
 23 | 		t.plan(1);
 24 | 		var frame = new Frame({
 25 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 26 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 27 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 28 | 		});
 29 | 
 30 | 		var expected = 10; // 1 + 2 + 2 + 3 + 2
 31 | 
 32 | 		var actual = frame.where("id_0", 0).sum("value");
 33 | 
 34 | 		t.equals(actual, expected);
 35 | 	});
 36 | 
 37 | 	tape("where does not modify sum on original Frame", function(t){
 38 | 		t.plan(1);
 39 | 		var frame = new Frame({
 40 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 41 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 42 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 43 | 		});
 44 | 
 45 | 		var expected = 19; // 1 + 2 + 2 + 3 + 1 + 3 + 4 + 2 + 1
 46 | 
 47 | 		var fw = frame.where("id_0", 0);
 48 | 		var actual = frame.sum("value");
 49 | 
 50 | 		t.equals(actual, expected);
 51 | 	});
 52 | 
 53 | 	tape("sum works with multiple wheres", function(t){
 54 | 		t.plan(1);
 55 | 		var frame = new Frame({
 56 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 57 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 58 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 59 | 		});
 60 | 
 61 | 		var expected = 8; // 3 + 4 + 1
 62 | 		var actual = frame.where("id_0", 1).where("id_1", 1).sum("value");
 63 | 
 64 | 		t.equals(actual, expected);
 65 | 	});
 66 | 
 67 | 
 68 | 	tape("sum works with where in", function(t){
 69 | 		t.plan(1);
 70 | 		var frame = new Frame({
 71 | 			"id"  :   [0, 2, 0, 1, 1, 0, 2, 0, 1],
 72 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 73 | 		});
 74 | 
 75 | 		var expected = 14; // 1 + 2 + 2 + 3 + 4 + 2
 76 | 		frame = frame.where("id", [0, 2]);
 77 | 		var actual = frame.sum("value");
 78 | 
 79 | 		t.equals(actual, expected);
 80 | 	});
 81 | 
 82 | 
 83 | 	tape("sum works with where in undefined", function(t){
 84 | 		t.plan(1);
 85 | 		var frame = new Frame({
 86 | 			"id"  :   [0, 2, 0, 1, 1, 0, 2, 0, 1],
 87 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 88 | 		});
 89 | 
 90 | 		var a; // undefined
 91 | 		var expected = 14; // 1 + 2 + 2 + 3 + 4 + 2
 92 | 		frame = frame.where("id", [0, 2, a]);
 93 | 		var actual = frame.sum("value");
 94 | 
 95 | 		t.equals(actual, expected);
 96 | 	});
 97 | }
 98 | 
 99 | //simpleTestCases();
100 | 
101 | var SAMPLE = 10;
102 | function numberCompare(a, b){ return a - b; }
103 | // get a predefined subset of a column (matches test data generation)
104 | function generate_subset(column){
105 | 	//column = id_columns[id_name]
106 | 	var uniques = {};
107 | 	for(var i = 0; i < SAMPLE; i++){
108 | 		uniques[column[i]] = column[i];
109 | 	}
110 | 	var keys = Object.keys(uniques);
111 | 	var subset = keys.map(function(k){ return uniques[k]});
112 | 
113 | 	l = Math.ceil(subset.length / 2);
114 | 	return subset.sort(numberCompare).slice(0, l);
115 | }
116 | 
117 | var RTOL = 1e-05, // 1e-05
118 | 	ATOL = 1e-12; // 1e-12
119 | 
120 | var dataDirectory = 'test/data/where.in.sum/',
121 | 	testFile = 'small.json';
122 | 
123 | var floader = require('floader'),
124 | 	dtest = require('../lib/test');
125 | 
126 | floader.load(dataDirectory + testFile, function(err, config){
127 | 
128 | 	var suite = JSON.parse(config);
129 | 	simpleTestCases();
130 | 
131 | 	for(var i = 0; i < suite.length; i++){
132 | 
133 | 		var prefix = String("0000" + (i + 1)).slice(-4);
134 | 
135 | 		// directory containing matrix data files for current test
136 | 		var directory = dataDirectory + prefix + '/';
137 | 
138 | 		var test = suite[i];
139 | 
140 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
141 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
142 | 
143 | 		var N = test.N; // number of rows
144 | 		var distincts = test.id.map(function(spec, i){ return spec.K; });
145 | 
146 | 		var testName = "where.in.sum: " + N + " x " + "(" + distincts.join(", ") + ")"
147 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
148 | 	}
149 | });
150 | 
151 | var OUT_FILENAME = "out.json";
152 | 
153 | 
154 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
155 | 	return function(t){
156 | 		t.plan(1);
157 | 
158 | 		var names = id_names.concat(value_names);
159 | 		var types = id_types.concat(value_types);
160 | 
161 | 		// which columns require a key file?
162 | 		var key_names = id_names.filter(function(item, i){
163 | 			return id_types[i] in dtest.string_types
164 | 		});
165 | 		var key_types = id_types.filter(function(item, i){
166 | 			return item in dtest.string_types
167 | 		});
168 | 
169 | 		// load columns from files
170 | 		dtest.load(directory, names, types, function(err, columns){
171 | 
172 | 			// load key files
173 | 			dtest.load_key(directory, key_names, key_types, function(err, keys){
174 | 
175 | 				floader.load(directory + OUT_FILENAME, function(err, out){
176 | 					var expected = JSON.parse(out);
177 | 
178 | 					var column_set = {};
179 | 					for (var i = 0; i < names.length; i++){
180 | 						var name = names[i];
181 | 						var column = columns[i];
182 | 						column_set[name] = column;
183 | 					}
184 | 					// keys map a small set of integers to other things (like strings)
185 | 					// they're a very simple form of fixed length coding
186 | 					var key_set = {};
187 | 					for (var i = 0; i < keys.length; i++){
188 | 						var name = key_names[i];
189 | 						var key = keys[i];
190 | 						key_set[name] = key;
191 | 					}
192 | 
193 | 					var frame = new Frame(column_set, key_set);
194 | 
195 | 					var subset = generate_subset(column_set["id_0"]);
196 | 					//console.log(subset);
197 | 					frame = frame.where("id_0", subset);
198 | 					var actual = frame.sum("value_0");
199 | 
200 | 					dtest.assert.close(t, actual, expected, "close", RTOL, ATOL);
201 | 				});
202 | 
203 | 			});
204 | 		});
205 | 	};
206 | }
207 | 


--------------------------------------------------------------------------------
/test/where.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	BitArray = require('bit-array'),
  3 | 	Frame = require('../lib/frame');
  4 | 
  5 | tape("where creates correct filter", function(t){
  6 | 	t.plan(1);
  7 | 
  8 | 	var frame = new Frame({
  9 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 10 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 11 | 	});
 12 | 
 13 | 	//frame.where(row => row.id == 1);
 14 | 	frame = frame.where("id", v => v == 1);
 15 | 
 16 | 	var expected = new BitArray(9);
 17 | 
 18 | 	expected.set(3, true);
 19 | 	expected.set(4, true);
 20 | 	expected.set(6, true);
 21 | 	expected.set(8, true);
 22 | 
 23 | 	var actual = frame._filter;
 24 | 	t.equals(actual.toString(), expected.toString());
 25 | });
 26 | 
 27 | tape("where with numerical argument creates correct filter", function(t){
 28 | 	t.plan(1);
 29 | 
 30 | 	var frame = new Frame({
 31 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 32 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 33 | 	});
 34 | 
 35 | 	frame = frame.where("id", 1);
 36 | 
 37 | 	var expected = new BitArray(9);
 38 | 
 39 | 	expected.set(3, true);
 40 | 	expected.set(4, true);
 41 | 	expected.set(6, true);
 42 | 	expected.set(8, true);
 43 | 
 44 | 	var actual = frame._filter;
 45 | 	t.equals(actual.toString(), expected.toString());
 46 | });
 47 | 
 48 | tape("where with array argument creates correct filter", function(t){
 49 | 	t.plan(1);
 50 | 
 51 | 	var frame = new Frame({
 52 | 		"id"  : [0, 2, 0, 1, 1, 0, 2, 0, 1],
 53 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 54 | 	});
 55 | 
 56 | 	frame = frame.where("id", [0, 2]);
 57 | 
 58 | 	var expected = new BitArray(9);
 59 | 
 60 | 	expected.set(0, true);
 61 | 	expected.set(1, true);
 62 | 	expected.set(2, true);
 63 | 	expected.set(5, true);
 64 | 	expected.set(6, true);
 65 | 	expected.set(7, true);
 66 | 
 67 | 	var actual = frame._filter;
 68 | 	t.equals(actual.toString(), expected.toString());
 69 | });
 70 | 
 71 | tape("where creates second filter correctly", function(t){
 72 | 	t.plan(1);
 73 | 
 74 | 	var frame = new Frame({
 75 | 		"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 76 | 		"id_1"  : [0, 0, 1, 1, 0, 1, 0, 0, 1],
 77 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 78 | 	});
 79 | 
 80 | 	//frame.where(row => row.id == 1);
 81 | 	frame = frame.where("id_1", id => id == 1);
 82 | 	frame = frame.where("id_0", id => id == 1);
 83 | 
 84 | 	var expected = new BitArray(9);
 85 | 
 86 | 	expected.set(3, true);
 87 | 	expected.set(8, true);
 88 | 
 89 | 	var actual = frame._filter;
 90 | 	t.equals(actual.toString(), expected.toString());
 91 | });
 92 | 
 93 | tape("where filters column via accessor", function(t){
 94 | 	t.plan(1);
 95 | 
 96 | 	var frame = new Frame({
 97 | 		"id"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 98 | 		"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 99 | 	});
100 | 
101 | 	//frame.where(row => row.id == 1);
102 | 	frame = frame.where("id", v => v == 1);
103 | 
104 | 	var expected = [3, 1, 4, 1];
105 | 
106 | 
107 | 	var actual = frame["value"];
108 | 	t.equals(actual.toString(), expected.toString());
109 | });
110 | 
111 | tape("where filters keyed column via accessor", function(t){
112 | 	t.plan(1);
113 | 
114 | 	var columns = {
115 | 		"id"  :   [0, 0, 0, 1, 1, 0, 1, 0, 1],
116 | 		"value" : [6, 1, 5, 3, 1, 2, 4, 0, 1]
117 | 	};
118 | 	var keys = {
119 | 		"value" : ["fare", "fish", "my", "red", "blue", "to", "add"]
120 | 	};
121 | 
122 | 	var frame = new Frame(columns, keys);
123 | 
124 | 	frame = frame.where("id", v => v == 1);
125 | 
126 | 	var expected = ["red", "fish", "blue", "fish"];
127 | 
128 | 
129 | 	var actual = frame["value"];
130 | 	t.equals(actual.toString(), expected.toString());
131 | });
132 | 
133 | tape("where accepts string filter on keyed column", function(t){
134 | 	t.plan(1);
135 | 
136 | 	var columns = {
137 | 		"id"  :   [0, 0, 0, 1, 1, 0, 1, 0, 1],
138 | 		"value" : [6, 1, 5, 3, 1, 2, 4, 0, 1]
139 | 	};
140 | 	var keys = {
141 | 		"id" : ["thoreau", "seuss"],
142 | 		"value" : ["fare", "fish", "my", "red", "blue", "to", "add"]
143 | 	};
144 | 
145 | 	var frame = new Frame(columns, keys);
146 | 
147 | 	frame = frame.where("id", "thoreau");
148 | 
149 | 	var expected = ["add", "fish", "to", "my", "fare"];
150 | 
151 | 
152 | 	var actual = frame["value"];
153 | 	t.equals(actual.toString(), expected.toString());
154 | });
155 | 
156 | tape("where accepts function with string on keyed column", function(t){
157 | 	t.plan(1);
158 | 
159 | 	var columns = {
160 | 		"id"  :   [0, 0, 0, 1, 1, 0, 1, 0, 1],
161 | 		"value" : [6, 1, 5, 3, 1, 2, 4, 0, 1]
162 | 	};
163 | 	var keys = {
164 | 		"id" : ["thoreau", "seuss"],
165 | 		"value" : ["fare", "fish", "my", "red", "blue", "to", "add"]
166 | 	};
167 | 
168 | 	var frame = new Frame(columns, keys);
169 | 
170 | 	frame = frame.where("id", v => v == "seuss");
171 | 
172 | 	var expected = ["red", "fish", "blue", "fish"];
173 | 
174 | 	var actual = frame["value"];
175 | 	t.equals(actual.toString(), expected.toString());
176 | });
177 | 
178 | tape("where filter can be modified", function(t){
179 | 	t.plan(2);
180 | 
181 | 	var columns = {
182 | 		"id"  :   [0, 0, 0, 1, 1, 0, 1, 0, 1],
183 | 		"value" : [6, 1, 5, 3, 1, 2, 4, 0, 1]
184 | 	};
185 | 	var keys = {
186 | 		"id" : ["thoreau", "seuss"],
187 | 		"value" : ["fare", "fish", "my", "red", "blue", "to", "add"]
188 | 	};
189 | 
190 | 	var frame = new Frame(columns, keys);
191 | 
192 | 
193 | 	frame = frame.where("id", "thoreau");
194 | 	var expected = ["add", "fish", "to", "my", "fare"];
195 | 
196 | 	var actual = frame["value"];
197 | 	t.equals(actual.toString(), expected.toString());
198 | 
199 | 	frame = frame.where("id", v => v == "seuss");
200 | 	var expected = ["red", "fish", "blue", "fish"];
201 | 
202 | 	var actual = frame["value"];
203 | 	t.equals(actual.toString(), expected.toString());
204 | });
205 | /*
206 | function eq(a){
207 | 	return function(v){ v == a; };
208 | }
209 | 
210 | function in(arr){
211 | 	var set = {};
212 | 	for (a in arr) set[a] = true;
213 | 	return function(v){ return v in set;};
214 | }*/
215 | 


--------------------------------------------------------------------------------
/test/where.mean.js:
--------------------------------------------------------------------------------
  1 | var tape = require('tape'),
  2 | 	Frame = require('../lib/frame');
  3 | 
  4 | // simple instructive test cases
  5 | function simpleTestCases(){
  6 | 
  7 | 	tape("mean works with where", function(t){
  8 | 		t.plan(1);
  9 | 		var frame = new Frame({
 10 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 11 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 12 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 13 | 		});
 14 | 
 15 | 		var expected = 2.25; // 3 + 1 + 4 + 1
 16 | 
 17 | 		var actual = frame.where("id_0", 1).mean("value");
 18 | 
 19 | 		dtest.assert.close(t, actual, expected);
 20 | 	});
 21 | 
 22 | 	tape("mean works with where", function(t){
 23 | 		t.plan(1);
 24 | 		var frame = new Frame({
 25 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 26 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 27 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 28 | 		});
 29 | 
 30 | 		var expected = 2; // 1 + 2 + 2 + 3 + 2
 31 | 
 32 | 		var actual = frame.where("id_0", 0).mean("value");
 33 | 
 34 | 		dtest.assert.close(t, actual, expected);
 35 | 	});
 36 | 
 37 | 	tape("where does not modify mean on original Frame", function(t){
 38 | 		t.plan(1);
 39 | 		var frame = new Frame({
 40 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 41 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 42 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 43 | 		});
 44 | 
 45 | 		var expected = 2.1111111111; // (1 + 2 + 2 + 3 + 1 + 3 + 4 + 2 + 1) / 9
 46 | 
 47 | 		var fw = frame.where("id_0", 0);
 48 | 		var actual = frame.mean("value");
 49 | 
 50 | 		dtest.assert.close(t, actual, expected);
 51 | 	});
 52 | 
 53 | 	tape("mean works with multiple wheres", function(t){
 54 | 		t.plan(1);
 55 | 		var frame = new Frame({
 56 | 			"id_0"  : [0, 0, 0, 1, 1, 0, 1, 0, 1],
 57 | 			"id_1"  : [0, 0, 1, 1, 0, 0, 1, 0, 1],
 58 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 59 | 		});
 60 | 
 61 | 		var expected = 2.666666666; // (3 + 4 + 1) / 3
 62 | 		var actual = frame.where("id_0", 1).where("id_1", 1).mean("value");
 63 | 
 64 | 		dtest.assert.close(t, actual, expected);
 65 | 	});
 66 | 
 67 | 
 68 | 	tape("mean works with where in", function(t){
 69 | 		t.plan(1);
 70 | 		var frame = new Frame({
 71 | 			"id"  :   [0, 2, 0, 1, 1, 0, 2, 0, 1],
 72 | 			"value" : [1, 2, 2, 3, 1, 3, 4, 2, 1]
 73 | 		});
 74 | 
 75 | 		var expected = 2.3333333333333; // 1 + 2 + 2 + 3 + 4 + 2
 76 | 		frame = frame.where("id", [0, 2]);
 77 | 		var actual = frame.mean("value");
 78 | 
 79 | 		dtest.assert.close(t, actual, expected);
 80 | 	});
 81 | }
 82 | 
 83 | //simpleTestCases();
 84 | 
 85 | var RTOL = 1e-05, // 1e-05
 86 | 	ATOL = 1e-12; // 1e-12
 87 | 
 88 | var dataDirectory = 'test/data/where.mean/',
 89 | 	testFile = 'small.json';
 90 | 
 91 | var floader = require('floader'),
 92 | 	dtest = require('../lib/test');
 93 | 
 94 | floader.load(dataDirectory + testFile, function(err, config){
 95 | 
 96 | 	var suite = JSON.parse(config);
 97 | 	simpleTestCases();
 98 | 
 99 | 	for(var i = 0; i < suite.length; i++){
100 | 
101 | 		var prefix = String("0000" + (i + 1)).slice(-4);
102 | 
103 | 		// directory containing matrix data files for current test
104 | 		var directory = dataDirectory + prefix + '/';
105 | 
106 | 		var test = suite[i];
107 | 
108 | 		var names = test.id.map(function(spec, i){ return "id_" + i;});
109 | 		var types = test.id.map(function(spec, i){ return spec['type'];});
110 | 
111 | 		var N = test.N; // number of rows
112 | 		var distincts = test.id.map(function(spec, i){ return spec.K; });
113 | 
114 | 		var testName = "where.mean: " + N + " x " + "(" + distincts.join(", ") + ")"
115 | 		tape(testName, generateTestCase(directory, names, types, ["value_0"], [test.value[0].type]));
116 | 	}
117 | });
118 | 
119 | var OUT_FILENAME = "out.json";
120 | 
121 | 
122 | function generateTestCase(directory, id_names, id_types, value_names, value_types){
123 | 	return function(t){
124 | 		t.plan(1);
125 | 
126 | 		var names = id_names.concat(value_names);
127 | 		var types = id_types.concat(value_types);
128 | 
129 | 		// which columns require a key file?
130 | 		var key_names = id_names.filter(function(item, i){
131 | 			return id_types[i] in dtest.string_types
132 | 		});
133 | 		var key_types = id_types.filter(function(item, i){
134 | 			return item in dtest.string_types
135 | 		});
136 | 
137 | 		// load columns from files
138 | 		dtest.load(directory, names, types, function(err, columns){
139 | 
140 | 			// load key files
141 | 			dtest.load_key(directory, key_names, key_types, function(err, keys){
142 | 
143 | 				floader.load(directory + OUT_FILENAME, function(err, out){
144 | 					var expected = JSON.parse(out);
145 | 
146 | 					var column_set = {};
147 | 					for (var i = 0; i < names.length; i++){
148 | 						var name = names[i];
149 | 						var column = columns[i];
150 | 						column_set[name] = column;
151 | 					}
152 | 					// keys map a small set of integers to other things (like strings)
153 | 					// they're a very simple form of fixed length coding
154 | 					var key_set = {};
155 | 					for (var i = 0; i < keys.length; i++){
156 | 						var name = key_names[i];
157 | 						var key = keys[i];
158 | 						key_set[name] = key;
159 | 					}
160 | 
161 | 					var frame = new Frame(column_set, key_set);
162 | 
163 | 					var value = column_set["id_0"][0];
164 | 					//console.log(subset);
165 | 					frame = frame.where("id_0", value);
166 | 					var actual = frame.mean("value_0");
167 | 
168 | 					dtest.assert.close(t, actual, expected, "close", RTOL, ATOL);
169 | 				});
170 | 
171 | 			});
172 | 		});
173 | 	};
174 | }
175 | 


--------------------------------------------------------------------------------