├── .gitignore ├── package.json ├── LICENSE ├── richflow.min.js ├── README.md └── richflow.js /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | /index.html 3 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "richflow", 3 | "description": "A framework for javascript data pipeline processing and data sharing.", 4 | "version": "0.0.6", 5 | "license": "MIT", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/ominibyte/richflow" 9 | }, 10 | "dependencies": { 11 | "n-readlines": "~0.2.7" 12 | }, 13 | "engines": { 14 | "node": ">= 7.0.0" 15 | }, 16 | "os": [ 17 | "darwin", 18 | "linux" 19 | ], 20 | "preferGlobal": true, 21 | "files": [ 22 | "*.js" 23 | ], 24 | "main": "richflow.js", 25 | "scripts": { 26 | "test": "console.log(Flow.from([1,2,3,4,5]).count());" 27 | }, 28 | "keywords": [ 29 | "data stream processing", 30 | "flow", 31 | "pipeline computing", 32 | "data processing", 33 | "javascript map reduce data processing framework" 34 | ], 35 | "author": "Richboy David", 36 | "bugs": { 37 | "url": "https://github.com/ominibyte/richflow/issues" 38 | }, 39 | "homepage": "https://github.com/ominibyte/richflow#readme" 40 | } 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 David Richboy Echomgbe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /richflow.min.js: -------------------------------------------------------------------------------- 1 | "use strict";(function(t,n){if("object"==typeof exports&&exports)n(exports);else{var s={};n(s),"function"==typeof define&&define.amd?define(s):t.RichFlow=s}})(this,function(t){function n(L,I){I.prev=L,L.next=I,I.rootFlow=L.rootFlow}function s(L,I){var k=function(O){setTimeout(()=>I._prePush(O,L),0)};L.subscribe(k),I.subscribers[L.key]=k}function o(L,I){I.subscribers[L.key]&&(L.unsubscribe(I.subscribers[L.key]),delete I.subscribers[L.key])}function u(L){var I;if(A.isNumber(L))I=function(k){var P=0;return{isDataEnd:function(){try{return!(P=L.length&&(L=L.slice(I),I=0),k}},this.peek=function(){return 0L,this.rootFlow=null,this.terminalFunc=()=>{},this.isDiscretized=!1,this.elements=[],this.ended=!1,this.elemPos=0}process(){if(this.ended)return this._getElement();this.prev.next!=this&&(this.prev.next=this);var L=this.prev.process();return null==L&&this._addElement(null),L}startPush(){this.prev.startPush()}stopPush(){this.prev.stopPush()}reset(){this.ended=!1,this.elements=[]}pipe(L){var I=this.pipeFunc(L);return this._addElement(I),null===this.next?I:this.next.pipe(I)}push(L){var I=this.pipeFunc(L);null===this.next?this.terminalFunc(I):this.next.push(I)}setTerminalFunction(L){A.isFunction(L)&&(this.terminalFunc=L)}static from(L){return j.getFlow(L)}static of(){return 0==arguments.length?j.getFlow([]):1k+L))}static fromFile(L){return new f(j.createIteratorFromFileSystem(L))}static toArray(){return"toArray"}static toSet(){return"toSet"}static ASC(){return"ASC"}static DESC(){return"DESC"}static NUM_ASC(){return"NUM_ASC"}static NUM_DESC(){return"NUM_DESC"}limit(L){if(0>=L)throw new Error("Limit value must be greater than 0");var I=new x(0,L);return n(this,I),I}skip(L){if(0>=L)throw new Error("Skip value must be greater than 0");var I=new x(L,Number.MAX_VALUE);return n(this,I),I}skipUntil(L){if(!A.isFunction(L))throw new Error("skipUntil requires a function");var I=new y(L,1);return n(this,I),I}skipWhile(L){if(!A.isFunction(L))throw new Error("skipWhile requires a function");var I=new y(L,2);return n(this,I),I}takeUntil(L){if(!A.isFunction(L))throw new Error("takeUntil requires a function");var I=new y(L,3);return n(this,I),I}takeWhile(L){if(!A.isFunction(L))throw new Error("takeWhile requires a function");var I=new y(L,4);return n(this,I),I}range(L,I){if(0>L)throw new Error("Start Index cannot be negative");if(0>=I)throw new Error("End Index must be greater than 0");if(L>I)throw new Error("End Index cannot be less than Start Index");var k=new x(L,I);return n(this,k),k}select(L){var I=new m;return I.pipeFunc=A.isFunction(L)?L:function(k){return k[L]},n(this,I),I}map(L){return this.select(L)}selectExpand(L){var I=new h(L);return n(this,I),I}selectFlatten(){return this.selectExpand(L=>L)}where(L){var I=new g(L);return n(this,I),I}filter(L){return this.where(L)}static _sort(L){return function(I,k){return"num_asc"==L?I-k:"num_desc"==L?k-I:Ik?"asc"==L?1:-1:0}}orderBy(L){L||(L=m._sort("asc")),A.isFunction(L)?L==m.ASC?L=m._sort("asc"):L==m.DESC?L=m._sort("desc"):L==m.NUM_ASC?L=m._sort("num_asc"):L==m.NUM_DESC&&(L=m._sort("num_desc")):A.isString(L)&&(L=m._sort(L.toLowerCase()));var I=new F(L);return n(this,I),I}partitionBy(L){var I=L;A.isFunction(L)||(I=function(O){return O[L]});var k=new E(I);return n(this,k),k}discretize(L,I,k){void 0===k&&(k=!0);var O=new S(L,u(I),k);return n(this,O),this.isDiscretized=!0,O}count(){var I,k,L=0;for(k=this.next,this.next=null;null!=(I=this.process());)L++;return this.next=k,L}static toMap(L){var I=L;return A.isFunction(L)||(I=function(k){return k[L]}),I}_toSet(){var I,k,L=new Set;for(k=this.next,this.next=null;null!=(I=this.process());)L.add(I);return this.next=k,L}_toArray(){var I,k,L=[];for(k=this.next,this.next=null;null!=(I=this.process());)L.push(I);return this.next=k,L}groupBy(L){var I=L;A.isFunction(L)||(I=function(C){return C[L]});var k,O,M,P={};for(M=this.next,this.next=null;null!=(k=this.process());)O=I(k),P[O]||(P[O]=[]),P[O].push(k);return this.next=M,P}join(L){L||(L=",");var O,P,I="",k=0;for(P=this.next,this.next=null;null!=(O=this.process());)0k.set(O.key,O.value)),k}return this._toArray()}foreach(L){var I,k;for(k=this.next,this.next=null;null!=(I=this.process());)L(I);this.next=k}forEach(L){this.foreach(L)}anyMatch(L){var I,k;for(k=this.next,this.next=null;null!=(I=this.process());)if(L(I))return!0;return this.next=k,!1}allMatch(L){var I,k;for(k=this.next,this.next=null;null!=(I=this.process());)if(!L(I))return!1;return this.next=k,!0}noneMatch(L){return!this.anyMatch(L)}findFirst(){var L,I;return I=this.next,this.next=null,L=this.process(),this.next=I,L}findAny(){return this.findFirst()}findLast(){var L,k,I=null;for(k=this.next,this.next=null;null!=(L=this.process());)I=L;return this.next=k,I}sum(){var L,k,I=0;for(k=this.next,this.next=null;null!=(L=this.process());)I+=L;return this.next=k,I}max(){var L,k,I=null;for(k=this.next,this.next=null;null!=(L=this.process());)(null==I||L>I)&&(I=L);return this.next=k,I}min(){var L,k,I=null;for(k=this.next,this.next=null;null!=(L=this.process());)(null==I||L!1},this.recall={},this.isParallel=!1,this.pFlow=null,this.shouldCache=!0}isStream(){return 0=this.iterators.length)return this.pos=0,null;if(!this.isDiscretized){for(var L=this.iterators[this.pos].next();L.done&&this.pos=this.iterators.length));)L=this.iterators[this.pos].next();return L.done?(this.pos=0,null):null===this.next?L.value:this.next.pipe(L.value)}var I=[];if(this.discreteStreamLength=Math.min(this.discreteStreamLength,this.iterators.length),1==this.discreteStreamLength){for(L=this.iterators[this.pos].next();L.done&&this.pos=this.iterators.length));)L=this.iterators[this.pos].next();if(L.done)return this.pos=0,null;for(;!L.done;){if(I.push(L.value),this.isDataEndObject.isDataEnd(L.value,I.length))return null===this.next?I:this.next.pipe(I);L=this.iterators[this.pos].next()}if(0O))break;var k=[];for(let O=0;Onull==O))break;if(this.streamElements.push(k),this.isDataEndObject.isDataEnd(k,this.streamElements.length)){this.recall.justEnded=!0;try{return null===this.next?this.streamElements.slice():this.next.pipe(this.streamElements.slice())}finally{this.streamElements=[]}}else this.recall.justEnded=!1}while(!0);return this.pos=0,delete this.recall.ended,delete this.recall.justEnded,this.streamElements=[],null}}merge(L){var I=this.isStream(),k=j.getIterator(L);if(!I&&k.streamer||I&&!k.streamer)throw new Error("Streamer cannot be merged with other data types");return this.iterators.push(k),this}startPush(){this.isListening||(this.isListening=!0,this.isStream()?this._listen():this._doPush())}stopPush(){if(this.isListening&&(this.isListening=!1,this.isStream()))for(let L of this.iterators)o(L.streamer,this)}_doPush(){var L;if(!this.isDiscretized)for(;this.isListening&&!(this.pos>=this.iterators.length);){for(L=this.iterators[this.pos].next();L.done&&this.pos=this.iterators.length));)L=this.iterators[this.pos].next();if(L.done)break;this.push(L.value)}else if(this.discreteStreamLength=Math.min(this.discreteStreamLength,this.iterators.length),1==this.discreteStreamLength)do{for(L=this.iterators[this.pos].next();!L.done;)this.streamElements.push(L.value),this.isDataEndObject.isDataEnd(L.value,this.streamElements.length)&&(this.push(this.streamElements.slice()),this.streamElements=[]),L=this.iterators[this.pos].next();if(0null==P))break;if(this.streamElements.push(O),!this.isDataEndObject.isDataEnd(O,this.streamElements.length))k=!1;else if(k=!0,this.push(this.streamElements.slice()),this.streamElements=[],m.from(I).allMatch(P=>P))break}while(!0)}this.isListening=!1,this.pos=0}_listen(){if(this.isDiscretized){var L=[];for(let I of this.iterators)I.streamer&&(L.push(I.streamer),s(I.streamer,this));this.recall.streamKeys=m.from(L).select(I=>I.key).collect(m.toArray),this.recall.queues=m.from(L).select(()=>new p).collect(m.toArray),this.discreteStreamLength=Math.min(L.length,this.discreteStreamLength),this.recall.ready=!0,this.recall.called=!1,this.streamElements=[]}else for(let I of this.iterators)I.streamer&&s(I.streamer,this)}_prePush(L,I){if(!this.isDiscretized)this.push(L);else{this.recall.queues[this.recall.streamKeys.indexOf(I.key)].enqueue(L);var k=this;if(!this.recall.ready)return;this.recall.ready=!1,setTimeout(function(){Outer:do{var O=[];for(let P=0;P=this.end)return;null===this.next?this.terminalFunc(L):this.next.push(L)}}}class g extends m{constructor(L){super(),this.pipeFunc=L}pipe(L){var I=this.pipeFunc(L);return I?(this._addElement(L),null===this.next?L:this.next.pipe(L)):this.prev.process()}push(L){var I=this.pipeFunc(L);I&&(null===this.next?this.terminalFunc(L):this.next.push(L))}}class F extends m{constructor(L){super(),this.pipeFunc=L,this.items=[],this.obtainedAll=!1,this.pos=0}process(){if(this.ended)return this._getElement();this.prev.next!=this&&(this.prev.next=this);var L;if(!this.obtainedAll){if(L=this.prev.process(),null!=L)return L;this.obtainedAll=!0,this.items.sort(this.pipeFunc)}if(this.posI.length)for(;this.span!=I.length;)I.push(null);if(this.items.push(I),!this.isDataEndObject.isDataEnd(I,this.items.length))return this.prev.process()}else if(this.items.push(I),!this.isDataEndObject.isDataEnd(I,this.items.length))return this.prev.process()}var M=this.spawnFlows?new v(this.items):this.items;try{return null===this.next?M:this.next.pipe(M)}finally{this.items=[]}}push(L){if(this.prev instanceof f&&!this.isDiscretized)this.items=L;else{var I=L;if(1!=this.span){if(!A.isArray(L)){var k=j.getFlow(L);I=[];var P,O=!1;for(let C=0;CI.length)for(;this.span!=I.length;)I.push(null);if(this.items.push(I),!this.isDataEndObject.isDataEnd(I,this.items.length))return}else if(this.items.push(L),!this.isDataEndObject.isDataEnd(L,this.items.length))return}var M;M=this.spawnFlows?new v(this.items):this.items,null===this.next?this.terminalFunc(M):this.next.push(M),this.items=[]}}class v extends f{constructor(L){super(j.createIteratorFromArray(L)),this.elements=L}elementSize(){return this.elements.length}streamSize(){try{return this.elements[0].length}catch(L){return 1}}}class _{constructor(L){this.listeners=[],this.receiver=L&&A.isFunction(L)?L:null,this.key=A.generateUUID()}push(L,I){null!=this.receiver&&this.receiver(L,I)}subscribe(L){if(L.notify&&A.isFunction(L.notify)||A.isFunction(L))this.listeners.push(L);else throw new Error("Listener object must either be a function or an object with a `notify` function.")}send(L){m.from(this.listeners).where(I=>I.notify&&A.isFunction(I.notify)).foreach(I=>I.notify(L)),m.from(this.listeners).where(I=>!(I.notify&&A.isFunction(I.notify))&&A.isFunction(I)).foreach(I=>I(L))}unsubscribe(L){let I=this.listeners.indexOf(L);0<=I&&this.listeners.splice(I,1)}size(){return 0}get(){return{}}}class j{static getFlow(L){return new f(j.getIterator(L))}static getIterator(L){let I;return I=A.isArray(L)&&!A.isString(L)?j.createIteratorFromArray(L):A.isMap(L)?j.createIteratorFromMap(L):A.isSet(L)?j.createIteratorFromSet(L):A.isString(L)&&L.toLowerCase().startsWith("fs://")?j.createIteratorFromFileSystem(L):A.isStreamer(L)?j.createIteratorFromStreamer(L):A.isGenerator(L)?j.createIteratorFromGenerator(L):A.isIterable(L)&&!A.isString(L)?j.createIteratorFromIterable(L):A.isObject(L)&&!A.isString(L)?j.createIteratorFromObject(L):j.createIteratorFromValue(L),I}static createIteratorFromArray(L){return function(I){let k=I,O=0;return{next:function(){try{return Ok.length&&(O=0)}}}}(L)}static createIteratorWithEmptyArraysFromNumber(L){L=Math.ceil(L);for(var I=[];L--;)I.push([]);return j.createIteratorFromArray(I)}static createIteratorFromMap(L){return function(I){let k=I,O=k.entries();return{next:function(){let P=O.next();return P.done?(O=k.entries(),P):{value:{key:P.value[0],value:P.value[1]},done:!1}}}}(L)}static createIteratorFromSet(L){return j.createIteratorFromArray(Array.from(L))}static createIteratorFromIterable(L){return L}static createIteratorFromGenerator(L){return function(I){let k=I,O=k(),P;return{next:function(){try{return P=O.next(),P}finally{P.done&&(O=k())}}}}(L)}static createIteratorFromObject(L){return function(I){let k=I,O=Object.keys(k),P=0,M=O.length;return{next:function(){try{return PM&&(P=0,O=Object.keys(k),M=O.length)}}}}(L)}static createIteratorFromFileSystem(L){return function(){"fs://"==L.trim().substring(0,"fs://".length)&&(L=L.trim().substring("fs://".length));let I=require("n-readlines"),k=new I(L);return{next:function(){let O=k.next();return O?{value:O.toString("utf8"),done:!1}:(k=new I(L),{done:!0})}}}()}static createIteratorFromValue(L){return function(){let I=!1;return{next:function(){try{return I?{done:!0}:{value:L,done:!1}}finally{I=!I}}}}()}static createIteratorFromStreamer(L){return function(){let I=L.size(),k={},O=0,P;return{next:function(){try{return O>=I?{done:!0}:(P=L.get(O),{value:null==P?k:P,done:!1})}finally{O++,O>I&&(O=0,I=L.size())}},streamer:L}}()}}var A={isFunction:function(L){return"function"==typeof L||!1},isArray:function(L){return"number"==typeof L.length&&0<=L.length},isSet:function(L){return L instanceof Set&&A.isFunction(L.values)},isMap:function(L){return L instanceof Map&&A.isFunction(L.values)},isObject:function(L){return!!L&&"object"==typeof L},isString:function(L){return"[object String]"===Object.prototype.toString.call(L)},isIn:function(L,I){for(let k=0;kO}start(){this.rootFlow.startPush()}stop(){this.rootFlow.stopPush()}push(L){this.streamer.push(L,this.key)}},t.Streamer=_,Object.defineProperty(t,"__esModule",{value:!0})}); -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | RichFlow: Data processing for JavaScript 2 | ======================================== 3 | A framework for javascript data pipeline processing, data sharing and stream processing. Actionable & Transformable Pipeline data processing. 4 | 5 | RichFlow is an extract of Flow, a node.js library built for data processing in the [JAMScript Framework](https://github.com/anrl/JAMScript-beta) 6 | 7 | Installation 8 | ------------ 9 | 10 | `npm install richflow` 11 | 12 | For use in the browser, download richflow.js from [github.com/ominibyte/richflow](https://github.com/ominibyte/richflow) 13 | 14 | Online in-browser testing of RichFlow is available at [richflow.richboy.me](http://richflow.richboy.me) 15 | 16 | Usage 17 | ----- 18 | The RichFlow library comes with several classes which can be used for different purposes. RichFlow is mostly based on JavaScript ES6. 19 | For the basic Flow, 'require' it as follows: 20 | 21 | ```javascript 22 | //in node.js 23 | var {Flow} = require('richflow'); 24 | 25 | //in your browser 26 | 27 | 30 | ``` 31 | 32 | 33 | Flow can be used to operate on several data types including Arrays, Sets, Maps, FileSystem, Objects, Generators. 34 | In addition, RichFlow comes with a Streamer object that allows data stream processing and allows Data sharing opportunities. 35 | 36 | ```javascript 37 | var array = [1, 2, 3, 4, 5]; 38 | 39 | //For a very simple example. Let us count the number of even numbers in the array 40 | var count = Flow.from(array).where(elem => elem % 2 == 0).count(); 41 | 42 | //create a data window and return a new array 43 | var range = Flow.from(array).skip(1).limit(3).collect(); 44 | //The above line is equivalent to 45 | var range = Flow.from(array).range(1, 4).collect(); 46 | 47 | //a few more possibilities 48 | var anotherArray = [6, 7, 8, 9]; 49 | var average = Flow.from(array).merge(anotherArray).select(elem => elem * 5).average(); 50 | 51 | //check if all students passed 52 | var studentScores = [71, 90, 55, 50, 88, 67]; 53 | var allPassed = Flow.from(studentScores).allMatch(score => score >= 50); 54 | 55 | //an example of selectExpand: prints ["my","name","is","richboy"] 56 | console.log(Flow.from("my name is richboy").selectExpand(input => input.split(" ")).collect()); 57 | 58 | //an example of selectFlatten: prints [1,2,3,4,5,6,7,8,9] 59 | console.log(Flow.from([[1,2,3],[4,5,6],[7,8,9]]).selectFlatten().collect()); 60 | ``` 61 | 62 | Understanding RichFlow 63 | ---------------------- 64 | A Flow is a data abstraction encapsulated within a JS ES6 class object that allows several operations on several data structures. Large collections of data can be processed efficiently. Flow allows programmers operate on data in somewhat similar way to SQL operations and it uses relatively similar query words. 65 | Flow operations can either be methods/transformations (operations that yield other Flows) or actions (operations that yield a result). 66 | 67 | 68 | Flow Creation 69 | ------------- 70 | A Flow can be created from several Javascript data structures including: Array, Set, Map, Object, FileSystem, Generator, and Streamer (an in-built bare-bones class for supporting data streaming). The last two could potentially produce an infinite stream of data. 71 | 72 | Here is an example of how a Flow can be created from a simple array: 73 | 74 | ```javascript 75 | var array = [1, 0, 5, 13, -1]; 76 | var flow = Flow.from(array); 77 | ``` 78 | 79 | The above example creates an Iterator from the array from which data is pipelined. 80 | Flow can also be created from a number range using: 81 | 82 | ```javascript 83 | var flow = Flow.fromRange(3, 8); //creates a Flow with [3,4,5,6,7,8] 84 | ``` 85 | 86 | Flow can also be created from several arguments using: 87 | 88 | ```javascript 89 | var flow = Flow.of(1, 3, 4, 7); //creates a Flow with [1,3,4,7] 90 | ``` 91 | 92 | The Flow.of(…) also allows creating Flow with empty array elements which could be operated on later. Flow.of(…) default to Flow.from(…) when the argument to the method is not a number and is a single argument. An example is shown below: 93 | 94 | ```javascript 95 | var flow = Flow.of(3); //creates a Flow with [[],[],[]] 96 | ``` 97 | 98 | Let us show a very simple use case for Flow.of(…) that is actually used within the Flow implementation: 99 | 100 | ```javascript 101 | //A lazy way to create 5 queues. 102 | var flow = Flow.of(5).map(array => new Queue()); 103 | ``` 104 | 105 | 106 | Flow Methods 107 | ------------ 108 | Flow methods are data transformations that yield other Flows. Each Flow maintains a link to the Flow operation before it. 109 | Flow methods are lazily computed, nothing happens to the underlying data until an action is called. 110 | When an action is called on a Flow, data is continually streamed/piped down to the next Flow level for further processing as they are produced. 111 | This can reduce the execution time because some operations can be handled together. The currently supported methods are listed below: 112 | 113 | For most of the examples, we will be using the following extracted sample dataset of nobel prize winners for physics in 2016. The complete dataset is available at: [http://api.nobelprize.org/v1/prize.json](http://api.nobelprize.org/v1/prize.json) 114 | 115 | ```javascript 116 | var winners = [ 117 | { 118 | "id": "928", 119 | "firstname": "David J.", 120 | "surname": "Thouless", 121 | "motivation": "\"for theoretical discoveries of topological phase transitions and topological phases of matter\"", 122 | "share": "2" 123 | }, 124 | { 125 | "id": "929", 126 | "firstname": "F. Duncan M.", 127 | "surname": "Haldane", 128 | "motivation": "\"for theoretical discoveries of topological phase transitions and topological phases of matter\"", 129 | "share": "4" 130 | }, 131 | { 132 | "id": "930", 133 | "firstname": "J. Michael", 134 | "surname": "Kosterlitz", 135 | "motivation": "\"for theoretical discoveries of topological phase transitions and topological phases of matter\"", 136 | "share": "4" 137 | } 138 | ]; 139 | ``` 140 | 141 | #### select(function | String) \[alias: map\] 142 | This is similar to map in mad-reduce operations. This selects one or more parts of a data from a given dataset. As an example 143 | 144 | ```javascript 145 | //we wish to get the surnames of all the winners 146 | var selectFlow = Flow.from(winners).select(winner => winner.surname); //returns a Flow object 147 | 148 | //For objects as with the working example, we can also do: 149 | var selectFlow = Flow.from(winners).select("surname"); //returns a Flow object 150 | ``` 151 | 152 | #### limit(Number) 153 | To limit the number of results obtained after the previous operation. 154 | 155 | ```javascript 156 | //let us say we want to restrict the result to the first two winners 157 | var limitFlow = Flow.from(winners).limit(2); //returns a Flow object 158 | 159 | //get the ids of the first two winners 160 | var ids = Flow.from(winners).limit(2).select("id").collect(); //returns an array 161 | ``` 162 | 163 | #### skip(Number) 164 | To ignore the first given number of results found after a previous operation. 165 | 166 | ```javascript 167 | //skip the first result 168 | var skipFlow = Flow.from(winners).skip(1); //returns a Flow object 169 | ``` 170 | 171 | #### range(startIndex: Number, endIndex: Number) 172 | This method combines the implementations of limit and skip. It creates a bound for the data to be used for further processing. 173 | startIndex is inclusive while endIndex is exclusive. 174 | 175 | ```javascript 176 | //so if we want to get only the second person: 177 | var rangeFlow = Flow.from(winners).range(1,2); //returns a Flow object 178 | ``` 179 | 180 | #### skipUntil(function) 181 | Skip until the condition in the function argument returns true. The function will receive each piped input and should return a boolean 182 | 183 | ```javascript 184 | var remaining = Flow.fromRange(1,10).skipUntil(num => num > 6).collect(); 185 | //returns [7, 8, 9, 10] 186 | ``` 187 | 188 | #### skipWhile(function) 189 | Skip while the condition in the function argument returns true. The function will receive each piped input and should return a boolean 190 | 191 | ```javascript 192 | var remaining = Flow.fromRange(1,10).skipWhile(num => num != 6).collect(); 193 | //returns [6, 7, 8, 9, 10] 194 | ``` 195 | 196 | #### takeUntil(function) 197 | Keep accepting the piped data until the condition in the function argument returns true. This method also takes the data that meets the condition but skips after. The function will receive each piped input and should return a boolean 198 | 199 | ```javascript 200 | var taken = Flow.fromRange(1,10).takeUntil(num => num == 4).collect(); 201 | //returns [1, 2, 3, 4] 202 | ``` 203 | 204 | #### takeWhile(function) 205 | Keep accepting the piped data while the condition in the function argument returns true. The function will receive each piped input and should return a boolean 206 | 207 | ```javascript 208 | var taken = Flow.fromRange(1,10).takeWhile(num => num * 8 <= 50).collect(); 209 | //returns [1, 2, 3, 4, 5, 6] 210 | ``` 211 | 212 | 213 | #### selectExpand(function) 214 | This maps one input to many outputs as generated by the function. The collection generated by function must be supported by Flow.from(…). 215 | 216 | ```javascript 217 | var sentence = "my name is richboy"; 218 | var parts = Flow.from(sentence).selectExpand(input => input.split(" ")).collect(); 219 | //returns ["my","name","is","richboy"] 220 | 221 | //Another Example: rewrite the sentence with only words that are above 2 chars long 222 | sentence = Flow.from(sentence).selectExpand(input => input.split(" ")).where(word => word.length > 2).join(" "); 223 | //returns "name richboy" 224 | ``` 225 | 226 | #### selectFlatten() 227 | This is similar to selectExpand, except that this doesn't take a function. Select flatten assumes that the input from the pipe is a collection that is supported by Flow.from(…). 228 | 229 | ```javascript 230 | var flattened = Flow.from([[1,2,3],[4,5,6],[7,8,9]]).selectFlatten().collect(); 231 | //returns [1,2,3,4,5,6,7,8,9] 232 | ``` 233 | 234 | #### where(function) \[alias: filter\] 235 | This method performs a filtering operation on the data to match a constraint. 236 | 237 | ```javascript 238 | //get all the even numbers from the array 239 | var whereFlow = Flow.from([1,2,3,4,5,6,7,8,9]).where(num => num % 2 == 0); //returns a Flow object 240 | ``` 241 | 242 | #### orderBy(function | Flow.ASC | Flow.DESC | Flow.NUM_ASC | Flow.NUM_DESC) 243 | This performs a sorting operation on the data based on a given function. Flow has internal operations to sort based on descending and ascending order. 244 | You can provide your own sorting implementation which will normally be submitted to Array.prototype.sort() function. 245 | *Flow.ASC* and *Flow.DESC* will sort according to each character's Unicode code point value, according to the string conversion of each element with the only difference being that *Flow.ASC* will sort in ascending order and *Flow.DESC* in descending order. 246 | *Flow.NUM_ASC* and *Flow.NUM_DESC* with sort the elements as numbers. 247 | 248 | ```javascript 249 | //sort the winners based on their surname 250 | var orderedFlow = Flow.from(winners).select("surname").orderBy(Flow.ASC); //returns a Flow object 251 | ``` 252 | 253 | #### partitionBy(function | String) 254 | This performs data grouping on the elements of the data, determined by the function. This is similar to the Flow action - groupBy, but this returns a Flow for further pipelining. 255 | The argument can either be a function (that receives an item each time to generate the group/partition that items belongs to) or a key (from which the group/partition will be determined using JS object syntax like `input[key]`). 256 | After partitioning, data is emitted one partition at a time in the format: 257 | `{key: "partition key", value:[...array of elements in that partition]}` 258 | 259 | As an example: 260 | 261 | ```javascript 262 | var array = [ 263 | {entity: "book", bookID: 12}, 264 | {entity: "student", studentID: 23434}, 265 | {entity: "student", studentID: 12233}, 266 | {entity: "book", bookID: 998} 267 | ]; 268 | 269 | //we want to partition by entity so all entries with same entity value would be grouped together 270 | let partitions = Flow.from(array).partitionBy("entity").collect(); 271 | //partitions will contain: 272 | /* 273 | [ 274 | {key: "book", value: [{entity: "book", bookID: 12}, {entity: "book", bookID: 998}]}, 275 | {key: "student", value: [{entity: "student", studentID: 23434}, {entity: "student", studentID: 12233}]} 276 | ] 277 | */ 278 | ``` 279 | 280 | #### merge(data) 281 | This method is only available to an object of IteratorFlow and is used to merge a supported data structure as with Flow.from(data). Merging creates an Iterator and adds it to the current Iterator or Iterators. 282 | This function also returns an IteratorFlow so one can do multiple merging on the return value. 283 | 284 | ```javascript 285 | //let us merge the data for those who won the nobel prize for chemistry in 2016 286 | var chemistryWinners = [ 287 | { 288 | "id": "931", 289 | "firstname": "Jean-Pierre", 290 | "surname": "Sauvage", 291 | "motivation": "\"for the design and synthesis of molecular machines\"", 292 | "share": "3" 293 | }, 294 | { 295 | "id": "932", 296 | "firstname": "Sir J. Fraser", 297 | "surname": "Stoddart", 298 | "motivation": "\"for the design and synthesis of molecular machines\"", 299 | "share": "3" 300 | }, 301 | { 302 | "id": "933", 303 | "firstname": "Bernard L.", 304 | "surname": "Feringa", 305 | "motivation": "\"for the design and synthesis of molecular machines\"", 306 | "share": "3" 307 | } 308 | ]; 309 | 310 | var iteratorFlow = Flow.from(winners); //IteratorFlow is the first flow in the chain 311 | //merge both datasets and return the full names of all the winners 312 | var allWinners = iteratorFlow.merge(chemistryWinners).select(winner => winner.firstname + " " + winner.surname).collect(); 313 | //returns ["David J. Thouless", "F. Duncan M. Haldane", "J. Michael Kosterlitz", "Jean-Pierre Sauvage", "Sir J. Fraser Stoddart", "Bernard L. Feringa"] 314 | ``` 315 | 316 | #### discretize(span, spanLength\[, spawnFlows\]) 317 | This method is best understood in the context of data streams. It allows processing data in windows. 318 | *span* is the number of data streams to focus on in a window. 319 | *spanLength* can either be a Number or a function that tells when we get to the end of a window. 320 | *spawnFlows* is an optional boolean value that states if the output should be objects of DiscretizedFlow or simple arrays. spawnFlows defaults to true. 321 | 322 | This method is available to all Flow objects but the implementation differs as with the IteratorFlow. See the advanced section for Usage. 323 | 324 | 325 | Flow Actions 326 | ------------ 327 | Flow actions are operations that yield results that are not themselves Flows. When an action is called on a Flow, the Flow engine begins operating on the data and pipes each produces data to the next layer until the condition for the action is met. The currently supported actions are listed below: 328 | 329 | #### count() 330 | Returns the total number of datasets left after the last Flow method. 331 | 332 | ```javascript 333 | //get the count of all the even numbers from the array 334 | var count = Flow.from([1,2,3,4,5,6,7,8,9]).where(num => num % 2 == 0).count(); 335 | //returns 4 336 | ``` 337 | 338 | #### findFirst() 339 | Returns the first data available in a Flow. 340 | 341 | ```javascript 342 | //get the first even number 343 | var first = Flow.from([1,2,3,4,5,6,7,8,9]).where(num => num % 2 == 0).findFirst(); 344 | //returns 2 345 | ``` 346 | 347 | #### findLast() 348 | Returns the last data available in a Flow. 349 | 350 | ```javascript 351 | //get the last even number 352 | var last = Flow.from([1,2,3,4,5,6,7,8,9]).where(num => num % 2 == 0).findLast(); 353 | //returns 8 354 | ``` 355 | 356 | #### findAny() 357 | This returns any data from the Flow. This currently does the same as findFirst(). This methods is expected to work best in a parallel computing sense with ParallelFlow. 358 | 359 | ```javascript 360 | //get the count of all the even numbers from the array 361 | var any = Flow.from([1,2,3,4,5,6,7,8,9]).where(num => num % 2 == 0).findAny(); 362 | //returns 2 363 | ``` 364 | 365 | #### groupBy(function | String) 366 | This returns the data as a JS object partitioned into array of groups, determined by the function. 367 | The argument can either be a function (that receives an item each time to generate the group that items belongs to) or a key (from which the group will be determined using JS object syntax like `input[key]`). 368 | 369 | ```javascript 370 | var array = [ 371 | {entity: "book", bookID: 12}, 372 | {entity: "student", studentID: 23434}, 373 | {entity: "student", studentID: 12233}, 374 | {entity: "book", bookID: 998} 375 | ]; 376 | 377 | //we want to group by entity so all entries with same entity value would be grouped together 378 | let groups = Flow.from(array).groupBy("entity"); 379 | //groups will contain: 380 | /* 381 | { 382 | "book": [{entity: "book", bookID: 12}, {entity: "book", bookID: 998}], 383 | "student": [{entity: "student", studentID: 23434}, {entity: "student", studentID: 12233}] 384 | } 385 | */ 386 | ``` 387 | 388 | #### collect(\[function\]) 389 | This returns the data as either an Array, Set or Map. The function argument is optional and default to returning an array. 390 | The function argument is a Flow internal function which can either be `Flow.toSet()`, `Flow.toArray()` or `Flow.toMap(keyFunc)`. It is also possible to ignore the parenthesis for the array and set as `Flow.toArray` and `Flow.toSet` respectively. 391 | `collect`ing with `Flow.toSet` returns a distinct dataset, `collect`ing with `Flow.toArray` returns all the data left after the last Flow method as an array while `collect`ing with `Flow.toMap(keyFunc)` returns a JS ES6 Map. The keyFunc in `Flow.toMap()` is same as the function supplied to groupBy. The only difference between calling collect with toMap(keyFunc) and calling groupBy(keyFunc) is that toMap returns an ES6 Map object while groupBy returns a plain JS object. 392 | 393 | ```javascript 394 | var array = [ 395 | {entity: "book", bookID: 12}, 396 | {entity: "student", studentID: 23434}, 397 | {entity: "student", studentID: 12233}, 398 | {entity: "book", bookID: 998} 399 | ]; 400 | 401 | //collecting to Array. Note that this exactly same without Flow.toArray 402 | var entities = Flow.from(array).select("entity").collect(Flow.toArray); 403 | //returns ["book", "student", "student", "book"] 404 | 405 | //collecting to Set 406 | var entitySet = Flow.from(array).select("entity").collect(Flow.toSet); 407 | //returns Set(2) {"book", "student"} 408 | 409 | //collecting all to Map 410 | var map = Flow.from(array).collect(Flow.toMap("entity")); 411 | /* 412 | returns: 413 | Map(2) { 414 | "book" => (2) [{entity: "book", bookID: 12}, {entity: "book", bookID: 998}], 415 | "student" => (2) [{entity: "student", studentID: 23434}, {entity: "student", studentID: 12233}] 416 | } 417 | */ 418 | ``` 419 | 420 | #### join([delimiter: String]) 421 | This function joins the outputs by a delimiter which is optional. The delimiter argument defaults to ",". 422 | 423 | ```javascript 424 | var joined = Flow.from([1,2,3,4,5]).map(num => num * 5).limit(3).join(" | "); 425 | //returns 5 | 10 | 15 426 | ``` 427 | 428 | #### forEach(function) \[alias: foreach\] 429 | This sends the remaining data from the last Flow in the chain to the custom function provided. The user may wish to operate on each data outside the context of Flow. 430 | 431 | ```javascript 432 | //print all even numbers to the console 433 | Flow.from([1,2,3,4,5,6,7,8,9]).where(num => num % 2 == 0).foreach(console.log); 434 | ``` 435 | 436 | #### anyMatch(function) 437 | This returns a boolean to check if the remaining data matches the definition in the user defined function. 438 | 439 | ```javascript 440 | //check if there is any number in the array that if we multiply with 5 yields 35 441 | var match = Flow.from([1,2,3,4,5,6,7,8,9]).anyMatch(num => num * 5 == 35); 442 | //returns true 443 | ``` 444 | 445 | #### allMatch(function) 446 | Similar to anyMatch, this checks that all the remaining data matches the condition defined in the function. 447 | 448 | ```javascript 449 | //check if multiplying 5 with all numbers in the array yields 35 450 | var match = Flow.from([1,2,3,4,5,6,7,8,9]).allMatch(num => num * 5 == 35); 451 | //returns false 452 | ``` 453 | 454 | #### noneMatch(function) 455 | This may look like the inverse of allMatch but it is more closely related to anyMatch. This basically checks that no item matches the condition defined in the function argument. 456 | 457 | ```javascript 458 | //check that multiplying 5 with any numbers in the array DOES NOT yield 35 459 | var match = Flow.from([1,2,3,4,5,6,7,8,9]).noneMatch(num => num * 5 == 35); 460 | //returns false 461 | ``` 462 | 463 | #### reduce(initial, function) 464 | This allows a Flow to be reduced to a single value. It takes the initial value for the reduce operation and the function that defines how the reduce would be carried out. 465 | The function parameter takes two arguments (in the order: currentValue and newValue) and is expected to return a value which is further fed in as the currentValue for the next iteration. The function is called until all values are piped out of the Flow chain. 466 | 467 | ```javascript 468 | //let us implement getting the sum of numbers 469 | var sum = Flow.from([1,2,3,4,5]).reduce(0, (cv, nv) => cv + nv); 470 | //returns 15 471 | ``` 472 | 473 | #### sum() 474 | This is a reduce operation that returns the sum. It is expected that the values a the last Flow item in the chain return Number types. 475 | 476 | ```javascript 477 | var sum = Flow.from([1,2,3,4,5]).sum(); 478 | //returns 15 479 | ``` 480 | 481 | #### average() 482 | This is also a reduce operation that returns the average. It is expected that the values a the last Flow item in the chain return Number types. 483 | 484 | ```javascript 485 | var avg = Flow.from([1,2,3,4,5]).average(); 486 | //returns 3 487 | ``` 488 | 489 | #### max() 490 | This returns the maximum number. It is expected that the values a the last Flow item in the chain return Number types. 491 | 492 | ```javascript 493 | var max = Flow.from([1,2,3,4,5]).max(); 494 | //returns 5 495 | ``` 496 | 497 | #### min() 498 | This returns the minimum number. It is expected that the values a the last Flow item in the chain return Number types. 499 | 500 | ```javascript 501 | var min = Flow.from([1,2,3,4,5]).min(); 502 | //returns 1 503 | ``` 504 | 505 | 506 | Flow from FileSystem (for node.js) 507 | ---------------------------------- 508 | Flow does not current work with the browser FileReader due to the way the FileReader is designed, which differs from the synchronous design of Flow. 509 | For working with files in node, The Flow.from() method accepts a string path to the file. However, the path needs to be prepended with "fs://". This is used to distinguish working with files from strings. 510 | Files are processed by line. As an example: 511 | 512 | ```javascript 513 | //we have a file called names.txt in the same directory 514 | Flow.from("fs://./names.txt").range(0, 11).foreach(line => console.log(line)); 515 | ``` 516 | 517 | 518 | Advanced + Design Info 519 | ---------------------- 520 | ### Flow Groups 521 | There are 5 Flow groups namely: IteratorFlow, OutFlow, InFlow, DiscretizedFlow and Flow (the default Flow). They are grouped based on the type of operations that can be performed on them. 522 | 523 | i. IteratorFlow: This is mostly the first Flow in a Flow chain. When the Flow.from(…) method is called, an IteratorFlow is created. This flow extends the default Flow and provides a few more operations. 524 | 525 | ii. OutFlow: This Flow is responsible for processing and sending data across applications. More information on this later. 526 | 527 | iii. InFlow: This Flow is responsible for receiving data from another application. Also, more information on this later. 528 | 529 | iv. DiscretizedFlow: This Flow splits data streams into chunks/windows to allow for Flow methods that require finite data operations. Discretized Flows are discussed much later. 530 | 531 | v. Flow: This is the default Flow that has all the basic operations for data processing. 532 | 533 | ### Flow Chain Pipelining 534 | A Flow chain is a linked data structure of different Flow objects. Every Flow is aware of the previous Flow and the next Flow in the chain. A Flow chain is created when a Flow method is called on a Flow object. 535 | As an example: 536 | 537 | ```javascript 538 | var flow = Flow.from(array).skip(2).where((num) => num % 2 == 0); 539 | ``` 540 | 541 | From the example above, there are three Flow objects in the Flow chain. When an action is called on the final flow object, data is piped through the Flow chain till it gets to the last Flow in the chain, from which the action is computed. 542 | 543 | ### Flow Push & Pull Models 544 | Flow provides two modes of data pipelining: push and pull. The pull model is used to request that data be piped from the IteratorFlow (discussed later) through the chain. The data is generated from the Iterator when requested and sent through the chain. This mode is used by Flow actions to do a final computation on the dataset. For the push model, data is automatically piped through the Flow chain. The push model is used in Flow Streaming. 545 | 546 | ### Flow Streaming & The Streamer Class 547 | For continuous streams of data, Flow provides a data push model that can continuously pipe data through the Flow chain. This can be especially useful if computed data needs to be sent to another application for further processing. Each Flow pushes processed data to the next Flow in the chain or to a customizable terminal function (If the Flow is the last in the chain). The terminal function for a Flow can be set using the setTerminalFunction method. Flow streaming can be achieved when the Flow is created from either a Streamer or a function that generates continuous data like a JS Generator. An example of working with Streamer is shown below: 548 | 549 | ```javascript 550 | //import Flow and Streamer 551 | var Flow = RichFlow.Flow; 552 | var Streamer = RichFlow.Streamer; 553 | //create a new streamer 554 | var streamer = new Streamer(); 555 | 556 | //create a Flow from the streamer. Several streamers can be added via the merge method 557 | var flow = Flow.from(streamer).filter(num => num % 2 != 0); //filter for odd numbers 558 | //set the terminal function which will receive the data from the last Flow in the chain 559 | flow.setTerminalFunction(console.log); //print to the console 560 | //Inform the IteratorFlow to start listening for data from the streamer 561 | flow.startPush(); //This can be called from any Flow in the chain. 562 | 563 | setInterval(() => { 564 | streamer.send(parseInt(Math.random() * 10)); //send data to all listeners 565 | }, 500); 566 | ``` 567 | 568 | **NOTE**: If the `startPush` method is called after the Streamer starts generating data, some data may be lost at the initial stage. 569 | 570 | The Streamer class is bare-bones and does minimal work. It can be extended to do much more like working as a finite dataset. Data could be received from the OutFlow and cached or data it generates could be cached and reused as a finite dataset using the Flow pull mode. If you wish to use the Streamer in Flow pull mode, you will need to extend the class and provide implementation for the `size` and `get` methods. 571 | 572 | The Streamer class can act as a stream provider and a stream receiver as well. A function can be supplied to the constructor of the Streamer to receive stream data. More on this on the InFlow and OutFlow sections. 573 | 574 | ### IteratorFlow 575 | The IteratorFlow is a Flow that creates a unified means of retrieving data from different data structures. The IteratorFlow turns the data passed to Flow.from(…) into a Javascript Iterable by wrapping the data with an iterator implementation that makes retrieving data as easy as calling a next() method on the iterator handle. More Iterators can be added via the merge method on an object of IteratorFlow. The merge method takes the same type of parameter as the Flow.from(…) method. 576 | 577 | This Flow is the Root Flow of the Flow chain and can be accessed from any Flow in the chain via the property rootFlow. As an example: 578 | 579 | ```javascript 580 | var flow = Flow.from(array).skip(2).where((num) => num % 2 == 0); 581 | var iteratorFlow = flow.rootFlow; //get access to the IteratorFlow 582 | ``` 583 | 584 | For data streaming in Flow, the IteratorFlow needs to listen for changes on the Streamer object(s) and retrieves new data when data is sent via the Streamer.send() method. The retrieved data is pushed through the Flow chain till it gets to an OutFlow or the terminal function of the last Flow object in the chain. To start data streaming in Flow, the startPush() method needs to be called on an object of the IteratorFlow. To stop the streaming at anytime, the stopPush() method can be called on the IteratorFlow object. When the stopPush() method is called, the IteratorFlow disconnects from the Streamers and stops listening for incoming data on the connected streams. 585 | 586 | 587 | ### DiscretizerFlow & DiscretizedFlow 588 | DiscretizerFlow partitions streams of data flowing through the Flow chain into windows and each data window could be emitted as a DiscretizedFlow or an array. Actually, discretization can also occur for static/finite datasets like arrays or generators. DiscretizedFlows are IteratorFlows and could themselves be discretized and Flow actions can be called on them. Any Flow can be discretized (with an exception to OutFlow). However, the discretization implementation in IteratorFlow differs from the implementation on others Flow. 589 | 590 | IteratorFlow handles the discretization process internally, while the DiscretizerFlow handles discretization for all other Flows. For IteratorFlow discretization, the data window can be created from a single iterator or multiple iterators (this could be a single datastream or multiple datastream) while the discretization for other Flow groups are done on the input data. The discretize method takes three arguments namely - the window span, the span length and a boolean value indicating if data should be spawned as discretized flows or as arrays. The third argument is optional and defaults to true. 591 | 592 | For IteratorFlow discretization, the window span talks about how many iterators should be included in creating the window. Recall that an Iterator can be added via the IteratorFlow.merge(…) method. A block of data is a data structure that has one item from each iterator from the window. The span length is the number of data blocks that should constitute a discrete block. Span length can be a number, a function or an object having an ‘isDataEnd’ function. The function receives two arguments - the last data added and the current length of the window span and should evaluate to a boolean. 593 | 594 | For other Flow groups, discretization is on the input. It is the responsibility of the programmer to ensure that the data received as input to the DiscretizerFlow is fit for discretization and it is assumed that each data piped can be broken down is the way needed by the programmer. When DiscretizerFlow determines that it is not possible to discretize ‘perfectly’, the implementation respects the programmers wish and fills the remaining slots in the data block with null values. The discretize method take in the same arguments and the span length follows the same as that of IteratorFlow. The window span here talks about how many parts each input piped to the DiscretizerFlow can be broken down. It is assumed that when each input is passed to Flow.from(…), it should be able to create an Iterator that will generate the amount of data required by the programmer. 595 | 596 | ```javascript 597 | //lazily create 4 streamers 598 | var streamers = Flow.of(4).map(a => new Streamer()).collect(); 599 | //we need to merge all the streams so we start by adding one 600 | var flow = Flow.from(streamers[0]); 601 | for( i = 1; i < streamers.length; i++ ) 602 | flow = flow.merge(streamers[i]); //merge the remainder 603 | //discretize with a span covering all streams and data length of 1 604 | var discretizerFlow = flow.discretize(streamers.length, 1); 605 | //set the terminal function 606 | discretizerFlow.setTerminalFunction(discretizedFlow => console.log(discretizedFlow.selectFlatten().collect())); 607 | discretizerFlow.startPush(); //start listening for data on the Streamers 608 | 609 | setInterval(() => { 610 | streamers.forEach(streamer => streamer.send(parseInt(Math.random() * 10))); 611 | }, 500); 612 | ``` 613 | 614 | ### OutFlow 615 | In [JAMScript](https://github.com/anrl/JAMScript-beta), OutFlow was built as a specialized Flow for the purpose of sending processed data to external applications. Here, the OutFlow has been stripped of that functionality. Though the concept is still part of it but that is now the responsibility of Streamer. When an OutFlow is created, a Flow object is supplied as a argument which the OutFlow links to, in order to receive pushed data. A Streamer object is also provided as the second argument in the constructor to which the OutFlow is expected to push the data and an identifier key which can optionally be supplied as a third argument (If none is supplied, one is auto generated). A Streamer could for instance generate stream data from sensors and write to a datastore such as Redis or send the computed data elsewhere. Let us see an example with Redis: 616 | 617 | ```javascript 618 | //require OutFlow, Flow and Streamer 619 | const {Flow, Streamer, OutFlow} = require('richflow'); //in node.js (See top for browser) 620 | var Redis = require('redis-fast-driver'); //require Redis 621 | var redis = new Redis({host: '127.0.0.1', port: 6379}); //establish connection 622 | 623 | //create 4 streamers. Could listen to sensors and obtain data 624 | var streamers = Flow.of(4).map(a => new Streamer()).collect(); 625 | 626 | //we need to merge all the streams so we start by adding one 627 | var flow = Flow.from(streamers[0]); 628 | for( i = 1; i < streamers.length; i++ ) 629 | flow = flow.merge(streamers[i]); //merge the remainder 630 | 631 | var outFlow = new OutFlow(flow.discretize(streamers.length, 1), 632 | new Streamer((dFlow, key) => { 633 | let avg = dFlow.selectFlatten().average(); 634 | let timestamp = new Date().getTime(); 635 | redis.rawCall(['ZADD', key, timestamp, avg + '']); 636 | }), "App1.Key"); 637 | outFlow.start(); //inform the IteratorFlow to begin pushing data 638 | 639 | //simulate sensor data 640 | setInterval(() => { 641 | streamers.forEach(streamer => streamer.send(parseInt(Math.random() * 10))); 642 | }, 500); 643 | ``` 644 | 645 | The start() method in OutFlow calls the startPush() method in the IteratorFlow (the first flow in the chain) and informs the IteratorFlow to start listening for push data from the data source. This data is continually pushed and may or may not get to the OutFlow based on the constraints within each Flow object in the Flow chain. As data arrives at the OutFlow, it is sent to the Streamer which further sends it to Redis. 646 | To stop listening to data streams, the programmer can call the OutFlow stop method on the object handle which will in turn call the stopPush() method on the IteratorFlow. For example: 647 | 648 | ```javascript 649 | outFlow.stop(); 650 | ``` 651 | 652 | ### InFlow 653 | This is also another specialized Flow. In [JAMScript](https://github.com/anrl/JAMScript-beta) it is solely responsible for retrieving data from an external application. The retrieved data can be taken through further processing along a Flow chain before being used. Here, the InFlow can listens for new data from the Streamer and push them onwards to any connected Flow. An example with Redis following from the OutFlow: 654 | 655 | ```javascript 656 | //require InFlow, Flow and Streamer 657 | const {Flow, Streamer, InFlow} = require('richflow'); //in node.js (See top for browser) 658 | var Redis = require('redis-fast-driver'); //require Redis 659 | var redis = new Redis({host: '127.0.0.1', port: 6379}); //establish connection 660 | 661 | class MyStreamer extends Streamer{ 662 | constructor(){ 663 | super(); 664 | this.lastIndex = 0; 665 | //listen for new data on Redis 666 | redis.rawCall(['config', 'set', 'notify-keyspace-events', 'Ez']); 667 | redis.rawCall(['psubscribe', '__keyevent*'], this.notify); 668 | } 669 | notify(e, data){ 670 | if(data[0] == "pmessage" && data[3]){ //check if a message has arrived 671 | //get data from Redis 672 | var self = this; 673 | redis.rawCall(['ZRANGE', "App1.Key", this.lastIndex + 1, -1], function(err, resp){ 674 | if( err ) 675 | throw new Error(err); 676 | 677 | for (var i = 0; i < resp.length; i++) { 678 | self.lastIndex++; 679 | self.send(resp[i]); //send data to all listeners like InFlow 680 | } 681 | }); 682 | } 683 | } 684 | } 685 | 686 | var inflow = new InFlow(new MyStreamer()); 687 | var flow = inflow.where(avg => avg > 5); //filter for averages above 5 688 | flow.setTerminalFunction(console.log); //print to the console 689 | ``` 690 | 691 | ### Flow Caching 692 | 693 | This is an internal process that aims to speed up Flow reuse and works with static/finite data sets (does not work with Flow streaming). Flow attempts to get data from the Iterators each time an action is called on the Flow. However, for static/finite datasets, the iterators will produce same data each time leading to a time wastage when piping through the Flow chain each time. By caching processed data, when ever an action is called on a Flow (a second time), because it has already processed the data during the first round, it serves the processed data, saving processing time. Caching IteratorFlow data is trivial so they are never cached. However, this caching is on memory. Currently, the cache stays on for as long as the Flow has not be garbage collected. 694 | 695 | 696 | Common Pitfalls 697 | --------------- 698 | The `null` value has a special meaning within Flow so your data should not contain it. This could cause Flow to give a fake report. 699 | 700 | By default, Flow caches outputs for faster reuse. However, this can cause certain issues if the underlying data source changes. With caching, the changes will not be reflected when the constructed Flow is being reused. Let us see a simple example with Arrays: 701 | 702 | ```javascript 703 | //with caching 704 | var arr = [1,2,3,4,5,6,7,8,9]; 705 | var flow = Flow.from(arr).where(num => num % 2 == 0); 706 | console.log(flow.count()); //prints 4 707 | arr.push(0); 708 | console.log(flow.count()); //prints 4 709 | 710 | //without caching 711 | var arr = [1,2,3,4,5,6,7,8,9]; 712 | var flow = Flow.from(arr).where(num => num % 2 == 0); 713 | flow.rootFlow.shouldCache = false; 714 | console.log(flow.count()); //prints 4 715 | arr.push(0); 716 | console.log(flow.count()); //prints 5 717 | ``` 718 | 719 | To disable caching, after creating the Flow, on the IteratorFlow do the following: 720 | 721 | ```javascript 722 | var iteratorFlow = Flow.from(…); 723 | iteratorFlow.shouldCache = false; //needs to be done before any action is called 724 | ``` 725 | 726 | Another common pitfall you may have is in reusing Flows. Each Iterator in the IteratorFlow maintains a cursor on where the next data should be obtained from. Now, because the pipeline process ensures that the minimum amount of work is done to produce the desired result, it will sometimes be the case that an iterator may not get to the end and thus reusing will resume the cursor of the iterator from the last placed it stopped and will yield unexpected results. Do not reuse Flows if you do not understand this concept. As an example: 727 | 728 | ```javascript 729 | var flow = Flow.fromRange(1, 10); //creates a Flow with numbers from 1 to 10 730 | console.log(flow.limit(5).collect()); //prints [1,2,3,4,5] 731 | flow.forEach(console.log); //prints 7 8 9 10 732 | ``` 733 | 734 | From the above code, you can notice that the call to forEach prints what is left as opposed to all the content from 1 to 10. This type of error can be fixed in most cases by flushing the contents of the flow before reusing. Sometimes, it can only be fixed with a combination of turning off caching and flushing and other times it may take more than that. **Flow reuse should be done with caution**. To fix the above error, we can do the following: 735 | 736 | ```javascript 737 | var flow = Flow.fromRange(1, 10); //creates a Flow with numbers from 1 to 10 738 | console.log(flow.limit(5).collect()); //prints [1,2,3,4,5] 739 | //flush the remaining contents. The iterators automatically reset for reuse when they get to the end 740 | flow.count(); 741 | flow.forEach(console.log); //prints 1 2 3 4 5 6 7 8 9 10 742 | ``` 743 | 744 | Roadmap 745 | ------- 746 | i. ParallelFlow: A truly parallel pipeline data processing library. 747 | 748 | ii. Flow Caching Offloading: An investigation needs to be made on when and which Flows to release memory, especially when the system is running low on RAM storage. There could be a listener that listens out for memory changes and probably informs Flows to either save processed data to disk or release the data. Based on the size of data held by the Flows in the middle of the chain, the runtime could decide which will be faster, saving to the disk and reloading from disk when needed or recomputing from the previous Flow in the chain. 749 | 750 | Contact 751 | ------- 752 | For questions or suggestions please send a message to david.echomgbe \[@\] gmail.com. Please prefix your email subject with "RichFlow -". 753 | -------------------------------------------------------------------------------- /richflow.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by Richboy on 30/05/17. 3 | */ 4 | 5 | "use strict"; 6 | 7 | (function(root, factory){ 8 | if (typeof exports === "object" && exports) { 9 | factory(exports); // CommonJS 10 | } 11 | else { 12 | var RichFlow = {}; 13 | factory(RichFlow); 14 | if (typeof define === "function" && define.amd) { 15 | define(RichFlow); // AMD 16 | } 17 | else { 18 | root.RichFlow = RichFlow; //