├── LICENSE ├── README.md ├── buffering.js ├── compose.js ├── content-addressable.js ├── duplex.js ├── package.json ├── pull.js └── simple ├── csv.js ├── jsondl.js └── ls.js /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Dominic Tarr 4 | 5 | Permission is hereby granted, free of charge, 6 | to any person obtaining a copy of this software and 7 | associated documentation files (the "Software"), to 8 | deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, 10 | merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom 12 | the Software is furnished to do so, 13 | subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice 16 | shall be included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 22 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pull-stream-examples 2 | 3 | this is a short introduction to pull-streams. 4 | 5 | Start with [pull.js](./pull.js), which covers the basics. 6 | 7 | [compose.js](./compose.js) explores the power of composing pull streams 8 | together. 9 | 10 | Also look at [buffering.js](./buffering.js) to learn how to create pull streams 11 | that can return multiple values using buffering, like node streams. 12 | 13 | Then see [duplex.js](./duplex.js), to turn node streams into pull streams and 14 | create very simple server. 15 | 16 | 17 | ## License 18 | 19 | MIT 20 | -------------------------------------------------------------------------------- /buffering.js: -------------------------------------------------------------------------------- 1 | var pull = require('pull-stream') 2 | 3 | /* 4 | *** 1:1 read-callback ratio 5 | 6 | A pull stream source (and thus transform) returns *exactly one value* per read. 7 | This differs from node streams, which can use `this.push(value)` and in internal 8 | buffer to create transforms that write many values from a single read value. 9 | Pull streams don't come with their own buffering mechanism -- wisely so. 10 | 11 | This means you need to think a bit more about returning more than 1 value from a 12 | single `read`. Let's say you had a pull stream source that provides strings that 13 | contain newlines, and want a transform to split them by newlines before passing 14 | them on. Some string may be split into several lines, so the transform will need 15 | to buffer them before passing them on. 16 | */ 17 | 18 | 19 | var src = pull.values([ 20 | 'hello\nworld', 21 | 'guten\ntag\nmeine\nfreunde' 22 | ]) 23 | 24 | // var trans = ??? 25 | 26 | var snk = pull.drain(console.log) 27 | 28 | // pull(src, trans, snk) 29 | 30 | 31 | /* 32 | There are a few ways to do this: 33 | 34 | 1. use https://github.com/pull-stream/pull-through 35 | 36 | This module wraps a pull stream transform to provide its own queuing 37 | mechanism, so that subsequent reads empty the queue. Its node stream analogy 38 | is https://github.com/dominictarr/through 39 | */ 40 | 41 | var through = require('pull-through') 42 | 43 | var trans = through(function (data) { 44 | data.split('\n').forEach(this.queue.bind(this)) 45 | }) 46 | 47 | 48 | /* 49 | 2. return an array, then flatten it 50 | 51 | pull.flatten (https://github.com/pull-stream/pull-stream/blob/2201ddda56ce5739266a7c0044e983ade47443ac/docs/throughs.md#flatten-) 52 | returns a transform stream that performs the buffering by holding onto 53 | arrays passed to it and draining those values to the reader until they're 54 | all gone 55 | */ 56 | 57 | var trans = pull( 58 | pull.map(function (value) { 59 | return value.split('\n') 60 | }), 61 | pull.flatten() 62 | ) 63 | 64 | 65 | // finally, connect them all together 66 | pull(src, trans, snk) 67 | 68 | -------------------------------------------------------------------------------- /compose.js: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | 4 | pull-streams are for making tiny modules. 5 | sometimes, you might write a pull stream from scratch, 6 | without any dependencies. but more often, you'll make a new 7 | pull-stream by combining several other pull-streams. 8 | 9 | This is called "composing" or "composition". 10 | in pull-streams, you need a complete pipeline before data will flow. 11 | that means:a source, zero or more throughs, and a sink 12 | 13 | but you can still call pull() on a _partial_ pipeline, 14 | which is a great way to create a pull-stream module. 15 | 16 | create a source modified by a through: 17 | 18 | pull(source, through) => source 19 | 20 | create a sink, but modify it's input before it goes. 21 | 22 | pull(through, sink) => sink 23 | 24 | create a through, by chainging several throughs: 25 | 26 | pull(through1, through2) => through 27 | 28 | these streams combine just like normal streams. 29 | 30 | pull( 31 | pull(source, through), 32 | pull(through1, through2), 33 | pull(through, sink) 34 | ) => undefined 35 | 36 | the complete pipeline returns undefined, because it cannot be piped 37 | to anything else. 38 | */ 39 | 40 | /* 41 | create a simple csv parser. 42 | using the split module to separate the input into lines, 43 | and then each line into cells. 44 | */ 45 | var split = require('pull-split') 46 | function parseCsv () { 47 | return pull( 48 | split(), //defaults to \n 49 | pull.map(function (line) { 50 | return line.split(/,\s+/) 51 | }) 52 | ) 53 | } 54 | 55 | /* 56 | >This will parse simple csv files, 57 | >for a more correct csv parser, see https://github.com/dominictarr/pull-csv 58 | 59 | now, say we want to sum a column in a large csv. 60 | we can take our parser, and a reduce function 61 | */ 62 | 63 | function sum (column, cb) { 64 | return pull.reduce(function (a, b) { 65 | return a + b[column] 66 | }, 0, cb) 67 | } 68 | 69 | var File = require('pull-file') 70 | 71 | pull(File(yourCsv), parseCsv(), sum(0, console.log)) 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /content-addressable.js: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | Now here is a non contrived example. 4 | 5 | This is a content addressable store - it's CA store 6 | is like a KeyValue store, except you do not get to 7 | choose the key, the key is always the hash of the value. 8 | 9 | This is a really good idea for a bunch of reasons. 10 | You data becomes immutable (cannot change), which means 11 | caching works perfectly (no cache invalidation). 12 | Also, if you know the hash you want, you can verify 13 | you have the correct data no matter who give it to you. 14 | 15 | These features make building a distributed system very 16 | easy, and is why you see this pattern in git, bittorrent, 17 | and bitcoin. 18 | */ 19 | // (try: ls .git/objects/*/* to look inside git's CA store) 20 | 21 | var fs = require('fs') 22 | var pull = require('pull-stream') 23 | var toPull = require('stream-to-pull-stream') 24 | 25 | //pass in the directory you want the CA store to be in. 26 | module.exports = function (dir) { 27 | 28 | /* 29 | first we need a function that turns 30 | 31 | */ 32 | //we'll use the hash of the file as it's filename. 33 | function toFile (hash) { 34 | return path.join(dir, hash.substring(0, 2), hash.substring(2)) 35 | } 36 | 37 | function sha256 () { 38 | var hash = createHash('sha256'), stream 39 | return stream = pull.through(function (data) { 40 | hash.update(data) 41 | }, function () { 42 | stream.digest = hash.digest('hex') 43 | }) 44 | } 45 | 46 | function read(hash) { 47 | return toPull.source(fs.createReadStream(toFile(hash))) 48 | } 49 | 50 | function write (expected, cb) { 51 | if('function' === typeof expected) 52 | cb = hash, expected = null 53 | 54 | cb = cb || function (err) { if(err) throw err } 55 | 56 | var tmpfile = getTemp(), hash = sha256() 57 | return pull( 58 | hash, 59 | toPull.sink(fs.createWriteStream(filename), function (err, data) { 60 | //if there was an error, delete the file. 61 | if(err) fs.unlink(tmpfile, cb) 62 | else if (expected && expected != hash.digest) 63 | fs.unlink(tmpfile, function () { 64 | cb(new Error( 65 | 'did not receive file:'+hash.digest 66 | } ' expected:'+expected 67 | )) 68 | }) 69 | else fs.rename(tmpfile, toFile(hash.digest), cb) 70 | }) 71 | } 72 | 73 | return { 74 | read: read, 75 | write: write 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /duplex.js: -------------------------------------------------------------------------------- 1 | /* 2 | Duplex streams are used to communicate with a remote service, 3 | and they are a pair of source and sink streams `{source, sink}` 4 | 5 | in node, you see duplex streams to connect replication or rpc protocols. 6 | client.pipe(server).pipe(client) 7 | or 8 | server.pipe(client).pipe(server) 9 | both do the same thing. 10 | 11 | the pull function we wrote before doesn't detect this, 12 | but if you use the pull-stream module it will. 13 | Then we can pipe duplex pull-streams like this: 14 | 15 | var pull = require('pull-stream') 16 | pull(client, server, client) 17 | 18 | Also, sometimes you'll need to interact with a regular node stream. 19 | there are two modules for this. 20 | 21 | stream-to-pull-stream 22 | and 23 | pull-stream-to-stream 24 | 25 | */ 26 | 27 | var net = require('net') 28 | var toPull = require('stream-to-pull-stream') 29 | var pull = require('pull-stream') 30 | 31 | var server = net.createServer(function (stream) { 32 | //convert into a duplex pull-stream 33 | stream = toPull.duplex(stream) 34 | 35 | pull( 36 | stream, 37 | pull.map(function (b) { 38 | //take the input, and MAKE IT LOUD!!! 39 | return b.toString().toUpperCase() + '!!!' 40 | }), 41 | stream 42 | ) 43 | 44 | }).listen(9999, function () { 45 | 46 | var stream = toPull.duplex(net.connect(9999)) 47 | 48 | pull( 49 | pull.values(['quiet stream']), 50 | stream, 51 | pull.drain(function (data) { 52 | console.log(data.toString()) 53 | }, function (err) { 54 | if(err) throw err 55 | server.close() 56 | }) 57 | ) 58 | 59 | }) 60 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pull-stream-examples", 3 | "description": "", 4 | "version": "0.0.0", 5 | "homepage": "https://github.com/dominictarr/pull-stream-examples", 6 | "repository": { 7 | "type": "git", 8 | "url": "git://github.com/dominictarr/pull-stream-examples.git" 9 | }, 10 | "dependencies": { 11 | "pull-stream": "^2.27.0", 12 | "stream-to-pull-stream": "^1.6.1" 13 | }, 14 | "devDependencies": {}, 15 | "scripts": { 16 | "test": "set -e; for t in test/*.js; do node $t; done" 17 | }, 18 | "author": "Dominic Tarr (http://dominictarr.com)", 19 | "license": "MIT" 20 | } 21 | 22 | -------------------------------------------------------------------------------- /pull.js: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | create a simple source stream that reads from an array. 4 | 5 | A pull stream is just an async stream that is called repeatedly. 6 | note that when every item in the array has been called back, 7 | it returns true in the error slot. This indicates the end of the stream. 8 | both err and end mean the stream is over! but there are many possible 9 | ways an error can occur (err && err !== true), and only one way a stream can correctly end (true) 10 | 11 | in pull-streams i like to call streams that data comes out of "sources", 12 | (in node they are usually called readables) 13 | 14 | */ 15 | function values (ary) { 16 | var i = 0 17 | return function read(abort, cb) { 18 | if(i===ary.length || abort) return cb(true) 19 | cb(null, ary[i++]) 20 | } 21 | } 22 | 23 | /* 24 | 25 | pull-streams don't really have a writable stream per se. "writable" implys that 26 | the writer is the active partner, and the stream which is written to is passive. 27 | (like you are when you watch TV. the TV writes its lies into neocortex via your retinas) 28 | 29 | instead of a writable, pull streams have a "sink", that is a reader. 30 | here the reader is the active party, actively consuming more data. 31 | When you read a book, you are in control, and must actively turn the pages to get more information. 32 | 33 | so, a sink is a function that you pass a source to, 34 | which then reads from that function until it gets to the end or decides to stop. 35 | */ 36 | 37 | function sink (read) { 38 | read(null, function next (err, data) { 39 | if(err) return console.log(err) 40 | console.log(data) 41 | //recursively call read again! 42 | read(null, next) 43 | }) 44 | } 45 | 46 | /* 47 | 48 | we could now consume the source with just these two functions. 49 | 50 | sink(values([1,2,3])) 51 | 52 | so simple. we didn't use any librarys, yet, we have streams with 2 way back pressure. 53 | since the pattern is async, the source can slow down by cb'ing slower, 54 | and the sink can slow down by waiting longer before calling read again! 55 | 56 | okay, to be useful, we also need a way to transform inputs into different outputs. 57 | i.e. a transform stream. 58 | 59 | in pull-streams a transform is implemented as a sink that returns a source. 60 | 61 | */ 62 | 63 | function map (mapper) { 64 | //a sink function: accept a source 65 | return function (read) { 66 | //but return another source! 67 | return function (abort, cb) { 68 | read(abort, function (err, data) { 69 | //if the stream has ended, pass that on. 70 | if(err) return cb(err) 71 | //apply a mapping to that data 72 | cb(null, mapper(data)) 73 | }) 74 | } 75 | } 76 | } 77 | 78 | var source = values([1,2,3]) 79 | var mapper = map(function (e) { return e*e }) 80 | 81 | 82 | /* 83 | right now, we could combine these 3 streams by passing them to each other. 84 | 85 | and then combine these with function composition: 86 | 87 | sink(mapper(source)) 88 | 89 | this would be equavalent to node's .pipe 90 | except with node streams it would look like 91 | 92 | source.pipe(mapper).pipe(sink) 93 | 94 | to be honest, it's easier to read if it does left to right. 95 | because the direction the data flows is the same as you read. 96 | 97 | lets write a quick function that allows us to compose pull streams left-to-right 98 | 99 | pull(source, mapper, sink) 100 | */ 101 | 102 | function pull () { 103 | var args = [].slice.call(arguments) 104 | var s = args.shift() 105 | while(args.length) s = args.shift()(s) 106 | return s 107 | } 108 | 109 | /* 110 | thats it! just call the next thing with the previous thing until there are no things left. 111 | if we return the last thing, then we can even do this: 112 | 113 | pull(pull(source, mapper), sink) 114 | 115 | */ 116 | 117 | /* 118 | Infinite streams. here is a stream that never ends. 119 | */ 120 | 121 | function infinite () { 122 | var i = 0 123 | return function (abort, cb) { 124 | if(abort) return cb(abort) 125 | cb(null, i++) 126 | } 127 | } 128 | 129 | /* 130 | Now, reading all of an infinite stream will take forever... 131 | BUT! the cool thing about pull streams is that they are LAZY. 132 | that means it only gives us the next thing when we ask for it. 133 | 134 | Also, you can ABORT a pull stream when you don't want any more. 135 | 136 | here is a take(n) stream that reads n items from a source and then stops. 137 | it's a transform stream like map, except it will stop early. 138 | */ 139 | 140 | function take (n) { 141 | return function (read) { 142 | return function (abort, cb) { 143 | //after n reads, tell the source to abort! 144 | if(!n--) return read(true, cb) 145 | read(null, cb) 146 | } 147 | } 148 | } 149 | 150 | /* 151 | now we can pipe the infinite stream through this, 152 | and it will stop after 101 items! 153 | */ 154 | 155 | pull(infinite(), mapper, take(101), sink) 156 | 157 | /* 158 | That covers 3 types of pull streams. Source, Transform, & Sink. 159 | There is one more important type, although it's not used as much. 160 | 161 | Duplex streams 162 | 163 | (see duplex.js!) 164 | */ 165 | 166 | -------------------------------------------------------------------------------- /simple/csv.js: -------------------------------------------------------------------------------- 1 | 2 | // split a file into lines, 3 | // and then map each line through a split function. 4 | 5 | 6 | function CSV () { 7 | return pull( 8 | Split(), //defaults to '\n' 9 | pull.map(function (line) { 10 | return line.split(',') 11 | }) 12 | ) 13 | } 14 | 15 | //parse a file 16 | 17 | pull( 18 | File(filename), 19 | CSV(), 20 | pull.drain(console.log) 21 | ) 22 | 23 | 24 | // this parses simple CSV files, as long so they do not escape commas with quotes. 25 | // the module pull-csv is a more correct csv parser. 26 | -------------------------------------------------------------------------------- /simple/jsondl.js: -------------------------------------------------------------------------------- 1 | 2 | //new line delimited json. 3 | 4 | var pull = require('pull-stream') 5 | var Split = require('pull-split') 6 | 7 | function pullJSON () { 8 | return pull( 9 | Split(), 10 | pull.map(function (line) { 11 | return JSON.parse(line) 12 | }) 13 | ) 14 | } 15 | 16 | pull( 17 | File(filename), 18 | pullJSON(), 19 | pull.drain(console.log) 20 | ) 21 | -------------------------------------------------------------------------------- /simple/ls.js: -------------------------------------------------------------------------------- 1 | 2 | var fs = require('fs') 3 | var path = require('path') 4 | 5 | var pull = require('pull-stream') 6 | var Defer = require('pull-defer') 7 | var Paramap = require('pull-paramap') 8 | 9 | //list the files in a directory. 10 | //since fs.readdir is an async function 11 | //but we want to return a new stream immeditaly 12 | //we use pull-defer 13 | 14 | function ls (dir) { 15 | 16 | var stream = Defer.source() 17 | 18 | fs.readdir(dir, function (err, ls) { 19 | stream.resolve(pull.values(ls)) 20 | }) 21 | 22 | return stream 23 | 24 | } 25 | 26 | //list the files in a directory 27 | /* 28 | pull( 29 | ls(process.argv[2] || process.cwd()), 30 | pull.drain(console.log) 31 | ) 32 | */ 33 | 34 | // get stats for each file. 35 | // we use paramap here, so that we can look up many files in parallel 36 | 37 | function ls_long (dir) { 38 | return pull( 39 | ls(dir), 40 | Paramap(function (file, cb) { 41 | var filename = path.join(dir, file) 42 | fs.lstat(filename, function (err, stat) { 43 | if(err) return cb(err) 44 | stat.file = filename 45 | stat.dir = stat.isDirectory() 46 | cb(null, stat) 47 | }) 48 | }) 49 | ) 50 | } 51 | 52 | /* 53 | pull( 54 | ls_long(process.argv[2] || process.cwd()), 55 | pull.drain(console.log) 56 | ) 57 | */ 58 | 59 | // drill down into subdirectories. 60 | // if an item is a directory, map that item to a stream. 61 | // pull.flatten() turns a stream of streams (or arrays) into a stream of items 62 | 63 | function ls_recursive (dir) { 64 | return pull( 65 | ls_long(dir), 66 | pull.map(function (e) { 67 | if(!e.dir) return [e.file] 68 | else return ls_recursive(e.file) 69 | }), 70 | pull.flatten() 71 | ) 72 | } 73 | 74 | pull( 75 | ls_recursive(process.argv[2] || process.cwd()), 76 | pull.drain(console.log) 77 | ) 78 | 79 | --------------------------------------------------------------------------------