├── LICENSE
├── README.md
├── buffering.js
├── compose.js
├── content-addressable.js
├── duplex.js
├── package.json
├── pull.js
└── simple
    ├── csv.js
    ├── jsondl.js
    └── ls.js


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Dominic Tarr
 4 | 
 5 | Permission is hereby granted, free of charge, 
 6 | to any person obtaining a copy of this software and 
 7 | associated documentation files (the "Software"), to 
 8 | deal in the Software without restriction, including 
 9 | without limitation the rights to use, copy, modify, 
10 | merge, publish, distribute, sublicense, and/or sell 
11 | copies of the Software, and to permit persons to whom 
12 | the Software is furnished to do so, 
13 | subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice 
16 | shall be included in all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
21 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 
22 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
23 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
24 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pull-stream-examples
 2 | 
 3 | this is a short introduction to pull-streams.
 4 | 
 5 | Start with [pull.js](./pull.js), which covers the basics.
 6 | 
 7 | [compose.js](./compose.js) explores the power of composing pull streams
 8 | together.
 9 | 
10 | Also look at [buffering.js](./buffering.js) to learn how to create pull streams
11 | that can return multiple values using buffering, like node streams.
12 | 
13 | Then see [duplex.js](./duplex.js), to turn node streams into pull streams and
14 | create very simple server.
15 | 
16 | 
17 | ## License
18 | 
19 | MIT
20 | 


--------------------------------------------------------------------------------
/buffering.js:
--------------------------------------------------------------------------------
 1 | var pull = require('pull-stream')
 2 | 
 3 | /*
 4 |  *** 1:1 read-callback ratio
 5 | 
 6 | A pull stream source (and thus transform) returns *exactly one value* per read.
 7 | This differs from node streams, which can use `this.push(value)` and in internal
 8 | buffer to create transforms that write many values from a single read value.
 9 | Pull streams don't come with their own buffering mechanism -- wisely so.
10 | 
11 | This means you need to think a bit more about returning more than 1 value from a
12 | single `read`. Let's say you had a pull stream source that provides strings that
13 | contain newlines, and want a transform to split them by newlines before passing
14 | them on. Some string may be split into several lines, so the transform will need
15 | to buffer them before passing them on.
16 | */
17 | 
18 | 
19 | var src = pull.values([
20 |   'hello\nworld',
21 |   'guten\ntag\nmeine\nfreunde'
22 | ])
23 | 
24 | // var trans = ???
25 | 
26 | var snk = pull.drain(console.log)
27 | 
28 | // pull(src, trans, snk)
29 | 
30 | 
31 | /*
32 | There are a few ways to do this:
33 | 
34 | 1. use https://github.com/pull-stream/pull-through
35 | 
36 |    This module wraps a pull stream transform to provide its own queuing
37 |    mechanism, so that subsequent reads empty the queue. Its node stream analogy
38 |    is https://github.com/dominictarr/through
39 | */
40 | 
41 | var through = require('pull-through')
42 | 
43 | var trans = through(function (data) {
44 |   data.split('\n').forEach(this.queue.bind(this))
45 | })
46 | 
47 | 
48 | /*
49 | 2. return an array, then flatten it
50 | 
51 |    pull.flatten (https://github.com/pull-stream/pull-stream/blob/2201ddda56ce5739266a7c0044e983ade47443ac/docs/throughs.md#flatten-)
52 |    returns a transform stream that performs the buffering by holding onto
53 |    arrays passed to it and draining those values to the reader until they're
54 |    all gone
55 | */
56 | 
57 | var trans = pull(
58 |   pull.map(function (value) {
59 |     return value.split('\n')
60 |   }),
61 |   pull.flatten()
62 | )
63 | 
64 | 
65 | // finally, connect them all together
66 | pull(src, trans, snk)
67 | 
68 | 


--------------------------------------------------------------------------------
/compose.js:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 | 
 4 | pull-streams are for making tiny modules.
 5 | sometimes, you might write a pull stream from scratch,
 6 | without any dependencies. but more often, you'll make a new
 7 | pull-stream by combining several other pull-streams.
 8 | 
 9 | This is called "composing" or "composition".
10 | in pull-streams, you need a complete pipeline before data will flow.
11 | that means:a  source, zero or more throughs, and a sink
12 | 
13 | but you can still call pull() on a _partial_ pipeline,
14 | which is a great way to create a pull-stream module.
15 | 
16 | create a source modified by a through:
17 | 
18 |   pull(source, through) => source
19 | 
20 | create a sink, but modify it's input before it goes.
21 | 
22 |   pull(through, sink) => sink
23 | 
24 | create a through, by chainging several throughs:
25 | 
26 |   pull(through1, through2) => through
27 | 
28 | these streams combine just like normal streams.
29 | 
30 | pull(
31 |   pull(source, through),
32 |   pull(through1, through2),
33 |   pull(through, sink)
34 | ) => undefined
35 | 
36 | the complete pipeline returns undefined, because it cannot be piped
37 | to anything else.
38 | */
39 | 
40 | /*
41 | create a simple csv parser.
42 | using the split module to separate the input into lines,
43 | and then each line into cells.
44 | */
45 | var split = require('pull-split')
46 | function parseCsv () {
47 |   return pull(
48 |     split(), //defaults to \n
49 |     pull.map(function (line) {
50 |       return line.split(/,\s+/)
51 |     })
52 |   )
53 | }
54 | 
55 | /*
56 | >This will parse simple csv files,
57 | >for a more correct csv parser, see https://github.com/dominictarr/pull-csv
58 | 
59 | now, say we want to sum a column in a large csv.
60 | we can take our parser, and a reduce function
61 | */
62 | 
63 | function sum (column, cb) {
64 |   return pull.reduce(function (a, b) {
65 |     return a + b[column]
66 |   }, 0, cb)
67 | }
68 | 
69 | var File = require('pull-file')
70 | 
71 | pull(File(yourCsv), parseCsv(), sum(0, console.log))
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/content-addressable.js:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 | Now here is a non contrived example.
 4 | 
 5 | This is a content addressable store - it's CA store
 6 | is like a KeyValue store, except you do not get to
 7 | choose the key, the key is always the hash of the value.
 8 | 
 9 | This is a really good idea for a bunch of reasons.
10 | You data becomes immutable (cannot change), which means
11 | caching works perfectly (no cache invalidation).
12 | Also, if you know the hash you want, you can verify
13 | you have the correct data no matter who give it to you.
14 | 
15 | These features make building a distributed system very
16 | easy, and is why you see this pattern in git, bittorrent,
17 | and bitcoin.
18 | */
19 | // (try: ls .git/objects/*/* to look inside git's CA store)
20 | 
21 | var fs = require('fs')
22 | var pull = require('pull-stream')
23 | var toPull = require('stream-to-pull-stream')
24 | 
25 | //pass in the directory you want the CA store to be in.
26 | module.exports = function (dir) {
27 | 
28 |   /*
29 |     first we need a function that turns
30 | 
31 |   */
32 |   //we'll use the hash of the file as it's filename.
33 |   function toFile (hash) {
34 |     return path.join(dir, hash.substring(0, 2), hash.substring(2))
35 |   }
36 | 
37 |   function sha256 () {
38 |     var hash = createHash('sha256'), stream
39 |     return stream = pull.through(function (data) {
40 |       hash.update(data)
41 |     }, function () {
42 |       stream.digest = hash.digest('hex')
43 |     })
44 |   }
45 | 
46 |   function read(hash) {
47 |     return toPull.source(fs.createReadStream(toFile(hash)))
48 |   }
49 | 
50 |   function write (expected, cb) {
51 |     if('function' === typeof expected)
52 |       cb = hash, expected = null
53 | 
54 |     cb = cb || function (err) { if(err) throw err }
55 | 
56 |     var tmpfile = getTemp(), hash = sha256()
57 |     return pull(
58 |       hash,
59 |       toPull.sink(fs.createWriteStream(filename), function (err, data) {
60 |         //if there was an error, delete the file.
61 |         if(err) fs.unlink(tmpfile, cb)
62 |         else if (expected && expected != hash.digest)
63 |           fs.unlink(tmpfile, function () {
64 |             cb(new Error(
65 |               'did not receive file:'+hash.digest
66 |             } ' expected:'+expected
67 |             ))
68 |           })
69 |         else    fs.rename(tmpfile, toFile(hash.digest), cb)
70 |       })
71 |   }
72 | 
73 |   return {
74 |     read: read,
75 |     write: write
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/duplex.js:
--------------------------------------------------------------------------------
 1 | /*
 2 | Duplex streams are used to communicate with a remote service,
 3 | and they are a pair of source and sink streams `{source, sink}`
 4 | 
 5 | in node, you see duplex streams to connect replication or rpc protocols.
 6 | client.pipe(server).pipe(client)
 7 | or
 8 | server.pipe(client).pipe(server)
 9 | both do the same thing.
10 | 
11 | the pull function we wrote before doesn't detect this,
12 | but if you use the pull-stream module it will.
13 | Then we can pipe duplex pull-streams like this:
14 | 
15 | var pull = require('pull-stream')
16 | pull(client, server, client)
17 | 
18 | Also, sometimes you'll need to interact with a regular node stream.
19 | there are two modules for this.
20 | 
21 | stream-to-pull-stream
22 | and
23 | pull-stream-to-stream
24 | 
25 | */
26 | 
27 | var net = require('net')
28 | var toPull = require('stream-to-pull-stream')
29 | var pull = require('pull-stream')
30 | 
31 | var server = net.createServer(function (stream) {
32 |   //convert into a duplex pull-stream
33 |   stream = toPull.duplex(stream)
34 | 
35 |   pull(
36 |     stream,
37 |     pull.map(function (b) {
38 |       //take the input, and MAKE IT LOUD!!!
39 |       return b.toString().toUpperCase() + '!!!'
40 |     }),
41 |     stream
42 |   )
43 | 
44 | }).listen(9999, function () {
45 | 
46 |   var stream = toPull.duplex(net.connect(9999))
47 | 
48 |   pull(
49 |     pull.values(['quiet stream']),
50 |     stream,
51 |     pull.drain(function (data) {
52 |       console.log(data.toString())
53 |     }, function (err) {
54 |       if(err) throw err
55 |       server.close()
56 |     })
57 |   )
58 | 
59 | })
60 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "pull-stream-examples",
 3 |   "description": "",
 4 |   "version": "0.0.0",
 5 |   "homepage": "https://github.com/dominictarr/pull-stream-examples",
 6 |   "repository": {
 7 |     "type": "git",
 8 |     "url": "git://github.com/dominictarr/pull-stream-examples.git"
 9 |   },
10 |   "dependencies": {
11 |     "pull-stream": "^2.27.0",
12 |     "stream-to-pull-stream": "^1.6.1"
13 |   },
14 |   "devDependencies": {},
15 |   "scripts": {
16 |     "test": "set -e; for t in test/*.js; do node $t; done"
17 |   },
18 |   "author": "Dominic Tarr <dominic.tarr@gmail.com> (http://dominictarr.com)",
19 |   "license": "MIT"
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/pull.js:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | create a simple source stream that reads from an array.
  4 | 
  5 | A pull stream is just an async stream that is called repeatedly.
  6 | note that when every item in the array has been called back,
  7 | it returns true in the error slot. This indicates the end of the stream.
  8 | both err and end mean the stream is over! but there are many possible
  9 | ways an error can occur (err && err !== true), and only one way a stream can correctly end (true)
 10 | 
 11 | in pull-streams i like to call streams that data comes out of "sources",
 12 | (in node they are usually called readables)
 13 | 
 14 | */
 15 | function values (ary) {
 16 |   var i = 0
 17 |   return function read(abort, cb) {
 18 |     if(i===ary.length || abort) return cb(true)
 19 |     cb(null, ary[i++])
 20 |   }
 21 | }
 22 | 
 23 | /*
 24 | 
 25 | pull-streams don't really have a writable stream per se. "writable" implys that
 26 | the writer is the active partner, and the stream which is written to is passive.
 27 | (like you are when you watch TV. the TV writes its lies into neocortex via your retinas)
 28 | 
 29 | instead of a writable, pull streams have a "sink", that is a reader.
 30 | here the reader is the active party, actively consuming more data.
 31 | When you read a book, you are in control, and must actively turn the pages to get more information.
 32 | 
 33 | so, a sink is a function that you pass a source to,
 34 | which then reads from that function until it gets to the end or decides to stop.
 35 | */
 36 | 
 37 | function sink (read) {
 38 |   read(null, function next (err, data) {
 39 |     if(err) return console.log(err)
 40 |     console.log(data)
 41 |     //recursively call read again!
 42 |     read(null, next)
 43 |   })
 44 | }
 45 | 
 46 | /*
 47 | 
 48 | we could now consume the source with just these two functions.
 49 | 
 50 | sink(values([1,2,3]))
 51 | 
 52 | so simple. we didn't use any librarys, yet, we have streams with 2 way back pressure.
 53 | since the pattern is async, the source can slow down by cb'ing slower,
 54 | and the sink can slow down by waiting longer before calling read again!
 55 | 
 56 | okay, to be useful, we also need a way to transform inputs into different outputs.
 57 | i.e. a transform stream.
 58 | 
 59 | in pull-streams a transform is implemented as a sink that returns a source.
 60 | 
 61 | */
 62 | 
 63 | function map (mapper) {
 64 |   //a sink function: accept a source
 65 |   return function (read) {
 66 |     //but return another source!
 67 |     return function (abort, cb) {
 68 |       read(abort, function (err, data) {
 69 |         //if the stream has ended, pass that on.
 70 |         if(err) return cb(err)
 71 |         //apply a mapping to that data
 72 |         cb(null, mapper(data))
 73 |       })
 74 |     }
 75 |   }
 76 | }
 77 | 
 78 | var source = values([1,2,3])
 79 | var mapper = map(function (e) { return e*e })
 80 | 
 81 | 
 82 | /*
 83 | right now, we could combine these 3 streams by passing them to each other.
 84 | 
 85 | and then combine these with function composition:
 86 | 
 87 | sink(mapper(source))
 88 | 
 89 | this would be equavalent to node's .pipe
 90 | except with node streams it would look like
 91 | 
 92 | source.pipe(mapper).pipe(sink)
 93 | 
 94 | to be honest, it's easier to read if it does left to right.
 95 | because the direction the data flows is the same as you read.
 96 | 
 97 | lets write a quick function that allows us to compose pull streams left-to-right
 98 | 
 99 | pull(source, mapper, sink)
100 | */
101 | 
102 | function pull () {
103 |   var args = [].slice.call(arguments)
104 |   var s = args.shift()
105 |   while(args.length) s = args.shift()(s)
106 |   return s
107 | }
108 | 
109 | /*
110 | thats it! just call the next thing with the previous thing until there are no things left.
111 | if we return the last thing, then we can even do this:
112 | 
113 | pull(pull(source, mapper), sink)
114 | 
115 | */
116 | 
117 | /*
118 | Infinite streams. here is a stream that never ends.
119 | */
120 | 
121 | function infinite () {
122 |   var i = 0
123 |   return function (abort, cb) {
124 |     if(abort) return cb(abort)
125 |     cb(null, i++)
126 |   }
127 | }
128 | 
129 | /*
130 | Now, reading all of an infinite stream will take forever...
131 | BUT! the cool thing about pull streams is that they are LAZY.
132 | that means it only gives us the next thing when we ask for it.
133 | 
134 | Also, you can ABORT a pull stream when you don't want any more.
135 | 
136 | here is a take(n) stream that reads n items from a source and then stops.
137 | it's a transform stream like map, except it will stop early.
138 | */
139 | 
140 | function take (n) {
141 |   return function (read) {
142 |     return function (abort, cb) {
143 |       //after n reads, tell the source to abort!
144 |       if(!n--) return read(true, cb)
145 |       read(null, cb)
146 |     }
147 |   }
148 | }
149 | 
150 | /*
151 | now we can pipe the infinite stream through this,
152 | and it will stop after 101 items!
153 | */
154 | 
155 | pull(infinite(), mapper, take(101), sink)
156 | 
157 | /*
158 | That covers 3 types of pull streams. Source, Transform, & Sink.
159 | There is one more important type, although it's not used as much.
160 | 
161 | Duplex streams
162 | 
163 | (see duplex.js!)
164 | */
165 | 
166 | 


--------------------------------------------------------------------------------
/simple/csv.js:
--------------------------------------------------------------------------------
 1 | 
 2 | // split a file into lines,
 3 | // and then map each line through a split function.
 4 | 
 5 | 
 6 | function CSV () {
 7 |   return pull(
 8 |     Split(), //defaults to '\n'
 9 |     pull.map(function (line) {
10 |       return line.split(',')
11 |     })
12 |   )
13 | }
14 | 
15 | //parse a file
16 | 
17 | pull(
18 |   File(filename),
19 |   CSV(),
20 |   pull.drain(console.log)
21 | )
22 | 
23 | 
24 | // this parses simple CSV files, as long so they do not escape commas with quotes.
25 | // the module pull-csv is a more correct csv parser.
26 | 


--------------------------------------------------------------------------------
/simple/jsondl.js:
--------------------------------------------------------------------------------
 1 | 
 2 | //new line delimited json.
 3 | 
 4 | var pull = require('pull-stream')
 5 | var Split = require('pull-split')
 6 | 
 7 | function pullJSON () {
 8 |   return pull(
 9 |     Split(),
10 |     pull.map(function (line) {
11 |       return JSON.parse(line)
12 |     })
13 |   )
14 | }
15 | 
16 | pull(
17 |   File(filename),
18 |   pullJSON(),
19 |   pull.drain(console.log)
20 | )
21 | 


--------------------------------------------------------------------------------
/simple/ls.js:
--------------------------------------------------------------------------------
 1 | 
 2 | var fs = require('fs')
 3 | var path = require('path')
 4 | 
 5 | var pull = require('pull-stream')
 6 | var Defer = require('pull-defer')
 7 | var Paramap = require('pull-paramap')
 8 | 
 9 | //list the files in a directory.
10 | //since fs.readdir is an async function
11 | //but we want to return a new stream immeditaly
12 | //we use pull-defer
13 | 
14 | function ls (dir) {
15 | 
16 |   var stream = Defer.source()
17 | 
18 |   fs.readdir(dir, function (err, ls) {
19 |     stream.resolve(pull.values(ls))
20 |   })
21 | 
22 |   return stream
23 | 
24 | }
25 | 
26 | //list the files in a directory
27 | /*
28 | pull(
29 |   ls(process.argv[2] || process.cwd()),
30 |   pull.drain(console.log)
31 | )
32 | */
33 | 
34 | // get stats for each file.
35 | // we use paramap here, so that we can look up many files in parallel
36 | 
37 | function ls_long (dir) {
38 |   return pull(
39 |     ls(dir),
40 |     Paramap(function (file, cb) {
41 |       var filename = path.join(dir, file)
42 |       fs.lstat(filename, function (err, stat) {
43 |         if(err) return cb(err)
44 |         stat.file = filename
45 |         stat.dir = stat.isDirectory()
46 |         cb(null, stat)
47 |       })
48 |     })
49 |   )
50 | }
51 | 
52 | /*
53 | pull(
54 |   ls_long(process.argv[2] || process.cwd()),
55 |   pull.drain(console.log)
56 | )
57 | */
58 | 
59 | // drill down into subdirectories.
60 | // if an item is a directory, map that item to a stream.
61 | // pull.flatten() turns a stream of streams (or arrays) into a stream of items
62 | 
63 | function ls_recursive (dir) {
64 |   return pull(
65 |     ls_long(dir),
66 |     pull.map(function (e) {
67 |       if(!e.dir) return [e.file]
68 |       else return ls_recursive(e.file)
69 |     }),
70 |     pull.flatten()
71 |   )
72 | }
73 | 
74 | pull(
75 |   ls_recursive(process.argv[2] || process.cwd()),
76 |   pull.drain(console.log)
77 | )
78 | 
79 | 


--------------------------------------------------------------------------------