├── .travis.yml ├── .eslintrc ├── package.json ├── readme.md ├── index.js └── test └── index.test.js /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: node_js 3 | node_js: 4 | - '0.10' 5 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "rules": { 3 | "indent": [2, 2], 4 | "quotes": [2, "single"], 5 | "no-console": [0], 6 | "semi": [2, "always"] 7 | }, 8 | "env": { 9 | "node": true 10 | }, 11 | "globals": { 12 | "process": true, 13 | "module": true, 14 | "require": true 15 | }, 16 | "extends": "eslint:recommended" 17 | } 18 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kinesis-readable", 3 | "version": "1.2.0", 4 | "description": "Simple readable stream client for AWS Kinesis", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "tape test/*.test.js" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/rclark/kinesis-readable.git" 12 | }, 13 | "keywords": [ 14 | "kinesis", 15 | "stream" 16 | ], 17 | "author": "Ryan Clark", 18 | "license": "ISC", 19 | "bugs": { 20 | "url": "https://github.com/rclark/kinesis-readable/issues" 21 | }, 22 | "homepage": "https://github.com/rclark/kinesis-readable", 23 | "devDependencies": { 24 | "eslint": "^1.10.3", 25 | "kinesalite": "^1.10.1", 26 | "queue-async": "^1.0.7", 27 | "tape": "^4.2.2" 28 | }, 29 | "dependencies": { 30 | "aws-sdk": "^2.2.22" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # kinesis-readable 2 | 3 | [![Build Status](https://travis-ci.org/rclark/kinesis-readable.svg?branch=master)](https://travis-ci.org/rclark/kinesis-readable) 4 | 5 | Node.js stream interface for reading records from [AWS Kinesis](http://aws.amazon.com/kinesis/). 6 | 7 | ## Usage 8 | 9 | ```js 10 | var AWS = new AWS.Kinesis({ 11 | region: 'us-east-1', 12 | params: { StreamName: 'my-stream' } 13 | }); 14 | 15 | // see below for options 16 | var readable = require('kinesis-readable')(client, options); 17 | 18 | readable 19 | // 'data' events will trigger for a set of records in the stream 20 | .on('data', function(records) { 21 | console.log(records); 22 | }) 23 | // each time a records are passed downstream, the 'checkpoint' event will provide 24 | // the last sequence number that has been read 25 | .on('checkpoint', function(sequenceNumber) { 26 | console.log(sequenceNumber); 27 | }) 28 | .on('error', function(err) { 29 | console.error(err); 30 | }) 31 | .on('end', function() { 32 | console.log('all done!'); 33 | }); 34 | 35 | // Calling .close() will finish all pending GetRecord requests before emitting 36 | // the 'end' event. 37 | // Because the kinesis stream persists, the readable stream will not 38 | // 'end' until you explicitly close it 39 | setTimeout(function() { 40 | readable.close(); 41 | }, 60 * 60 * 1000); 42 | ``` 43 | 44 | ## Options 45 | 46 | You can pass options to create the readable stream, all parameters are optional: 47 | 48 | ```js 49 | var options = { 50 | shardId: 'shard-identifier', // defaults to first shard in the stream 51 | iterator: 'LATEST', // default to TRIM_HORIZON 52 | startAfter: '12345678901234567890', // start reading after this sequence number 53 | startAt: '12345678901234567890', // start reading from this sequence number 54 | timestamp: '2016-04-04T19:58:46.480-00:00', // start reading from this timestamp 55 | limit: 100 // number of records per `data` event 56 | }; 57 | ``` 58 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var stream = require('stream'); 2 | 3 | module.exports = KinesisReadable; 4 | 5 | /** 6 | * A factory to generate a {@link KinesisClient} that pulls records from a Kinesis stream 7 | * 8 | * @param {object} client - an AWS.Kinesis client capable of reading the desired stream 9 | * @param {string} [name] - the name of the shard to read. Not required if already 10 | * set by the provided AWS.Kinesis client. 11 | * @param {object} [options] - configuration details 12 | * @param {string} [options.shardId] - the shard id to read from. Each KinesisReadable 13 | * instance is only capable of reading a single shard. If unspecified, the instance 14 | * will read from the first shard returned by a DescribeStream request. 15 | * @param {string} [options.iterator] - the iterator type. One of `LATEST` or `TRIM_HORIZON`. 16 | * If unspecified, defaults to `TRIM_HORIZON` 17 | * @param {string} [options.startAt] - a sequence number to start reading from. 18 | * @param {string} [options.startAfter] - a sequence number to start reading after. 19 | * @param {number} [options.timestamp] - a timestamp to start reading after. 20 | * @param {number} [options.limit] - the maximum number of records that will 21 | * be passed to any single `data` event. 22 | * @param {number} [options.readInterval] - time in ms to wait between getRecords API calls 23 | * @returns {KinesisClient} a readable stream of kinesis records 24 | */ 25 | function KinesisReadable(client, name, options) { 26 | if (typeof name === 'object') { 27 | options = name; 28 | name = undefined; 29 | } 30 | 31 | if (!options) options = {}; 32 | 33 | if (options.iterator && options.iterator !== 'LATEST' && options.iterator !== 'TRIM_HORIZON') 34 | throw new Error('options.iterator must be one of LATEST or TRIM_HORIZON'); 35 | 36 | var readable = new stream.Readable({ 37 | objectMode: true, 38 | highWaterMark: 100 39 | }); 40 | 41 | var checkpoint = new stream.Transform({ 42 | objectMode: true, 43 | highWaterMark: 100 44 | }); 45 | 46 | var iterator, drain, ended, pending = 0; 47 | 48 | function describeStream(callback) { 49 | pending++; 50 | client.describeStream({ StreamName: name }, function(err, data) { 51 | pending--; 52 | if (err) return callback(err); 53 | 54 | var shardId = options.shardId ? 55 | data.StreamDescription.Shards.filter(function(shard) { 56 | return shard.ShardId === options.shardId; 57 | }).map(function(shard) { 58 | return shard.ShardId; 59 | })[0] : data.StreamDescription.Shards[0].ShardId; 60 | 61 | if (!shardId) return callback(new Error('Shard ' + options.shardId + ' does not exist')); 62 | getShardIterator(shardId, callback); 63 | }); 64 | } 65 | 66 | function getShardIterator(shardId, callback) { 67 | var params = { 68 | ShardId: shardId, 69 | StreamName: name 70 | }; 71 | 72 | if (options.iterator) { 73 | params.ShardIteratorType = options.iterator; 74 | } else if (options.startAt) { 75 | params.ShardIteratorType = 'AT_SEQUENCE_NUMBER'; 76 | params.StartingSequenceNumber = options.startAt; 77 | } else if (options.startAfter) { 78 | params.ShardIteratorType = 'AFTER_SEQUENCE_NUMBER'; 79 | params.StartingSequenceNumber = options.startAfter; 80 | } else if (options.timestamp) { 81 | params.ShardIteratorType = 'AT_TIMESTAMP'; 82 | params.Timestamp = options.timestamp; 83 | } else { 84 | params.ShardIteratorType = 'TRIM_HORIZON'; 85 | } 86 | 87 | pending++; 88 | client.getShardIterator(params, function(err, data) { 89 | pending--; 90 | if (err) return callback(err); 91 | iterator = data.ShardIterator; 92 | callback(); 93 | }); 94 | } 95 | 96 | function read(callback) { 97 | if (drain && !pending) return callback(null, { Records: null }); 98 | if (drain && pending) return setImmediate(read, callback); 99 | 100 | pending++; 101 | client.getRecords({ 102 | ShardIterator: iterator, 103 | Limit: options.limit 104 | }, function(err, data) { 105 | pending--; 106 | if (err) return callback(err); 107 | 108 | iterator = data.NextShardIterator; 109 | 110 | if (!data.Records.length) { 111 | if (!drain) return setTimeout(read, options.readInterval || 500, callback); 112 | data.Records = null; 113 | } 114 | 115 | callback(null, data); 116 | }); 117 | } 118 | 119 | readable._read = function() { 120 | if (iterator) return read(gotRecords); 121 | 122 | describeStream(function(err) { 123 | if (err) return checkpoint.emit('error', err); 124 | read(gotRecords); 125 | }); 126 | 127 | function gotRecords(err, data) { 128 | if (err) return checkpoint.emit('error', err); 129 | setTimeout(readable.push.bind(readable), options.readInterval || 500, data.Records); 130 | } 131 | }; 132 | 133 | checkpoint._transform = function(data, enc, callback) { 134 | checkpoint.emit('checkpoint', data.slice(-1)[0].SequenceNumber); 135 | callback(null, data); 136 | }; 137 | 138 | checkpoint._flush = function(callback) { 139 | ended = true; 140 | callback(); 141 | }; 142 | 143 | /** 144 | * A kinesis stream persists beyond the duration of a readable stream. In order 145 | * to stop reading from the stream, call `.close()`. Then listen for the `end` 146 | * event to indicate that all data that as been read from Kinesis has been passed 147 | * downstream. 148 | * 149 | * @instance 150 | * @memberof KinesisClient 151 | * @returns {KinesisClient} 152 | */ 153 | checkpoint.close = function() { 154 | drain = true; 155 | if (!ended) readable._read(); 156 | return checkpoint; 157 | }; 158 | 159 | /** 160 | * A client that implements a node.js readable stream interface for reading kinesis 161 | * records. See node.js documentation for details. 162 | * 163 | * In addition to the normal events emitted by a readable stream, the KinesisClient 164 | * emits `checkpoint` events, which indicate the most recent sequence number that 165 | * has been read from Kinesis and passed downstream. 166 | * 167 | * @name KinesisClient 168 | */ 169 | return readable.pipe(checkpoint); 170 | } 171 | -------------------------------------------------------------------------------- /test/index.test.js: -------------------------------------------------------------------------------- 1 | var tape = require('tape'); 2 | var queue = require('queue-async'); 3 | var crypto = require('crypto'); 4 | 5 | var kinesalite = require('kinesalite')({ 6 | ssl: false, 7 | createStreamMs: 1, 8 | deleteStreamMs: 1 9 | }); 10 | 11 | var AWS = require('aws-sdk'); 12 | var kinesis = new AWS.Kinesis({ 13 | accessKeyId: '-', 14 | secretAccessKey: '-', 15 | endpoint: 'http://localhost:7654', 16 | region: '-' 17 | }); 18 | 19 | var testStreamName = 'test-stream'; 20 | 21 | function test(name, callback) { 22 | tape('start kinesalite', function(assert) { 23 | queue(1) 24 | .defer(kinesalite.listen.bind(kinesalite), 7654) 25 | .defer(kinesis.createStream.bind(kinesis), { 26 | StreamName: testStreamName, 27 | ShardCount: 1 28 | }) 29 | .await(function(err) { 30 | if (err) throw err; 31 | assert.end(); 32 | }); 33 | }); 34 | 35 | tape(name, callback); 36 | 37 | tape('stop kinesalite', function(assert) { 38 | queue(1) 39 | .defer(function(next) { 40 | kinesis.deleteStream({ 41 | StreamName: testStreamName 42 | }, function(err) { 43 | if (err) return next(err); 44 | setTimeout(next, 20); 45 | }); 46 | }) 47 | .defer(kinesalite.close.bind(kinesalite)) 48 | .await(function(err) { 49 | if (err) throw err; 50 | assert.end(); 51 | }); 52 | }); 53 | } 54 | 55 | test('reads records that already exist', function(assert) { 56 | var records = []; 57 | for (var i = 0; i < 20; i++) records.push({ 58 | Data: crypto.randomBytes(10), 59 | PartitionKey: 'key' 60 | }); 61 | 62 | kinesis.putRecords({ 63 | StreamName: testStreamName, 64 | Records: records 65 | }, function(err) { 66 | if (err) throw err; 67 | readRecords(); 68 | }); 69 | 70 | function readRecords() { 71 | var readable = require('..')(kinesis, testStreamName); 72 | 73 | var count = 0; 74 | 75 | readable 76 | .on('data', function(recordSet) { 77 | assert.equal(recordSet.length, 20, 'got records'); 78 | 79 | recordSet.forEach(function(record, i) { 80 | var expected = records[i].Data.toString('hex'); 81 | assert.equal(record.Data.toString('hex'), expected, 'anticipated data'); 82 | }); 83 | 84 | count += recordSet.length; 85 | if (count > records.length) assert.fail('should not read extra records'); 86 | if (count === records.length) readable.close(); 87 | }) 88 | .on('end', function() { 89 | assert.ok('fires end event after close'); 90 | assert.end(); 91 | }) 92 | .on('error', function(err) { 93 | assert.ifError(err, 'should not error'); 94 | }); 95 | } 96 | }); 97 | 98 | test('can use stream name predefined by the client', function(assert) { 99 | var records = []; 100 | for (var i = 0; i < 20; i++) records.push({ 101 | Data: crypto.randomBytes(10), 102 | PartitionKey: 'key' 103 | }); 104 | 105 | kinesis.putRecords({ 106 | StreamName: testStreamName, 107 | Records: records 108 | }, function(err) { 109 | if (err) throw err; 110 | readRecords(); 111 | }); 112 | 113 | function readRecords() { 114 | var client = new AWS.Kinesis({ 115 | params: { StreamName: testStreamName }, 116 | accessKeyId: '-', 117 | secretAccessKey: '-', 118 | endpoint: 'http://localhost:7654', 119 | region: '-' 120 | }); 121 | 122 | var readable = require('..')(client); 123 | 124 | var count = 0; 125 | 126 | readable 127 | .on('data', function(recordSet) { 128 | count += recordSet.length; 129 | if (count > records.length) assert.fail('should not read extra records'); 130 | if (count === records.length) readable.close(); 131 | }) 132 | .on('end', function() { 133 | assert.equal(count, 20, 'read 20 records'); 134 | assert.end(); 135 | }) 136 | .on('error', function(err) { 137 | assert.ifError(err, 'should not error'); 138 | }); 139 | } 140 | }); 141 | 142 | test('reads ongoing records', function(assert) { 143 | var records = []; 144 | for (var i = 0; i < 20; i++) records.push({ 145 | Data: crypto.randomBytes(10), 146 | PartitionKey: 'key' 147 | }); 148 | 149 | var readable = require('..')(kinesis, testStreamName); 150 | var count = 0; 151 | 152 | readable 153 | .on('data', function(recordSet) { 154 | count += recordSet.length; 155 | if (count > records.length) assert.fail('should not read extra records'); 156 | if (count === records.length) readable.close(); 157 | }) 158 | .on('end', function() { 159 | assert.equal(count, 20, 'read 20 records'); 160 | assert.end(); 161 | }) 162 | .on('error', function(err) { 163 | assert.ifError(err, 'should not error'); 164 | }); 165 | 166 | setTimeout(function() { 167 | kinesis.putRecords({ 168 | StreamName: testStreamName, 169 | Records: records 170 | }, function(err) { 171 | if (err) throw err; 172 | }); 173 | }, 500); 174 | }); 175 | 176 | test('reads latest records', function(assert) { 177 | var initialRecords = []; 178 | var subsequentRecords = []; 179 | for (var i = 0; i < 20; i++) { 180 | initialRecords.push({ 181 | Data: crypto.randomBytes(10), 182 | PartitionKey: 'key' 183 | }); 184 | subsequentRecords.push({ 185 | Data: crypto.randomBytes(10), 186 | PartitionKey: 'key' 187 | }); 188 | } 189 | 190 | function readRecords() { 191 | var readable = require('..')(kinesis, testStreamName, { iterator: 'LATEST' }); 192 | var count = 0; 193 | 194 | readable 195 | .on('data', function(recordSet) { 196 | recordSet.forEach(function(record, i) { 197 | var expected = subsequentRecords[i].Data.toString('hex'); 198 | assert.equal(record.Data.toString('hex'), expected, 'anticipated data'); 199 | }); 200 | 201 | count += recordSet.length; 202 | if (count > 20) assert.fail('should not read extra records'); 203 | if (count === 20) readable.close(); 204 | }) 205 | .on('end', function() { 206 | assert.equal(count, 20, 'read 20 records'); 207 | assert.end(); 208 | }) 209 | .on('error', function(err) { 210 | assert.ifError(err, 'should not error'); 211 | }); 212 | 213 | setTimeout(function() { 214 | kinesis.putRecords({ 215 | StreamName: testStreamName, 216 | Records: subsequentRecords 217 | }, function(err) { 218 | if (err) throw err; 219 | }); 220 | }, 500); 221 | } 222 | 223 | kinesis.putRecords({ 224 | StreamName: testStreamName, 225 | Records: initialRecords 226 | }, function(err) { 227 | if (err) throw err; 228 | readRecords(); 229 | }); 230 | }); 231 | 232 | test('emits checkpoints, obeys limits', function(assert) { 233 | var records = []; 234 | for (var i = 0; i < 20; i++) records.push({ 235 | Data: crypto.randomBytes(10), 236 | PartitionKey: 'key' 237 | }); 238 | 239 | var readable = require('..')(kinesis, testStreamName, { limit: 1 }); 240 | var count = 0; 241 | var checkpoints = 0; 242 | 243 | readable 244 | .on('data', function(recordSet) { 245 | assert.equal(recordSet.length, 1, 'obeys requested limit'); 246 | count += recordSet.length; 247 | if (count > records.length) assert.fail('should not read extra records'); 248 | if (count === records.length) readable.close(); 249 | }) 250 | .on('checkpoint', function(sequenceNum) { 251 | if (typeof sequenceNum !== 'string') assert.fail('invalid sequenceNum emitted'); 252 | checkpoints++; 253 | }) 254 | .on('end', function() { 255 | assert.equal(checkpoints, 20, 'emits on each read'); 256 | assert.end(); 257 | }) 258 | .on('error', function(err) { 259 | assert.ifError(err, 'should not error'); 260 | }); 261 | 262 | kinesis.putRecords({ 263 | StreamName: testStreamName, 264 | Records: records 265 | }, function(err) { 266 | if (err) throw err; 267 | }); 268 | }); 269 | 270 | test('reads after checkpoint', function(assert) { 271 | var records = []; 272 | for (var i = 0; i < 15; i++) records.push({ 273 | Data: crypto.randomBytes(10), 274 | PartitionKey: 'key' 275 | }); 276 | 277 | kinesis.putRecords({ 278 | StreamName: testStreamName, 279 | Records: records 280 | }, function(err, resp) { 281 | if (err) throw err; 282 | readRecords(resp.Records[9].SequenceNumber); 283 | }); 284 | 285 | function readRecords(startAfter) { 286 | var readable = require('..')(kinesis, testStreamName, { limit: 1, startAfter: startAfter }); 287 | var count = 10; // should start at the 10th record and read 5 more 288 | 289 | readable 290 | .on('data', function(recordSet) { 291 | var record = recordSet[0]; 292 | var expected = records[count].Data.toString('hex'); 293 | assert.equal(record.Data.toString('hex'), expected, 'anticipated data'); 294 | count += recordSet.length; 295 | if (count > records.length) assert.fail('should not read extra records'); 296 | if (count === records.length) readable.close(); 297 | }) 298 | .on('end', function() { 299 | assert.end(); 300 | }) 301 | .on('error', function(err) { 302 | assert.ifError(err, 'should not error'); 303 | }); 304 | } 305 | }); 306 | 307 | test('reads from checkpoint', function(assert) { 308 | var records = []; 309 | for (var i = 0; i < 15; i++) records.push({ 310 | Data: crypto.randomBytes(10), 311 | PartitionKey: 'key' 312 | }); 313 | 314 | kinesis.putRecords({ 315 | StreamName: testStreamName, 316 | Records: records 317 | }, function(err, resp) { 318 | if (err) throw err; 319 | readRecords(resp.Records[9].SequenceNumber); 320 | }); 321 | 322 | function readRecords(startAt) { 323 | var readable = require('..')(kinesis, testStreamName, { limit: 1, startAt: startAt }); 324 | var count = 9; // should start at the 9th record and read 6 more 325 | 326 | readable 327 | .on('data', function(recordSet) { 328 | var record = recordSet[0]; 329 | var expected = records[count].Data.toString('hex'); 330 | assert.equal(record.Data.toString('hex'), expected, 'anticipated data'); 331 | count += recordSet.length; 332 | if (count > records.length) assert.fail('should not read extra records'); 333 | if (count === records.length) readable.close(); 334 | }) 335 | .on('end', function() { 336 | assert.end(); 337 | }) 338 | .on('error', function(err) { 339 | assert.ifError(err, 'should not error'); 340 | }); 341 | } 342 | }); 343 | --------------------------------------------------------------------------------