├── .gitignore
├── AmazonKinesisArchiver.png
├── dist
├── amazon-kinesis-archiver-1.0.0.zip
├── amazon-kinesis-archiver-1.0.1.zip
└── amazon-kinesis-archiver-1.0.2.zip
├── .github
└── PULL_REQUEST_TEMPLATE.md
├── deploy.sh
├── CODE_OF_CONDUCT.md
├── bin
├── createDynamoTable.js
├── queryArchive.js
├── scanArchive.js
└── setup.sh
├── snippets.txt
├── package.json
├── lib
├── constants.js
├── tableSetup.js
├── common.js
├── kinesis-archiver.js
└── archive-access.js
├── test
├── testReinject.js
├── testQuery.js
├── testScan.js
└── testArchive.js
├── index.js
├── deploy.yaml
├── CONTRIBUTING.md
├── LICENSE.txt
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .idea
3 |
--------------------------------------------------------------------------------
/AmazonKinesisArchiver.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-kinesis-archiver/HEAD/AmazonKinesisArchiver.png
--------------------------------------------------------------------------------
/dist/amazon-kinesis-archiver-1.0.0.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-kinesis-archiver/HEAD/dist/amazon-kinesis-archiver-1.0.0.zip
--------------------------------------------------------------------------------
/dist/amazon-kinesis-archiver-1.0.1.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-kinesis-archiver/HEAD/dist/amazon-kinesis-archiver-1.0.1.zip
--------------------------------------------------------------------------------
/dist/amazon-kinesis-archiver-1.0.2.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-kinesis-archiver/HEAD/dist/amazon-kinesis-archiver-1.0.2.zip
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | *Issue #, if available:*
2 |
3 | *Description of changes:*
4 |
5 |
6 | By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
7 |
--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #set -x
3 |
4 | ver=1.0.2
5 |
6 | for r in `aws ec2 describe-regions --query Regions[*].RegionName --output text`; do aws s3 cp dist/amazon-kinesis-archiver-$ver.zip s3://awslabs-code-$r/AmazonKinesisArchiver/amazon-kinesis-archiver-$ver.zip --acl public-read --region $r; done
7 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 |
--------------------------------------------------------------------------------
/bin/createDynamoTable.js:
--------------------------------------------------------------------------------
1 | var tableSetup = require('./lib/tableSetup');
2 |
3 | var getArgSafe = function(index) {
4 | if (process.argv.length >= index + 1) {
5 | return process.argv[index];
6 | } else {
7 | return undefined;
8 | }
9 | }
10 |
11 | if (process.argv.length == 2) {
12 | tableSetup.runSetup();
13 | } else {
14 | // map command line arguments
15 | tableSetup.runSetup(getArgSafe(2), getArgSafe(3), getArgSafe(4), getArgSafe(5), getArgSafe(6), getArgSafe(7));
16 | }
--------------------------------------------------------------------------------
/snippets.txt:
--------------------------------------------------------------------------------
1 | // publish deploy.yaml to regional buckets
2 | for r in `aws ec2 describe-regions --query Regions[*].RegionName --output text`; do aws s3 cp deploy.yaml s3://awslabs-code-$r/AmazonKinesisArchiver/deploy.yaml --acl public-read --region $r; done
3 |
4 | // publish lambda jar to regional buckets
5 | for r in `aws ec2 describe-regions --query Regions[*].RegionName --output text`; do aws s3 cp dist/amazon-kinesis-archiver-1.0.0.zip s3://awslabs-code-$r/AmazonKinesisArchiver/amazon-kinesis-archiver-1.0.0.zip --acl public-read --region $r; done
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "kinesis-stream-compressor",
3 | "description": "An AWS Lambda function stores only latest unique values of a Kinesis Stream, for the purposes of archive/replay",
4 | "version": "1.0.2",
5 | "dependencies": {
6 | "async": "2.6.4"
7 | },
8 | "keywords": [
9 | "amazon",
10 | "aws",
11 | "kinesis",
12 | "lambda"
13 | ],
14 | "author": {
15 | "name": "Ian Meyers",
16 | "email": "meyersi@amazon.com"
17 | },
18 | "main": "index.js",
19 | "license": "Apache-2.0",
20 | "files": [
21 | "index.js",
22 | "lib/constants.js",
23 | "lib/common.js",
24 | "lib/kinesis-archiver.js",
25 | "lib/archive-access.js",
26 | "package.json",
27 | "README.md",
28 | "LICENSE.txt"
29 | ]
30 | }
31 |
--------------------------------------------------------------------------------
/lib/constants.js:
--------------------------------------------------------------------------------
1 | RECOVERY_MODE_LATEST = 'latest';
2 | RECOVERY_MODE_ALL = 'all';
3 | RECOVERY_MODE_TAG_NAME = 'StreamArchiveMode';
4 | ARCHIVE_TAG_NAME = 'ArchiveTTL';
5 | ARCHIVE_ATTRIBUTE_NAME = 'expireAfter';
6 | ARCHIVE_TAG_TTL_SECONDS_NAME = 'expireSeconds';
7 | REGION_KEY = 'AWS_REGION';
8 | OK = 'OK';
9 | ERROR = 'ERROR';
10 | partitionKeyName = 'partitionKey';
11 | sortKeyName = "sequenceNumber";
12 | lastUpdateDateName = "lastUpdate";
13 | approximateArrivalName = "approximateArrivalTimestamp";
14 | conditionCheckFailed = 'ConditionalCheckFailedException';
15 | provisionedThroughputExceeded = 'ProvisionedThroughputExceededException';
16 | reValidateArchiveModeCacheSeconds = 180;
17 | intermediateEncoding = 'UTF8';
18 | defaultReadIOPS = 10;
19 | defaultWriteOPS = 10;
--------------------------------------------------------------------------------
/test/testReinject.js:
--------------------------------------------------------------------------------
1 | var q = require('../lib/archive-access')(process.env['AWS_REGION'] || 'us-east-1');
2 | var async = require('async');
3 |
4 | var sqn1 = '49550822123942288925422195661801699673398497972964035234';
5 | var sqn2 = '49550822123942288925422195661802908599218112602138741410';
6 | var sqn3 = '49550822123942288925422195661802908599218112602138741412';
7 | var approxArrival = 1428537600;
8 |
9 | var everything = function(callback) {
10 | q.reinject('EnergyPipelineSensors', undefined, undefined, undefined, undefined, undefined, true, "^", 2, function(
11 | err) {
12 | console.log("All Data Complete");
13 | if (err) {
14 | console.log(err);
15 | }
16 | callback(err);
17 | });
18 | };
19 |
20 | async.waterfall([ everything, ], function(err) {
21 | console.log("tests complete");
22 | process.exit(0);
23 | });
24 |
--------------------------------------------------------------------------------
/bin/queryArchive.js:
--------------------------------------------------------------------------------
1 | var OK = 0;
2 | var ERROR = -1;
3 |
4 | var getArgSafe = function(index) {
5 | if (process.argv.length >= index + 1) {
6 | return process.argv[index];
7 | } else {
8 | return undefined;
9 | }
10 | }
11 |
12 | var failArgSafe = function(index, label) {
13 | var v = getArgSafe(index);
14 | if (!v && v !== "") {
15 | console.error("You must provide a value for " + label);
16 | process.exit(ERROR);
17 | } else {
18 | return v;
19 | }
20 | }
21 |
22 | // mandatory arguments are region, stream, partition key and sequenceStart
23 | if (process.argv.length < 6) {
24 | console
25 | .error("You must provide the region name, stream name, partition key and sequence number to query the stream archive");
26 | process.exit(ERROR);
27 | } else {
28 | var regionName = failArgSafe(2, 'region');
29 | var streamName = failArgSafe(3, 'streamName');
30 | var partitionKey = failArgSafe(4, 'partitionKey');
31 | var sequence = failArgSafe(5, 'sequenceNumber');
32 |
33 | var sequenceEnd = getArgSafe(6);
34 | var recordLimit = getArgSafe(7);
35 |
36 | var q = require('../lib/archive-access')(regionName);
37 |
38 | q.queryToStdConsole(streamName, partitionKey, sequence, sequenceEnd, recordLimit, function(err) {
39 | if (err) {
40 | console.error(err);
41 | process.exit(ERROR);
42 | } else {
43 | process.exit(OK);
44 | }
45 | });
46 | }
--------------------------------------------------------------------------------
/bin/scanArchive.js:
--------------------------------------------------------------------------------
1 | var OK = 0;
2 | var ERROR = -1;
3 |
4 | var getArgSafe = function(index) {
5 | if (process.argv.length >= index + 1) {
6 | return process.argv[index];
7 | } else {
8 | return undefined;
9 | }
10 | }
11 |
12 | var failArgSafe = function(index, label) {
13 | var v = getArgSafe(index);
14 | if (!v && v !== "") {
15 | console.error("You must provide a value for " + label);
16 | process.exit(ERROR);
17 | } else {
18 | return v;
19 | }
20 | }
21 |
22 | // mandatory arguments are region, stream, partition key and sequenceStart
23 | if (process.argv.length < 6) {
24 | console
25 | .error("You must provide the region name, stream name, partition key and sequence number to query the stream archive");
26 | process.exit(ERROR);
27 | } else {
28 | var regionName = failArgSafe(2, 'region');
29 | var streamName = failArgSafe(3, 'streamName');
30 | var partitionKey = failArgSafe(4, 'partitionKey');
31 | var sequence = failArgSafe(5, 'sequenceNumber');
32 |
33 | var sequenceEnd = getArgSafe(6);
34 | var recordLimit = getArgSafe(7);
35 |
36 | var q = require('../lib/archive-access')(regionName);
37 |
38 | q.scanToStdConsole(streamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit, function(err) {
39 | if (err) {
40 | console.error(err);
41 | process.exit(ERROR);
42 | } else {
43 | process.exit(OK);
44 | }
45 | });
46 | }
--------------------------------------------------------------------------------
/bin/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Kinesis Streams to Firehose
4 | #
5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 | function usage {
20 | echo "Please provide the Stream Name, the Archive Mode (all | latest), and the Region. You can also specify the TTL Interval in Seconds if data should automatically be deleted from DynamoDB."
21 | exit -1
22 | }
23 |
24 | function checkDep {
25 | which $1 > /dev/null 2>&1
26 |
27 | if [ $? != 0 ]; then
28 | echo "This utility requires the AWS Cli, which can be installed using instructions found at http://docs.aws.amazon.com/cli/latest/userguide/installing.html, as well as a node.js runtime"
29 | exit -2
30 | fi
31 | }
32 |
33 | if [ $# -lt 3 ]; then
34 | usage
35 | fi
36 |
37 | if [ $# -lt 3 ]; then
38 | if [ $# -gt 5 ]; then
39 | usage
40 | fi
41 | fi
42 |
43 | checkDep aws
44 | checkDep node
45 |
46 | if [[ $2 = "all" || $2 = "latest" ]]; then
47 | echo "Adding configuration tags to Kinesis Stream $1"
48 | aws kinesis add-tags-to-stream --stream-name $1 --tags StreamArchiveMode=$2 --region $3
49 |
50 | if [[ "$4" != "" ]]; then
51 | aws kinesis add-tags-to-stream --stream-name $1 --tags ArchiveTTL=expireSeconds=$4 --region $3
52 | fi
53 |
54 | aws kinesis list-tags-for-stream --stream-name $1 --region $3
55 | else
56 | usage
57 | fi
58 |
59 | # now go create the dynamo db table
60 | node createDynamoTable.js $1 $3 $2
61 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | var debug = process.env['DEBUG'] || false;
2 | require("./lib/constants");
3 |
4 | var archiver;
5 |
6 | exports.handler = function (event, context) {
7 | var finish = function (err, data) {
8 | console.log("Processing Complete");
9 |
10 | // log the event if we've failed
11 | if (data && data.status && data.status !== OK) {
12 | if (data.message) {
13 | console.log(data.message);
14 | }
15 |
16 | // ensure that Lambda doesn't checkpoint to Kinesis
17 | context.done(status, JSON.stringify(data.message ? data.message : "Unknown Error"));
18 | } else {
19 | context.done(err);
20 | }
21 | };
22 |
23 | /** End Runtime Functions */
24 | if (debug) {
25 | console.log(JSON.stringify(event));
26 | }
27 |
28 | var noProcessReason;
29 |
30 | if (!event.Records || event.Records.length === 0) {
31 | noProcessReason = "Event contains no Data";
32 | }
33 | if (event.Records[0].eventSource !== "aws:kinesis") {
34 | noProcessReason = "Invalid Event Source " + event.eventSource;
35 | }
36 | if (event.Records[0].kinesis.kinesisSchemaVersion !== "1.0") {
37 | noProcessReason = "Unsupported Event Schema Version " + event.Records[0].kinesis.kinesisSchemaVersion;
38 | }
39 |
40 | var current_region = process.env[REGION_KEY];
41 |
42 | // setup the stream archiver if it hasn't yet been initialised
43 | if (!archiver) {
44 | archiver = require('./lib/kinesis-archiver')(current_region);
45 | }
46 |
47 | if (noProcessReason) {
48 | // fail processing with the specified reason
49 | finish(event, ERROR, noProcessReason);
50 | } else {
51 | // resolve the stream name
52 | var eventSourceARNTokens = event.Records[0].eventSourceARN.split(":");
53 | var streamName = eventSourceARNTokens[eventSourceARNTokens.length - 1].split("/")[1];
54 |
55 | // setup the compressor
56 | archiver.init(streamName, undefined, function (err) {
57 | if (err) {
58 | finish(err);
59 | } else {
60 | // process the stream records
61 | archiver.processRecords(streamName, event, finish);
62 | }
63 | });
64 | }
65 | };
--------------------------------------------------------------------------------
/test/testQuery.js:
--------------------------------------------------------------------------------
1 | var q = require('../lib/archive-access')(process.env['AWS_REGION'] || 'us-east-1');
2 | var async = require('async');
3 |
4 | var sqn1 = '49550822123942288925422195661802908599218112602138741410';
5 | var sqn2 = '49550822123942288925422195661802908599218112602138741412';
6 |
7 | var by_item = function(callback) {
8 | var drained = false;
9 | var worker = function(task, wCallback) {
10 | console.log(task);
11 | wCallback();
12 | };
13 | var queue = async.queue(worker, 2);
14 | queue.drain = function() {
15 | drained = true;
16 | }
17 |
18 | q.queryArchive('EnergyPipelineSensors', '3388323060863249599', sqn1, sqn1, undefined, queue, function(err) {
19 | console.log("Query by Item Complete");
20 | async.until(function() {
21 | return drained;
22 | }, function(untilCallback) {
23 | setTimeout(function() {
24 | untilCallback();
25 | }, 500);
26 | }, function(err) {
27 | callback(err);
28 | });
29 | });
30 | };
31 |
32 | var by_range = function(callback) {
33 | var drained = false;
34 | var worker = function(task, wCallback) {
35 | console.log(task);
36 | wCallback();
37 | };
38 | var queue = async.queue(worker, 2);
39 | queue.drain = function() {
40 | drained = true;
41 | }
42 |
43 | q.queryArchive('EnergyPipelineSensors', '3388323060863249599', sqn1, sqn2, 10, queue, function(err) {
44 | console.log("Query by Range Complete");
45 | async.until(function() {
46 | return drained;
47 | }, function(untilCallback) {
48 | setTimeout(function() {
49 | untilCallback();
50 | }, 500);
51 | }, function(err) {
52 | callback(err);
53 | });
54 | });
55 | };
56 |
57 | var all_for_key = function(callback) {
58 | var drained = false;
59 | var worker = function(task, wCallback) {
60 | console.log(task);
61 | wCallback();
62 | };
63 | var queue = async.queue(worker, 2);
64 | queue.drain = function() {
65 | drained = true;
66 | }
67 |
68 | q.queryArchive('EnergyPipelineSensors', '3388323060863249599', undefined, undefined, undefined, queue,
69 | function(err) {
70 | console.log("Data for All Complete");
71 | async.until(function() {
72 | return drained;
73 | }, function(untilCallback) {
74 | setTimeout(function() {
75 | untilCallback();
76 | }, 500);
77 | }, function(err) {
78 | callback(err);
79 | });
80 | });
81 | };
82 |
83 | async.waterfall([ by_item, by_range, all_for_key ], function(err) {
84 | console.log("tests complete");
85 | process.exit(0);
86 | });
87 |
--------------------------------------------------------------------------------
/deploy.yaml:
--------------------------------------------------------------------------------
1 | AWSTemplateFormatVersion: 2010-09-09
2 | Transform: AWS::Serverless-2016-10-31
3 | Parameters:
4 | StreamArn:
5 | Default: arn:aws:kinesis:eu-west-1:999999999999:stream/MyStream
6 | Description: ARN of the Kinesis Stream to Archive
7 | Type: String
8 | AllowedPattern: arn:aws:kinesis:[A-Za-z-0-9]+:\d+:stream\/[A-Za-z-0-9]+
9 | StreamPosition:
10 | Default: TRIM_HORIZON
11 | Description: The point in the Stream to start archiving messages
12 | Type: String
13 | AllowedValues:
14 | - TRIM_HORIZON
15 | - LATEST
16 | Resources:
17 | StreamProcessor:
18 | Type: AWS::Serverless::Function
19 | Properties:
20 | Handler: index.handler
21 | Runtime: nodejs6.10
22 | CodeUri:
23 | Bucket: !Sub awslabs-code-${AWS::Region}
24 | Key: AmazonKinesisArchiver/amazon-kinesis-archiver-1.0.2.zip
25 | MemorySize: 192
26 | Timeout: 60
27 | Tags:
28 | Name: AmazonKinesisArchiver
29 | Role: !GetAtt ArchiverIAMRole.Arn
30 | Events:
31 | Stream:
32 | Type: Kinesis
33 | Properties:
34 | Stream: !Ref StreamArn
35 | StartingPosition: !Ref StreamPosition
36 | ArchiverIAMRole:
37 | Type: "AWS::IAM::Role"
38 | Properties:
39 | Path: "/"
40 | ManagedPolicyArns:
41 | - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
42 | AssumeRolePolicyDocument:
43 | Version: "2012-10-17"
44 | Statement:
45 | -
46 | Sid: "AllowLambdaServiceToAssumeRole"
47 | Effect: "Allow"
48 | Action:
49 | - "sts:AssumeRole"
50 | Principal:
51 | Service:
52 | - "lambda.amazonaws.com"
53 | Policies:
54 | -
55 | PolicyName: "KinesisArchiverPermission"
56 | PolicyDocument:
57 | Version: "2012-10-17"
58 | Statement:
59 | -
60 | Effect: "Allow"
61 | Action:
62 | - "kinesis:DescribeStream"
63 | - "kinesis:ListStreams"
64 | - "kinesis:ListTagsForStream"
65 | - "kinesis:GetShardIterator"
66 | - "kinesis:GetRecords"
67 | - "kinesis:PutRecord"
68 | - "kinesis:PutRecords"
69 | - "dynamoDB:PutItem"
70 | - "dynamoDB:UpdateItem"
71 | - "dynamoDB:Query"
72 | - "dynamoDB:Scan"
73 | Resource: "*"
74 |
--------------------------------------------------------------------------------
/test/testScan.js:
--------------------------------------------------------------------------------
1 | var q = require('../lib/archive-access')(process.env['AWS_REGION'] || 'us-east-1');
2 | var async = require('async');
3 |
4 | var sqn1 = '49550822123942288925422195661801699673398497972964035234';
5 | var sqn2 = '49550822123942288925422195661802908599218112602138741410';
6 | var sqn3 = '49550822123942288925422195661802908599218112602138741412';
7 | var approxArrival = 1428537600;
8 |
9 | var everything = function(callback) {
10 | var drained = false;
11 | var worker = function(task, wCallback) {
12 | console.log(task);
13 | wCallback();
14 | };
15 | var queue = async.queue(worker, 2);
16 | queue.drain = function() {
17 | drained = true;
18 | }
19 |
20 | q.scanArchive('EnergyPipelineSensors', undefined, undefined, undefined, undefined, queue, function(err) {
21 | console.log("All Data Test Completed - waiting for queue workers");
22 |
23 | async.until(function() {
24 | return drained;
25 | }, function(untilCallback) {
26 | setTimeout(function() {
27 | untilCallback();
28 | }, 500);
29 | }, function(err) {
30 | callback(err);
31 | });
32 | });
33 | };
34 |
35 | var by_seq = function(callback) {
36 | var drained = false;
37 | var worker = function(task, wCallback) {
38 | console.log(task);
39 | wCallback();
40 | };
41 | var queue = async.queue(worker, 2);
42 | queue.drain = function() {
43 | drained = true;
44 | }
45 |
46 | q.scanArchive('EnergyPipelineSensors', sqn2, undefined, undefined, undefined, queue, function(err) {
47 | console.log("Scan by Seq Complete");
48 | async.until(function() {
49 | return drained;
50 | }, function(untilCallback) {
51 | setTimeout(function() {
52 | untilCallback();
53 | }, 500);
54 | }, function(err) {
55 | callback(err);
56 | });
57 | });
58 | };
59 |
60 | var by_approx = function(callback) {
61 | var drained = false;
62 | var worker = function(task, wCallback) {
63 | console.log(task);
64 | wCallback();
65 | };
66 | var queue = async.queue(worker, 2);
67 | queue.drain = function() {
68 | drained = true;
69 | }
70 |
71 | q.scanArchive('EnergyPipelineSensors', sqn2, undefined, approxArrival, undefined, queue, function(err) {
72 | console.log("Scan by Approx Complete");
73 | async.until(function() {
74 | return drained;
75 | }, function(untilCallback) {
76 | setTimeout(function() {
77 | untilCallback();
78 | }, 500);
79 | }, function(err) {
80 | callback(err);
81 | });
82 | });
83 | };
84 |
85 | var console_support = function(callback) {
86 | q.scanToStdConsole('EnergyPipelineSensors', sqn1, undefined, undefined, 10, callback);
87 | };
88 |
89 | async.waterfall([ everything, by_seq, by_approx, console_support ], function(err) {
90 | console.log("tests complete");
91 | process.exit(0);
92 | });
93 |
--------------------------------------------------------------------------------
/test/testArchive.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2014-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 | SPDX-License-Identifier: Apache-2.0
4 | */
5 |
6 | var lambda = require('../index');
7 |
8 | var event = {
9 | "Records" : [
10 | {
11 | "kinesis" : {
12 | "kinesisSchemaVersion" : "1.0",
13 | "approximateArrivalTimestamp" : 1428537600,
14 | "partitionKey" : "-48903309263388366",
15 | "sequenceNumber" : "49550822123942288925422195661801699673398497972964035234",
16 | "data" : "MzcxICgxZikgdHMtMTQzNTczODI4ODkxOSA1Ni4zNjM5MTkwNzg3ODk0NXgtMS42NDA1NjI4ODM3NDE1MjAzIDEwOS45NzkzOTQwMzc4NDA1NSBhdCAxNi4xMjMyNjMyOTY0NjM2MDUgVDoyLjIxMTY3MjU2ODE0NTYwNDQgYzogIDAuMDAxMTk0IGRlZyAgMC4wMDAwMDE="
17 | },
18 | "eventSource" : "aws:kinesis",
19 | "eventVersion" : "1.0",
20 | "eventID" : "shardId-000000000176:49550822123942288925422195661801699673398497972964035234",
21 | "eventName" : "aws:kinesis:record",
22 | "invokeIdentityArn" : "arn:aws:iam::887210671223:role/LambdaExecRole",
23 | "awsRegion" : "eu-west-1",
24 | "eventSourceARN" : "arn:aws:kinesis:eu-west-1:887210671223:stream/EnergyPipelineSensors"
25 | },
26 | {
27 | "kinesis" : {
28 | "kinesisSchemaVersion" : "1.0",
29 | "approximateArrivalTimestamp" : 1428537600,
30 | "partitionKey" : "3388323060863249599",
31 | "sequenceNumber" : "49550822123942288925422195661802908599218112602138741410",
32 | "data" : "NDQgKDYpIHRzLTE0MzU3MzgyOTEwNDYgNTIuMzcyNjA1NDcwOTMxMzc2eC0wLjM5NzEwMzQxMDY2MDkzMjYgMTEwLjkwNTU3MDk1MDcyNDE4IGF0IDE2LjE2ODI3MTY0NDI3MDI5NSBUOjEuNTU2MjY3Nzk3NTczOTAwNSBjOiAgMC4wMDA5MTkgZGVnICAwLjAwMDAwMQ=="
33 | },
34 | "eventSource" : "aws:kinesis",
35 | "eventVersion" : "1.0",
36 | "eventID" : "shardId-000000000176:49550822123942288925422195661802908599218112602138741410",
37 | "eventName" : "aws:kinesis:record",
38 | "invokeIdentityArn" : "arn:aws:iam::887210671223:role/LambdaExecRole",
39 | "awsRegion" : "eu-west-1",
40 | "eventSourceARN" : "arn:aws:kinesis:eu-west-1:887210671223:stream/EnergyPipelineSensors"
41 | },
42 | {
43 | "kinesis" : {
44 | "kinesisSchemaVersion" : "1.0",
45 | "approximateArrivalTimestamp" : 1428537600,
46 | "partitionKey" : "3388323060863249599",
47 | "sequenceNumber" : "49550822123942288925422195661802908599218112602138741411",
48 | "data" : "NDQgKDYpIHRzLTE0MzU3MzgyOTEwNDYgNTIuMzcyNjA1NDcwOTMxMzc2eC0wLjM5NzEwMzQxMDY2MDkzMjYgMTEwLjkwNTU3MDk1MDcyNDE4IGF0IDE2LjE2ODI3MTY0NDI3MDI5NSBUOjEuNTU2MjY3Nzk3NTczOTAwNSBjOiAgMC4wMDA5MTkgZGVnICAwLjAwMDAwMQ=="
49 | },
50 | "eventSource" : "aws:kinesis",
51 | "eventVersion" : "1.0",
52 | "eventID" : "shardId-000000000176:49550822123942288925422195661802908599218112602138741411",
53 | "eventName" : "aws:kinesis:record",
54 | "invokeIdentityArn" : "arn:aws:iam::887210671223:role/LambdaExecRole",
55 | "awsRegion" : "eu-west-1",
56 | "eventSourceARN" : "arn:aws:kinesis:eu-west-1:887210671223:stream/EnergyPipelineSensors"
57 | } ]
58 | };
59 |
60 | function context() {
61 | }
62 | context.done = function(status, message) {
63 | console.log("Context Closure Message: " + JSON.stringify(message));
64 |
65 | if (status && status !== null) {
66 | console.log('ERROR');
67 | process.exit(-1);
68 | } else {
69 | process.exit(0);
70 | }
71 | };
72 | context.success = function(message) {
73 | done(null, message);
74 | }
75 | context.fail = function(err) {
76 | done(err, err.message);
77 | }
78 |
79 | // run the lambda function
80 | lambda.handler(event, context);
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check [existing open](https://github.com/awslabs/amazon-kinesis-archiver/issues), or [recently closed](https://github.com/awslabs/amazon-kinesis-archiver/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 |
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 |
42 |
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/amazon-kinesis-archiver/labels/help%20wanted) issues is a great place to start.
45 |
46 |
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 |
52 |
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 |
56 |
57 | ## Licensing
58 |
59 | See the [LICENSE](https://github.com/awslabs/amazon-kinesis-archiver/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 |
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 |
--------------------------------------------------------------------------------
/lib/tableSetup.js:
--------------------------------------------------------------------------------
1 | var readline = require('readline');
2 | var async = require('async');
3 | require("./lib/constants");
4 | var common = require("./lib/common");
5 | var aws = require('aws-sdk');
6 | var ec2 = new aws.EC2({
7 | apiVersion: '2016-11-15',
8 | region: "us-east-1"
9 | });
10 |
11 | /* configuration of question prompts and config assignment */
12 | var rl = readline.createInterface({
13 | input: process.stdin,
14 | output: process.stdout
15 | });
16 |
17 | var _streamName;
18 | var _archiveMode;
19 | var _setRegion;
20 | var _writeIOPS;
21 | var _readIOPS;
22 | var _ttlSeconds;
23 |
24 | q_region = function (callback) {
25 | rl.question('Enter the Region for the Archive Table > ', function (answer) {
26 | if (blank(answer) !== null) {
27 | ec2.describeRegions({}, function (err, data) {
28 | if (err) {
29 | callback(err);
30 | } else {
31 | var regions = [];
32 | data.Regions.map(function (item) {
33 | regions.push(item.RegionName);
34 | });
35 | validateArrayContains(regions, answer.toLowerCase(), function (err) {
36 | if (err) {
37 | callback(err);
38 | } else {
39 | _setRegion = answer.toLowerCase();
40 |
41 | callback(null);
42 | }
43 | });
44 | }
45 | });
46 | } else {
47 | callback("You must provide a region");
48 | }
49 | });
50 | };
51 |
52 | q_streamName = function (callback) {
53 | rl.question('Enter the Stream Name > ', function (answer) {
54 | if (blank(answer) !== null) {
55 | _streamName = answer;
56 |
57 | callback(null);
58 | } else {
59 | callback("You must provide a Stream Name");
60 | }
61 | });
62 | };
63 |
64 | q_archiveMode = function (callback) {
65 | rl.question('Enter the Archive Mode required (all | latest) > ', function (answer) {
66 | if (!answer || (answer !== RECOVERY_MODE_LATEST && answer !== RECOVERY_MODE_ALL)) {
67 | callback("Archive Mode must be one of " + RECOVERY_MODE_LATEST + " or " + RECOVERY_MODE_ALL);
68 | } else {
69 | _archiveMode = answer;
70 | callback(null);
71 | }
72 | });
73 | };
74 |
75 | q_setTTL = function (callback) {
76 | rl.question('Should data expire from the archive table? If yes enter the interval to retain in seconds > ',
77 | function (answer) {
78 | if (!answer) {
79 | callback(null);
80 | } else {
81 | _ttlSeconds = parseInt(answer);
82 | callback(null);
83 | }
84 | });
85 | };
86 |
87 | q_readIOPS = function (callback) {
88 | rl.question('How Many Read IOPS do you require? > ', function (answer) {
89 | if (blank(answer) !== null) {
90 | _readIOPS = getIntValue(answer);
91 |
92 | callback(null);
93 | } else {
94 | callback("You must provide the required amount of Read IOPS");
95 | }
96 | });
97 | };
98 |
99 | q_writeIOPS = function (callback) {
100 | rl.question('How Many Write IOPS do you require? > ', function (answer) {
101 | if (blank(answer) !== null) {
102 | _writeIOPS = getIntValue(answer);
103 |
104 | callback(null);
105 | } else {
106 | callback("You must provide the required amount of Write IOPS");
107 | }
108 | });
109 | };
110 |
111 | last = function (err, callback) {
112 | rl.close();
113 |
114 | if (err) {
115 | console.log(err);
116 | } else {
117 | common.createTables(_setRegion, _streamName, _archiveMode, _readIOPS, _writeIOPS, _ttlSeconds, function (err) {
118 | if (err) {
119 | console.log(err);
120 | process.exit(ERROR);
121 | } else {
122 | process.exit(OK);
123 | }
124 | });
125 | }
126 | };
127 |
128 | validateArrayContains = function (array, value, callback) {
129 | if (array.indexOf(value) == -1) {
130 | var err = 'Value must be one of ' + array.toString();
131 | callback(err);
132 | } else {
133 | callback();
134 | }
135 | };
136 |
137 | blank = function (value) {
138 | if (value === '') {
139 | return null;
140 | } else {
141 | return value;
142 | }
143 | };
144 |
145 | getIntValue = function (value, rl) {
146 | if (!value || value === null) {
147 | rl.close();
148 | console.log('Null Value');
149 | process.exit(INVALID_ARG);
150 | } else {
151 | var num = parseInt(value);
152 |
153 | if (isNaN(num)) {
154 | rl.close();
155 | console.log('Value \'' + value + '\' is not a Number');
156 | process.exit(INVALID_ARG);
157 | } else {
158 | return num;
159 | }
160 | }
161 | };
162 |
163 | exports.runSetup = function (streamName, region, archiveMode, readIOPS, writeIOPS, ttlSeconds, callback) {
164 | qs = [];
165 |
166 | if (!streamName || streamName === "") {
167 | qs.push(q_streamName);
168 | } else {
169 | _streamName = streamName;
170 | }
171 | if (!region || region === "") {
172 | qs.push(q_region);
173 | } else {
174 | _setRegion = region;
175 | }
176 | if (!archiveMode || archiveMode === "") {
177 | qs.push(q_archiveMode);
178 | } else {
179 | _archiveMode = archiveMode;
180 | }
181 | if (!ttlSeconds || ttlSeconds === "") {
182 | qs.push(q_setTTL);
183 | } else {
184 | _ttlSeconds = ttlSeconds;
185 | }
186 | if (!readIOPS || readIOPS === "") {
187 | qs.push(q_readIOPS);
188 | } else {
189 | _readIOPS = readIOPS;
190 | }
191 | if (!writeIOPS || writeIOPS === "") {
192 | qs.push(q_writeIOPS);
193 | } else {
194 | _writeIOPS = writeIOPS;
195 | }
196 | async.waterfall(qs, last);
197 | };
--------------------------------------------------------------------------------
/lib/common.js:
--------------------------------------------------------------------------------
1 | var debug = process.env['DEBUG'] || false;
2 |
3 | require("./constants");
4 | var aws = require('aws-sdk');
5 |
6 | function getTargetTablename(StreamName, archiveMode) {
7 | return StreamName + "-archive-" + archiveMode;
8 | };
9 | exports.getTargetTablename = getTargetTablename;
10 |
11 | getTtlValue = function (seconds) {
12 | return Math.round(now() + seconds);
13 | };
14 | exports.getTtlValue = getTtlValue;
15 |
16 | getFormattedDate = function (date) {
17 | if (!date) {
18 | date = new Date();
19 | }
20 |
21 | var hour = date.getHours();
22 | hour = (hour < 10 ? "0" : "") + hour;
23 |
24 | var min = date.getMinutes();
25 | min = (min < 10 ? "0" : "") + min;
26 |
27 | var sec = date.getSeconds();
28 | sec = (sec < 10 ? "0" : "") + sec;
29 |
30 | var year = date.getFullYear();
31 |
32 | var month = date.getMonth() + 1;
33 | month = (month < 10 ? "0" : "") + month;
34 |
35 | var day = date.getDate();
36 | day = (day < 10 ? "0" : "") + day;
37 |
38 | return year + "-" + month + "-" + day + " " + hour + ":" + min + ":" + sec;
39 | };
40 |
41 | function now() {
42 | return new Date().getTime() / 1000;
43 | };
44 | exports.now = now;
45 |
46 | function randomInt(low, high) {
47 | return Math.floor(Math.random() * (high - low) + low);
48 | };
49 | exports.randomInt = randomInt;
50 |
51 | function createTables(regionName, streamName, archiveMode, readIOPS, writeIOPS, ttlSeconds, callback) {
52 | var dynamoDB = new aws.DynamoDB({
53 | apiVersion: '2012-08-10',
54 | region: regionName
55 | });
56 |
57 | if (!streamName || !archiveMode || !readIOPS || !writeIOPS) {
58 | callback("Unable to create DynamoDB Table without configured values");
59 | } else {
60 | var tableName = exports.getTargetTablename(streamName, archiveMode);
61 |
62 | // create the base specification for the dynamo db table
63 | var compressStreamSpec = {
64 | AttributeDefinitions: [{
65 | AttributeName: partitionKeyName,
66 | AttributeType: 'S'
67 | }, {
68 | AttributeName: "sequenceNumber",
69 | AttributeType: 'S'
70 | }],
71 | KeySchema: [{
72 | AttributeName: partitionKeyName,
73 | KeyType: 'HASH'
74 | }],
75 | TableName: tableName,
76 | BillingMode: 'PAY_PER_REQUEST'
77 | };
78 |
79 | // add a sort key on sequence number for MODE = all
80 | if (archiveMode == RECOVERY_MODE_ALL) {
81 | compressStreamSpec.KeySchema.push({
82 | AttributeName: sortKeyName,
83 | KeyType: 'RANGE'
84 | });
85 | }
86 |
87 | console.log("Creating Table " + tableName + " in Dynamo DB");
88 | dynamoDB.createTable(compressStreamSpec, function (err, data) {
89 | if (err) {
90 | if (err.code !== 'ResourceInUseException') {
91 | console.log(Object.prototype.toString.call(err).toString());
92 | console.log(err.toString());
93 | callback(err.toString());
94 | } else {
95 | console.log("OK - table already exists");
96 | }
97 | }
98 | });
99 |
100 | if (ttlSeconds) {
101 | var params = {
102 | TableName: tableName,
103 | TimeToLiveSpecification: {
104 | AttributeName: ARCHIVE_ATTRIBUTE_NAME,
105 | Enabled: true
106 | }
107 | };
108 |
109 | // wait until the table has been created
110 | dynamoDB.waitFor('tableExists', {
111 | TableName: tableName
112 | }, function (err, data) {
113 | if (err) {
114 | console.log(err, err.stack);
115 | callback(err);
116 | } else {
117 | // set ttl values
118 | dynamoDB.updateTimeToLive(params, function (err, data) {
119 | if (err) {
120 | console.log(err, err.stack);
121 | callback(err);
122 | } else {
123 | console.log("TTL Enabled");
124 | callback();
125 | }
126 | });
127 | }
128 | });
129 | }
130 | }
131 | };
132 | exports.createTables = createTables;
133 |
134 | function getArchiveSettingsForStream(streamName, streamModeCache, kinesisClient, forceInvalidate, callback) {
135 | if (!(streamName in streamModeCache) || forceInvalidate === true) {
136 | if (debug) {
137 | if (forceInvalidate) {
138 | console.log("Force resolution of Archive Mode for Stream " + streamName)
139 | } else {
140 | console.log("Building Archive Mode cache for Stream " + streamName);
141 | }
142 | }
143 |
144 | // revalidate the cached values every N seconds
145 | setTimeout(getArchiveSettingsForStream, reValidateArchiveModeCacheSeconds * 1000, streamName, streamModeCache,
146 | kinesisClient, true, undefined);
147 |
148 | // query the stream's tags and resolve settings from there
149 | kinesisClient.listTagsForStream({
150 | StreamName: streamName
151 | }, function (err, data) {
152 | if (err) {
153 | callback(err);
154 | } else {
155 | // cache entry
156 | var cacheEntry = {};
157 | // process the tags on the stream for the configuration info
158 | data.Tags.map(function (item) {
159 | if (item.Key === RECOVERY_MODE_TAG_NAME) {
160 | cacheEntry[RECOVERY_MODE_TAG_NAME] = item.Value;
161 | console.log("Setting Archive Mode " + item.Value);
162 | }
163 | if (item.Key === ARCHIVE_TAG_NAME) {
164 | var tag_values = item.Value.split("=");
165 | cacheEntry[ARCHIVE_TAG_TTL_SECONDS_NAME] = tag_values[1];
166 | }
167 | });
168 |
169 | // set the default archive mode if needed
170 | if (!cacheEntry[RECOVERY_MODE_TAG_NAME]) {
171 | console.log("Setting default Archive Mode 'latest'");
172 | cacheEntry[RECOVERY_MODE_TAG_NAME] = RECOVERY_MODE_LATEST;
173 | }
174 |
175 | cacheEntry.tableName = getTargetTablename(streamName, cacheEntry[RECOVERY_MODE_TAG_NAME]);
176 | streamModeCache[streamName] = cacheEntry;
177 |
178 | if (debug) {
179 | console.log("Created Cache Entry: " + JSON.stringify(cacheEntry));
180 | }
181 |
182 | // all set - lets go!
183 | if (callback) {
184 | callback(null);
185 | }
186 | }
187 | });
188 |
189 |
190 | } else {
191 | // cache already includes the stream recovery information, so lets
192 | // go!
193 | if (debug) {
194 | console.log("Found previously cached information for " + streamName);
195 | }
196 | if (callback) {
197 | callback();
198 | }
199 | }
200 | };
201 | exports.getArchiveSettingsForStream = getArchiveSettingsForStream;
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
--------------------------------------------------------------------------------
/lib/kinesis-archiver.js:
--------------------------------------------------------------------------------
1 | var debug = process.env['DEBUG'] || false;
2 | var region = process.env['AWS_REGION'];
3 |
4 | var pjson = require('../package.json');
5 | var async = require('async');
6 | require("./constants");
7 | var common = require("./common");
8 | var streamModeCache = {};
9 |
10 | var aws;
11 | var kinesisClient;
12 | var dynamoDB;
13 |
14 | module.exports = function (setRegion, kinesisClient, dynamoDB, overrideCache) {
15 | this.aws = require('aws-sdk');
16 | if (setRegion) {
17 | this.aws.config.update({
18 | region: region
19 | });
20 | } else {
21 | if (!setRegion || setRegion === null || setRegion === "") {
22 | this.aws.config.update({
23 | region: 'us-east-1'
24 | });
25 |
26 | }
27 | }
28 |
29 | if (kinesisClient) {
30 | this.kinesisClient = kinesisClient;
31 | } else {
32 | // configure a new connection to kinesis streams, if one has not
33 | // been provided
34 | if (!this.kinesisClient) {
35 | if (debug) {
36 | console.log("Connecting to Amazon Kinesis Streams in " + this.aws.config.region);
37 | }
38 | this.kinesisClient = new this.aws.Kinesis({
39 | apiVersion: '2013-12-02',
40 | region: this.aws.config.region
41 | });
42 | }
43 | }
44 | if (dynamoDB) {
45 | this.dynamoDB = dynamoDB;
46 | } else {
47 | if (!this.dynamoDB) {
48 | if (debug) {
49 | console.log("Connecting to Amazon DynamoDB in " + this.aws.config.region);
50 | }
51 | this.dynamoDB = new this.aws.DynamoDB({
52 | apiVersion: '2012-08-10',
53 | region: this.aws.config.region
54 | });
55 | }
56 | }
57 | if (overrideCache) {
58 | console.log("Applying Cache Override");
59 |
60 | this.streamModeCache = overrideCache;
61 | }
62 |
63 | console.log("AWS Kinesis Stream Archiver online in " + this.aws.config.region);
64 |
65 | init = function (streamName, forceInvalidate, callback) {
66 | common.getArchiveSettingsForStream(streamName, streamModeCache, this.kinesisClient, forceInvalidate, callback);
67 | };
68 |
69 | processRecords = function (streamName, event, callback) {
70 | var processor = processKinesisRecord.bind(undefined, streamName);
71 |
72 | async.map(event.Records, processor, function (err, results) {
73 | if (debug) {
74 | console.log("Completed Archival of " + event.Records.length + " Event Records");
75 | }
76 |
77 | if (err) {
78 | console.log(err);
79 | callback(err);
80 | } else {
81 | var errors = [];
82 | var status = OK;
83 |
84 | // extract all errors from parallel results
85 | results.map(function (item) {
86 | if (item.status) {
87 | if (item.status !== OK) {
88 | status = status === OK ? item.status : status;
89 | errors.push = item.msg;
90 | }
91 | } else {
92 | status = ERROR;
93 | if (item.msg) {
94 | errors.push(item.msg);
95 | }
96 | }
97 | });
98 |
99 | var message;
100 | if (errors.length > 0) {
101 | message = JSON.stringify(errors);
102 | }
103 | if (debug && message) {
104 | console.log(message);
105 | }
106 | callback(null, {
107 | "event": event,
108 | "status": status,
109 | "message": message
110 | });
111 | }
112 | });
113 | };
114 |
115 | processKinesisRecord = function (streamName, record, completionCallback) {
116 | // lookup the stream vital details from the cache
117 | var cacheEntry = streamModeCache[streamName];
118 | var archiveMode = cacheEntry[RECOVERY_MODE_TAG_NAME];
119 | var tableName = cacheEntry.tableName;
120 |
121 | var ttlSeconds;
122 | if (cacheEntry.hasOwnProperty(ARCHIVE_TAG_TTL_SECONDS_NAME)) {
123 | ttlSeconds = cacheEntry[ARCHIVE_TAG_TTL_SECONDS_NAME];
124 | }
125 |
126 | var partitionKey = record.kinesis.partitionKey;
127 | var seq = record.kinesis.sequenceNumber;
128 | var data = record.kinesis.data;
129 | var shardId = record.eventID.split(":")[0];
130 |
131 | if (debug) {
132 | console.log("Resoved event detail:");
133 | console.log({
134 | "partitionKey": partitionKey,
135 | "seq": seq,
136 | "tableName": tableName,
137 | "shardID": shardId,
138 | "data": data
139 | });
140 | }
141 |
142 | // check that we can store the record into ddb
143 | if (data.length > 400 * 1024 - partitionKey.length - seq.length - 10 /* timestamp */) {
144 | completionCallback("Message Length of " + data.length + " Exceeds Max DDB Item Size");
145 | }
146 |
147 | try {
148 | var proceed = false;
149 | var tryNumber = 0;
150 | var retryLimit = 20;
151 | var asyncError;
152 |
153 | // async whilst gives us a retry mechanism in case of provisioned
154 | // throughput errors or whatever
155 | async
156 | .whilst(
157 | function () {
158 | // return OK if the proceed flag has
159 | // been set, or if we've hit the
160 | // retry count
161 | return !proceed && tryNumber < retryLimit;
162 | },
163 | function (whilstCallback) {
164 | tryNumber++;
165 |
166 | // build the params for an atomic update of the
167 | // object body
168 | var shardMagic = shardId + "-" + common.randomInt(0, 99);
169 | var item = {
170 | Key: {},
171 | TableName: tableName,
172 | UpdateExpression: "set #lastUpdate = :updateTime, recordData = :data, shardId = :shardId, approximateArrivalTimestamp = :approxArrival",
173 | ExpressionAttributeNames: {
174 | "#lastUpdate": lastUpdateDateName
175 | },
176 | ExpressionAttributeValues: {
177 | ":updateTime": {
178 | S: getFormattedDate(),
179 | },
180 | ":data": {
181 | S: data
182 | },
183 | ":shardId": {
184 | S: shardId
185 | },
186 | ":approxArrival": {
187 | N: "" + record.kinesis.approximateArrivalTimestamp
188 | }
189 | }
190 | };
191 |
192 | if (debug) {
193 | item["ReturnConsumedCapacity"] = 'TOTAL';
194 | }
195 |
196 | // set the primary key
197 | item.Key[partitionKeyName] = {
198 | S: partitionKey
199 | };
200 |
201 | if (archiveMode === RECOVERY_MODE_LATEST) {
202 | /*
203 | * sequence number must be lower or not
204 | * found
205 | */
206 | item.ConditionExpression = "#seq < :sequence or attribute_not_exists(#seq)";
207 |
208 | // add the sequence as update expressions to
209 | // set the new
210 | // value
211 | item.UpdateExpression = item.UpdateExpression + ",#seq = :sequence";
212 | item.ExpressionAttributeNames = {
213 | "#seq": sortKeyName
214 | };
215 | item.ExpressionAttributeValues[":sequence"] = {
216 | S: '' + seq
217 | };
218 | } else {
219 | // RECOVERY_MODE_ALL uses the sequence as
220 | // the sort key
221 | item.Key[sortKeyName] = {
222 | S: seq
223 | };
224 | }
225 |
226 | // add ttl information
227 | if (ttlSeconds) {
228 | item.UpdateExpression = item.UpdateExpression + ", #ttlAttribute = :expireAfter";
229 |
230 | if (item.ExpressionAttributeNames) {
231 | item.ExpressionAttributeNames["#ttlAttribute"] = ARCHIVE_ATTRIBUTE_NAME;
232 | } else {
233 | item.ExpressionAttributeNames = {
234 | "#ttlAttribute": ARCHIVE_ATTRIBUTE_NAME
235 | };
236 | }
237 | item.ExpressionAttributeValues[":expireAfter"] = {
238 | N: "" + common.getTtlValue(ttlSeconds)
239 | };
240 | }
241 |
242 | if (debug) {
243 | console.log(JSON.stringify(item));
244 | }
245 |
246 | // update ddb
247 | this.dynamoDB.updateItem(item, function (err, data) {
248 | if (err) {
249 | if (err.code === conditionCheckFailed) {
250 | /*
251 | * no problem - something wrote a
252 | * later record so we're done
253 | */
254 | if (debug) {
255 | console.log("Duplicate record with archive mode " + archiveMode
256 | + ". Continuing...");
257 | }
258 | proceed = true;
259 | whilstCallback();
260 | } else if (err.code === provisionedThroughputExceeded) {
261 | console.log("Provisioned Throughput Exceeded - add Write IOPS!");
262 |
263 | // exponential backoff with 10ms
264 | // jitter
265 | setTimeout((50 * (2 ^ tryNumber)) + common.randomInt(0, 10),
266 | whilstCallback());
267 | } else {
268 | asyncError = err;
269 | proceed = true;
270 | whilstCallback(err);
271 | }
272 | } else {
273 | /*
274 | * no error - the body was updated on
275 | * the item
276 | */
277 | proceed = true;
278 | whilstCallback();
279 | if (debug) {
280 | console.log("OK - Consumption Information: " + JSON.stringify(data.ConsumedCapacity));
281 | }
282 | }
283 | });
284 | }, function (err) {
285 | // function called when the async retry
286 | // completes
287 | if (err) {
288 | completionCallback(err, null);
289 | } else {
290 | if (asyncError) {
291 | // throw errors which were encountered
292 | // during async
293 | // calls
294 | completionCallback(asyncError, null);
295 | } else {
296 | if (!proceed) {
297 | // we timed out while trying to
298 | // write the item to
299 | // ddb
300 | completionCallback(null, {
301 | status: ERROR,
302 | partitionKey: partitionKey,
303 | sequence: seq,
304 | msg: "Timeout while trying to update DDB"
305 | });
306 | } else {
307 | // done ok
308 | completionCallback(null, {
309 | status: OK,
310 | partitionKey: partitionKey,
311 | sequence: seq,
312 | msg: null
313 | });
314 | }
315 | }
316 | }
317 | });
318 | } catch (err) {
319 | if (debug) {
320 | console.log("Catch of core record processing");
321 | console.log(JSON.stringify(err));
322 | }
323 | completionCallback(err, {
324 | status: ERROR,
325 | partitionKey: partitionKey,
326 | sequence: seq,
327 | msg: null
328 | });
329 | }
330 | };
331 |
332 | return this;
333 | };
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Kinesis Stream Archiver
2 |
3 | Amazon Kinesis provides a family of services for working with streaming data at any scale. Kinesis Streams enables you to build custom applications that process or analyse streaming data for specialised needs. By default, Records of a Stream are accessible for up to 24 hours from the time they are added to the Stream. You can raise this limit to up to 7 days by enabling extended data retention, and you can also [send your Kinesis Streams data to Kinesis Firehose](http://docs.aws.amazon.com/firehose/latest/dev/create-name.html) for long term backup on Amazon S3.
4 |
5 | Some applications have the need to be able to reprocess data that is significantly older than the Stream retention period, and would like to be able to 'replay' data into a Stream for subsequent processing. This type of feature allows you to use Kinesis Streams for a 'unified log' or 'log oriented' architecture. In this model, you can use a Stream to build a database of changes carried on the Stream, and consume the sum total or final copies of log messages quickly and easily.
6 |
7 | This community built and maintained module, built in AWS Lambda, gives you the ability to accomplish many of the above requirements without having to run additional server infrastructure. It consumes data from an Amazon Kinesis Stream and writes the event records to Amazon DynamoDB, a fully managed and highly durable NOSQL database. When it does this, you can choose whether it keeps all data received, or only the latest record by sequence number for the Stream's Partition Key. You can then use programmatic API's in your software to query or replay data into the original or alternative Kinesis Streams.
8 |
9 | 
10 |
11 | ## Creating a Stream archive
12 |
13 | To get started with this module, simply deploy the function, and mapping to a Kinesis Stream, via the [AWS SAM](https://github.com/awslabs/serverless-application-model) templates below:
14 |
15 | | |
16 | | --------------------------|
17 | | [
](https://console.aws.amazon.com/cloudformation/home?region=ap-south-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-ap-south-1.amazonaws.com/awslabs-code-ap-south-1/AmazonKinesisArchiver/deploy.yaml) in ap-south-1 |
18 | | [
](https://console.aws.amazon.com/cloudformation/home?region=eu-west-2#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-eu-west-2.amazonaws.com/awslabs-code-eu-west-2/AmazonKinesisArchiver/deploy.yaml) in eu-west-2 |
19 | | [
](https://console.aws.amazon.com/cloudformation/home?region=eu-west-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-eu-west-1.amazonaws.com/awslabs-code-eu-west-1/AmazonKinesisArchiver/deploy.yaml) in eu-west-1 |
20 | | [
](https://console.aws.amazon.com/cloudformation/home?region=ap-northeast-2#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-ap-northeast-2.amazonaws.com/awslabs-code-ap-northeast-2/AmazonKinesisArchiver/deploy.yaml) in ap-northeast-2 |
21 | | [
](https://console.aws.amazon.com/cloudformation/home?region=ap-northeast-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-ap-northeast-1.amazonaws.com/awslabs-code-ap-northeast-1/AmazonKinesisArchiver/deploy.yaml) in ap-northeast-1 |
22 | | [
](https://console.aws.amazon.com/cloudformation/home?region=sa-east-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-sa-east-1.amazonaws.com/awslabs-code-sa-east-1/AmazonKinesisArchiver/deploy.yaml) in sa-east-1 |
23 | | [
](https://console.aws.amazon.com/cloudformation/home?region=ca-central-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-ca-central-1.amazonaws.com/awslabs-code-ca-central-1/AmazonKinesisArchiver/deploy.yaml) in ca-central-1 |
24 | | [
](https://console.aws.amazon.com/cloudformation/home?region=ap-southeast-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-ap-southeast-1.amazonaws.com/awslabs-code-ap-southeast-1/AmazonKinesisArchiver/deploy.yaml) in ap-southeast-1 |
25 | | [
](https://console.aws.amazon.com/cloudformation/home?region=ap-southeast-2#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-ap-southeast-2.amazonaws.com/awslabs-code-ap-southeast-2/AmazonKinesisArchiver/deploy.yaml) in ap-southeast-2 |
26 | | [
](https://console.aws.amazon.com/cloudformation/home?region=eu-central-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-eu-central-1.amazonaws.com/awslabs-code-eu-central-1/AmazonKinesisArchiver/deploy.yaml) in eu-central-1 |
27 | | [
](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-us-east-1.amazonaws.com/awslabs-code-us-east-1/AmazonKinesisArchiver/deploy.yaml) in us-east-1 |
28 | | [
](https://console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-us-east-2.amazonaws.com/awslabs-code-us-east-2/AmazonKinesisArchiver/deploy.yaml) in us-east-2 |
29 | | [
](https://console.aws.amazon.com/cloudformation/home?region=us-west-1#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-us-west-1.amazonaws.com/awslabs-code-us-west-1/AmazonKinesisArchiver/deploy.yaml) in us-west-1 |
30 | | [
](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/new?stackName=AmazonKinesisArchiver&templateURL=https://s3-us-west-2.amazonaws.com/awslabs-code-us-west-2/AmazonKinesisArchiver/deploy.yaml) in us-west-2 |
31 |
32 | When creating the Stack, you must supply a Stream ARN, which is the identity for the Kinesis Stream that should be archived, and the Stream Position, which can be one of:
33 |
34 | |Setting |Start Position |
35 | | ----|---|
36 | | `TRIM_HORIZON` | The system will start archiving from the earliest record available, based on the Stream's retention policy) |
37 | | `LATEST` | The system will start archiving from the next record ingested after the Kinesis Event Source is created and the function deployed |
38 |
39 | Once done, you will see that you have a new Lambda function deployed, with name `-StreamProcessor-`, and this function will have an Event Source created for the indicated Kinesis Stream.
40 |
41 | ## Configuring the Archive Mode
42 |
43 | Now that the function is set up, we need to tell it how data should be archived for each Stream. Unfortunately we can't yet do this through AWS SAM, so we'll use a script that's part of this project. The Kinesis Archiver knows decides how to archive data based on Tags that are placed on the source Kinesis Stream, which enables a single function to archive a virtually unlimited number of input Streams. To set the archive mode, simply run:
44 |
45 | `./bin/setup.sh ` with the following options:
46 |
47 | * `Stream Name` - the Name of the Kinesis Stream in the specified Region. Please note this is not the Stream ARN used previously
48 | * `Archive Mode` - one of `ALL` or `LATEST`. Archive Mode `ALL` will create a full record of all messages from the Stream. `LATEST` will only keep the last copy of a message on the basis of the supplied Stream Partition Key value
49 | * `Region` - the region where the Kinesis Stream is deployed
50 |
51 | Once done, you will be asked a series of questions about how the Archive should be stored in DynamoDB, including whether you want TTL expiration of archive data, and how many read and write IOPS to provision for the archive table.
52 |
53 | _Please note that this script requires that you have the [AWS Command Line Interface](https://aws.amazon.com/cli), and a node.js runtime installed on your system_.
54 |
55 | ## What happens now?
56 |
57 | Data from your Kinesis Stream will be routed to the Archiver Lambda function, and then saved into DynamoDB. The DynamoDB table is called ```MyKinesisStream-archive-```, where `` is one of `ALL` or `LATEST`.
58 |
59 | This table has the following structure:
60 |
61 | * `partitionKey` - String - this is the partition key specified on the Kinesis PUT event
62 | * `sequenceNumber` - String - this is the Kinesis Sequence Number of the last Record archived into the table for the partitionKey
63 | * `lastUpdate` - String - Timestamp that the last archived record was written to DynamoDB
64 | * `recordData` - String - Base64 encoded string value of the Kinesis record data
65 | * `approximateArrivalTimestamp` - Long - Timestamp expressed as epoch seconds when the message was received by Amazon Kinesis
66 | * `shardId` - String - the Shard ID from which the message was received
67 |
68 | If you specify that the archive mode is `ALL`, then the table has a Partition/Sort key of `partitionKey/sequenceNumber`. If you instead specify `LATEST`, then the table will just have Partition key of `partitionKey`, and only the latest `sequenceNumber` will be stored (using a [DynamoDB Conditional Write](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithItems.html#WorkingWithItems.ConditionalUpdate)).
69 |
70 |
71 | ## Automatically expiring data
72 |
73 | The Kinesis Archiver has the ability to automatically remove data from the Stream Archive using the [DynamoDB Time To Live (TTL)](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/TTL.html) feature, and is probably a good idea to use if you select archive mode `ALL`. When used, it will automatically delete data from DynamoDB based on table attribute:
74 |
75 | * `expireAfter` - Long - the timestamp expressed as epoch seconds after which the entry in DynamoDB may be expired by the TTL management process
76 |
77 | When the value is found in the Stream configuration, the Archiver will automatically add the `expireAfter` attribute set to the `expireSeconds` after the archival time.
78 |
79 | _If you were to change your mind and no longer want TTL applied, you can delete the `expireAfter` attribute from every item in the table_
80 |
81 | ## Querying data from an archive
82 |
83 | You may want to query data that is stored in the archive, which is easy to do from the command line, or programmatically. To access data using simple console tools, you can just run the `bin/queryArchive.js` function, using the following arguments:
84 |
85 | ```javascript
86 | node queryArchive.js
87 | ```
88 |
89 | With the provided arguments:
90 |
91 | * `region` - The AWS Region where the Archive table is stored
92 | * `streamName` - The name of the Kinesis Stream being archived
93 | * `partitionKey` - The unique partition key value to be queried
94 | * `sequenceNumber` (start | end) - The sequence number to extract, or if both sequence number values are provided, the lower sequence number to look for. You can provide an empty string "" to indicate no value. Providing a sequence value for one argument and an empty string for the other will result in a range query being executed
95 | * `recordLimit` - This will only query the specified number of records from the table
96 |
97 | In some cases, you may not know the partition key you are looking for, or may want to issue a more general query. In this case, you'll want to use the `bin/scanArchive.js` script, which is invoked by:
98 |
99 | ```javascript
100 | node scanArchive.js
101 | ```
102 |
103 | With the provided arguments:
104 |
105 | * `region` - The AWS Region where the Archive table is stored
106 | * `streamName` - The name of the Kinesis Stream being archived
107 | * `sequenceStart` - The starting sequence number to read records from
108 | * `lastUpdateStart` - The starting date that the record was archived into the archive table
109 | * `approximateArrivalStart` - The starting date based upon the timestamp assigned by Amazon Kinesis when the original record was received
110 | * `recordLimit` - This will only query the specified number of records from the table
111 |
112 | ## Reprocessing records from an archive
113 |
114 | The above query and scan API's are used to give you flexibility on how you replay data back into a Stream. When you push data back into a Stream, it is definitely best practice to consider how your applications will know that the data is not original data, and to deal with it as a 'replay' of previously seen data. To facilitate this requirement, the reinjection model used by this API allows you to request the original message data be reinjected with the original message, which then gives you a contract in your processors that can be used to determine if a message is the correct format and if it is being replayed. When requested, the reinject API's will add the following data to payload before sending the data to the specified Kinesis Stream:
115 |
116 | ```javascript
117 | {
118 | "originalApproximateArrivalTimestamp": ,
119 | "originalShardId": ,
120 | "originalSequenceNumber": ,
121 | "originalStreamName": (only added if you are routing to a different stream)
122 | }
123 | ```
124 |
125 | When using the reinject API's, you must supply a boolean value indicating whether this metadata should be added to replayed messages, and also a 'metadata separator' which is a character you supply to delineate the message metadata from the original message contents. Data stored in the archive is Base64 encoded, but reinjection will decode the data prior to creating the message to be reinjected. For example, if you called a reinject method with `method(true,'|')` and the original data in Kinesis was `HelloWorld`, you would get a value on the target Stream of `||HelloWorld`. The metadata separator is also placed at the beginning of the message, so that serialisers within your application can 'peek' at the first byte (given you use a single byte separator!) and decide on the type of message received:
126 |
127 | ```javascript
128 | var mysep = "|";
129 |
130 | var metadata;
131 | var messagePayload;
132 |
133 | // check if the first character of the record matches the separator
134 | if (myKinesisRecord.charAt(0) === mysep) {
135 | // split the record on the separator
136 | var elements = data.split(mysep);
137 | // the first field is empty due to the separator prefix
138 | metadata = JSON.parse(elements[1]);
139 | messagePayload = elements[2];
140 | } else {
141 | // message doesn't have the separator character as the first value, so we'll assume it's an original message
142 | messagePayload = data;
143 | }
144 | ```
145 |
146 | It is for this reason that should choose a separator character that is single byte, and is not allowed to be placed at the beginning of your message on the Stream (for instance, an unprintable character etc).
147 |
148 | ## API Access
149 |
150 | The above methods are simplistic interfaces to view data from stdout in a console. For most typical use cases, you will integrate with the query, scan and reinject methods using a programatic interface. All the base interfaces for the API are provided in the `lib/archive-access.js` module, and generally take the same arguments as listed above. However, there is one major difference to consider. Because the query and scan operations in node.js are asynchronous API's this module uses the node.js module [async](https://caolan.github.io/async) to provide data for processing, and a callback method to indicate that all records have been provided by the API. As such, each API takes an argument which is an [async.queue](https://caolan.github.io/async/docs.html#queue) which can be configured to meet your application's requirements around concurrency and processing speed.
151 |
152 | ```javascript
153 | queryArchive = function(streamName, partitionKey, sequenceStart, sequenceEnd, recordLimit, recordQueue, callback)
154 | ```
155 |
156 | In this processing model, you need to follow the workflow:
157 |
158 | 1. Create an async queue worker, who handles the records received from the archive table. This worker has the signature `function(record, callback)` which is supplied a record from the given API, and then should call the provided callback when processing is completed. Records will only be removed from the async queue once all callbacks have completed
159 | 2. Create a marker variable which indicates whether the given API has completed supplying data
160 | 3. Create a `queue drain()` function, which is called whenever the queue is emptied. This could be called multiple times during the lifecycle of the queue, given that all queued callbacks could complete but the query method is still running. In this queue drain method, unless the above marker variable has been modified by the API callback, the method is still running.
161 | 4. Call the API, and supply the configured queue method. This API must supply a callback which is invoked when the API has completed. It is recommended that this callback set the value of the variable declared in `2` which can then be monitored by the queue drain function.
162 |
163 | The integration model for working with these API's can be seen in the example method which services the console API's, such as `queryToCustomConsole`:
164 |
165 | ```javascript
166 | queryToCustomConsole = function (streamName, partitionKey, sequenceStart, sequenceEnd, recordLimit, threads,
167 | customConsole, callback) {
168 | // create a worker method that will be used for the queue callback
169 | var worker = function (record, wCallback) {
170 | // decode the data stored in the table
171 | var data = new Buffer(record.recordData.S, 'Base64').toString(intermediateEncoding);
172 | // write to the supplied console
173 | customConsole.log(data);
174 | wCallback();
175 | };
176 | // create the async queue with the requested number of threads
177 | var queue = async.queue(worker, threads);
178 |
179 | // create a marker variable to indicate whether the query API has supplied all records into the provided queue
180 | var queryCompleted = false;
181 | var queryErrors;
182 |
183 | /* create a queue drain method which is signalled every time the queue is emptied. This method will check the
184 | status of the query completed variable, and only calls the provided callback after 500 milliseconds, which
185 | will allow the queue to refill with async records if there is a delay in processing */
186 | queue.drain = function () {
187 | async.until(function () {
188 | // we're only done when the queryArchive api calls the final callback. This callback sets the queryCompleted flag to true
189 | return queryCompleted;
190 | }, function (untilCallback) {
191 | // call the provided callback after 500 millis to ensure we allow the queue to refill in case of a race
192 | setTimeout(function () {
193 | untilCallback();
194 | }, 500);
195 | }, function (err) {
196 | callback(err || queryErrors);
197 | });
198 | };
199 |
200 | // query the stored archive using the supplied filters
201 | queryArchive(streamName, partitionKey, sequenceStart, sequenceEnd, recordLimit, queue, function (err) {
202 | /* once this method has been called, the queryArchive API has completed sending records to the provided
203 | queue. However, this does not mean that the query workers have finished doing their job with the provided
204 | records */
205 | queryErrors = err;
206 | queryCompleted = true;
207 | });
208 | }
209 | ```
210 |
211 | We hope that the comments in the code are enough information to allow you to create robust applications that work with the asynchronous nature of the API.
212 |
213 | ## Performing message replay
214 |
215 | For message reinjection, the API provides a queue worker which add the required metadata to the original archived message, and then put the records into the Kinesis Stream using a supplied Kinesis API Client. It's interface is:
216 |
217 | ```javascript
218 | getReinjectWorker = function (sourceStreamName, targetStreamName, includeReinjectMetadata, metadataSeparator, kinesisClient)
219 | ```
220 |
221 | This interface allows you to create query or scan based access methods for the Archive table, and use the worker to reinject data easily. For example, to replay data based on a scan operation, we can use the example method:
222 |
223 | ```javascript
224 | reinjectWithScan = function (sourceStreamName, targetStreamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit, includeReinjectMetadata, metadataSeparator, threads, callback) {
225 | var scanErrors;
226 | var scanCompleted = false;
227 |
228 | // get a new reinject worker
229 | var queue = async.queue(getReinjectWorker(sourceStreamName, targetStreamName, includeReinjectMetadata, metadataSeparator, this.kinesisClient), threads);
230 |
231 | queue.drain = function () {
232 | async.until(function () {
233 | return scanCompleted;
234 | }, function (untilCallback) {
235 | setTimeout(function () {
236 | untilCallback();
237 | }, 500);
238 | }, function (err) {
239 | callback(err || scanErrors);
240 | });
241 | };
242 |
243 | // scan through the stored archive using the supplied filters
244 | scanArchive(sourceStreamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit, queue,
245 | function (err) {
246 | scanErrors = err;
247 | scanCompleted = true;
248 | });
249 | };
250 | ```
251 |
252 | ## Support
253 |
254 | Please note that the Amazon Kinesis Archiver is a community maintained AWSLabs project, and is not supported directly by Amazon Web Services Support. If you have any problems, questions, or feature requests, please raise an issue here on Github.
255 |
256 | ----
257 |
258 | Amazon Kinesis Archiver
259 |
260 | Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
261 |
262 | This library is licensed under the Apache 2.0 License.
263 |
--------------------------------------------------------------------------------
/lib/archive-access.js:
--------------------------------------------------------------------------------
1 | var debug = process.env['DEBUG'] || false;
2 | require("./constants");
3 | var common = require("./common");
4 | var aws;
5 | var kinesisClient;
6 | var dynamoDB;
7 | var async = require('async');
8 |
9 | module.exports = function (setRegion, kinesisClient, dynamoDB) {
10 | this.aws = require('aws-sdk');
11 | if (setRegion) {
12 | this.aws.config.update({
13 | region: setRegion
14 | });
15 | } else {
16 | if (!setRegion || setRegion === null || setRegion === "") {
17 | this.aws.config.update({
18 | region: 'us-east-1'
19 | });
20 |
21 | }
22 | }
23 |
24 | if (kinesisClient) {
25 | this.kinesisClient = kinesisClient;
26 | } else {
27 | // configure a new connection to kinesis streams, if one has not
28 | // been provided
29 | if (!this.kinesisClient) {
30 | this.kinesisClient = new this.aws.Kinesis({
31 | apiVersion: '2013-12-02',
32 | region: this.aws.config.region
33 | });
34 | if (debug) {
35 | console.log("Connected to Amazon Kinesis Streams in " + this.kinesisClient.config.region);
36 | }
37 | }
38 | }
39 |
40 | if (dynamoDB) {
41 | this.dynamoDB = dynamoDB;
42 | } else {
43 | if (!this.dynamoDB) {
44 | this.dynamoDB = new this.aws.DynamoDB({
45 | apiVersion: '2012-08-10',
46 | region: this.aws.config.region
47 | });
48 | if (debug) {
49 | console.log("Connected to Amazon DynamoDB in " + this.dynamoDB.config.region);
50 | }
51 | }
52 | }
53 |
54 | console.log("AWS Kinesis Stream Archive Query Engine online in " + this.aws.config.region);
55 |
56 | /* example method to perform a re-inject with a scan operation. The same method can be used for query etc */
57 | reinjectWithScan = function (sourceStreamName, targetStreamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit, includeReinjectMetadata, metadataSeparator, threads, callback) {
58 | var scanErrors;
59 | var scanCompleted = false;
60 |
61 | // get a new reinject worker
62 | var queue = async.queue(getReinjectWorker(sourceStreamName, targetStreamName, includeReinjectMetadata, metadataSeparator, this.kinesisClient), threads);
63 |
64 | queue.drain = function () {
65 | async.until(function () {
66 | return scanCompleted;
67 | }, function (untilCallback) {
68 | setTimeout(function () {
69 | untilCallback();
70 | }, 500);
71 | }, function (err) {
72 | callback(err || scanErrors);
73 | });
74 | };
75 |
76 | // scan through the stored archive using the supplied filters
77 | scanArchive(sourceStreamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit, queue,
78 | function (err) {
79 | scanErrors = err;
80 | scanCompleted = true;
81 | });
82 | };
83 |
84 | /**
85 | * function which acts as a reinjection queue, and which can be supplied to the query or scan API's
86 | */
87 | getReinjectWorker = function (sourceStreamName, targetStreamName, includeReinjectMetadata, metadataSeparator, kinesisClient) {
88 | var destinationStream = (targetStreamName ? targetStreamName : sourceStreamName);
89 |
90 | var worker = function (record, wCallback) {
91 | // decode the data stored in the table
92 | var data = new Buffer(record.recordData.S, 'Base64').toString(intermediateEncoding);
93 |
94 | // process whether we should add the original metadata to
95 | // the
96 | // message before emitting
97 | if (includeReinjectMetadata) {
98 | var metadata = {
99 | "originalApproximateArrivalTimestamp": record.approximateArrivalTimestamp.N,
100 | "originalShardId": record.shardId.S,
101 | "originalSequenceNumber": record.sequenceNumber.S
102 | };
103 |
104 | if (!targetStreamName || sourceStreamName !== targetStreamName) {
105 | metadata['originalStreamName'] = sourceStreamName;
106 | }
107 |
108 | data = metadataSeparator + JSON.stringify(metadata) + metadataSeparator + data;
109 | }
110 |
111 | var params = {
112 | StreamName: destinationStream,
113 | PartitionKey: record.partitionKey.S,
114 | Data: new Buffer(data, intermediateEncoding)
115 | };
116 |
117 | if (debug) {
118 | console.log(params.Data.toString(intermediateEncoding));
119 | }
120 |
121 | kinesisClient.putRecord(params, function (err, data) {
122 | wCallback(err, data);
123 | });
124 | };
125 |
126 | return worker;
127 | }
128 |
129 | /**
130 | * Simple wrapper for a custom console using the default stdout/stderr
131 | * console
132 | */
133 | scanToStdConsole = function (streamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit,
134 | callback) {
135 | // create default console
136 | const
137 | c = require('console');
138 | // wrap customer console scan
139 | scanToCustomConsole(streamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit, c, 1,
140 | callback);
141 | }
142 |
143 | /**
144 | * Method to perform a scan and write it to the specified console object.
145 | * Using this method should enable developers to create custom IO handlers
146 | */
147 | scanToCustomConsole = function (streamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart,
148 | recordLimit, console, threads, callback) {
149 | var worker = function (record, wCallback) {
150 | // decode the data stored in the table
151 | var data = new Buffer(record.recordData.S, 'Base64').toString(intermediateEncoding);
152 | // write to the supplied console
153 | console.log(data);
154 | wCallback();
155 | };
156 | var queue = async.queue(worker, threads);
157 | var scanCompleted = false;
158 | var scanErrors;
159 | queue.drain = function () {
160 | async.until(function () {
161 | return scanCompleted;
162 | }, function (untilCallback) {
163 | setTimeout(function () {
164 | untilCallback();
165 | }, 500);
166 | }, function (err) {
167 | callback(err || scanErrors);
168 | });
169 | };
170 |
171 | // scan through the stored archive using the supplied filters
172 | scanArchive(streamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit, queue,
173 | function (err) {
174 | scanErrors = err;
175 | scanCompleted = true;
176 | });
177 | }
178 | /**
179 | * function interface to scan through the archive sequentially with the
180 | * potential filters provided
181 | */
182 | scanArchive = function (streamName, sequenceStart, lastUpdateDateStart, approximateArrivalStart, recordLimit,
183 | recordQueue, callback) {
184 | if (!recordQueue) {
185 | callback("You must provide an Async.queue (https://caolan.github.io/async/docs.html#queue) to process records");
186 | } else {
187 | var streamModeCache = {};
188 | common.getArchiveSettingsForStream(streamName, streamModeCache, this.kinesisClient, true, function (err) {
189 | if (err) {
190 | finalCallback(err);
191 | } else {
192 | var tableName = streamModeCache[streamName].tableName;
193 |
194 | // build up the filter conditions
195 | var params = {
196 | TableName: tableName
197 | };
198 | if (recordLimit) {
199 | params.Limit = recordLimit;
200 | }
201 | // function to build the filter expression information based
202 | // on what's supplied
203 | var t = function (alias, filter, type) {
204 | if (filter) {
205 | if (!params.FilterExpression) {
206 | if (!params.FilterExpression) {
207 | params.FilterExpression = "";
208 | params.ExpressionAttributeNames = {};
209 | params.ExpressionAttributeValues = {};
210 | }
211 | params.FilterExpression += "#" + alias + " >= :" + alias + " ";
212 | params.ExpressionAttributeNames["#" + alias] = alias;
213 | params.ExpressionAttributeValues[":" + alias] = {};
214 | params.ExpressionAttributeValues[":" + alias][type] = filter;
215 | }
216 | }
217 | };
218 |
219 | // process each filter that might have been provided - all
220 | // are
221 | // supported concurrently
222 | t(sortKeyName, sequenceStart, 'S');
223 | t(lastUpdateDateName, lastUpdateDateStart, 'S');
224 | t(approximateArrivalName, approximateArrivalStart, 'N');
225 |
226 | // issue the scan operation
227 | var moreRecords = true;
228 | async.whilst(function () {
229 | return moreRecords;
230 | }, function (whilstCallback) {
231 | this.dynamoDB.scan(params, function (err, data) {
232 | if (err) {
233 | whilstCallback(err);
234 | } else {
235 | if (!data || !data.Items) {
236 | moreRecords = false;
237 | whilstCallback();
238 | } else {
239 | // process each record
240 | data.Items.map(function (item) {
241 | // invoke the provided per-record
242 | // callback
243 | recordQueue.push(item, function (err) {
244 | if (err) {
245 | console.log(err);
246 | }
247 | });
248 | });
249 |
250 | // process the output of the scan api so we
251 | // know
252 | // if we need to continue
253 | if (!data.LastEvaluatedKey) {
254 | // this is the last page of query
255 | // results,
256 | // so mark that we are done to the async
257 | // iterator
258 | moreRecords = false;
259 | } else {
260 | // more records to come, so bind this
261 | // key
262 | // into the exclusive start key for the
263 | // next
264 | // query
265 | params.ExclusiveStartKey = {
266 | S: data.LastEvaluatedKey
267 | };
268 | }
269 |
270 | whilstCallback();
271 | }
272 | }
273 | });
274 | }, function (err) {
275 | if (callback) {
276 | callback(err);
277 | }
278 | });
279 | }
280 | });
281 | }
282 | };
283 |
284 | /**
285 | * Simple wrapper for a custom console using the default stdout/stderr
286 | * console
287 | */
288 | queryToStdConsole = function (streamName, partitionKey, sequenceStart, sequenceEnd, recordLimit, callback) {
289 | // create default console
290 | const
291 | c = require('console');
292 | // wrap customer console scan
293 | queryToCustomConsole(streamName, partitionKey, sequenceStart, sequenceEnd, recordLimit, 1, console, callback);
294 | }
295 |
296 | /**
297 | * Method to perform a query and write it to the specified console object.
298 | * Using this method should enable developers to create custom IO handlers
299 | */
300 | queryToCustomConsole = function (streamName, partitionKey, sequenceStart, sequenceEnd, recordLimit, threads,
301 | customConsole, callback) {
302 | // create a worker method that will be used for the queue callback
303 | var worker = function (record, wCallback) {
304 | // decode the data stored in the table
305 | var data = new Buffer(record.recordData.S, 'Base64').toString(intermediateEncoding);
306 | // write to the supplied console
307 | customConsole.log(data);
308 | wCallback();
309 | };
310 | // create the async queue with the requested number of threads
311 | var queue = async.queue(worker, threads);
312 |
313 | // create a marker variable to indicate whether the query API has supplied all records into the provided queue
314 | var queryCompleted = false;
315 | var queryErrors;
316 |
317 | /* create a queue drain method which is signalled every time the queue is emptied. This method will check the
318 | status of the query completed variable, and only calls the provided callback after 500 milliseconds, which
319 | will allow the queue to refill with async records if there is a delay in processing */
320 | queue.drain = function () {
321 | async.until(function () {
322 | // we're only done when the queryArchive api calls the final callback. This callback sets the queryCompleted flag to true
323 | return queryCompleted;
324 | }, function (untilCallback) {
325 | // call the provided callback after 500 millis to ensure we allow the queue to refill in case of a race
326 | setTimeout(function () {
327 | untilCallback();
328 | }, 500);
329 | }, function (err) {
330 | callback(err || queryErrors);
331 | });
332 | };
333 |
334 | // query the stored archive using the supplied filters
335 | queryArchive(streamName, partitionKey, sequenceStart, sequenceEnd, recordLimit, queue, function (err) {
336 | /* once this method has been called, the queryArchive API has completed sending records to the provided
337 | queue. However, this does not mean that the query workers have finished doing their job with the provided
338 | records */
339 | queryErrors = err;
340 | queryCompleted = true;
341 | });
342 | }
343 |
344 | /**
345 | * function to get or query the archive store with specific values
346 | */
347 | queryArchive = function (streamName, partitionKey, sequenceStart, sequenceEnd, recordLimit, recordQueue, callback) {
348 | var streamModeCache = {};
349 | common.getArchiveSettingsForStream(streamName, streamModeCache, this.kinesisClient, true, function (err) {
350 | if (err) {
351 | callback(err);
352 | } else {
353 | var tableName = streamModeCache[streamName].tableName;
354 |
355 | if (sequenceStart && sequenceEnd && sequenceStart == sequenceEnd) {
356 | var params = {
357 | TableName: tableName,
358 | Key: {}
359 | };
360 | params.Key[partitionKeyName] = {
361 | S: partitionKey
362 | };
363 | // we're just going to fetch the requested record from the
364 | // archive
365 | if (streamModeCache[streamName][RECOVERY_MODE_TAG_NAME] == RECOVERY_MODE_ALL) {
366 | // add the sort key for the supplied sequence numbers if
367 | // all data is captured
368 | params.Key[sortKeyName] = {
369 | S: sequenceStart
370 | };
371 | } else {
372 | console.log("WARN: Sequence information supplied but archive mode is " + RECOVERY_MODE_LATEST);
373 | }
374 |
375 | if (debug) {
376 | console.log("Query Parameters: " + JSON.stringify(params));
377 | }
378 |
379 | this.dynamoDB.getItem(params, function (err, data) {
380 | if (err) {
381 | callback(err);
382 | } else {
383 | // call the per-record callback with the supplied
384 | // final callback indicating we are done
385 | recordQueue.push(data.Item, function (err) {
386 | if (err) {
387 | console.error(err);
388 | }
389 | callback(err);
390 | });
391 | }
392 | });
393 | } else {
394 | // we'll implement a record query
395 | var params = {
396 | TableName: tableName,
397 | Select: 'ALL_ATTRIBUTES',
398 | KeyConditionExpression: "#partitionKeyName = :partitionKey"
399 | };
400 | params.ExpressionAttributeNames = {
401 | "#partitionKeyName": partitionKeyName
402 | };
403 | params.ExpressionAttributeValues = {
404 | ":partitionKey": {
405 | S: partitionKey
406 | }
407 | };
408 | if (recordLimit) {
409 | params.Limit = recordLimit;
410 | }
411 | if (sequenceStart && !sequenceEnd) {
412 | params.KeyConditionExpression += " and #sortKey = :sequenceStart";
413 | params.ExpressionAttributeNames["#sortKey"] = sortKeyName;
414 | params.ExpressionAttributeValues[":sequenceStart"] = {
415 | S: sequenceStart
416 | };
417 | } else if (sequenceStart && sequenceEnd) {
418 | params.KeyConditionExpression += " and #sortKey between :sequenceStart and :sequenceEnd";
419 | params.ExpressionAttributeNames["#sortKey"] = sortKeyName;
420 | params.ExpressionAttributeValues[":sequenceStart"] = {
421 | S: sequenceStart
422 | };
423 | params.ExpressionAttributeValues[":sequenceEnd"] = {
424 | S: sequenceEnd
425 | };
426 | } else {
427 | if (sequenceStart) {
428 | params.KeyConditionExpression += " and #sortKey >= :sequenceStart";
429 | params.ExpressionAttributeNames["#sortKey"] = sortKeyName;
430 | params.ExpressionAttributeValues[":sequenceStart"] = {
431 | S: sequenceStart
432 | };
433 | }
434 |
435 | if (sequenceEnd) {
436 | params.KeyConditionExpression += " and #sortKey <= :sequenceEnd";
437 | params.ExpressionAttributeNames["#sortKey"] = sortKeyName;
438 | params.ExpressionAttributeValues[":sequenceEnd"] = {
439 | S: sequenceEnd
440 | };
441 | }
442 | }
443 |
444 | if (debug) {
445 | console.log("Query Parameters: " + JSON.stringify(params));
446 | }
447 |
448 | var moreRecords = true;
449 | async.whilst(function () {
450 | return moreRecords;
451 | }, function (whilstCallback) {
452 | this.dynamoDB.query(params, function (err, data) {
453 | if (err) {
454 | whilstCallback(err);
455 | } else {
456 | // process each record
457 | data.Items.map(function (item) {
458 | recordQueue.push(item, function (err) {
459 | if (err) {
460 | console.log(err);
461 | }
462 | });
463 | });
464 |
465 | if (!data.LastEvaluatedKey) {
466 | // this is the last page of query results,
467 | // so
468 | // mark that we are done to the async
469 | // iterator
470 | moreRecords = false;
471 | } else {
472 | // more records to come, so bind this key
473 | // into
474 | // the exclusive start key for the next
475 | // query
476 | params.ExclusiveStartKey = {
477 | S: data.LastEvaluatedKey
478 | };
479 | }
480 | whilstCallback();
481 | }
482 | });
483 | }, function (err) {
484 | if (callback) {
485 | callback(err);
486 | }
487 | });
488 | }
489 | }
490 | });
491 | };
492 |
493 | return this;
494 | };
--------------------------------------------------------------------------------