├── .gitignore ├── WebContent ├── META-INF │ └── MANIFEST.MF ├── images │ ├── drive.png │ ├── server.png │ ├── database.png │ └── README ├── .ebextensions │ └── packages.config ├── index.html ├── WEB-INF │ └── web.xml └── styles │ └── styles.css ├── src ├── resources │ ├── log4j.properties │ └── connector.properties └── com │ └── amazonaws │ └── services │ ├── kinesis │ └── connectors │ │ ├── impl │ │ ├── AllPassFilter.java │ │ ├── StringToByteArrayTransformer.java │ │ ├── StringToStringTransformer.java │ │ ├── JsonToByteArrayTransformer.java │ │ └── BasicMemoryBuffer.java │ │ ├── interfaces │ │ ├── IFilter.java │ │ ├── ITransformer.java │ │ ├── ICollectionTransformer.java │ │ ├── ITransformerBase.java │ │ ├── IKinesisConnectorPipeline.java │ │ ├── IEmitter.java │ │ └── IBuffer.java │ │ ├── redshift │ │ ├── RedshiftTransformer.java │ │ ├── RedshiftBasicEmitter.java │ │ └── RedshiftManifestEmitter.java │ │ ├── BasicJsonTransformer.java │ │ ├── KinesisConnectorRecordProcessorFactory.java │ │ ├── UnmodifiableBuffer.java │ │ ├── s3 │ │ ├── S3ManifestEmitter.java │ │ └── S3Emitter.java │ │ ├── KinesisConnectorExecutorBase.java │ │ ├── KinesisConnectorRecordProcessor.java │ │ └── KinesisConnectorConfiguration.java │ └── cognito │ └── streams │ └── connector │ ├── AmazonCognitoStreamsServletInitiator.java │ ├── AmazonCognitoStreamsRecordBean.java │ ├── AmazonCognitoStreamsRedshiftEmitter.java │ ├── AmazonCognitoStreamsConnectorPipeline.java │ ├── AmazonCognitoStreamsConnectorExecutor.java │ ├── AmazonCognitoStreamsEventBean.java │ ├── AmazonCognitoStreamsEnvironmentOptions.java │ └── AmazonCognitoStreamsEventBeanTransformer.java ├── LICENSE.txt ├── README.md ├── pom.xml └── CognitoStreamsSample.json /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | target 3 | .settings 4 | .project 5 | .classpath 6 | -------------------------------------------------------------------------------- /WebContent/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Class-Path: 3 | 4 | -------------------------------------------------------------------------------- /WebContent/images/drive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-archives/amazon-cognito-streams-sample/HEAD/WebContent/images/drive.png -------------------------------------------------------------------------------- /WebContent/images/server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-archives/amazon-cognito-streams-sample/HEAD/WebContent/images/server.png -------------------------------------------------------------------------------- /WebContent/images/database.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amazon-archives/amazon-cognito-streams-sample/HEAD/WebContent/images/database.png -------------------------------------------------------------------------------- /WebContent/.ebextensions/packages.config: -------------------------------------------------------------------------------- 1 | container_commands: 2 | driver: 3 | command: cp WEB-INF/lib/postgresql-*.jar /usr/share/java/tomcat7/ 4 | -------------------------------------------------------------------------------- /WebContent/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Amazon Cognito Streams Sample 4 | 5 | 6 | OK - Amazon Cognito Streams Sample 7 | 8 | 9 | -------------------------------------------------------------------------------- /WebContent/images/README: -------------------------------------------------------------------------------- 1 | Icons Copyright (c) Yusuke Kamiyamane. 2 | Licensed under a Creative Commons Attribution 3.0 license. 3 | 4 | http://p.yusukekamiyamane.com 5 | http://creativecommons.org/licenses/by/3.0/ 6 | -------------------------------------------------------------------------------- /src/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=INFO, stdout 3 | 4 | # Direct log messages to stdout 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n -------------------------------------------------------------------------------- /WebContent/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | AmazonCognitoStreamsSample 7 | 8 | index.html 9 | 10 | 11 | com.amazonaws.services.cognito.streams.connector.AmazonCognitoStreamsServletInitiator 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/impl/AllPassFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.impl; 6 | 7 | import com.amazonaws.services.kinesis.connectors.interfaces.IFilter; 8 | 9 | /** 10 | * This class is a basic implementation of IFilter that returns true for all records. 11 | * 12 | * @param 13 | */ 14 | public class AllPassFilter implements IFilter { 15 | 16 | @Override 17 | public boolean keepRecord(T record) { 18 | return true; 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/resources/connector.properties: -------------------------------------------------------------------------------- 1 | # KinesisConnector Application Settings 2 | appName = AmazonCognitoStreamsSample 3 | 4 | # Leaving these values unset to allow defaults (which have been used since launch) for S3 5 | # to continue to operate 6 | # 1MB = 1024*1024 = 1048756 7 | bufferRecordCountLimit = 25 8 | bufferSizeByteLimit = 1048576 9 | bufferMillisecondsLimit = 60000 10 | # Due to maintenance redshift cluster becomes unavailable for around 10 minutes, 11 | # so retry for 20 minutes 12 | backoffInterval = 120000 13 | retryLimit = 10 14 | 15 | # set maxRecords to be the same as the buffer size above, otherwise it'll be 16 | # ignored 17 | maxRecords = 25 18 | 19 | # Redshift parameters for KinesisConnector 20 | redshiftDataTable = cognito_raw_data 21 | #redshiftEndpoint = 22 | #redshiftUsername = 23 | #redshiftPassword = 24 | #redshiftURL = 25 | redshiftDataDelimiter = | 26 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/interfaces/IFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.interfaces; 6 | 7 | /** 8 | * The IFilter is associated with an IBuffer. The IBuffer may use the result of calling the 9 | * keepRecord() method to decide whether to store a record or discard it. 10 | * 11 | * @param 12 | * the data type stored in the record 13 | */ 14 | public interface IFilter { 15 | 16 | /** 17 | * A method enabling the buffer to filter records. Return false if you don't want to hold on to 18 | * the record. 19 | * 20 | * @param record 21 | * @return true if the record should be added to the buffer. 22 | */ 23 | public boolean keepRecord(T record); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/impl/StringToByteArrayTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.impl; 6 | 7 | import com.amazonaws.services.kinesis.connectors.interfaces.ITransformer; 8 | import com.amazonaws.services.kinesis.model.Record; 9 | 10 | /** 11 | * This class is an implementation of the ITransformer to transform between raw Amazon Kinesis records and 12 | * strings. It assumes that the Record parameter of toClass() is a byte array representation of a 13 | * string. 14 | * 15 | */ 16 | public class StringToByteArrayTransformer implements ITransformer { 17 | 18 | @Override 19 | public String toClass(Record record) { 20 | return new String(record.getData().array()); 21 | } 22 | 23 | @Override 24 | public byte[] fromClass(String record) { 25 | return record.getBytes(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/impl/StringToStringTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.impl; 6 | 7 | import com.amazonaws.services.kinesis.connectors.interfaces.ITransformer; 8 | import com.amazonaws.services.kinesis.model.Record; 9 | 10 | /** 11 | * This class is an implementation of the ITransformer to transform between raw Amazon Kinesis records and 12 | * strings. It assumes that the Record parameter of toClass() is a byte array representation of a 13 | * string. This is useful for the RedshiftManifestEmitter to perform an Amazon Redshift copy on a file name 14 | * specified in a String. 15 | * 16 | */ 17 | public class StringToStringTransformer implements ITransformer { 18 | 19 | @Override 20 | public String toClass(Record record) { 21 | return new String(record.getData().array()); 22 | } 23 | 24 | @Override 25 | public String fromClass(String record) { 26 | return record; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/interfaces/ITransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.interfaces; 6 | 7 | import java.io.IOException; 8 | 9 | import com.amazonaws.services.kinesis.model.Record; 10 | 11 | /** 12 | * ITransformer is used to transform data from a Record (byte array) to the data model class (T) for 13 | * processing in the application and from the data model class to the output type (U) for the 14 | * emitter. 15 | * 16 | * @param 17 | * the data type stored in the record 18 | * @param 19 | * the data type to emit 20 | */ 21 | public interface ITransformer extends ITransformerBase { 22 | /** 23 | * Transform record into an object of its original class. 24 | * 25 | * @param record 26 | * raw record from the Amazon Kinesis stream 27 | * @return data as its original class 28 | * @throws IOException 29 | * could not convert the record to a T 30 | */ 31 | public T toClass(Record record) throws IOException; 32 | } 33 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/interfaces/ICollectionTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.interfaces; 6 | 7 | import java.io.IOException; 8 | import java.util.Collection; 9 | 10 | import com.amazonaws.services.kinesis.model.Record; 11 | 12 | /** 13 | * ICollectionTransformer is used to transform data from an Amazon Kinesis Record (byte array) to 14 | * a collection of the data model class (T) for processing in the application and from the 15 | * data model class to the output type (U) for the emitter. 16 | * 17 | * @param 18 | * the data type stored in the record 19 | * @param 20 | * the data type to emit 21 | */ 22 | public interface ICollectionTransformer extends ITransformerBase { 23 | /** 24 | * Transform record into a collection of object of their original class. 25 | * 26 | * @param record 27 | * raw record from the Amazon Kinesis stream 28 | * @return data as its original class 29 | * @throws IOException 30 | * could not convert the record to a Collection 31 | */ 32 | public Collection toClass(Record record) throws IOException; 33 | } 34 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/interfaces/ITransformerBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.interfaces; 6 | 7 | import java.io.IOException; 8 | 9 | /** 10 | * Base transformer class to provide backwards compatibility with ITransformer 11 | * while supporting the ICollectionTransformer. 12 | * 13 | * This class is not meant to be implemented. Instead, implement ITransformer 14 | * or ICollectionTransformer depending on the type of PutRecordRequests you are 15 | * doing for proper toClass transforms. 16 | * - Use ITransformer if each Amazon Kinesis Record contains one object of type T. 17 | * - Use ICollectionTransformer if each Amazon Kinesis Record contains a Collection 18 | * (batched PutRecordRequests). 19 | * 20 | * @param 21 | * the data type stored in the record 22 | * @param 23 | * the data type to emit 24 | */ 25 | public abstract interface ITransformerBase { 26 | 27 | /** 28 | * Transform record from its original class to final output class. 29 | * 30 | * @param record 31 | * data as its original class 32 | * @return U 33 | * the object as its final class 34 | */ 35 | public U fromClass(T record) throws IOException; 36 | } 37 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/cognito/streams/connector/AmazonCognitoStreamsServletInitiator.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.cognito.streams.connector; 6 | 7 | import javax.servlet.ServletContextEvent; 8 | import javax.servlet.ServletContextListener; 9 | 10 | import org.apache.commons.logging.Log; 11 | import org.apache.commons.logging.LogFactory; 12 | 13 | /** 14 | * Wrapper class to make tomcat run our worker even though we're not running real servlets. 15 | */ 16 | public class AmazonCognitoStreamsServletInitiator implements ServletContextListener { 17 | private static final Log LOG = LogFactory.getLog(AmazonCognitoStreamsServletInitiator.class); 18 | 19 | @Override 20 | public void contextDestroyed(ServletContextEvent arg0) { 21 | } 22 | 23 | @Override 24 | public void contextInitialized(ServletContextEvent arg0) { 25 | // Load the JDBC Driver 26 | try { 27 | LOG.info("Loading driver..."); 28 | Class.forName("org.postgresql.Driver"); 29 | LOG.info("Driver loaded!"); 30 | } catch (ClassNotFoundException e) { 31 | LOG.error(e); 32 | throw new RuntimeException("Cannot find the driver in the classpath!", e); 33 | } 34 | 35 | AmazonCognitoStreamsConnectorExecutor worker = new AmazonCognitoStreamsConnectorExecutor(); 36 | new Thread(worker).start(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/impl/JsonToByteArrayTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.impl; 6 | 7 | import java.io.IOException; 8 | 9 | import org.apache.commons.logging.Log; 10 | import org.apache.commons.logging.LogFactory; 11 | 12 | import com.amazonaws.services.kinesis.connectors.BasicJsonTransformer; 13 | import com.fasterxml.jackson.core.JsonProcessingException; 14 | import com.fasterxml.jackson.databind.ObjectMapper; 15 | 16 | /** 17 | * The JsonToByteArrayTransformer defines a BasicJsonTransformer with byte array for its output 18 | * type. This allows for data to be sent to Amazon S3 or an Amazon Kinesis stream. 19 | */ 20 | public class JsonToByteArrayTransformer extends BasicJsonTransformer { 21 | private static final Log LOG = LogFactory.getLog(JsonToByteArrayTransformer.class); 22 | 23 | public JsonToByteArrayTransformer(Class inputClass) { 24 | super(inputClass); 25 | } 26 | 27 | @Override 28 | public byte[] fromClass(T record) throws IOException { 29 | try { 30 | return new ObjectMapper().writeValueAsString(record).getBytes(); 31 | } catch (JsonProcessingException e) { 32 | String message = "Error parsing record to JSON"; 33 | LOG.error(message, e); 34 | throw new IOException(message, e); 35 | } 36 | 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Amazon Cognito Streams connector for Amazon Redshift 2 | 3 | This example application is meant to demonstrate how to consume [Amazon Cognito streams](http://mobile.awsblog.com/post/Tx35782XTJZROY4) and model data in Amazon Redshift. 4 | 5 | ## Building the sample 6 | 7 | This sample is designed to be built with Maven. All requisite dependencies are captured in the `pom.xml`. Simply use `mvn package` to build the war and deploy to Elastic Beanstalk or directly to EC2. 8 | 9 | ## Configuring the sample 10 | 11 | The sample requires the following environment variables be set: 12 | 13 | * `JDBC_CONNECTION_STRING` - This JDBC connection string for your Amazon Redshift cluster. 14 | * `KINESIS_INPUT_STREAM` (or `PARAM1`) - The name of the Kinesis stream to look for Amazon Cognito updates. 15 | * `REDSHIFT_USER_NAME` (or `PARAM2`) - The master user name for your Amazon Redshift cluster. 16 | * `REDSHIFT_PASSWORD` (or `PARAM3`) - The master user password for your Amazon Redshift cluster. 17 | * `S3_BUCKET_NAME` (or `PARAM4`) - The name of the S3 bucket to use for intermediate storage of data. This bucket should be configured to delete old data via S3 lifecycle configuration. 18 | * `REGION` (or `PARAM5`) - The region of all your resources. 19 | 20 | ## Deploying the sample 21 | 22 | We've included a [CloudFormation template](CognitoStreamsSample.json) for deploying a binary version of this sample. In addition to configuring the above environment variables, it will create the Amazon Redshift cluster and other associated resources. Consider using this to get started and for deploying future updates to this sample. 23 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/redshift/RedshiftTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.redshift; 6 | 7 | import com.amazonaws.services.kinesis.connectors.impl.JsonToByteArrayTransformer; 8 | 9 | /** 10 | * This class is an implementation of the ITransformer interface and an extension of the 11 | * BasicJsonTransformer class. The abstract method toDelimitedString() requires implementing classes 12 | * to output a delimited-string representation of the data model that is compatible with an 13 | * insertion into Amazon Redshift. 14 | * 15 | * @param 16 | */ 17 | public abstract class RedshiftTransformer extends JsonToByteArrayTransformer { 18 | 19 | public RedshiftTransformer(Class clazz) { 20 | super(clazz); 21 | } 22 | 23 | /** 24 | * This method requires implementing classes to output a string representation of the data model 25 | * that is compatible with Amazon Redshift. This string will be used to insert records into an Amazon Redshift 26 | * table, and should be in a delimited format. 27 | * 28 | * @param recordObject 29 | * the instance of the data model to convert to delimited string. 30 | * @return a delimited string representation of the data model that is compatible with Amazon Redshift 31 | */ 32 | public abstract String toDelimitedString(T recordObject); 33 | 34 | @Override 35 | public byte[] fromClass(T record) { 36 | return toDelimitedString(record).getBytes(); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/BasicJsonTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors; 6 | 7 | import java.io.IOException; 8 | 9 | import org.apache.commons.logging.Log; 10 | import org.apache.commons.logging.LogFactory; 11 | 12 | import com.amazonaws.services.kinesis.connectors.interfaces.ITransformer; 13 | import com.amazonaws.services.kinesis.model.Record; 14 | import com.fasterxml.jackson.databind.ObjectMapper; 15 | 16 | /** 17 | * This class implements the ITransformer interface and provides an implementation of the toClass() 18 | * method for deserializing and serializing JSON strings. The constructor takes the class to 19 | * transform to/from JSON. The Record parameter of the toClass() method is expected to contain a 20 | * byte representation of a JSON string. 21 | * 22 | * @param 23 | */ 24 | public abstract class BasicJsonTransformer implements ITransformer { 25 | private static final Log LOG = LogFactory.getLog(BasicJsonTransformer.class); 26 | protected Class inputClass; 27 | 28 | public BasicJsonTransformer(Class inputClass) { 29 | this.inputClass = inputClass; 30 | } 31 | 32 | @Override 33 | public T toClass(Record record) throws IOException { 34 | try { 35 | return new ObjectMapper().readValue(record.getData().array(), this.inputClass); 36 | } catch (IOException e) { 37 | String message = "Error parsing record from JSON: " + new String(record.getData().array()); 38 | LOG.error(message, e); 39 | throw new IOException(message, e); 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/cognito/streams/connector/AmazonCognitoStreamsRecordBean.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.cognito.streams.connector; 6 | 7 | import java.util.Date; 8 | 9 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties; 10 | 11 | /** 12 | * Bean that represents a single change in a dataset. 13 | */ 14 | @JsonIgnoreProperties(ignoreUnknown = true) 15 | public class AmazonCognitoStreamsRecordBean { 16 | private String key; 17 | 18 | private String value; 19 | 20 | private Long syncCount; 21 | 22 | private Date lastModifiedDate; 23 | 24 | private Date deviceLastModifiedDate; 25 | 26 | private String op; 27 | 28 | public String getKey() { 29 | return key; 30 | } 31 | 32 | public void setKey(String key) { 33 | this.key = key; 34 | } 35 | 36 | public String getValue() { 37 | return value; 38 | } 39 | 40 | public void setValue(String value) { 41 | this.value = value; 42 | } 43 | 44 | public Long getSyncCount() { 45 | return syncCount; 46 | } 47 | 48 | public void setSyncCount(Long syncCount) { 49 | this.syncCount = syncCount; 50 | } 51 | 52 | public Date getLastModifiedDate() { 53 | return lastModifiedDate; 54 | } 55 | 56 | public void setLastModifiedDate(Date lastModifiedDate) { 57 | this.lastModifiedDate = lastModifiedDate; 58 | } 59 | 60 | public Date getDeviceLastModifiedDate() { 61 | return deviceLastModifiedDate; 62 | } 63 | 64 | public void setDeviceLastModifiedDate(Date deviceLastModifiedDate) { 65 | this.deviceLastModifiedDate = deviceLastModifiedDate; 66 | } 67 | 68 | public String getOp() { 69 | return op; 70 | } 71 | 72 | public void setOp(String op) { 73 | this.op = op; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/cognito/streams/connector/AmazonCognitoStreamsRedshiftEmitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.cognito.streams.connector; 6 | 7 | import com.amazonaws.auth.AWSCredentialsProvider; 8 | import com.amazonaws.auth.BasicSessionCredentials; 9 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 10 | import com.amazonaws.services.kinesis.connectors.redshift.RedshiftBasicEmitter; 11 | 12 | /** 13 | * Extension of RedshiftBasicEmitter to support session based credentials. 14 | */ 15 | public class AmazonCognitoStreamsRedshiftEmitter extends RedshiftBasicEmitter { 16 | private final AWSCredentialsProvider credentialsProvider; 17 | private final String s3bucket; 18 | private final String redshiftTable; 19 | private final char redshiftDelimiter; 20 | 21 | public AmazonCognitoStreamsRedshiftEmitter(KinesisConnectorConfiguration configuration) { 22 | super(configuration); 23 | s3bucket = configuration.S3_BUCKET; 24 | redshiftTable = configuration.REDSHIFT_DATA_TABLE; 25 | redshiftDelimiter = configuration.REDSHIFT_DATA_DELIMITER; 26 | credentialsProvider = configuration.AWS_CREDENTIALS_PROVIDER; 27 | } 28 | 29 | @Override 30 | protected String generateCopyStatement(String s3File) { 31 | BasicSessionCredentials creds = (BasicSessionCredentials) credentialsProvider.getCredentials(); 32 | 33 | StringBuilder exec = new StringBuilder(); 34 | exec.append("COPY " + redshiftTable + " "); 35 | exec.append("FROM 's3://" + s3bucket + "/" + s3File + "' "); 36 | exec.append("CREDENTIALS 'aws_access_key_id=" + creds.getAWSAccessKeyId()); 37 | exec.append(";aws_secret_access_key=" + creds.getAWSSecretKey()); 38 | exec.append(";token=" + creds.getSessionToken() + "' "); 39 | exec.append("DELIMITER '" + redshiftDelimiter + "'"); 40 | exec.append(";"); 41 | return exec.toString(); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/cognito/streams/connector/AmazonCognitoStreamsConnectorPipeline.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.cognito.streams.connector; 6 | 7 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 8 | import com.amazonaws.services.kinesis.connectors.impl.AllPassFilter; 9 | import com.amazonaws.services.kinesis.connectors.impl.BasicMemoryBuffer; 10 | import com.amazonaws.services.kinesis.connectors.interfaces.IBuffer; 11 | import com.amazonaws.services.kinesis.connectors.interfaces.IEmitter; 12 | import com.amazonaws.services.kinesis.connectors.interfaces.IFilter; 13 | import com.amazonaws.services.kinesis.connectors.interfaces.IKinesisConnectorPipeline; 14 | import com.amazonaws.services.kinesis.connectors.interfaces.ITransformer; 15 | 16 | /** 17 | * Connector pipeline for Amazon Cognito streams. 18 | * 19 | * Consumes all events from stream and uses custom Redshift emitter. 20 | * 21 | */ 22 | public class AmazonCognitoStreamsConnectorPipeline implements 23 | IKinesisConnectorPipeline { 24 | 25 | @Override 26 | public IEmitter getEmitter(KinesisConnectorConfiguration configuration) { 27 | return new AmazonCognitoStreamsRedshiftEmitter(configuration); 28 | } 29 | 30 | @Override 31 | public IBuffer getBuffer( 32 | KinesisConnectorConfiguration configuration) { 33 | return new BasicMemoryBuffer(configuration); 34 | } 35 | 36 | @Override 37 | public ITransformer getTransformer( 38 | KinesisConnectorConfiguration configuration) { 39 | return new AmazonCognitoStreamsEventBeanTransformer(configuration); 40 | } 41 | 42 | @Override 43 | public IFilter getFilter( 44 | KinesisConnectorConfiguration configuration) { 45 | return new AllPassFilter(); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/interfaces/IKinesisConnectorPipeline.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.interfaces; 6 | 7 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 8 | 9 | /** 10 | * This interface is used by the KinesisConnectorRecordProcessorFactory to obtain instances of the 11 | * user's implemented classes. Each method takes the applications configuration as an argument. The 12 | * user should implement this such that each method returns a configured implementation of each 13 | * interface. It has two parameter types, the data input type (T) and the data output type (U). 14 | * Records come in as a byte[] and are transformed to a T. Then they are buffered in T form. When 15 | * the buffer is full, T's are converted to U's and passed to the emitter. 16 | * 17 | */ 18 | public interface IKinesisConnectorPipeline { 19 | /** 20 | * Return an instance of the users implementation of IEmitter 21 | * 22 | * @param configuration 23 | * @return a configured instance of the IEmitter implementation. 24 | */ 25 | IEmitter getEmitter(KinesisConnectorConfiguration configuration); 26 | 27 | /** 28 | * Return an instance of the users implementation of IBuffer 29 | * 30 | * @param configuration 31 | * @return a configured instance of the IBuffer implementation. 32 | */ 33 | IBuffer getBuffer(KinesisConnectorConfiguration configuration); 34 | 35 | /** 36 | * Return an instance of the users implementation of ITransformer. 37 | * 38 | * @param configuration 39 | * @return a configured instance of the ITransformer implementation 40 | */ 41 | ITransformerBase getTransformer(KinesisConnectorConfiguration configuration); 42 | 43 | /** 44 | * Return an instance of the users implementation of IFilter. 45 | * 46 | * @param configuration 47 | * @return a configured instance of the IFilter implementation. 48 | */ 49 | IFilter getFilter(KinesisConnectorConfiguration configuration); 50 | } 51 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/interfaces/IEmitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.interfaces; 6 | 7 | import java.io.IOException; 8 | import java.util.List; 9 | 10 | import com.amazonaws.services.kinesis.connectors.UnmodifiableBuffer; 11 | 12 | /** 13 | * IEmitter takes a full buffer and processes the stored records. The IEmitter is a member of the 14 | * IKinesisConnectorPipeline that "emits" the objects that have been deserialized by the 15 | * ITransformer. The emit() method is invoked when the buffer is full (possibly to persist the 16 | * records or send them to another Amazon Kinesis stream). After emitting the records, the IEmitter should 17 | * return a list of records that could not be processed. Implementations may choose to fail the 18 | * entire set of records in the buffer or to fail records individually. 19 | * 20 | * @param 21 | * the data type stored in the record 22 | */ 23 | public interface IEmitter { 24 | 25 | /** 26 | * Invoked when the buffer is full. This method emits the set of filtered records. It should 27 | * return a list of records that were not emitted successfully. Returning 28 | * Collections.emptyList() is considered a success. 29 | * 30 | * @param buffer 31 | * The full buffer of records 32 | * @throws IOException 33 | * A failure was reached that is not recoverable, no retry will occur and the fail 34 | * method will be called 35 | * @return A list of records that failed to emit to be retried 36 | */ 37 | List emit(UnmodifiableBuffer buffer) throws IOException; 38 | 39 | /** 40 | * This method defines how to handle a set of records that cannot successfully be emitted. 41 | * 42 | * @param records 43 | * a list of records that were not successfully emitted 44 | */ 45 | void fail(List records); 46 | 47 | /** 48 | * This method is called when the KinesisConnectorRecordProcessor is shutdown. It should close 49 | * any existing client connections. 50 | */ 51 | void shutdown(); 52 | } 53 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/KinesisConnectorRecordProcessorFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors; 6 | 7 | import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory; 8 | import com.amazonaws.services.kinesis.connectors.interfaces.IBuffer; 9 | import com.amazonaws.services.kinesis.connectors.interfaces.IEmitter; 10 | import com.amazonaws.services.kinesis.connectors.interfaces.IFilter; 11 | import com.amazonaws.services.kinesis.connectors.interfaces.IKinesisConnectorPipeline; 12 | import com.amazonaws.services.kinesis.connectors.interfaces.ITransformerBase; 13 | 14 | /** 15 | * This class is used to generate KinesisConnectorRecordProcessors that operate using the user's 16 | * implemented classes. The createProcessor() method sets the dependencies of the 17 | * KinesisConnectorRecordProcessor that are specified in the KinesisConnectorPipeline argument, 18 | * which accesses instances of the users implementations. 19 | */ 20 | public class KinesisConnectorRecordProcessorFactory implements IRecordProcessorFactory { 21 | 22 | private IKinesisConnectorPipeline pipeline; 23 | private KinesisConnectorConfiguration configuration; 24 | 25 | public KinesisConnectorRecordProcessorFactory(IKinesisConnectorPipeline pipeline, 26 | KinesisConnectorConfiguration configuration) { 27 | this.configuration = configuration; 28 | this.pipeline = pipeline; 29 | } 30 | 31 | @Override 32 | public KinesisConnectorRecordProcessor createProcessor() { 33 | try { 34 | IBuffer buffer = pipeline.getBuffer(configuration); 35 | IEmitter emitter = pipeline.getEmitter(configuration); 36 | ITransformerBase transformer = pipeline.getTransformer(configuration); 37 | IFilter filter = pipeline.getFilter(configuration); 38 | KinesisConnectorRecordProcessor processor = 39 | new KinesisConnectorRecordProcessor(buffer, filter, emitter, transformer, configuration); 40 | return processor; 41 | } catch (Throwable t) { 42 | throw new RuntimeException(t); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /WebContent/styles/styles.css: -------------------------------------------------------------------------------- 1 | /************************************* 2 | GENERAL 3 | *************************************/ 4 | body { 5 | margin: 0; 6 | padding: 0; 7 | font: 12px/1.4em "Lucida Grande", Verdana, sans-serif; 8 | color: #333; 9 | overflow-y: scroll; 10 | text-rendering: optimizeLegibility; 11 | background-color: #d5e9ed; 12 | } 13 | 14 | h2 { 15 | font-size: 1.3em; 16 | line-height: 1.5em; 17 | font-weight: bold; 18 | margin: 20px 0 0 0; 19 | padding: 0; 20 | border-bottom: 3px solid #eee; 21 | 22 | /* icon setup */ 23 | padding: 0.2em 1em 0.2em 30px; 24 | background-position: 0 50%; 25 | background-repeat: no-repeat; 26 | } 27 | 28 | 29 | /************************************* 30 | SECTIONS 31 | *************************************/ 32 | div#content { 33 | margin: 30px auto; 34 | padding: 0 30px 15px 30px; 35 | background-color: #fff; 36 | width: 940px; 37 | 38 | /* box-shadow */ 39 | -moz-box-shadow: 0 5px 10px #aaa; 40 | -webkit-box-shadow: 0 5px 10px #aaa; 41 | box-shadow: 0 5px 10px #aaa; 42 | 43 | /* bottom corners */ 44 | -webkit-border-bottom-right-radius: 7px; 45 | -webkit-border-bottom-left-radius: 7px; 46 | -moz-border-radius-bottomright: 7px; 47 | -moz-border-radius-bottomleft: 7px; 48 | border-bottom-right-radius: 7px; 49 | border-bottom-left-radius: 7px; 50 | } 51 | 52 | /*div#content div.section {}*/ 53 | 54 | div#content div.section ul { 55 | margin: 0; 56 | padding: 1em 0 0 2em; 57 | overflow: hidden; 58 | } 59 | 60 | div#content div.section ul li { 61 | list-style-type: square; 62 | white-space: nowrap; 63 | line-height: 1.5em; 64 | } 65 | 66 | /* Section titles */ 67 | div#content div.section.s3 h2 { 68 | background-image: url(../images/drive.png); 69 | } 70 | 71 | div#content div.section.ec2 h2 { 72 | background-image: url(../images/server.png); 73 | } 74 | 75 | div#content div.section.sdb h2 { 76 | background-image: url(../images/database.png); 77 | } 78 | 79 | 80 | /************************************* 81 | CONTAINERS 82 | *************************************/ 83 | .container { 84 | zoom: 1; 85 | } 86 | 87 | .container:after { 88 | content: "."; 89 | display: block; 90 | height: 0; 91 | clear: both; 92 | visibility: hidden; 93 | } 94 | 95 | 96 | /************************************* 97 | GRIDS 98 | *************************************/ 99 | .grid { float: left; margin-right: 20px; } 100 | .gridlast { margin-right: 0; } 101 | .grid5 { width: 300px; } 102 | .grid15 { width: 940px; } 103 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/cognito/streams/connector/AmazonCognitoStreamsConnectorExecutor.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.cognito.streams.connector; 6 | 7 | import java.io.IOException; 8 | import java.io.InputStream; 9 | import java.util.Properties; 10 | 11 | import org.apache.commons.logging.Log; 12 | import org.apache.commons.logging.LogFactory; 13 | 14 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 15 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorExecutorBase; 16 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorRecordProcessorFactory; 17 | 18 | import com.amazonaws.auth.InstanceProfileCredentialsProvider; 19 | 20 | /** 21 | * Implementation of a KinesisConnectorExecutor. 22 | * 23 | * Responsible for starting loading necessary configuration and starting up Worker instance. 24 | * 25 | */ 26 | public class AmazonCognitoStreamsConnectorExecutor extends KinesisConnectorExecutorBase { 27 | 28 | private static final Log LOG = LogFactory.getLog(AmazonCognitoStreamsConnectorExecutor.class); 29 | 30 | protected final KinesisConnectorConfiguration config; 31 | 32 | public AmazonCognitoStreamsConnectorExecutor() { 33 | 34 | // Load properties configured in the bundle 35 | InputStream configStream = getClass().getClassLoader().getResourceAsStream("connector.properties"); 36 | 37 | Properties properties = new Properties(); 38 | try { 39 | properties.load(configStream); 40 | configStream.close(); 41 | } catch (IOException e) { 42 | String msg = "Could not load properties file from classpath"; 43 | LOG.error(msg, e); 44 | throw new IllegalStateException(msg, e); 45 | } 46 | 47 | // Overlay properties set in environment 48 | AmazonCognitoStreamsEnvironmentOptions.bootstrapEnv(properties); 49 | 50 | // Always use Instance Profile credentials 51 | InstanceProfileCredentialsProvider credentialsProvider = new InstanceProfileCredentialsProvider(); 52 | 53 | this.config = new KinesisConnectorConfiguration(properties, credentialsProvider); 54 | 55 | super.initialize(this.config); 56 | } 57 | 58 | @Override 59 | public KinesisConnectorRecordProcessorFactory getKinesisConnectorRecordProcessorFactory() { 60 | return new KinesisConnectorRecordProcessorFactory<>(new AmazonCognitoStreamsConnectorPipeline(), config); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/cognito/streams/connector/AmazonCognitoStreamsEventBean.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.cognito.streams.connector; 6 | 7 | import java.util.Date; 8 | import java.util.List; 9 | 10 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties; 11 | 12 | /** 13 | * An instance of an event record written to the stream by Amazon Cognito. 14 | * 15 | * Contains either a list of Records or a URL that will contain the list of records. 16 | */ 17 | @JsonIgnoreProperties(ignoreUnknown = true) 18 | public class AmazonCognitoStreamsEventBean { 19 | 20 | private String identityPoolId; 21 | private String identityId; 22 | private String datasetName; 23 | private String operation; 24 | private List kinesisSyncRecords; 25 | private String kinesisSyncRecordsURL; 26 | private Date lastModifiedDate; 27 | private Integer syncCount; 28 | 29 | public String getIdentityPoolId() { 30 | return identityPoolId; 31 | } 32 | 33 | public void setIdentityPoolId(String identityPoolId) { 34 | this.identityPoolId = identityPoolId; 35 | } 36 | 37 | public String getIdentityId() { 38 | return identityId; 39 | } 40 | 41 | public void setIdentityId(String identityId) { 42 | this.identityId = identityId; 43 | } 44 | 45 | public String getDatasetName() { 46 | return datasetName; 47 | } 48 | 49 | public void setDatasetName(String datasetName) { 50 | this.datasetName = datasetName; 51 | } 52 | 53 | public String getOperation() { 54 | return operation; 55 | } 56 | 57 | public void setOperation(String operation) { 58 | this.operation = operation; 59 | } 60 | 61 | public List getKinesisSyncRecords() { 62 | return kinesisSyncRecords; 63 | } 64 | 65 | public void setKinesisSyncRecords(List kinesisSyncRecords) { 66 | this.kinesisSyncRecords = kinesisSyncRecords; 67 | } 68 | 69 | public String getKinesisSyncRecordsURL() { 70 | return kinesisSyncRecordsURL; 71 | } 72 | 73 | public void setKinesisSyncRecordsURL(String kinesisSyncRecordsURL) { 74 | this.kinesisSyncRecordsURL = kinesisSyncRecordsURL; 75 | } 76 | 77 | public Date getLastModifiedDate() { 78 | return lastModifiedDate; 79 | } 80 | 81 | public void setLastModifiedDate(Date lastModifiedDate) { 82 | this.lastModifiedDate = lastModifiedDate; 83 | } 84 | 85 | public Integer getSyncCount() { 86 | return syncCount; 87 | } 88 | 89 | public void setSyncCount(Integer syncCount) { 90 | this.syncCount = syncCount; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/UnmodifiableBuffer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors; 6 | 7 | import java.util.Collections; 8 | import java.util.List; 9 | import java.util.Objects; 10 | 11 | import com.amazonaws.services.kinesis.connectors.interfaces.IBuffer; 12 | 13 | /** 14 | * This class is a wrapper on an IBuffer that limits the functionality of the buffer. This buffer 15 | * cannot be added to, and retrieving the list of records returns an unmodifiable list. Calling 16 | * consumeRecord() or clear() will cause an UnathorizedOperationException to be thrown. Calling 17 | * getRecords() returns the records wrapped in an UnmodifiableList. 18 | * 19 | * @param 20 | */ 21 | public class UnmodifiableBuffer implements IBuffer { 22 | 23 | private final IBuffer buf; 24 | private final List records; 25 | 26 | public UnmodifiableBuffer(IBuffer buf) { 27 | this.buf = buf; 28 | this.records = buf.getRecords(); 29 | } 30 | 31 | public UnmodifiableBuffer(IBuffer buf, List records) { 32 | this.buf = buf; 33 | this.records = records; 34 | } 35 | 36 | @Override 37 | public long getBytesToBuffer() { 38 | return buf.getBytesToBuffer(); 39 | } 40 | 41 | @Override 42 | public long getNumRecordsToBuffer() { 43 | return buf.getNumRecordsToBuffer(); 44 | } 45 | 46 | @Override 47 | public long getMillisecondsToBuffer() { 48 | return buf.getMillisecondsToBuffer(); 49 | } 50 | 51 | @Override 52 | public boolean shouldFlush() { 53 | return buf.shouldFlush(); 54 | } 55 | 56 | @Override 57 | public void consumeRecord(T record, int recordBytes, String sequenceNumber) { 58 | throw new UnsupportedOperationException("This is an unmodifiable buffer"); 59 | } 60 | 61 | @Override 62 | public void clear() { 63 | throw new UnsupportedOperationException("This is an unmodifiable buffer"); 64 | } 65 | 66 | @Override 67 | public String getFirstSequenceNumber() { 68 | return buf.getFirstSequenceNumber(); 69 | } 70 | 71 | @Override 72 | public String getLastSequenceNumber() { 73 | return buf.getLastSequenceNumber(); 74 | } 75 | 76 | @Override 77 | public List getRecords() { 78 | return Collections.unmodifiableList(records); 79 | } 80 | 81 | @Override 82 | public int hashCode() { 83 | return Objects.hash(buf, records); 84 | } 85 | 86 | @Override 87 | public boolean equals(Object obj) { 88 | if (obj == this) { 89 | return true; 90 | } 91 | if (obj instanceof UnmodifiableBuffer) { 92 | UnmodifiableBuffer other = (UnmodifiableBuffer) obj; 93 | return Objects.equals(buf, other.buf) && Objects.equals(records, records); 94 | } 95 | return false; 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/interfaces/IBuffer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.interfaces; 6 | 7 | import java.util.List; 8 | 9 | /** 10 | * IBuffer defines a buffer used to store records streamed through Amazon Kinesis. It is a part of the 11 | * IKinesisConnectorPipeline utilized by the KinesisConnectorRecordProcessor. Records are stored in 12 | * the buffer by calling the consumeRecord method. The buffer has two size limits defined: total 13 | * byte count and total number of records. The shouldFlush() method may indicate that the buffer is 14 | * full based on these limits. 15 | * 16 | * @param 17 | * the data type stored in the record 18 | */ 19 | public interface IBuffer { 20 | /** 21 | * Get the byte size limit of data stored in the buffer before the records are flushed to the 22 | * emitter 23 | * 24 | * @return byte size limit of buffer 25 | */ 26 | public long getBytesToBuffer(); 27 | 28 | /** 29 | * Get the record number limit of data stored in the buffer before the records are flushed to 30 | * the emitter 31 | * 32 | * @return record number limit of buffer 33 | */ 34 | public long getNumRecordsToBuffer(); 35 | 36 | /** 37 | * Get the time limit in milliseconds before the records are flushed to the emitter 38 | * 39 | * @return time limit in milleseconds 40 | */ 41 | public long getMillisecondsToBuffer(); 42 | 43 | /** 44 | * Returns true if the buffer is full and stored records should be sent to the emitter 45 | * 46 | * @return true if records should be sent to the emitter followed by clearing the buffer 47 | */ 48 | public boolean shouldFlush(); 49 | 50 | /** 51 | * Stores the record in the buffer 52 | * 53 | * @param record 54 | * record to be processed 55 | * @param recordBytes 56 | * size of the record data in bytes 57 | * @param sequenceNumber 58 | * Amazon Kinesis sequence identifier 59 | */ 60 | public void consumeRecord(T record, int recordBytes, String sequenceNumber); 61 | 62 | /** 63 | * Clears the buffer 64 | */ 65 | public void clear(); 66 | 67 | /** 68 | * Get the sequence number of the first record stored in the buffer. Used for bookkeeping and 69 | * uniquely identifying items in the buffer. 70 | * 71 | * @return the sequence number of the first record stored in the buffer 72 | */ 73 | public String getFirstSequenceNumber(); 74 | 75 | /** 76 | * Get the sequence number of the last record stored in the buffer. Used for bookkeeping and 77 | * uniquely identifying items in the buffer. 78 | * 79 | * @return the sequence number of the last record stored in the buffer 80 | */ 81 | public String getLastSequenceNumber(); 82 | 83 | /** 84 | * Get the records stored in the buffer 85 | * 86 | * @return the records stored in the buffer 87 | */ 88 | public List getRecords(); 89 | } 90 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.amazonaws 5 | amazon-cognito-streams-sample 6 | 1.0.0 7 | war 8 | 9 | clean compile war:war 10 | src 11 | 12 | 13 | src/resources 14 | 15 | 16 | 17 | 18 | maven-compiler-plugin 19 | 3.1 20 | 21 | 1.7 22 | 1.7 23 | 24 | 25 | 26 | maven-war-plugin 27 | 2.3 28 | 29 | WebContent 30 | true 31 | AmazonCognitoStreamsSample 32 | 33 | 34 | 35 | 36 | 37 | 38 | com.amazonaws 39 | amazon-kinesis-client 40 | LATEST 41 | 42 | 43 | commons-logging 44 | commons-logging 45 | LATEST 46 | 47 | 48 | com.amazonaws 49 | aws-java-sdk 50 | LATEST 51 | 52 | 53 | javax.servlet 54 | javax.servlet-api 55 | 3.0.1 56 | provided 57 | 58 | 59 | commons-io 60 | commons-io 61 | LATEST 62 | 63 | 64 | commons-collections 65 | commons-collections 66 | LATEST 67 | 68 | 69 | org.javatuples 70 | javatuples 71 | LATEST 72 | 73 | 74 | joda-time 75 | joda-time 76 | 2.2 77 | 78 | 79 | postgresql 80 | postgresql 81 | 8.4-702.jdbc4 82 | 83 | 84 | com.google.guava 85 | guava 86 | 18.0 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/s3/S3ManifestEmitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.s3; 6 | 7 | import java.io.IOException; 8 | import java.nio.ByteBuffer; 9 | import java.util.Collections; 10 | import java.util.List; 11 | 12 | import org.apache.commons.logging.Log; 13 | import org.apache.commons.logging.LogFactory; 14 | 15 | import com.amazonaws.AmazonServiceException; 16 | import com.amazonaws.services.kinesis.AmazonKinesisClient; 17 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 18 | import com.amazonaws.services.kinesis.connectors.UnmodifiableBuffer; 19 | import com.amazonaws.services.kinesis.model.PutRecordRequest; 20 | 21 | /** 22 | * This implementaion of IEmitter inserts records into Amazon S3 and emits filenames into a separate 23 | * Amazon Kinesis stream. The separate Amazon Kinesis stream is to be used by another Amazon Kinesis enabled application 24 | * that utilizes RedshiftManifestEmitters to insert the records into Amazon Redshift via a manifest copy. 25 | * This class requires the configuration of an Amazon S3 bucket and endpoint, as well as Amazon Kinesis endpoint 26 | * and output stream. 27 | *

28 | * When the buffer is full, this Emitter: 29 | *

    30 | *
  1. Puts all records into a single file in S3
  2. 31 | *
  3. Puts the single file name into the manifest stream
  4. 32 | *
33 | *

34 | * NOTE: the Amazon S3 bucket and Amazon Redshift cluster must be in the same region. 35 | */ 36 | public class S3ManifestEmitter extends S3Emitter { 37 | private static final Log LOG = LogFactory.getLog(S3ManifestEmitter.class); 38 | private final AmazonKinesisClient kinesisClient; 39 | private final String manifestStream; 40 | 41 | public S3ManifestEmitter(KinesisConnectorConfiguration configuration) { 42 | super(configuration); 43 | manifestStream = configuration.KINESIS_OUTPUT_STREAM; 44 | kinesisClient = new AmazonKinesisClient(configuration.AWS_CREDENTIALS_PROVIDER); 45 | kinesisClient.setEndpoint(configuration.KINESIS_ENDPOINT); 46 | } 47 | 48 | @Override 49 | public List emit(final UnmodifiableBuffer buffer) throws IOException { 50 | // Store the contents of buffer.getRecords because superclass will 51 | // clear the buffer on success 52 | List failed = super.emit(buffer); 53 | // calls S3Emitter to write objects to Amazon S3 54 | if (!failed.isEmpty()) { 55 | return buffer.getRecords(); 56 | } 57 | String s3File = getS3FileName(buffer.getFirstSequenceNumber(), buffer.getLastSequenceNumber()); 58 | // wrap the name of the Amazon S3 file as the record data 59 | ByteBuffer data = ByteBuffer.wrap(s3File.getBytes()); 60 | // Put the list of file names to the manifest Amazon Kinesis stream 61 | PutRecordRequest putRecordRequest = new PutRecordRequest(); 62 | putRecordRequest.setData(data); 63 | putRecordRequest.setStreamName(manifestStream); 64 | // Use constant partition key to ensure file order 65 | putRecordRequest.setPartitionKey(manifestStream); 66 | try { 67 | kinesisClient.putRecord(putRecordRequest); 68 | LOG.info("S3ManifestEmitter emitted record downstream: " + s3File); 69 | return Collections.emptyList(); 70 | } catch (Exception e) { 71 | LOG.error(e); 72 | return buffer.getRecords(); 73 | } 74 | } 75 | 76 | @Override 77 | public void fail(List records) { 78 | super.fail(records); 79 | } 80 | 81 | @Override 82 | public void shutdown() { 83 | super.shutdown(); 84 | kinesisClient.shutdown(); 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/s3/S3Emitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.s3; 6 | 7 | import java.io.ByteArrayInputStream; 8 | import java.io.ByteArrayOutputStream; 9 | import java.io.IOException; 10 | import java.util.Arrays; 11 | import java.util.Collections; 12 | import java.util.List; 13 | 14 | import org.apache.commons.logging.Log; 15 | import org.apache.commons.logging.LogFactory; 16 | 17 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 18 | import com.amazonaws.services.kinesis.connectors.UnmodifiableBuffer; 19 | import com.amazonaws.services.kinesis.connectors.interfaces.IEmitter; 20 | import com.amazonaws.services.s3.AmazonS3Client; 21 | 22 | /** 23 | * This implementation of IEmitter is used to store files from an Amazon Kinesis stream in S3. The use of 24 | * this class requires the configuration of an Amazon S3 bucket/endpoint. When the buffer is full, this 25 | * class's emit method adds the contents of the buffer to Amazon S3 as one file. The filename is generated 26 | * from the first and last sequence numbers of the records contained in that file separated by a 27 | * dash. This class requires the configuration of an Amazon S3 bucket and endpoint. 28 | */ 29 | public class S3Emitter implements IEmitter { 30 | private static final Log LOG = LogFactory.getLog(S3Emitter.class); 31 | protected final String s3Bucket; 32 | protected final String s3Endpoint; 33 | 34 | protected final AmazonS3Client s3client; 35 | 36 | public S3Emitter(KinesisConnectorConfiguration configuration) { 37 | s3Bucket = configuration.S3_BUCKET; 38 | s3Endpoint = configuration.S3_ENDPOINT; 39 | s3client = new AmazonS3Client(configuration.AWS_CREDENTIALS_PROVIDER); 40 | if (s3Endpoint != null) { 41 | s3client.setEndpoint(s3Endpoint); 42 | } 43 | } 44 | 45 | protected String getS3FileName(String firstSeq, String lastSeq) { 46 | return firstSeq + "-" + lastSeq; 47 | } 48 | 49 | protected String getS3URI(String s3FileName) { 50 | return "s3://" + s3Bucket + "/" + s3FileName; 51 | } 52 | 53 | @Override 54 | public List emit(final UnmodifiableBuffer buffer) throws IOException { 55 | List records = buffer.getRecords(); 56 | // Write all of the records to a compressed output stream 57 | ByteArrayOutputStream baos = new ByteArrayOutputStream(); 58 | for (byte[] record : records) { 59 | try { 60 | baos.write(record); 61 | } catch (Exception e) { 62 | LOG.error("Error writing record to output stream. Failing this emit attempt. Record: " 63 | + Arrays.toString(record), 64 | e); 65 | return buffer.getRecords(); 66 | } 67 | } 68 | // Get the Amazon S3 filename 69 | String s3FileName = getS3FileName(buffer.getFirstSequenceNumber(), buffer.getLastSequenceNumber()); 70 | String s3URI = getS3URI(s3FileName); 71 | try { 72 | ByteArrayInputStream object = new ByteArrayInputStream(baos.toByteArray()); 73 | LOG.debug("Starting upload of file " + s3URI + " to Amazon S3 containing " + records.size() + " records."); 74 | s3client.putObject(s3Bucket, s3FileName, object, null); 75 | LOG.info("Successfully emitted " + buffer.getRecords().size() + " records to Amazon S3 in " + s3URI); 76 | return Collections.emptyList(); 77 | } catch (Exception e) { 78 | LOG.error("Caught exception when uploading file " + s3URI + "to Amazon S3. Failing this emit attempt.", e); 79 | return buffer.getRecords(); 80 | } 81 | } 82 | 83 | @Override 84 | public void fail(List records) { 85 | for (byte[] record : records) { 86 | LOG.error("Record failed: " + Arrays.toString(record)); 87 | } 88 | } 89 | 90 | @Override 91 | public void shutdown() { 92 | s3client.shutdown(); 93 | } 94 | 95 | } 96 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/impl/BasicMemoryBuffer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.impl; 6 | 7 | import java.util.LinkedList; 8 | import java.util.List; 9 | import java.util.concurrent.atomic.AtomicLong; 10 | 11 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 12 | import com.amazonaws.services.kinesis.connectors.interfaces.IBuffer; 13 | 14 | /** 15 | * This class is a basic implementation of the IBuffer interface. It is a wrapper on a buffer of 16 | * records that are periodically flushed. It is configured with an implementation of IFilter that 17 | * decides whether a record will be added to the buffer to be emitted. 18 | * 19 | * @param 20 | */ 21 | public class BasicMemoryBuffer implements IBuffer { 22 | 23 | private final long bytesPerFlush; 24 | private final long numMessagesToBuffer; 25 | private final long millisecondsToBuffer; 26 | 27 | private final List buffer; 28 | private final AtomicLong byteCount; 29 | 30 | private String firstSequenceNumber; 31 | private String lastSequenceNumber; 32 | 33 | private long previousFlushTimeMillisecond; 34 | 35 | public BasicMemoryBuffer(KinesisConnectorConfiguration configuration, List buffer) { 36 | bytesPerFlush = configuration.BUFFER_BYTE_SIZE_LIMIT; 37 | numMessagesToBuffer = configuration.BUFFER_RECORD_COUNT_LIMIT; 38 | millisecondsToBuffer = configuration.BUFFER_MILLISECONDS_LIMIT; 39 | this.buffer = buffer; 40 | byteCount = new AtomicLong(); 41 | previousFlushTimeMillisecond = getCurrentTimeMilliseconds(); 42 | } 43 | 44 | public BasicMemoryBuffer(KinesisConnectorConfiguration configuration) { 45 | this(configuration, new LinkedList()); 46 | } 47 | 48 | @Override 49 | public long getBytesToBuffer() { 50 | return bytesPerFlush; 51 | } 52 | 53 | @Override 54 | public long getNumRecordsToBuffer() { 55 | return numMessagesToBuffer; 56 | } 57 | 58 | @Override 59 | public long getMillisecondsToBuffer() { 60 | return millisecondsToBuffer; 61 | } 62 | 63 | @Override 64 | public void consumeRecord(T record, int recordSize, String sequenceNumber) { 65 | if (buffer.isEmpty()) { 66 | firstSequenceNumber = sequenceNumber; 67 | } 68 | lastSequenceNumber = sequenceNumber; 69 | buffer.add(record); 70 | byteCount.addAndGet(recordSize); 71 | } 72 | 73 | @Override 74 | public void clear() { 75 | buffer.clear(); 76 | byteCount.set(0); 77 | previousFlushTimeMillisecond = getCurrentTimeMilliseconds(); 78 | } 79 | 80 | @Override 81 | public String getFirstSequenceNumber() { 82 | return firstSequenceNumber; 83 | } 84 | 85 | @Override 86 | public String getLastSequenceNumber() { 87 | return lastSequenceNumber; 88 | } 89 | 90 | /** 91 | * By default, we flush once we have exceeded the number of messages or maximum bytes to buffer. 92 | * However, subclasses can use their own means to determine if they should flush. 93 | * 94 | * @return true if either the number of records in the buffer exceeds max number of records or 95 | * the size of the buffer exceeds the max number of bytes in the buffer. 96 | */ 97 | @Override 98 | public boolean shouldFlush() { 99 | long timelapseMillisecond = getCurrentTimeMilliseconds() - previousFlushTimeMillisecond; 100 | return (!buffer.isEmpty()) 101 | && ((buffer.size() >= getNumRecordsToBuffer()) || (byteCount.get() >= getBytesToBuffer()) || (timelapseMillisecond >= getMillisecondsToBuffer())); 102 | } 103 | 104 | @Override 105 | public List getRecords() { 106 | return buffer; 107 | } 108 | 109 | // This method has protected access for unit testing purposes. 110 | protected long getCurrentTimeMilliseconds() { 111 | return System.currentTimeMillis(); 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/cognito/streams/connector/AmazonCognitoStreamsEnvironmentOptions.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.cognito.streams.connector; 6 | 7 | import java.sql.Connection; 8 | import java.sql.DriverManager; 9 | import java.sql.SQLException; 10 | import java.sql.Statement; 11 | import java.util.Properties; 12 | 13 | import org.apache.commons.logging.Log; 14 | import org.apache.commons.logging.LogFactory; 15 | 16 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 17 | import com.google.common.base.MoreObjects; 18 | 19 | /** 20 | * Helper class to load environment variables into properties and ensure 21 | * table is available in Redshift. 22 | * 23 | */ 24 | public class AmazonCognitoStreamsEnvironmentOptions { 25 | private static final Log LOG = LogFactory.getLog(AmazonCognitoStreamsEnvironmentOptions.class); 26 | 27 | static String getJDBCConnection() { 28 | String variable = System.getProperty("JDBC_CONNECTION_STRING"); 29 | if (variable == null) { 30 | throw new RuntimeException("JDBC_CONNECTION_STRING not set"); 31 | } 32 | return variable; 33 | } 34 | 35 | static String getKinesisInputStream() { 36 | 37 | String variable = MoreObjects.firstNonNull(System.getProperty("KINESIS_INPUT_STREAM"), 38 | System.getProperty("PARAM1")); 39 | 40 | return variable; 41 | } 42 | 43 | static String getRedshiftUserName() { 44 | String variable = MoreObjects.firstNonNull(System.getProperty("REDSHIFT_USER_NAME"), 45 | System.getProperty("PARAM2")); 46 | 47 | return variable; 48 | } 49 | 50 | static String getRedshiftPassword() { 51 | String variable = MoreObjects.firstNonNull(System.getProperty("REDSHIFT_PASSWORD"), 52 | System.getProperty("PARAM3")); 53 | 54 | return variable; 55 | } 56 | 57 | static String getS3BucketName() { 58 | String variable = MoreObjects.firstNonNull(System.getProperty("S3_BUCKET_NAME"), 59 | System.getProperty("PARAM4")); 60 | 61 | return variable; 62 | } 63 | 64 | static String getRegion() { 65 | String variable = MoreObjects.firstNonNull(System.getProperty("REGION"), 66 | System.getProperty("PARAM5")); 67 | 68 | return variable; 69 | } 70 | 71 | static void createRedshiftTable(Properties properties) { 72 | // Ensure our data table exists 73 | Properties loginProperties = new Properties(); 74 | loginProperties.setProperty("user", getRedshiftUserName()); 75 | loginProperties.setProperty("password", getRedshiftPassword()); 76 | 77 | StringBuilder builder = new StringBuilder(); 78 | builder.append("CREATE TABLE IF NOT EXISTS ") 79 | .append(properties.getProperty(KinesisConnectorConfiguration.PROP_REDSHIFT_DATA_TABLE)) 80 | .append(" (") 81 | .append("identityPoolId varchar(128),") 82 | .append("identityId varchar(128),") 83 | .append("datasetName varchar(128),") 84 | .append("operation varchar(64),") 85 | .append("key varchar(1024),") 86 | .append("value varchar(4096),") 87 | .append("op varchar(64),") 88 | .append("syncCount int,") 89 | .append("deviceLastModifiedDate timestamp,") 90 | .append("lastModifiedDate timestamp") 91 | .append(")"); 92 | 93 | Connection conn = null; 94 | try { 95 | conn = DriverManager.getConnection(getJDBCConnection(), loginProperties); 96 | 97 | Statement stmt = conn.createStatement(); 98 | stmt.execute(builder.toString()); 99 | stmt.close(); 100 | } catch (SQLException e) { 101 | LOG.error("Failed to create table.", e); 102 | } finally { 103 | try { 104 | conn.close(); 105 | } catch (SQLException e) { 106 | LOG.error("Failed close connection.", e); 107 | } 108 | } 109 | } 110 | 111 | public static void bootstrapEnv(Properties properties) { 112 | properties.setProperty(KinesisConnectorConfiguration.PROP_REDSHIFT_URL, getJDBCConnection()); 113 | properties.setProperty(KinesisConnectorConfiguration.PROP_S3_BUCKET, getS3BucketName()); 114 | properties.setProperty(KinesisConnectorConfiguration.PROP_REDSHIFT_USERNAME, getRedshiftUserName()); 115 | properties.setProperty(KinesisConnectorConfiguration.PROP_REDSHIFT_PASSWORD, getRedshiftPassword()); 116 | properties.setProperty(KinesisConnectorConfiguration.PROP_KINESIS_INPUT_STREAM, getKinesisInputStream()); 117 | properties.setProperty(KinesisConnectorConfiguration.PROP_REGION_NAME, getRegion()); 118 | 119 | createRedshiftTable(properties); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/cognito/streams/connector/AmazonCognitoStreamsEventBeanTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.cognito.streams.connector; 6 | 7 | import java.net.URL; 8 | import java.sql.Timestamp; 9 | import java.util.Date; 10 | import java.util.List; 11 | 12 | import org.apache.commons.logging.Log; 13 | import org.apache.commons.logging.LogFactory; 14 | 15 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 16 | import com.amazonaws.services.kinesis.connectors.redshift.RedshiftTransformer; 17 | import com.fasterxml.jackson.databind.ObjectMapper; 18 | import com.google.common.collect.ImmutableList; 19 | 20 | /** 21 | * Transformer for event bean. Each event bean can become multiple rows in Redshift. 22 | * 23 | * Transformer is also responsible for fetching the S3 URL (if supplied) 24 | */ 25 | public class AmazonCognitoStreamsEventBeanTransformer extends RedshiftTransformer { 26 | 27 | private static final Log LOG = LogFactory.getLog(AmazonCognitoStreamsEventBeanTransformer.class); 28 | 29 | private static final ObjectMapper om = new ObjectMapper(); 30 | 31 | private final char delim; 32 | 33 | public AmazonCognitoStreamsEventBeanTransformer(KinesisConnectorConfiguration config) { 34 | super(AmazonCognitoStreamsEventBean.class); 35 | delim = config.REDSHIFT_DATA_DELIMITER; 36 | } 37 | 38 | @Override 39 | public String toDelimitedString(AmazonCognitoStreamsEventBean dataObject) { 40 | StringBuilder builder = new StringBuilder(); 41 | StringBuilder dataBuilder = new StringBuilder(); 42 | 43 | dataBuilder.append(dataObject.getIdentityPoolId()).append(delim) 44 | .append(dataObject.getIdentityId()).append(delim) 45 | .append(truncate(sanitize(dataObject.getDatasetName()),128)).append(delim) 46 | .append(dataObject.getOperation()); 47 | 48 | String repeatingPart = dataBuilder.toString(); 49 | 50 | // If the data object has a URL, parse the records from the S3 file 51 | if (dataObject.getKinesisSyncRecordsURL() != null) { 52 | LOG.info("fetching records from " + dataObject.getKinesisSyncRecordsURL()); 53 | try { 54 | URL url = new URL(dataObject.getKinesisSyncRecordsURL()); 55 | List parsed = om.readValue(url.openStream(), 56 | om.getTypeFactory().constructCollectionType(List.class, AmazonCognitoStreamsRecordBean.class)); 57 | dataObject.setKinesisSyncRecords(parsed); 58 | } 59 | catch (Exception e) { 60 | LOG.error("Unable to parse S3 payload",e); 61 | throw new RuntimeException("Unable to parse S3 payload",e); 62 | } 63 | LOG.info("fetched " + dataObject.getKinesisSyncRecords().size() + " records from S3"); 64 | } 65 | 66 | // For some operations, neither records nor URL will be populated 67 | if (dataObject.getKinesisSyncRecords() == null) { 68 | AmazonCognitoStreamsRecordBean tempBean = new AmazonCognitoStreamsRecordBean(); 69 | tempBean.setDeviceLastModifiedDate(new Date()); 70 | tempBean.setLastModifiedDate(new Date()); 71 | dataObject.setKinesisSyncRecords(ImmutableList.of(tempBean)); 72 | } 73 | 74 | for (AmazonCognitoStreamsRecordBean recordObject: dataObject.getKinesisSyncRecords()) { 75 | 76 | builder.append(repeatingPart).append(delim) 77 | .append(truncate(sanitize(recordObject.getKey()),1024)).append(delim) 78 | .append(truncate(sanitize(recordObject.getValue()),4096)).append(delim) 79 | .append(recordObject.getOp()).append(delim) 80 | .append(recordObject.getSyncCount()).append(delim) 81 | .append(new Timestamp(recordObject.getDeviceLastModifiedDate().getTime())).append(delim) 82 | .append(new Timestamp(recordObject.getLastModifiedDate().getTime())) 83 | .append("\n"); 84 | } 85 | 86 | LOG.info("processed " + dataObject.getKinesisSyncRecords().size() + " records from Kinesis"); 87 | 88 | return builder.toString(); 89 | } 90 | 91 | 92 | /** 93 | * Remove characters known to cause issues in Redshift import 94 | * @param string 95 | * @return 96 | */ 97 | private String sanitize(String string) { 98 | if (string == null) { 99 | return null; 100 | } 101 | string = string.replace("\n", " "); 102 | string = string.replace(Character.toString(delim), " "); 103 | string = string.replaceAll("\\x00", "?"); 104 | return string; 105 | } 106 | 107 | /** 108 | * Truncate values to length. 109 | * @param string 110 | * @param maxLength 111 | * @return 112 | */ 113 | private String truncate(String string, int maxLength) { 114 | if (string == null) { 115 | return null; 116 | } 117 | if (string.length() > maxLength) { 118 | string = string.substring(0, maxLength); 119 | } 120 | return string; 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/KinesisConnectorExecutorBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors; 6 | 7 | import org.apache.commons.logging.Log; 8 | import org.apache.commons.logging.LogFactory; 9 | 10 | import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; 11 | import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker; 12 | import com.amazonaws.services.kinesis.connectors.interfaces.IKinesisConnectorPipeline; 13 | import com.amazonaws.services.kinesis.metrics.interfaces.IMetricsFactory; 14 | 15 | public abstract class KinesisConnectorExecutorBase implements Runnable { 16 | private static final Log LOG = LogFactory.getLog(KinesisConnectorExecutorBase.class); 17 | 18 | // Amazon Kinesis Client Library worker to process records 19 | protected Worker worker; 20 | 21 | /** 22 | * Initialize the Amazon Kinesis Client Library configuration and worker 23 | * 24 | * @param kinesisConnectorConfiguration Amazon Kinesis connector configuration 25 | */ 26 | protected void initialize(KinesisConnectorConfiguration kinesisConnectorConfiguration) { 27 | initialize(kinesisConnectorConfiguration, null); 28 | } 29 | 30 | /** 31 | * Initialize the Amazon Kinesis Client Library configuration and worker with metrics factory 32 | * 33 | * @param kinesisConnectorConfiguration Amazon Kinesis connector configuration 34 | * @param metricFactory would be used to emit metrics in Amazon Kinesis Client Library 35 | */ 36 | protected void 37 | initialize(KinesisConnectorConfiguration kinesisConnectorConfiguration, IMetricsFactory metricFactory) { 38 | KinesisClientLibConfiguration kinesisClientLibConfiguration = 39 | new KinesisClientLibConfiguration(kinesisConnectorConfiguration.APP_NAME, 40 | kinesisConnectorConfiguration.KINESIS_INPUT_STREAM, 41 | kinesisConnectorConfiguration.AWS_CREDENTIALS_PROVIDER, 42 | kinesisConnectorConfiguration.WORKER_ID).withKinesisEndpoint(kinesisConnectorConfiguration.KINESIS_ENDPOINT) 43 | .withFailoverTimeMillis(kinesisConnectorConfiguration.FAILOVER_TIME) 44 | .withMaxRecords(kinesisConnectorConfiguration.MAX_RECORDS) 45 | .withInitialPositionInStream(kinesisConnectorConfiguration.INITIAL_POSITION_IN_STREAM) 46 | .withIdleTimeBetweenReadsInMillis(kinesisConnectorConfiguration.IDLE_TIME_BETWEEN_READS) 47 | .withCallProcessRecordsEvenForEmptyRecordList(KinesisConnectorConfiguration.DEFAULT_CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST) 48 | .withCleanupLeasesUponShardCompletion(kinesisConnectorConfiguration.CLEANUP_TERMINATED_SHARDS_BEFORE_EXPIRY) 49 | .withParentShardPollIntervalMillis(kinesisConnectorConfiguration.PARENT_SHARD_POLL_INTERVAL) 50 | .withShardSyncIntervalMillis(kinesisConnectorConfiguration.SHARD_SYNC_INTERVAL) 51 | .withTaskBackoffTimeMillis(kinesisConnectorConfiguration.BACKOFF_INTERVAL) 52 | .withMetricsBufferTimeMillis(kinesisConnectorConfiguration.CLOUDWATCH_BUFFER_TIME) 53 | .withMetricsMaxQueueSize(kinesisConnectorConfiguration.CLOUDWATCH_MAX_QUEUE_SIZE) 54 | .withUserAgent(kinesisConnectorConfiguration.APP_NAME + "," 55 | + kinesisConnectorConfiguration.CONNECTOR_DESTINATION + "," 56 | + KinesisConnectorConfiguration.KINESIS_CONNECTOR_USER_AGENT) 57 | .withRegionName(kinesisConnectorConfiguration.REGION_NAME); 58 | 59 | if (!kinesisConnectorConfiguration.CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST) { 60 | LOG.warn("The false value of callProcessRecordsEvenForEmptyList will be ignored. It must be set to true for the bufferTimeMillisecondsLimit to work correctly."); 61 | } 62 | 63 | if (kinesisConnectorConfiguration.IDLE_TIME_BETWEEN_READS > kinesisConnectorConfiguration.BUFFER_MILLISECONDS_LIMIT) { 64 | LOG.warn("idleTimeBetweenReads is greater than bufferTimeMillisecondsLimit. For best results, ensure that bufferTimeMillisecondsLimit is more than or equal to idleTimeBetweenReads "); 65 | } 66 | 67 | // If a metrics factory was specified, use it. 68 | if (metricFactory != null) { 69 | worker = 70 | new Worker(getKinesisConnectorRecordProcessorFactory(), 71 | kinesisClientLibConfiguration, 72 | metricFactory); 73 | } else { 74 | worker = new Worker(getKinesisConnectorRecordProcessorFactory(), kinesisClientLibConfiguration); 75 | } 76 | LOG.info(getClass().getSimpleName() + " worker created"); 77 | } 78 | 79 | @Override 80 | public void run() { 81 | if (worker != null) { 82 | // Start Amazon Kinesis Client Library worker to process records 83 | LOG.info("Starting worker in " + getClass().getSimpleName()); 84 | try { 85 | worker.run(); 86 | } catch (Throwable t) { 87 | LOG.error(t); 88 | throw t; 89 | } finally { 90 | LOG.error("Worker " + getClass().getSimpleName() + " is not running."); 91 | } 92 | } else { 93 | throw new RuntimeException("Initialize must be called before run."); 94 | } 95 | } 96 | 97 | /** 98 | * This method returns a {@link KinesisConnectorRecordProcessorFactory} that contains the 99 | * appropriate {@link IKinesisConnectorPipeline} for the Amazon Kinesis Enabled Application 100 | * 101 | * @return a {@link KinesisConnectorRecordProcessorFactory} that contains the appropriate 102 | * {@link IKinesisConnectorPipeline} for the Amazon Kinesis Enabled Application 103 | */ 104 | public abstract KinesisConnectorRecordProcessorFactory getKinesisConnectorRecordProcessorFactory(); 105 | } 106 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/redshift/RedshiftBasicEmitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.redshift; 6 | 7 | import java.io.IOException; 8 | import java.sql.Connection; 9 | import java.sql.DriverManager; 10 | import java.sql.ResultSet; 11 | import java.sql.SQLException; 12 | import java.sql.Statement; 13 | import java.util.Collections; 14 | import java.util.List; 15 | import java.util.Properties; 16 | 17 | import org.apache.commons.logging.Log; 18 | import org.apache.commons.logging.LogFactory; 19 | 20 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 21 | import com.amazonaws.services.kinesis.connectors.UnmodifiableBuffer; 22 | import com.amazonaws.services.kinesis.connectors.s3.S3Emitter; 23 | 24 | /** 25 | * This class is an implementation of IEmitter that emits records into Amazon Redshift one by one. It 26 | * utilizes the Amazon Redshift copy command on each file by first inserting records into Amazon S3 and then 27 | * performing the Amazon Redshift copy command. Amazon S3 insertion is done by extending the Amazon S3 emitter. 28 | *

29 | * * This class requires the configuration of an Amazon S3 bucket and endpoint, as well as the following Amazon Redshift 30 | * items: 31 | *

    32 | *
  • Redshift URL
  • 33 | *
  • username and password
  • 34 | *
  • data table and key column (data table stores items from the manifest copy)
  • 35 | *
  • file table and key column (file table is used to store file names to prevent duplicate entries)
  • 36 | *
  • the delimiter used for string parsing when inserting entries into Redshift
  • 37 | *
    38 | * NOTE: The Amazon S3 bucket and the Amazon Redshift cluster need to be in the same region. 39 | */ 40 | public class RedshiftBasicEmitter extends S3Emitter { 41 | private static final Log LOG = LogFactory.getLog(RedshiftBasicEmitter.class); 42 | private final String s3bucket; 43 | private final String redshiftTable; 44 | private final String redshiftURL; 45 | private final char redshiftDelimiter; 46 | private final Properties loginProperties; 47 | private final String accessKey; 48 | private final String secretKey; 49 | 50 | public RedshiftBasicEmitter(KinesisConnectorConfiguration configuration) { 51 | super(configuration); 52 | s3bucket = configuration.S3_BUCKET; 53 | redshiftTable = configuration.REDSHIFT_DATA_TABLE; 54 | redshiftDelimiter = configuration.REDSHIFT_DATA_DELIMITER; 55 | redshiftURL = configuration.REDSHIFT_URL; 56 | loginProperties = new Properties(); 57 | loginProperties.setProperty("user", configuration.REDSHIFT_USERNAME); 58 | loginProperties.setProperty("password", configuration.REDSHIFT_PASSWORD); 59 | accessKey = configuration.AWS_CREDENTIALS_PROVIDER.getCredentials().getAWSAccessKeyId(); 60 | secretKey = configuration.AWS_CREDENTIALS_PROVIDER.getCredentials().getAWSSecretKey(); 61 | } 62 | 63 | @Override 64 | public List emit(final UnmodifiableBuffer buffer) throws IOException { 65 | List failed = super.emit(buffer); 66 | if (!failed.isEmpty()) { 67 | return buffer.getRecords(); 68 | } 69 | Connection conn = null; 70 | try { 71 | conn = DriverManager.getConnection(redshiftURL, loginProperties); 72 | String s3File = getS3FileName(buffer.getFirstSequenceNumber(), buffer.getLastSequenceNumber()); 73 | executeStatement(generateCopyStatement(s3File), conn); 74 | LOG.info("Successfully copied " + getNumberOfCopiedRecords(conn) 75 | + " records to Amazon Redshift from file s3://" + s3Bucket + "/" + s3File); 76 | closeConnection(conn); 77 | return Collections.emptyList(); 78 | } catch (IOException | SQLException e) { 79 | LOG.error(e); 80 | closeConnection(conn); 81 | return buffer.getRecords(); 82 | } 83 | } 84 | 85 | @Override 86 | public void fail(List records) { 87 | super.fail(records); 88 | } 89 | 90 | @Override 91 | public void shutdown() { 92 | super.shutdown(); 93 | } 94 | 95 | private void closeConnection(Connection conn) { 96 | try { 97 | conn.close(); 98 | } catch (Exception e) { 99 | LOG.error(e); 100 | } 101 | } 102 | 103 | protected String generateCopyStatement(String s3File) { 104 | StringBuilder exec = new StringBuilder(); 105 | exec.append("COPY " + redshiftTable + " "); 106 | exec.append("FROM 's3://" + s3bucket + "/" + s3File + "' "); 107 | exec.append("CREDENTIALS 'aws_access_key_id=" + accessKey); 108 | exec.append(";aws_secret_access_key=" + secretKey + "' "); 109 | exec.append("DELIMITER '" + redshiftDelimiter + "'"); 110 | exec.append(";"); 111 | return exec.toString(); 112 | } 113 | 114 | private void executeStatement(String statement, Connection conn) throws IOException { 115 | try { 116 | Statement stmt = conn.createStatement(); 117 | stmt.execute(statement); 118 | stmt.close(); 119 | return; 120 | } catch (SQLException e) { 121 | LOG.error(e); 122 | throw new IOException(e); 123 | } 124 | 125 | } 126 | 127 | private int getNumberOfCopiedRecords(Connection conn) throws IOException { 128 | String cmd = "select pg_last_copy_count();"; 129 | Statement stmt = null; 130 | ResultSet resultSet = null; 131 | try { 132 | stmt = conn.createStatement(); 133 | resultSet = stmt.executeQuery(cmd); 134 | resultSet.next(); 135 | int numCopiedRecords = resultSet.getInt(1); 136 | resultSet.close(); 137 | stmt.close(); 138 | return numCopiedRecords; 139 | } catch (SQLException e) { 140 | try { 141 | resultSet.close(); 142 | } catch (Exception e1) { 143 | } 144 | try { 145 | stmt.close(); 146 | } catch (Exception e1) { 147 | } 148 | throw new IOException(e); 149 | } 150 | 151 | } 152 | 153 | } 154 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/KinesisConnectorRecordProcessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors; 6 | 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.Collection; 10 | import java.util.List; 11 | 12 | import org.apache.commons.logging.Log; 13 | import org.apache.commons.logging.LogFactory; 14 | 15 | import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException; 16 | import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException; 17 | import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException; 18 | import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException; 19 | import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor; 20 | import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer; 21 | import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason; 22 | import com.amazonaws.services.kinesis.connectors.interfaces.IBuffer; 23 | import com.amazonaws.services.kinesis.connectors.interfaces.IEmitter; 24 | import com.amazonaws.services.kinesis.connectors.interfaces.IFilter; 25 | import com.amazonaws.services.kinesis.connectors.interfaces.ICollectionTransformer; 26 | import com.amazonaws.services.kinesis.connectors.interfaces.ITransformer; 27 | import com.amazonaws.services.kinesis.connectors.interfaces.ITransformerBase; 28 | import com.amazonaws.services.kinesis.model.Record; 29 | 30 | /** 31 | * This is the base class for any KinesisConnector. It is configured by a constructor that takes in 32 | * as parameters implementations of the IBuffer, ITransformer, and IEmitter dependencies defined in 33 | * a IKinesisConnectorPipeline. It is typed to match the class that records are transformed into for 34 | * filtering and manipulation. This class is produced by a KinesisConnectorRecordProcessorFactory. 35 | *

    36 | * When a Worker calls processRecords() on this class, the pipeline is used in the following way: 37 | *

      38 | *
    1. Records are transformed into the corresponding data model (parameter type T) via the ITransformer.
    2. 39 | *
    3. Transformed records are passed to the IBuffer.consumeRecord() method, which may optionally filter based on the 40 | * IFilter in the pipeline.
    4. 41 | *
    5. When the buffer is full (IBuffer.shouldFlush() returns true), records are transformed with the ITransformer to 42 | * the output type (parameter type U) and a call is made to IEmitter.emit(). IEmitter.emit() returning an empty list is 43 | * considered a success, so the record processor will checkpoint and emit will not be retried. Non-empty return values 44 | * will result in additional calls to emit with failed records as the unprocessed list until the retry limit is reached. 45 | * Upon exceeding the retry limit or an exception being thrown, the IEmitter.fail() method will be called with the 46 | * unprocessed records.
    6. 47 | *
    7. When the shutdown() method of this class is invoked, a call is made to the IEmitter.shutdown() method which 48 | * should close any existing client connections.
    8. 49 | *
    50 | * 51 | */ 52 | public class KinesisConnectorRecordProcessor implements IRecordProcessor { 53 | 54 | private final IEmitter emitter; 55 | private final ITransformerBase transformer; 56 | private final IFilter filter; 57 | private final IBuffer buffer; 58 | private final int retryLimit; 59 | private final long backoffInterval; 60 | 61 | private static final Log LOG = LogFactory.getLog(KinesisConnectorRecordProcessor.class); 62 | 63 | private String shardId; 64 | 65 | public KinesisConnectorRecordProcessor(IBuffer buffer, 66 | IFilter filter, 67 | IEmitter emitter, 68 | ITransformerBase transformer, 69 | KinesisConnectorConfiguration configuration) { 70 | if (buffer == null || filter == null || emitter == null || transformer == null) { 71 | throw new IllegalArgumentException("buffer, filter, emitter, and transformer must not be null"); 72 | } 73 | this.buffer = buffer; 74 | this.filter = filter; 75 | this.emitter = emitter; 76 | this.transformer = transformer; 77 | // Limit must be greater than zero 78 | if (configuration.RETRY_LIMIT <= 0) { 79 | retryLimit = 1; 80 | } else { 81 | retryLimit = configuration.RETRY_LIMIT; 82 | } 83 | this.backoffInterval = configuration.BACKOFF_INTERVAL; 84 | } 85 | 86 | @Override 87 | public void initialize(String shardId) { 88 | this.shardId = shardId; 89 | } 90 | 91 | @Override 92 | public void processRecords(List records, IRecordProcessorCheckpointer checkpointer) { 93 | // Note: This method will be called even for empty record lists. This is needed for checking the buffer time 94 | // threshold. 95 | 96 | if (shardId == null) { 97 | throw new IllegalStateException("Record processor not initialized"); 98 | } 99 | 100 | // Transform each Amazon Kinesis Record and add the result to the buffer 101 | for (Record record : records) { 102 | try { 103 | if (transformer instanceof ITransformer) { 104 | ITransformer singleTransformer = (ITransformer) transformer; 105 | filterAndBufferRecord(singleTransformer.toClass(record), record); 106 | } else if (transformer instanceof ICollectionTransformer) { 107 | ICollectionTransformer listTransformer = (ICollectionTransformer) transformer; 108 | Collection transformedRecords = listTransformer.toClass(record); 109 | for (T transformedRecord : transformedRecords) { 110 | filterAndBufferRecord(transformedRecord, record); 111 | } 112 | } else { 113 | throw new RuntimeException("Transformer must implement ITransformer or ICollectionTransformer"); 114 | } 115 | } catch (IOException e) { 116 | LOG.error(e); 117 | } 118 | } 119 | 120 | if (buffer.shouldFlush()) { 121 | List emitItems = transformToOutput(buffer.getRecords()); 122 | emit(checkpointer, emitItems); 123 | } 124 | } 125 | 126 | private void filterAndBufferRecord(T transformedRecord, Record record) { 127 | if (filter.keepRecord(transformedRecord)) { 128 | buffer.consumeRecord(transformedRecord, record.getData().array().length, record.getSequenceNumber()); 129 | } 130 | } 131 | 132 | private List transformToOutput(List items) { 133 | List emitItems = new ArrayList(); 134 | for (T item : items) { 135 | try { 136 | emitItems.add(transformer.fromClass(item)); 137 | } catch (IOException e) { 138 | LOG.error("Failed to transform record " + item + " to output type", e); 139 | } 140 | } 141 | return emitItems; 142 | } 143 | 144 | private void emit(IRecordProcessorCheckpointer checkpointer, List emitItems) { 145 | List unprocessed = new ArrayList(emitItems); 146 | try { 147 | for (int numTries = 0; numTries < retryLimit; numTries++) { 148 | 149 | unprocessed = emitter.emit(new UnmodifiableBuffer(buffer, unprocessed)); 150 | if (unprocessed.isEmpty()) { 151 | break; 152 | } 153 | try { 154 | Thread.sleep(backoffInterval); 155 | } catch (InterruptedException e) { 156 | } 157 | } 158 | if (!unprocessed.isEmpty()) { 159 | emitter.fail(unprocessed); 160 | } 161 | buffer.clear(); 162 | // checkpoint once all the records have been consumed 163 | checkpointer.checkpoint(); 164 | } catch (IOException | KinesisClientLibDependencyException | InvalidStateException | ThrottlingException 165 | | ShutdownException e) { 166 | LOG.error(e); 167 | emitter.fail(unprocessed); 168 | } 169 | } 170 | 171 | @Override 172 | public void shutdown(IRecordProcessorCheckpointer checkpointer, ShutdownReason reason) { 173 | switch (reason) { 174 | case TERMINATE: 175 | emit(checkpointer, transformToOutput(buffer.getRecords())); 176 | break; 177 | case ZOMBIE: 178 | break; 179 | default: 180 | throw new IllegalStateException("invalid shutdown reason"); 181 | } 182 | LOG.info("shutting down record processor with shardId: " + shardId + " with reason " + reason); 183 | emitter.shutdown(); 184 | } 185 | 186 | } 187 | -------------------------------------------------------------------------------- /CognitoStreamsSample.json: -------------------------------------------------------------------------------- 1 | { 2 | "AWSTemplateFormatVersion": "2010-09-09", 3 | "Description": "Amazon Cognito Streams Sample", 4 | "Parameters": { 5 | "StreamName": { 6 | "Type": "String", 7 | "MinLength": "1", 8 | "MaxLength": "128", 9 | "Description": "Enter the kinesis stream that you associated with your identity pool.", 10 | "ConstraintDescription": "Enter in a valid string" 11 | }, 12 | "InboundTraffic": { 13 | "Description": "Allow inbound traffic to the cluster from this CIDR range.", 14 | "Type": "String", 15 | "MinLength": "9", 16 | "MaxLength": "18", 17 | "Default": "0.0.0.0/0", 18 | "AllowedPattern": "(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})/(\\d{1,2})", 19 | "ConstraintDescription": "must be a valid CIDR range of the form x.x.x.x/x." 20 | }, 21 | "PortNumber": { 22 | "Description": "The port number on which the cluster accepts incoming connections.", 23 | "Type": "Number", 24 | "Default": "5439" 25 | }, 26 | "MasterUserName": { 27 | "Description": "The username that is associated with the master user account for the cluster that is being created", 28 | "MinLength": "1", 29 | "MaxLength": "127", 30 | "Type": "String" 31 | }, 32 | "MasterUserPassword": { 33 | "Description": "The password that is associated with the master user account for the cluster that is being created (min 8 characters, contain at least one uppercase letter, one lowercase letter, and one number).", 34 | "Type": "String", 35 | "MinLength": "8", 36 | "MaxLength": "64", 37 | "ConstraintDescription": "Must meet Redshift password requirements (min 8 characters, contain at least one uppercase letter, one lowercase letter, and one number).", 38 | "NoEcho": "true" 39 | }, 40 | "AlarmEmail": { 41 | "Description": "Email address to notify if there are any operational issues", 42 | "Type": "String", 43 | "AllowedPattern": "([a-zA-Z0-9_.-]+)@([a-zA-Z0-9-]+\\.)+[a-zA-Z]{2,4}", 44 | "ConstraintDescription": "must be a valid email address." 45 | }, 46 | "Delay": { 47 | "Type": "Number", 48 | "Description": "Enter the delay (in milliseconds) in consumption from stream after which alarm will be triggered.", 49 | "Default": "3600000" 50 | }, 51 | "KinesisRecordThreshold": { 52 | "Type": "Number", 53 | "Description": "Enter the incoming records in 5 mins in Kinesis after that you will be notified to provision more shards. Each shard can support up to 1000 records written per second.", 54 | "Default": "240000" 55 | } 56 | }, 57 | "Resources": { 58 | "AmazonCognitoStreamsSampleRole": { 59 | "Type": "AWS::IAM::Role", 60 | "Properties": { 61 | "AssumeRolePolicyDocument": { 62 | "Version": "2012-10-17", 63 | "Statement": [ 64 | { 65 | "Effect": "Allow", 66 | "Principal": { 67 | "Service": [ 68 | "ec2.amazonaws.com" 69 | ] 70 | }, 71 | "Action": [ 72 | "sts:AssumeRole" 73 | ] 74 | } 75 | ] 76 | }, 77 | "Path": "/", 78 | "Policies": [ 79 | { 80 | "PolicyName": "AmazonCognitoStreamsSamplePolicy", 81 | "PolicyDocument": { 82 | "Version": "2012-10-17", 83 | "Statement": [ 84 | { 85 | "Action": [ 86 | "dynamodb:*", 87 | "s3:*", 88 | "cloudwatch:*", 89 | "kinesis:*" 90 | ], 91 | "Effect": "Allow", 92 | "Resource": "*" 93 | } 94 | ] 95 | } 96 | } 97 | ] 98 | } 99 | }, 100 | "AmazonCognitoStreamsSampleInstanceProfile": { 101 | "Type": "AWS::IAM::InstanceProfile", 102 | "Properties": { 103 | "Path": "/", 104 | "Roles": [ 105 | { 106 | "Ref": "AmazonCognitoStreamsSampleRole" 107 | } 108 | ] 109 | } 110 | }, 111 | "AmazonCognitoStreamsSampleApplication": { 112 | "Type": "AWS::ElasticBeanstalk::Application", 113 | "Properties": { 114 | "Description": "Sample application for showcasing Amazon Cognito streams", 115 | "ApplicationVersions": [ 116 | { 117 | "VersionLabel": "Version1", 118 | "Description": "Release version", 119 | "SourceBundle": { 120 | "S3Bucket": { "Fn::Join": ["-", [ "amazon-cognito-samples", { "Ref": "AWS::Region" } ] ] }, 121 | "S3Key": "AmazonCognitoStreamsSample-1.0.war" 122 | } 123 | } 124 | ], 125 | "ConfigurationTemplates": [ 126 | { 127 | "TemplateName": "DefaultConfiguration", 128 | "Description": "64bit Amazon Linux running Tomcat 7", 129 | "SolutionStackName": "64bit Amazon Linux 2014.09 v1.1.0 running Tomcat 7 Java 7", 130 | "OptionSettings": [ 131 | { 132 | "Namespace": "aws:elasticbeanstalk:environment", 133 | "OptionName": "EnvironmentType", 134 | "Value": "SingleInstance" 135 | }, 136 | { 137 | "Namespace": "aws:autoscaling:launchconfiguration", 138 | "OptionName": "InstanceType", 139 | "Value": "t2.micro" 140 | }, 141 | { 142 | "Namespace": "aws:autoscaling:asg", 143 | "OptionName": "MaxSize", 144 | "Value": "1" 145 | }, 146 | { 147 | "Namespace": "aws:autoscaling:launchconfiguration", 148 | "OptionName": "IamInstanceProfile", 149 | "Value": { 150 | "Ref": "AmazonCognitoStreamsSampleInstanceProfile" 151 | } 152 | }, 153 | { 154 | "Namespace": "aws:ec2:vpc", 155 | "OptionName": "VPCId", 156 | "Value": { "Ref": "VPC" } 157 | }, 158 | { 159 | "Namespace": "aws:ec2:vpc", 160 | "OptionName": "Subnets", 161 | "Value": { "Ref" : "PublicSubnet" } 162 | }, 163 | { 164 | "Namespace": "aws:elasticbeanstalk:application:environment", 165 | "OptionName": "JDBC_CONNECTION_STRING", 166 | "Value": { 167 | "Fn::Join": [ 168 | "", 169 | [ 170 | "jdbc:postgresql://", 171 | { 172 | "Fn::GetAtt": [ 173 | "RedshiftCluster", 174 | "Endpoint.Address" 175 | ] 176 | }, 177 | ":", 178 | { 179 | "Fn::GetAtt": [ 180 | "RedshiftCluster", 181 | "Endpoint.Port" 182 | ] 183 | }, 184 | "/cognito?tcpKeepAlive=true" 185 | ] 186 | ] 187 | } 188 | }, 189 | { 190 | "Namespace": "aws:elasticbeanstalk:application:environment", 191 | "OptionName": "PARAM1", 192 | "Value": { 193 | "Ref": "StreamName" 194 | } 195 | }, 196 | { 197 | "Namespace": "aws:elasticbeanstalk:application:environment", 198 | "OptionName": "PARAM2", 199 | "Value": { 200 | "Ref": "MasterUserName" 201 | } 202 | }, 203 | { 204 | "Namespace": "aws:elasticbeanstalk:application:environment", 205 | "OptionName": "PARAM3", 206 | "Value": { 207 | "Ref": "MasterUserPassword" 208 | } 209 | }, 210 | { 211 | "Namespace": "aws:elasticbeanstalk:application:environment", 212 | "OptionName": "PARAM4", 213 | "Value": { 214 | "Ref": "S3IntermediateBucket" 215 | } 216 | }, 217 | { 218 | "Namespace": "aws:elasticbeanstalk:application:environment", 219 | "OptionName": "PARAM5", 220 | "Value": { 221 | "Ref": "AWS::Region" 222 | } 223 | } 224 | ] 225 | } 226 | ] 227 | } 228 | }, 229 | "AmazonCognitoStreamsSampleEnvironment": { 230 | "Type": "AWS::ElasticBeanstalk::Environment", 231 | "Properties": { 232 | "ApplicationName": { 233 | "Ref": "AmazonCognitoStreamsSampleApplication" 234 | }, 235 | "Description": "AmazonCognitoStreamsSampleEnvironment", 236 | "TemplateName": "DefaultConfiguration", 237 | "VersionLabel": "Version1" 238 | } 239 | }, 240 | "S3IntermediateBucket": { 241 | "Type": "AWS::S3::Bucket", 242 | "DeletionPolicy": "Retain", 243 | "Properties": { 244 | "LifecycleConfiguration": { 245 | "Rules": [ 246 | { 247 | "ExpirationInDays": "10", 248 | "Id": "delete-old-files", 249 | "Status": "Enabled" 250 | } 251 | ] 252 | } 253 | } 254 | }, 255 | "RedshiftCluster": { 256 | "Type": "AWS::Redshift::Cluster", 257 | "DependsOn": "AttachGateway", 258 | "Properties": { 259 | "DBName": "cognito", 260 | "MasterUsername": { 261 | "Ref": "MasterUserName" 262 | }, 263 | "MasterUserPassword": { 264 | "Ref": "MasterUserPassword" 265 | }, 266 | "NodeType": "dw2.large", 267 | "ClusterType": "single-node", 268 | "ClusterParameterGroupName": { 269 | "Ref": "RedshiftClusterParameterGroup" 270 | }, 271 | "VpcSecurityGroupIds": [ 272 | { 273 | "Ref": "SecurityGroup" 274 | } 275 | ], 276 | "ClusterSubnetGroupName": { 277 | "Ref": "RedshiftClusterSubnetGroup" 278 | }, 279 | "PubliclyAccessible": "true", 280 | "Port": { 281 | "Ref": "PortNumber" 282 | } 283 | } 284 | }, 285 | "RedshiftClusterParameterGroup": { 286 | "Type": "AWS::Redshift::ClusterParameterGroup", 287 | "Properties": { 288 | "Description": "Cluster parameter group", 289 | "ParameterGroupFamily": "redshift-1.0", 290 | "Parameters": [ 291 | { 292 | "ParameterName": "enable_user_activity_logging", 293 | "ParameterValue": "true" 294 | } 295 | ] 296 | } 297 | }, 298 | "RedshiftClusterSubnetGroup": { 299 | "Type": "AWS::Redshift::ClusterSubnetGroup", 300 | "Properties": { 301 | "Description": "Cluster subnet group", 302 | "SubnetIds": [ 303 | { 304 | "Ref": "PublicSubnet" 305 | } 306 | ] 307 | } 308 | }, 309 | "VPC": { 310 | "Type": "AWS::EC2::VPC", 311 | "Properties": { 312 | "CidrBlock": "10.0.0.0/16" 313 | } 314 | }, 315 | "PublicSubnet": { 316 | "Type": "AWS::EC2::Subnet", 317 | "Properties": { 318 | "CidrBlock": "10.0.0.0/24", 319 | "VpcId": { 320 | "Ref": "VPC" 321 | } 322 | } 323 | }, 324 | "SecurityGroup": { 325 | "Type": "AWS::EC2::SecurityGroup", 326 | "Properties": { 327 | "GroupDescription": "Security group", 328 | "SecurityGroupIngress": [ 329 | { 330 | "CidrIp": { 331 | "Ref": "InboundTraffic" 332 | }, 333 | "FromPort": { 334 | "Ref": "PortNumber" 335 | }, 336 | "ToPort": { 337 | "Ref": "PortNumber" 338 | }, 339 | "IpProtocol": "tcp" 340 | } 341 | ], 342 | "VpcId": { 343 | "Ref": "VPC" 344 | } 345 | } 346 | }, 347 | "myInternetGateway": { 348 | "Type": "AWS::EC2::InternetGateway" 349 | }, 350 | "AttachGateway": { 351 | "Type": "AWS::EC2::VPCGatewayAttachment", 352 | "Properties": { 353 | "VpcId": { 354 | "Ref": "VPC" 355 | }, 356 | "InternetGatewayId": { 357 | "Ref": "myInternetGateway" 358 | } 359 | } 360 | }, 361 | "PublicRouteTable": { 362 | "Type": "AWS::EC2::RouteTable", 363 | "Properties": { 364 | "VpcId": { 365 | "Ref": "VPC" 366 | } 367 | } 368 | }, 369 | "PublicRoute": { 370 | "Type": "AWS::EC2::Route", 371 | "DependsOn": "AttachGateway", 372 | "Properties": { 373 | "RouteTableId": { 374 | "Ref": "PublicRouteTable" 375 | }, 376 | "DestinationCidrBlock": "0.0.0.0/0", 377 | "GatewayId": { 378 | "Ref": "myInternetGateway" 379 | } 380 | } 381 | }, 382 | "PublicSubnetRouteTableAssociation": { 383 | "Type": "AWS::EC2::SubnetRouteTableAssociation", 384 | "Properties": { 385 | "SubnetId": { 386 | "Ref": "PublicSubnet" 387 | }, 388 | "RouteTableId": { 389 | "Ref": "PublicRouteTable" 390 | } 391 | } 392 | }, 393 | "DynamoDBTable": { 394 | "Type": "AWS::DynamoDB::Table", 395 | "Properties": { 396 | "AttributeDefinitions": [ 397 | { 398 | "AttributeName": "leaseKey", 399 | "AttributeType": "S" 400 | } 401 | ], 402 | "KeySchema": [ 403 | { 404 | "AttributeName": "leaseKey", 405 | "KeyType": "HASH" 406 | } 407 | ], 408 | "ProvisionedThroughput": { 409 | "ReadCapacityUnits": 10, 410 | "WriteCapacityUnits": 10 411 | }, 412 | "TableName": "AmazonCognitoStreamsSample" 413 | } 414 | }, 415 | "AlarmTopic": { 416 | "Type": "AWS::SNS::Topic", 417 | "Properties": { 418 | "Subscription": [ 419 | { 420 | "Endpoint": { 421 | "Ref": "AlarmEmail" 422 | }, 423 | "Protocol": "email" 424 | } 425 | ] 426 | } 427 | }, 428 | "KinesisIteratorAgeAlarm": { 429 | "Type": "AWS::CloudWatch::Alarm", 430 | "Properties": { 431 | "AlarmDescription": "Alarm if Kinesis iterator age goes above 1 hour", 432 | "Namespace": "AWS/Kinesis", 433 | "MetricName": "GetRecords.IteratorAge", 434 | "Dimensions": [ 435 | { 436 | "Name": "StreamName", 437 | "Value": { 438 | "Ref": "StreamName" 439 | } 440 | } 441 | ], 442 | "Statistic": "Average", 443 | "Period": "300", 444 | "EvaluationPeriods": "2", 445 | "Threshold": { 446 | "Ref": "Delay" 447 | }, 448 | "ComparisonOperator": "GreaterThanThreshold", 449 | "AlarmActions": [ 450 | { 451 | "Ref": "AlarmTopic" 452 | } 453 | ] 454 | } 455 | }, 456 | "KinesisNoDataAlarm": { 457 | "Type": "AWS::CloudWatch::Alarm", 458 | "Properties": { 459 | "AlarmDescription": "Alarm if there is no data in Kinesis for 15 mins", 460 | "Namespace": "AWS/Kinesis", 461 | "MetricName": "IncomingRecords", 462 | "Dimensions": [ 463 | { 464 | "Name": "StreamName", 465 | "Value": { 466 | "Ref": "StreamName" 467 | } 468 | } 469 | ], 470 | "Statistic": "Sum", 471 | "Period": "300", 472 | "EvaluationPeriods": "3", 473 | "Threshold": 1, 474 | "ComparisonOperator": "LessThanThreshold", 475 | "AlarmActions": [ 476 | { 477 | "Ref": "AlarmTopic" 478 | } 479 | ] 480 | } 481 | }, 482 | "KinesisHighIncomingRecords": { 483 | "Type": "AWS::CloudWatch::Alarm", 484 | "Properties": { 485 | "AlarmDescription": "Alarm if there incoming rate to Kinesis is high.", 486 | "Namespace": "AWS/Kinesis", 487 | "MetricName": "IncomingRecords", 488 | "Dimensions": [ 489 | { 490 | "Name": "StreamName", 491 | "Value": { 492 | "Ref": "StreamName" 493 | } 494 | } 495 | ], 496 | "Statistic": "Sum", 497 | "Period": "300", 498 | "EvaluationPeriods": "3", 499 | "Threshold": { 500 | "Ref": "KinesisRecordThreshold" 501 | }, 502 | "ComparisonOperator": "GreaterThanThreshold", 503 | "AlarmActions": [ 504 | { 505 | "Ref": "AlarmTopic" 506 | } 507 | ] 508 | } 509 | } 510 | }, 511 | "Outputs": { 512 | "ClusterEndpoint": { 513 | "Description": "Cluster JDBC Connection", 514 | "Value": { 515 | "Fn::Join": [ 516 | "", 517 | [ 518 | "jdbc:postgresql://", 519 | { 520 | "Fn::GetAtt": [ 521 | "RedshiftCluster", 522 | "Endpoint.Address" 523 | ] 524 | }, 525 | ":", 526 | { 527 | "Fn::GetAtt": [ 528 | "RedshiftCluster", 529 | "Endpoint.Port" 530 | ] 531 | }, 532 | "/cognito?tcpKeepAlive=true" 533 | ] 534 | ] 535 | } 536 | } 537 | } 538 | } 539 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/redshift/RedshiftManifestEmitter.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors.redshift; 6 | 7 | import java.io.ByteArrayInputStream; 8 | import java.io.IOException; 9 | import java.sql.Connection; 10 | import java.sql.DriverManager; 11 | import java.sql.ResultSet; 12 | import java.sql.SQLException; 13 | import java.sql.Statement; 14 | import java.util.ArrayList; 15 | import java.util.Collection; 16 | import java.util.Collections; 17 | import java.util.List; 18 | import java.util.Properties; 19 | import java.util.SortedSet; 20 | import java.util.TreeSet; 21 | 22 | import org.apache.commons.logging.Log; 23 | import org.apache.commons.logging.LogFactory; 24 | 25 | import com.amazonaws.auth.AWSCredentials; 26 | import com.amazonaws.auth.AWSCredentialsProvider; 27 | import com.amazonaws.auth.AWSSessionCredentials; 28 | import com.amazonaws.services.kinesis.connectors.KinesisConnectorConfiguration; 29 | import com.amazonaws.services.kinesis.connectors.UnmodifiableBuffer; 30 | import com.amazonaws.services.kinesis.connectors.interfaces.IEmitter; 31 | import com.amazonaws.services.s3.AmazonS3Client; 32 | import com.amazonaws.services.s3.model.PutObjectRequest; 33 | 34 | /** 35 | * This implementation of IEmitter collects filenames from an Amazon Kinesis stream that has been started by 36 | * a S3ManifestEmitter. The RedshiftManifestEmitter processes the list of Amazon S3 file names, generates a 37 | * manifest file and performs an Amazon Redshift copy. The Amazon Redshift copy is done using transactions to 38 | * prevent duplication of objects in Amazon Redshift.
    39 | * It follows the following procedure: 40 | *
      41 | *
    1. Write manifest file to Amazon S3
    2. 42 | *
    3. Begin Amazon Redshift transaction
    4. 43 | *
    5. If any files already exist in Amazon Redshift, return and checkpoint (this transaction already completed 44 | * successfully so no need to duplicate)
    6. 45 | *
    7. Write file names to Amazon Redshift file table
    8. 46 | *
    9. Call Amazon Redshift copy
    10. 47 | *
    11. Commit Amazon Redshift Transaction
    12. 48 | *
    49 | *

    50 | * This class requires the configuration of an Amazon S3 bucket and endpoint, as well as the following Amazon Redshift 51 | * items: 52 | *

      53 | *
    • Amazon Redshift URL
    • 54 | *
    • username and password
    • 55 | *
    • data table and key column (data table stores items from the manifest copy)
    • 56 | *
    • file table and key column (file table is used to store file names to prevent duplicate entries)
    • 57 | *
    • mandatory flag for Amazon Redshift copy
    • 58 | *
    • the delimiter used for string parsing when inserting entries into Amazon Redshift
    • 59 | *
    60 | *
    61 | * NOTE: Amazon S3 bucket and Amazon Redshift table must be in the same region for Manifest Copy. 62 | */ 63 | public class RedshiftManifestEmitter implements IEmitter { 64 | private static final Log LOG = LogFactory.getLog(RedshiftManifestEmitter.class); 65 | private final String s3Bucket; 66 | private final String dataTable; 67 | private final String fileTable; 68 | private final String fileKeyColumn; 69 | private final char dataDelimiter; 70 | private final AWSCredentialsProvider credentialsProvider; 71 | private final String s3Endpoint; 72 | private final AmazonS3Client s3Client; 73 | private final boolean copyMandatory; 74 | private final Properties loginProps; 75 | private final String redshiftURL; 76 | private static final String MANIFEST_PREFIX = "manifests/"; 77 | 78 | public RedshiftManifestEmitter(KinesisConnectorConfiguration configuration) { 79 | dataTable = configuration.REDSHIFT_DATA_TABLE; 80 | fileTable = configuration.REDSHIFT_FILE_TABLE; 81 | fileKeyColumn = configuration.REDSHIFT_FILE_KEY_COLUMN; 82 | dataDelimiter = configuration.REDSHIFT_DATA_DELIMITER; 83 | copyMandatory = configuration.REDSHIFT_COPY_MANDATORY; 84 | s3Bucket = configuration.S3_BUCKET; 85 | s3Endpoint = configuration.S3_ENDPOINT; 86 | s3Client = new AmazonS3Client(configuration.AWS_CREDENTIALS_PROVIDER); 87 | if (s3Endpoint != null) { 88 | s3Client.setEndpoint(s3Endpoint); 89 | } 90 | credentialsProvider = configuration.AWS_CREDENTIALS_PROVIDER; 91 | loginProps = new Properties(); 92 | loginProps.setProperty("user", configuration.REDSHIFT_USERNAME); 93 | loginProps.setProperty("password", configuration.REDSHIFT_PASSWORD); 94 | redshiftURL = configuration.REDSHIFT_URL; 95 | } 96 | 97 | @Override 98 | public List emit(final UnmodifiableBuffer buffer) throws IOException { 99 | List records = buffer.getRecords(); 100 | Connection conn = null; 101 | 102 | String manifestFileName = getManifestFile(records); 103 | // Copy to Amazon Redshift using manifest file 104 | try { 105 | conn = DriverManager.getConnection(redshiftURL, loginProps); 106 | conn.setAutoCommit(false); 107 | List deduplicatedRecords = checkForExistingFiles(conn, records); 108 | if (deduplicatedRecords.isEmpty()) { 109 | LOG.info("All the files in this set were already copied to Redshift."); 110 | // All of these files were already written 111 | rollbackAndCloseConnection(conn); 112 | records.clear(); 113 | return Collections.emptyList(); 114 | } 115 | 116 | if (deduplicatedRecords.size() != records.size()) { 117 | manifestFileName = getManifestFile(deduplicatedRecords); 118 | } 119 | // Write manifest file to Amazon S3 120 | try { 121 | writeManifestToS3(manifestFileName, records); 122 | } catch (Exception e) { 123 | LOG.error("Error writing file " + manifestFileName + " to S3. Failing this emit attempt.", e); 124 | return buffer.getRecords(); 125 | } 126 | 127 | LOG.info("Inserting " + deduplicatedRecords.size() + " rows into the files table."); 128 | insertRecords(conn, deduplicatedRecords); 129 | LOG.info("Initiating Amazon Redshift manifest copy of " + deduplicatedRecords.size() + " files."); 130 | redshiftCopy(conn, manifestFileName); 131 | conn.commit(); 132 | LOG.info("Successful Amazon Redshift manifest copy of " + getNumberOfCopiedRecords(conn) + " records from " 133 | + deduplicatedRecords.size() + " files using manifest s3://" + s3Bucket + "/" 134 | + getManifestFile(records)); 135 | closeConnection(conn); 136 | return Collections.emptyList(); 137 | } catch (SQLException | IOException e) { 138 | LOG.error("Error copying data from manifest file " + manifestFileName 139 | + " into Amazon Redshift. Failing this emit attempt.", e); 140 | rollbackAndCloseConnection(conn); 141 | return buffer.getRecords(); 142 | } catch (Exception e) { 143 | LOG.error("Error copying data from manifest file " + manifestFileName 144 | + " into Redshift. Failing this emit attempt.", e); 145 | rollbackAndCloseConnection(conn); 146 | return buffer.getRecords(); 147 | } 148 | } 149 | 150 | private void rollbackAndCloseConnection(Connection conn) { 151 | try { 152 | if ((conn != null) && (!conn.isClosed())) { 153 | conn.rollback(); 154 | } 155 | } catch (Exception e) { 156 | LOG.error("Unable to rollback Amazon Redshift transaction.", e); 157 | } 158 | closeConnection(conn); 159 | } 160 | 161 | private void closeConnection(Connection conn) { 162 | try { 163 | if ((conn != null) && (!conn.isClosed())) { 164 | conn.close(); 165 | } 166 | } catch (Exception e) { 167 | LOG.error("Unable to close Amazon Redshift connection.", e); 168 | } 169 | } 170 | 171 | @Override 172 | public void fail(List records) { 173 | for (String record : records) { 174 | LOG.error("Record failed: " + record); 175 | } 176 | } 177 | 178 | /** 179 | * Generates manifest file and writes it to Amazon S3 180 | * 181 | * @param fileName Name of manifest file (Amazon S3 key) 182 | * @param records Used to generate the manifest file 183 | * @throws IOException 184 | */ 185 | private String writeManifestToS3(String fileName, List records) throws IOException { 186 | String fileContents = generateManifestFile(records); 187 | // upload generated manifest file 188 | PutObjectRequest putObjectRequest = 189 | new PutObjectRequest(s3Bucket, fileName, new ByteArrayInputStream(fileContents.getBytes()), null); 190 | s3Client.putObject(putObjectRequest); 191 | return fileName; 192 | } 193 | 194 | /** 195 | * 196 | * Inserts the records to the fileTable using a SQL String in the format: INSERT INTO fileTable 197 | * VALUES ('f1'),('f2'),...; 198 | * 199 | * @param records 200 | * @throws IOException 201 | */ 202 | private void insertRecords(Connection conn, Collection records) throws IOException { 203 | String toInsert = getCollectionString(records, "(", "),(", ")"); 204 | StringBuilder insertSQL = new StringBuilder(); 205 | insertSQL.append("INSERT INTO "); 206 | insertSQL.append(fileTable); 207 | insertSQL.append(" VALUES "); 208 | insertSQL.append(toInsert); 209 | insertSQL.append(";"); 210 | executeStatement(conn, insertSQL.toString()); 211 | } 212 | 213 | /** 214 | * Selects the count of files that are already present in Amazon Redshift using a SQL Query in the 215 | * format: SELECT COUNT(*) FROM fileTable WHERE fileKeyColumn IN ('f1','f2',...); 216 | * 217 | * @param records 218 | * @return Deduplicated list of files 219 | * @throws IOException 220 | */ 221 | 222 | private List checkForExistingFiles(Connection conn, List records) throws IOException { 223 | SortedSet recordSet = new TreeSet<>(records); 224 | String files = getCollectionString(recordSet, "(", ",", ")"); 225 | StringBuilder selectExisting = new StringBuilder(); 226 | selectExisting.append("SELECT " + fileKeyColumn + " FROM "); 227 | selectExisting.append(fileTable); 228 | selectExisting.append(" WHERE "); 229 | selectExisting.append(fileKeyColumn); 230 | selectExisting.append(" IN "); 231 | selectExisting.append(files); 232 | selectExisting.append(";"); 233 | Statement stmt = null; 234 | ResultSet resultSet = null; 235 | try { 236 | stmt = conn.createStatement(); 237 | final String query = selectExisting.toString(); 238 | resultSet = stmt.executeQuery(query); 239 | while (resultSet.next()) { 240 | String existingFile = resultSet.getString(1); 241 | LOG.info("File " + existingFile + " has already been copied. Leaving it out."); 242 | recordSet.remove(existingFile); 243 | } 244 | resultSet.close(); 245 | stmt.close(); 246 | return new ArrayList(recordSet); 247 | } catch (SQLException e) { 248 | try { 249 | resultSet.close(); 250 | } catch (Exception e1) { 251 | } 252 | try { 253 | stmt.close(); 254 | } catch (Exception e1) { 255 | } 256 | throw new IOException(e); 257 | } 258 | } 259 | 260 | private int getNumberOfCopiedRecords(Connection conn) throws IOException { 261 | String cmd = "select pg_last_copy_count();"; 262 | Statement stmt = null; 263 | ResultSet resultSet = null; 264 | try { 265 | stmt = conn.createStatement(); 266 | resultSet = stmt.executeQuery(cmd); 267 | resultSet.next(); 268 | int numCopiedRecords = resultSet.getInt(1); 269 | resultSet.close(); 270 | stmt.close(); 271 | return numCopiedRecords; 272 | } catch (SQLException e) { 273 | try { 274 | resultSet.close(); 275 | } catch (Exception e1) { 276 | } 277 | try { 278 | stmt.close(); 279 | } catch (Exception e1) { 280 | } 281 | throw new IOException(e); 282 | } 283 | 284 | } 285 | 286 | /** 287 | * Executes a, Amazon Redshift copy from Amazon S3 using a Manifest file with a command in the format: COPY 288 | * dataTable FROM 's3://s3Bucket/manifestFile' CREDENTIALS 289 | * 'aws_access_key_id=accessKey;aws_secret_access_key=secretKey' DELIMITER dataDelimiter 290 | * MANIFEST; 291 | * 292 | * @param Name of manifest file 293 | * @throws IOException 294 | */ 295 | protected void redshiftCopy(Connection conn, String manifestFile) throws IOException { 296 | AWSCredentials credentials = credentialsProvider.getCredentials(); 297 | StringBuilder redshiftCopy = new StringBuilder(); 298 | redshiftCopy.append("COPY " + dataTable + " "); 299 | redshiftCopy.append("FROM 's3://" + s3Bucket + "/" + manifestFile + "' "); 300 | redshiftCopy.append("CREDENTIALS '"); 301 | redshiftCopy.append("aws_access_key_id=" + credentials.getAWSAccessKeyId()); 302 | redshiftCopy.append(";"); 303 | redshiftCopy.append("aws_secret_access_key=" + credentials.getAWSSecretKey()); 304 | if (credentials instanceof AWSSessionCredentials) { 305 | redshiftCopy.append(";"); 306 | redshiftCopy.append("token=" + ((AWSSessionCredentials) credentials).getSessionToken()); 307 | } 308 | redshiftCopy.append("' "); 309 | redshiftCopy.append("DELIMITER '" + dataDelimiter + "' "); 310 | redshiftCopy.append("MANIFEST"); 311 | redshiftCopy.append(";"); 312 | executeStatement(conn, redshiftCopy.toString()); 313 | } 314 | 315 | /** 316 | * Helper function to execute SQL Statement with no results. Attempts to execute the statement 317 | * redshiftRetryLimit times. 318 | * 319 | * @param statement 320 | * @throws IOException 321 | */ 322 | private void executeStatement(Connection conn, String statement) throws IOException { 323 | try { 324 | Statement stmt = conn.createStatement(); 325 | stmt.execute(statement); 326 | stmt.close(); 327 | return; 328 | } catch (SQLException e) { 329 | LOG.error("Amazon S3 endpoint set to: " + s3Endpoint); 330 | LOG.error("Error executing statement: " + statement, e); 331 | throw new IOException(e); 332 | } 333 | } 334 | 335 | /** 336 | * Builds a String from the members of a Set of String 337 | * 338 | * @param members 339 | * List of String, each member will be surrounded by single quotes 340 | * @param prepend 341 | * beginning of String 342 | * @param delimiter 343 | * between each member 344 | * @param append 345 | * end of String 346 | * @return String in format: {prepend} 347 | * '{member1}'{delimiter}'{member2}'{delimiter}...'{lastMember}'{app e n d } 348 | */ 349 | private String getCollectionString(Collection members, String prepend, String delimiter, String append) { 350 | StringBuilder s = new StringBuilder(); 351 | s.append(prepend); 352 | for (String m : members) { 353 | s.append("'"); 354 | s.append(m); 355 | s.append("'"); 356 | s.append(delimiter); 357 | } 358 | s.replace(s.length() - delimiter.length(), s.length(), ""); 359 | s.append(append); 360 | return s.toString(); 361 | } 362 | 363 | /** 364 | * Manifest file is named in the format manifests/{firstFileName}-{lastFileName} 365 | * 366 | * @param records 367 | * @return Manifest file name 368 | */ 369 | private String getManifestFile(List records) { 370 | return MANIFEST_PREFIX + records.get(0) + "-" + records.get(records.size() - 1); 371 | } 372 | 373 | /** 374 | * Format for Amazon Redshift Manifest File: 375 | * 376 | *
    377 |      * {
    378 |      * 	"entries": [
    379 |      * 		{"url":"s3://s3Bucket/file1","mandatory":true},
    380 |      * 		{"url":"s3://s3Bucket/file2","mandatory":true},
    381 |      * 		{"url":"s3://s3Bucket/file3","mandatory":true}
    382 |      * 	]
    383 |      * }
    384 |      * 
    385 |      * 
    386 | * 387 | * 388 | * @param files 389 | * @return String representation of Amazon S3 manifest file 390 | */ 391 | private String generateManifestFile(List files) { 392 | StringBuilder s = new StringBuilder(); 393 | s.append("{\n"); 394 | s.append("\t\"entries\": [\n"); 395 | for (String file : files) { 396 | s.append("\t\t{"); 397 | s.append("\"url\":\"s3://"); 398 | s.append(s3Bucket); 399 | s.append("/"); 400 | s.append(file); 401 | s.append("\""); 402 | s.append(","); 403 | s.append("\"mandatory\":" + Boolean.toString(copyMandatory)); 404 | s.append("},\n"); 405 | } 406 | s.replace(s.length() - 2, s.length() - 1, ""); 407 | s.append("\t]\n"); 408 | s.append("}\n"); 409 | return s.toString(); 410 | } 411 | 412 | @Override 413 | public void shutdown() { 414 | s3Client.shutdown(); 415 | } 416 | 417 | } 418 | -------------------------------------------------------------------------------- /src/com/amazonaws/services/kinesis/connectors/KinesisConnectorConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | // SPDX-License-Identifier: MIT-0. 4 | */ 5 | package com.amazonaws.services.kinesis.connectors; 6 | 7 | import java.rmi.dgc.VMID; 8 | import java.util.Properties; 9 | 10 | import org.apache.commons.logging.Log; 11 | import org.apache.commons.logging.LogFactory; 12 | 13 | import com.amazonaws.auth.AWSCredentialsProvider; 14 | import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; 15 | 16 | /** 17 | * This class contains constants used to configure AWS Services in Amazon Kinesis Connectors. The user 18 | * should use System properties to set their proper configuration. An instance of 19 | * KinesisConnectorConfiguration is created with System properties and an AWSCredentialsProvider. 20 | * For example: 21 | * 22 | *
     23 |  * Properties prop = new Properties();
     24 |  * prop.put(KinesisConnectorConfiguration.PROP_APP_NAME, "MyKinesisConnector");
     25 |  * KinesisConnectorConfiguration config =
     26 |  *         new KinesisConnectorConfiguration(prop, new DefaultAWSCredentialsProviderChain());
     27 |  * 
    28 | * 29 | */ 30 | public class KinesisConnectorConfiguration { 31 | private static final Log LOG = LogFactory.getLog(KinesisConnectorConfiguration.class); 32 | public static final String KINESIS_CONNECTOR_USER_AGENT = "amazon-kinesis-connector-java-1.1.1"; 33 | 34 | // Connector App Property Keys 35 | public static final String PROP_APP_NAME = "appName"; 36 | public static final String PROP_CONNECTOR_DESTINATION = "connectorDestination"; 37 | public static final String PROP_RETRY_LIMIT = "retryLimit"; 38 | public static final String PROP_BACKOFF_INTERVAL = "backoffInterval"; 39 | public static final String PROP_KINESIS_ENDPOINT = "kinesisEndpoint"; 40 | public static final String PROP_KINESIS_INPUT_STREAM = "kinesisInputStream"; 41 | public static final String PROP_KINESIS_INPUT_STREAM_SHARD_COUNT = "kinesisInputStreamShardCount"; 42 | public static final String PROP_KINESIS_OUTPUT_STREAM = "kinesisOutputStream"; 43 | public static final String PROP_KINESIS_OUTPUT_STREAM_SHARD_COUNT = "kinesisOutpuStreamShardCount"; 44 | public static final String PROP_WORKER_ID = "workerID"; 45 | public static final String PROP_FAILOVER_TIME = "failoverTime"; 46 | public static final String PROP_MAX_RECORDS = "maxRecords"; 47 | public static final String PROP_INITIAL_POSITION_IN_STREAM = "initialPositionInStream"; 48 | public static final String PROP_IDLE_TIME_BETWEEN_READS = "idleTimeBetweenReads"; 49 | public static final String PROP_PARENT_SHARD_POLL_INTERVAL = "parentShardPollInterval"; 50 | public static final String PROP_SHARD_SYNC_INTERVAL = "shardSyncInterval"; 51 | public static final String PROP_CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST = "callProcessRecordsEvenForEmptyList"; 52 | public static final String PROP_CLEANUP_TERMINATED_SHARDS_BEFORE_EXPIRY = "cleanupTerminatedShardsBeforeExpiry"; 53 | public static final String PROP_REGION_NAME = "regionName"; 54 | public static final String PROP_BATCH_RECORDS_IN_PUT_REQUEST = "batchRecordsInPutRequest"; 55 | public static final String PROP_S3_ENDPOINT = "s3Endpoint"; 56 | public static final String PROP_S3_BUCKET = "s3Bucket"; 57 | public static final String PROP_REDSHIFT_ENDPOINT = "redshiftEndpoint"; 58 | public static final String PROP_REDSHIFT_USERNAME = "redshiftUsername"; 59 | public static final String PROP_REDSHIFT_PASSWORD = "redshiftPassword"; 60 | public static final String PROP_REDSHIFT_URL = "redshiftURL"; 61 | public static final String PROP_REDSHIFT_DATA_TABLE = "redshiftDataTable"; 62 | public static final String PROP_REDSHIFT_FILE_TABLE = "redshiftFileTable"; 63 | public static final String PROP_REDSHIFT_FILE_KEY_COLUMN = "redshiftFileKeyColumn"; 64 | public static final String PROP_REDSHIFT_DATA_DELIMITER = "redshiftDataDelimiter"; 65 | public static final String PROP_REDSHIFT_COPY_MANDATORY = "redshiftCopyMandatory"; 66 | public static final String PROP_BUFFER_RECORD_COUNT_LIMIT = "bufferRecordCountLimit"; 67 | public static final String PROP_BUFFER_BYTE_SIZE_LIMIT = "bufferByteSizeLimit"; 68 | public static final String PROP_BUFFER_MILLISECONDS_LIMIT = "bufferMillisecondsLimit"; 69 | public static final String PROP_DYNAMODB_ENDPOINT = "dynamoDBEndpoint"; 70 | public static final String PROP_DYNAMODB_DATA_TABLE_NAME = "dynamoDBDataTableName"; 71 | public static final String PROP_CLOUDWATCH_NAMESPACE = "cloudWatchNamespace"; 72 | public static final String PROP_CLOUDWATCH_BUFFER_TIME = "cloudWatchBufferTime"; 73 | public static final String PROP_CLOUDWATCH_MAX_QUEUE_SIZE = "cloudWatchMaxQueueSize"; 74 | public static final String PROP_ELASTICSEARCH_CLUSTER_NAME = "elasticsearchClusterName"; 75 | public static final String PROP_ELASTICSEARCH_ENDPOINT = "elasticsearchEndpoint"; 76 | public static final String PROP_ELASTICSEARCH_PORT = "elasticsearchPort"; 77 | public static final String PROP_ELASTICSEARCH_TRANSPORT_SNIFF = "clientTransportSniff"; 78 | public static final String PROP_ELASTICSEARCH_IGNORE_CLUSTER_NAME = "clientTransportIgnoreClusterName"; 79 | public static final String PROP_ELASTICSEARCH_PING_TIMEOUT = "clientTransportPingTimeout"; 80 | public static final String PROP_ELASTICSEARCH_NODE_SAMPLER_INTERVAL = "clientTransportNodesSamplerInterval"; 81 | public static final String PROP_ELASTICSEARCH_DEFAULT_INDEX_NAME = "elasticsearchDefaultIndexName"; 82 | public static final String PROP_ELASTICSEARCH_DEFAULT_TYPE_NAME = "elasticsearchDefaultTypeName"; 83 | public static final String PROP_ELASTICSEARCH_CLOUDFORMATION_TEMPLATE_URL = 84 | "elasticsearchCloudFormationTemplateUrl"; 85 | public static final String PROP_ELASTICSEARCH_CLOUDFORMATION_STACK_NAME = "elasticsearchCloudFormationStackName"; 86 | public static final String PROP_ELASTICSEARCH_VERSION_NUMBER = "elasticsearchVersionNumber"; 87 | public static final String PROP_ELASTICSEARCH_CLOUDFORMATION_KEY_PAIR_NAME = 88 | "elasticsearchCloudFormationKeyPairName"; 89 | public static final String PROP_ELASTICSEARCH_CLOUDFORMATION_CLUSTER_INSTANCE_TYPE = 90 | "elasticsearchCloudFormationClusterInstanceType"; 91 | public static final String PROP_ELASTICSEARCH_CLOUDFORMATION_SSH_LOCATION = 92 | "elasticsearchCloudFormationSSHLocation"; 93 | public static final String PROP_ELASTICSEARCH_CLOUDFORMATION_CLUSTER_SIZE = 94 | "elasticsearchCloudFormationClusterSize"; 95 | 96 | // Default Connector App Constants 97 | public static final String DEFAULT_APP_NAME = "KinesisConnector"; 98 | public static final String DEFAULT_CONNECTOR_DESTINATION = "generic"; 99 | public static final int DEFAULT_RETRY_LIMIT = 3; 100 | public static final long DEFAULT_BACKOFF_INTERVAL = 1000L * 10; 101 | public static final long DEFAULT_BUFFER_RECORD_COUNT_LIMIT = 1000L; 102 | public static final long DEFAULT_BUFFER_BYTE_SIZE_LIMIT = 1024 * 1024L; 103 | public static final long DEFAULT_BUFFER_MILLISECONDS_LIMIT = Long.MAX_VALUE; 104 | public static final boolean DEFAULT_BATCH_RECORDS_IN_PUT_REQUEST = false; 105 | 106 | // Default Amazon Kinesis Constants 107 | public static final String DEFAULT_KINESIS_ENDPOINT = null; 108 | public static final String DEFAULT_KINESIS_INPUT_STREAM = "kinesisInputStream"; 109 | public static final String DEFAULT_KINESIS_OUTPUT_STREAM = "kinesisOutputStream"; 110 | public static final int DEFAULT_KINESIS_STREAM_SHARD_COUNT = 1; 111 | 112 | // Default Amazon Kinesis Client Library Constants 113 | public static final String DEFAULT_WORKER_ID = new VMID().toString(); 114 | public static final long DEFAULT_FAILOVER_TIME = 30000L; 115 | public static final int DEFAULT_MAX_RECORDS = 10000; 116 | public static final InitialPositionInStream DEFAULT_INITIAL_POSITION_IN_STREAM = 117 | InitialPositionInStream.TRIM_HORIZON; 118 | public static final long DEFAULT_IDLE_TIME_BETWEEN_READS = 1000L; 119 | public static final long DEFAULT_PARENT_SHARD_POLL_INTERVAL = 10000L; 120 | public static final long DEFAULT_SHARD_SYNC_INTERVAL = 60000L; 121 | // CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST must be set to true for bufferMillisecondsLimit to work 122 | public static final boolean DEFAULT_CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST = true; 123 | public static final boolean DEFAULT_CLEANUP_TERMINATED_SHARDS_BEFORE_EXPIRY = false; 124 | public static final String DEFAULT_REGION_NAME = "us-east-1"; 125 | 126 | // Default Amazon S3 Constants 127 | public static final String DEFAULT_S3_ENDPOINT = "https://s3.amazonaws.com"; 128 | public static final String DEFAULT_S3_BUCKET = "kinesis-bucket"; 129 | 130 | // Default Amazon Redshift Constants 131 | public static final String DEFAULT_REDSHIFT_ENDPOINT = "https://redshift.us-east-1.amazonaws.com"; 132 | public static final String DEFAULT_REDSHIFT_USERNAME = null; 133 | public static final String DEFAULT_REDSHIFT_PASSWORD = null; 134 | public static final String DEFAULT_REDSHIFT_URL = null; 135 | public static final String DEFAULT_REDSHIFT_DATA_TABLE = "users"; 136 | public static final String DEFAULT_REDSHIFT_FILE_TABLE = "files"; 137 | public static final String DEFAULT_REDSHIFT_FILE_KEY_COLUMN = "file"; 138 | public static final Character DEFAULT_REDSHIFT_DATA_DELIMITER = '|'; 139 | public static final boolean DEFAULT_REDSHIFT_COPY_MANDATORY = true; 140 | 141 | // Default Amazon DynamoDB Constants 142 | public static final String DEFAULT_DYNAMODB_ENDPOINT = "dynamodb.us-east-1.amazonaws.com"; 143 | public static final String DEFAULT_DYNAMODB_DATA_TABLE_NAME = "dynamodb_emitter_test"; 144 | 145 | // Default Amazon CloudWatch Constants 146 | public static final String DEFAULT_CLOUDWATCH_NAMESPACE = DEFAULT_APP_NAME; 147 | public static final long DEFAULT_CLOUDWATCH_BUFFER_TIME = 10 * 1000L; 148 | public static final int DEFAULT_CLOUDWATCH_MAX_QUEUE_SIZE = 10000; 149 | 150 | // Default Amazon Elasticsearch Constraints 151 | public static final String DEFAULT_ELASTICSEARCH_CLUSTER_NAME = "elasticsearch"; 152 | public static final String DEFAULT_ELASTICSEARCH_ENDPOINT = "localhost"; 153 | public static final int DEFAULT_ELASTICSEARCH_PORT = 9300; 154 | public static final boolean DEFAULT_ELASTICSEARCH_TRANSPORT_SNIFF = false; 155 | public static final boolean DEFAULT_ELASTICSEARCH_IGNORE_CLUSTER_NAME = false; 156 | public static final String DEFAULT_ELASTICSEARCH_PING_TIMEOUT = "5s"; 157 | public static final String DEFAULT_ELASTICSEARCH_NODE_SAMPLER_INTERVAL = "5s"; 158 | public static final String DEFAULT_ELASTICSEARCH_DEFAULT_INDEX_NAME = "index"; 159 | public static final String DEFAULT_ELASTICSEARCH_DEFAULT_TYPE_NAME = "type"; 160 | public static final String DEFAULT_ELASTICSEARCH_CLOUDFORMATION_TEMPLATE_URL = "Elasticsearch.template"; 161 | public static final String DEFAULT_ELASTICSEARCH_CLOUDFORMATION_STACK_NAME = "kinesisElasticsearchSample"; 162 | public static final String DEFAULT_ELASTICSEARCH_VERSION_NUMBER = "1.2.1"; 163 | public static final String DEFAULT_ELASTICSEARCH_CLOUDFORMATION_KEY_PAIR_NAME = ""; 164 | public static final String DEFAULT_ELASTICSEARCH_CLOUDFORMATION_CLUSTER_INSTANCE_TYPE = "m1.small"; 165 | public static final String DEFAULT_ELASTICSEARCH_CLOUDFORMATION_SSH_LOCATION = "0.0.0.0/0"; 166 | public static final String DEFAULT_ELASTICSEARCH_CLOUDFORMATION_CLUSTER_SIZE = "3"; 167 | 168 | private static final String CONNECTION_DESTINATION_PREFIX = "amazon-kinesis-connector-to-"; 169 | 170 | // Configurable program variables 171 | public final AWSCredentialsProvider AWS_CREDENTIALS_PROVIDER; 172 | public final String APP_NAME; 173 | public final String CONNECTOR_DESTINATION; 174 | public final long BACKOFF_INTERVAL; 175 | public final int RETRY_LIMIT; 176 | public final long BUFFER_RECORD_COUNT_LIMIT; 177 | public final long BUFFER_BYTE_SIZE_LIMIT; 178 | public final long BUFFER_MILLISECONDS_LIMIT; 179 | public final boolean BATCH_RECORDS_IN_PUT_REQUEST; 180 | 181 | public final String KINESIS_ENDPOINT; 182 | public final String KINESIS_INPUT_STREAM; 183 | public final int KINESIS_INPUT_STREAM_SHARD_COUNT; 184 | public final String KINESIS_OUTPUT_STREAM; 185 | public final int KINESIS_OUTPUT_STREAM_SHARD_COUNT; 186 | 187 | public final String WORKER_ID; 188 | public final long FAILOVER_TIME; 189 | public final int MAX_RECORDS; 190 | public final InitialPositionInStream INITIAL_POSITION_IN_STREAM; 191 | public final long IDLE_TIME_BETWEEN_READS; 192 | public final long PARENT_SHARD_POLL_INTERVAL; 193 | public final long SHARD_SYNC_INTERVAL; 194 | public final boolean CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST; 195 | public final boolean CLEANUP_TERMINATED_SHARDS_BEFORE_EXPIRY; 196 | public final String REGION_NAME; 197 | public final String S3_ENDPOINT; 198 | public final String S3_BUCKET; 199 | public final String REDSHIFT_ENDPOINT; 200 | public final String REDSHIFT_USERNAME; 201 | public final String REDSHIFT_PASSWORD; 202 | public String REDSHIFT_URL; 203 | public final String REDSHIFT_DATA_TABLE; 204 | public final String REDSHIFT_FILE_TABLE; 205 | public final String REDSHIFT_FILE_KEY_COLUMN; 206 | public final Character REDSHIFT_DATA_DELIMITER; 207 | public final boolean REDSHIFT_COPY_MANDATORY; 208 | public final String DYNAMODB_ENDPOINT; 209 | public final String DYNAMODB_DATA_TABLE_NAME; 210 | public final String CLOUDWATCH_NAMESPACE; 211 | public final long CLOUDWATCH_BUFFER_TIME; 212 | public final int CLOUDWATCH_MAX_QUEUE_SIZE; 213 | public final String ELASTICSEARCH_CLUSTER_NAME; 214 | public String ELASTICSEARCH_ENDPOINT; 215 | public final int ELASTICSEARCH_PORT; 216 | public final boolean ELASTICSEARCH_TRANSPORT_SNIFF; 217 | public final boolean ELASTICSEARCH_IGNORE_CLUSTER_NAME; 218 | public final String ELASTICSEARCH_PING_TIMEOUT; 219 | public final String ELASTICSEARCH_NODE_SAMPLER_INTERVAL; 220 | public final String ELASTICSEARCH_DEFAULT_INDEX_NAME; 221 | public final String ELASTICSEARCH_DEFAULT_TYPE_NAME; 222 | public final String ELASTICSEARCH_CLOUDFORMATION_TEMPLATE_URL; 223 | public final String ELASTICSEARCH_CLOUDFORMATION_STACK_NAME; 224 | public final String ELASTICSEARCH_VERSION_NUMBER; 225 | public final String ELASTICSEARCH_CLOUDFORMATION_KEY_PAIR_NAME; 226 | public final String ELASTICSEARCH_CLOUDFORMATION_CLUSTER_INSTANCE_TYPE; 227 | public final String ELASTICSEARCH_CLOUDFORMATION_SSH_LOCATION; 228 | public final String ELASTICSEARCH_CLOUDFORMATION_CLUSTER_SIZE; 229 | 230 | /** 231 | * Configure the connector application with any set of properties that are unique to the application. Any 232 | * unspecified property will be set to a default value. 233 | * 234 | * @param properties 235 | * the System properties that will be used to configure KinesisConnectors 236 | */ 237 | public KinesisConnectorConfiguration(Properties properties, AWSCredentialsProvider credentialsProvider) { 238 | AWS_CREDENTIALS_PROVIDER = credentialsProvider; 239 | 240 | // Connector configuration 241 | APP_NAME = properties.getProperty(PROP_APP_NAME, DEFAULT_APP_NAME); 242 | CONNECTOR_DESTINATION = 243 | CONNECTION_DESTINATION_PREFIX 244 | + properties.getProperty(PROP_CONNECTOR_DESTINATION, DEFAULT_CONNECTOR_DESTINATION); 245 | RETRY_LIMIT = getIntegerProperty(PROP_RETRY_LIMIT, DEFAULT_RETRY_LIMIT, properties); 246 | BACKOFF_INTERVAL = getLongProperty(PROP_BACKOFF_INTERVAL, DEFAULT_BACKOFF_INTERVAL, properties); 247 | BUFFER_RECORD_COUNT_LIMIT = 248 | getLongProperty(PROP_BUFFER_RECORD_COUNT_LIMIT, DEFAULT_BUFFER_RECORD_COUNT_LIMIT, properties); 249 | BUFFER_BYTE_SIZE_LIMIT = 250 | getLongProperty(PROP_BUFFER_BYTE_SIZE_LIMIT, DEFAULT_BUFFER_BYTE_SIZE_LIMIT, properties); 251 | BUFFER_MILLISECONDS_LIMIT = 252 | getLongProperty(PROP_BUFFER_MILLISECONDS_LIMIT, DEFAULT_BUFFER_MILLISECONDS_LIMIT, properties); 253 | BATCH_RECORDS_IN_PUT_REQUEST = 254 | getBooleanProperty(PROP_BATCH_RECORDS_IN_PUT_REQUEST, DEFAULT_BATCH_RECORDS_IN_PUT_REQUEST, properties); 255 | 256 | // Amazon Kinesis configuration 257 | KINESIS_ENDPOINT = properties.getProperty(PROP_KINESIS_ENDPOINT, DEFAULT_KINESIS_ENDPOINT); 258 | KINESIS_INPUT_STREAM = properties.getProperty(PROP_KINESIS_INPUT_STREAM, DEFAULT_KINESIS_INPUT_STREAM); 259 | KINESIS_INPUT_STREAM_SHARD_COUNT = 260 | getIntegerProperty(PROP_KINESIS_INPUT_STREAM_SHARD_COUNT, 261 | DEFAULT_KINESIS_STREAM_SHARD_COUNT, 262 | properties); 263 | KINESIS_OUTPUT_STREAM = properties.getProperty(PROP_KINESIS_OUTPUT_STREAM, DEFAULT_KINESIS_OUTPUT_STREAM); 264 | KINESIS_OUTPUT_STREAM_SHARD_COUNT = 265 | getIntegerProperty(PROP_KINESIS_OUTPUT_STREAM_SHARD_COUNT, 266 | DEFAULT_KINESIS_STREAM_SHARD_COUNT, 267 | properties); 268 | 269 | // Amazon S3 configuration 270 | S3_ENDPOINT = properties.getProperty(PROP_S3_ENDPOINT, DEFAULT_S3_ENDPOINT); 271 | S3_BUCKET = properties.getProperty(PROP_S3_BUCKET, DEFAULT_S3_BUCKET); 272 | 273 | // Amazon Redshift configuration 274 | REDSHIFT_ENDPOINT = properties.getProperty(PROP_REDSHIFT_ENDPOINT, DEFAULT_REDSHIFT_ENDPOINT); 275 | REDSHIFT_USERNAME = properties.getProperty(PROP_REDSHIFT_USERNAME, DEFAULT_REDSHIFT_USERNAME); 276 | REDSHIFT_PASSWORD = properties.getProperty(PROP_REDSHIFT_PASSWORD, DEFAULT_REDSHIFT_PASSWORD); 277 | REDSHIFT_URL = properties.getProperty(PROP_REDSHIFT_URL, DEFAULT_REDSHIFT_URL); 278 | REDSHIFT_DATA_TABLE = properties.getProperty(PROP_REDSHIFT_DATA_TABLE, DEFAULT_REDSHIFT_DATA_TABLE); 279 | REDSHIFT_FILE_TABLE = properties.getProperty(PROP_REDSHIFT_FILE_TABLE, DEFAULT_REDSHIFT_FILE_TABLE); 280 | REDSHIFT_FILE_KEY_COLUMN = 281 | properties.getProperty(PROP_REDSHIFT_FILE_KEY_COLUMN, DEFAULT_REDSHIFT_FILE_KEY_COLUMN); 282 | REDSHIFT_DATA_DELIMITER = 283 | getCharacterProperty(PROP_REDSHIFT_DATA_DELIMITER, DEFAULT_REDSHIFT_DATA_DELIMITER, properties); 284 | REDSHIFT_COPY_MANDATORY = 285 | getBooleanProperty(PROP_REDSHIFT_COPY_MANDATORY, DEFAULT_REDSHIFT_COPY_MANDATORY, properties); 286 | 287 | // Amazon DynamoDB configuration 288 | DYNAMODB_ENDPOINT = properties.getProperty(PROP_DYNAMODB_ENDPOINT, DEFAULT_DYNAMODB_ENDPOINT); 289 | DYNAMODB_DATA_TABLE_NAME = 290 | properties.getProperty(PROP_DYNAMODB_DATA_TABLE_NAME, DEFAULT_DYNAMODB_DATA_TABLE_NAME); 291 | 292 | // Amazon CloudWatch configuration 293 | CLOUDWATCH_NAMESPACE = properties.getProperty(PROP_CLOUDWATCH_NAMESPACE, DEFAULT_CLOUDWATCH_NAMESPACE); 294 | CLOUDWATCH_BUFFER_TIME = 295 | getLongProperty(PROP_CLOUDWATCH_BUFFER_TIME, DEFAULT_CLOUDWATCH_BUFFER_TIME, properties); 296 | CLOUDWATCH_MAX_QUEUE_SIZE = 297 | getIntegerProperty(PROP_CLOUDWATCH_MAX_QUEUE_SIZE, DEFAULT_CLOUDWATCH_MAX_QUEUE_SIZE, properties); 298 | 299 | // Elasticsearch configuration 300 | ELASTICSEARCH_CLUSTER_NAME = 301 | properties.getProperty(PROP_ELASTICSEARCH_CLUSTER_NAME, DEFAULT_ELASTICSEARCH_CLUSTER_NAME); 302 | ELASTICSEARCH_ENDPOINT = properties.getProperty(PROP_ELASTICSEARCH_ENDPOINT, DEFAULT_ELASTICSEARCH_ENDPOINT); 303 | ELASTICSEARCH_PORT = getIntegerProperty(PROP_ELASTICSEARCH_PORT, DEFAULT_ELASTICSEARCH_PORT, properties); 304 | ELASTICSEARCH_TRANSPORT_SNIFF = 305 | getBooleanProperty(PROP_ELASTICSEARCH_TRANSPORT_SNIFF, 306 | DEFAULT_ELASTICSEARCH_TRANSPORT_SNIFF, 307 | properties); 308 | 309 | ELASTICSEARCH_IGNORE_CLUSTER_NAME = 310 | getBooleanProperty(PROP_ELASTICSEARCH_IGNORE_CLUSTER_NAME, 311 | DEFAULT_ELASTICSEARCH_IGNORE_CLUSTER_NAME, 312 | properties); 313 | ELASTICSEARCH_PING_TIMEOUT = 314 | properties.getProperty(PROP_ELASTICSEARCH_PING_TIMEOUT, DEFAULT_ELASTICSEARCH_PING_TIMEOUT); 315 | ELASTICSEARCH_NODE_SAMPLER_INTERVAL = 316 | properties.getProperty(PROP_ELASTICSEARCH_NODE_SAMPLER_INTERVAL, 317 | DEFAULT_ELASTICSEARCH_NODE_SAMPLER_INTERVAL); 318 | ELASTICSEARCH_DEFAULT_INDEX_NAME = 319 | properties.getProperty(PROP_ELASTICSEARCH_DEFAULT_INDEX_NAME, DEFAULT_ELASTICSEARCH_DEFAULT_INDEX_NAME); 320 | ELASTICSEARCH_DEFAULT_TYPE_NAME = 321 | properties.getProperty(PROP_ELASTICSEARCH_DEFAULT_TYPE_NAME, DEFAULT_ELASTICSEARCH_DEFAULT_TYPE_NAME); 322 | ELASTICSEARCH_CLOUDFORMATION_TEMPLATE_URL = 323 | properties.getProperty(PROP_ELASTICSEARCH_CLOUDFORMATION_TEMPLATE_URL, 324 | DEFAULT_ELASTICSEARCH_CLOUDFORMATION_TEMPLATE_URL); 325 | ELASTICSEARCH_CLOUDFORMATION_STACK_NAME = 326 | properties.getProperty(PROP_ELASTICSEARCH_CLOUDFORMATION_STACK_NAME, 327 | DEFAULT_ELASTICSEARCH_CLOUDFORMATION_STACK_NAME); 328 | ELASTICSEARCH_VERSION_NUMBER = 329 | properties.getProperty(PROP_ELASTICSEARCH_VERSION_NUMBER, DEFAULT_ELASTICSEARCH_VERSION_NUMBER); 330 | ELASTICSEARCH_CLOUDFORMATION_KEY_PAIR_NAME = 331 | properties.getProperty(PROP_ELASTICSEARCH_CLOUDFORMATION_KEY_PAIR_NAME, 332 | DEFAULT_ELASTICSEARCH_CLOUDFORMATION_KEY_PAIR_NAME); 333 | ELASTICSEARCH_CLOUDFORMATION_CLUSTER_INSTANCE_TYPE = 334 | properties.getProperty(PROP_ELASTICSEARCH_CLOUDFORMATION_CLUSTER_INSTANCE_TYPE, 335 | DEFAULT_ELASTICSEARCH_CLOUDFORMATION_CLUSTER_INSTANCE_TYPE); 336 | ELASTICSEARCH_CLOUDFORMATION_SSH_LOCATION = 337 | properties.getProperty(PROP_ELASTICSEARCH_CLOUDFORMATION_SSH_LOCATION, 338 | DEFAULT_ELASTICSEARCH_CLOUDFORMATION_SSH_LOCATION); 339 | ELASTICSEARCH_CLOUDFORMATION_CLUSTER_SIZE = 340 | properties.getProperty(PROP_ELASTICSEARCH_CLOUDFORMATION_CLUSTER_SIZE, 341 | DEFAULT_ELASTICSEARCH_CLOUDFORMATION_CLUSTER_SIZE); 342 | 343 | // Amazon Kinesis Client Library configuration 344 | WORKER_ID = properties.getProperty(PROP_WORKER_ID, DEFAULT_WORKER_ID); 345 | FAILOVER_TIME = getLongProperty(PROP_FAILOVER_TIME, DEFAULT_FAILOVER_TIME, properties); 346 | MAX_RECORDS = getIntegerProperty(PROP_MAX_RECORDS, DEFAULT_MAX_RECORDS, properties); 347 | INITIAL_POSITION_IN_STREAM = 348 | getInitialPositionInStreamProperty(PROP_INITIAL_POSITION_IN_STREAM, 349 | DEFAULT_INITIAL_POSITION_IN_STREAM, 350 | properties); 351 | IDLE_TIME_BETWEEN_READS = 352 | getLongProperty(PROP_IDLE_TIME_BETWEEN_READS, DEFAULT_IDLE_TIME_BETWEEN_READS, properties); 353 | PARENT_SHARD_POLL_INTERVAL = 354 | getLongProperty(PROP_PARENT_SHARD_POLL_INTERVAL, DEFAULT_PARENT_SHARD_POLL_INTERVAL, properties); 355 | SHARD_SYNC_INTERVAL = getLongProperty(PROP_SHARD_SYNC_INTERVAL, DEFAULT_SHARD_SYNC_INTERVAL, properties); 356 | CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST = 357 | getBooleanProperty(PROP_CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST, 358 | DEFAULT_CALL_PROCESS_RECORDS_EVEN_FOR_EMPTY_LIST, 359 | properties); 360 | CLEANUP_TERMINATED_SHARDS_BEFORE_EXPIRY = 361 | getBooleanProperty(PROP_CLEANUP_TERMINATED_SHARDS_BEFORE_EXPIRY, 362 | DEFAULT_CLEANUP_TERMINATED_SHARDS_BEFORE_EXPIRY, 363 | properties); 364 | REGION_NAME = properties.getProperty(PROP_REGION_NAME, DEFAULT_REGION_NAME); 365 | } 366 | 367 | private boolean getBooleanProperty(String property, boolean defaultValue, Properties properties) { 368 | String propertyValue = properties.getProperty(property, Boolean.toString(defaultValue)); 369 | return Boolean.parseBoolean(propertyValue); 370 | } 371 | 372 | private long getLongProperty(String property, long defaultValue, Properties properties) { 373 | String propertyValue = properties.getProperty(property, Long.toString(defaultValue)); 374 | try { 375 | return Long.parseLong(propertyValue.trim()); 376 | } catch (NumberFormatException e) { 377 | LOG.error(e); 378 | return defaultValue; 379 | } 380 | } 381 | 382 | private int getIntegerProperty(String property, int defaultValue, Properties properties) { 383 | String propertyValue = properties.getProperty(property, Integer.toString(defaultValue)); 384 | try { 385 | return Integer.parseInt(propertyValue.trim()); 386 | } catch (NumberFormatException e) { 387 | LOG.error(e); 388 | return defaultValue; 389 | } 390 | } 391 | 392 | private char getCharacterProperty(String property, char defaultValue, Properties properties) { 393 | String propertyValue = properties.getProperty(property, Character.toString(defaultValue)); 394 | if (propertyValue.length() == 1) { 395 | return propertyValue.charAt(0); 396 | } 397 | return defaultValue; 398 | } 399 | 400 | private InitialPositionInStream getInitialPositionInStreamProperty(String property, 401 | InitialPositionInStream defaultInitialPositionInInputStream, 402 | Properties properties) { 403 | String propertyValue = properties.getProperty(property, defaultInitialPositionInInputStream.toString()); 404 | try { 405 | return InitialPositionInStream.valueOf(propertyValue); 406 | } catch (Exception e) { 407 | LOG.error(e); 408 | return defaultInitialPositionInInputStream; 409 | } 410 | } 411 | } 412 | --------------------------------------------------------------------------------