├── project ├── build.properties ├── plugins.sbt ├── KinesisTeeBuild.scala ├── BuildSettings.scala └── Dependencies.scala ├── vagrant ├── .gitignore ├── ansible.hosts ├── up.guidance ├── up.playbooks ├── peru.yaml └── up.bash ├── deploy ├── gordon │ └── kinesis-tee │ │ ├── kinesis-tee-app │ │ ├── kinesis-tee-code │ │ │ └── .gitkeep │ │ └── settings.yml │ │ └── settings.yml └── deploy.rb ├── CHANGELOG ├── .gitignore ├── src ├── test │ ├── resources │ │ ├── sampleconfig.json │ │ ├── sample_self_describing_config.json │ │ └── sample_with_target_acct.json │ └── scala │ │ └── com │ │ └── snowplowanalytics │ │ └── kinesistee │ │ ├── routing │ │ └── PointToPointRouteSpec.scala │ │ ├── config │ │ ├── LambdaUtilsSpec.scala │ │ ├── KinesisTeeConfigSchemaSpec.scala │ │ ├── SelfDescribingDataSpec.scala │ │ └── ConfigurationBuilderSpec.scala │ │ ├── filters │ │ └── JavascriptFilterSpec.scala │ │ ├── KinesisTeeSpec.scala │ │ ├── StreamWriterSpec.scala │ │ ├── transformation │ │ └── SnowplowToJsonSpec.scala │ │ └── MainSpec.scala └── main │ ├── scala │ └── com │ │ └── snowplowanalytics │ │ └── kinesistee │ │ ├── models │ │ ├── Content.scala │ │ └── Stream.scala │ │ ├── transformation │ │ ├── TransformationStrategy.scala │ │ └── SnowplowToJson.scala │ │ ├── routing │ │ ├── RoutingStrategy.scala │ │ └── PointToPointRoute.scala │ │ ├── filters │ │ ├── FilterStrategy.scala │ │ └── JavascriptFilter.scala │ │ ├── Tee.scala │ │ ├── config │ │ ├── Builder.scala │ │ ├── SelfDescribingData.scala │ │ ├── ConfigurationBuilder.scala │ │ └── lambdaUtils.scala │ │ ├── KinesisTee.scala │ │ ├── StreamWriter.scala │ │ └── Main.scala │ └── avro │ └── com.snowplowanalytics.kinesistee.config │ └── Configuration │ └── avro │ └── 1-0-0.avsc ├── Vagrantfile ├── .travis.yml ├── README.md └── LICENSE-2.0.txt /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.11 2 | -------------------------------------------------------------------------------- /vagrant/.gitignore: -------------------------------------------------------------------------------- 1 | .peru 2 | oss-playbooks 3 | ansible 4 | -------------------------------------------------------------------------------- /vagrant/ansible.hosts: -------------------------------------------------------------------------------- 1 | [vagrant] 2 | 127.0.0.1:2222 3 | -------------------------------------------------------------------------------- /deploy/gordon/kinesis-tee/kinesis-tee-app/kinesis-tee-code/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vagrant/up.guidance: -------------------------------------------------------------------------------- 1 | To get started: 2 | 3 | cd /vagrant 4 | sbt assembly 5 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | Release 0.1.0 (2016-10-03) 2 | -------------------------- 3 | Initial release 4 | -------------------------------------------------------------------------------- /vagrant/up.playbooks: -------------------------------------------------------------------------------- 1 | oss-playbooks/java8.yml 2 | oss-playbooks/scala.yml 3 | oss-playbooks/sbt.yml 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | 4 | # sbt specific 5 | dist/* 6 | target/ 7 | lib_managed/ 8 | src_managed/ 9 | project/boot/ 10 | project/plugins/project/ 11 | 12 | # Vagrant 13 | .vagrant 14 | VERSION 15 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") 2 | 3 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.3.5") 4 | 5 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2") 6 | 7 | addSbtPlugin("com.julianpeeters" % "sbt-avrohugger" % "0.11.0") -------------------------------------------------------------------------------- /src/test/resources/sampleconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "My Kinesis Tee example", 3 | "targetStream": { 4 | "name": "my-target-stream", 5 | "targetAccount": null 6 | }, 7 | "transformer": { 8 | "com.snowplowanalytics.kinesistee.config.Transformer": { 9 | "builtIn": "SNOWPLOW_TO_NESTED_JSON" 10 | } 11 | }, 12 | "filter": null 13 | } -------------------------------------------------------------------------------- /deploy/gordon/kinesis-tee/settings.yml: -------------------------------------------------------------------------------- 1 | --- 2 | project: kinesis-tee 3 | default-region: eu-west-1 4 | code-bucket: *** YOUR BUCKET NAME *** 5 | apps: 6 | - gordon.contrib.lambdas 7 | - gordon.contrib.helpers 8 | - kinesis-tee-app 9 | kinesis: 10 | ingest_stream_configuration: 11 | lambda: kinesis-tee-app.kinesis-tee-code 12 | stream: *** YOUR STREAM NAME *** 13 | batch_size: 100 14 | starting_position: LATEST 15 | -------------------------------------------------------------------------------- /src/test/resources/sample_self_describing_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "iglu:com.snowplowanalytics.kinesistee.tbd", 3 | "data": { 4 | "name": "My Kinesis Tee example", 5 | "targetStream": { 6 | "name": "my-target-stream", 7 | "targetAccount": null 8 | }, 9 | "transformer": { 10 | "com.snowplowanalytics.kinesistee.config.Transformer": { 11 | "builtIn": "SNOWPLOW_TO_NESTED_JSON" 12 | } 13 | }, 14 | "filter": null 15 | } 16 | } -------------------------------------------------------------------------------- /src/test/resources/sample_with_target_acct.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "My Kinesis Tee example", 3 | "targetStream": { 4 | "name": "my-target-stream", 5 | "targetAccount": { 6 | "com.snowplowanalytics.kinesistee.config.TargetAccount": { 7 | "awsAccessKey": "*", 8 | "awsSecretAccessKey": "*", 9 | "region": "eu-west-1" 10 | } 11 | } 12 | }, 13 | "transformer": { 14 | "com.snowplowanalytics.kinesistee.config.Transformer": { 15 | "builtIn": "SNOWPLOW_TO_NESTED_JSON" 16 | } 17 | }, 18 | "filter": null 19 | } -------------------------------------------------------------------------------- /vagrant/peru.yaml: -------------------------------------------------------------------------------- 1 | imports: 2 | ansible: ansible 3 | ansible_playbooks: oss-playbooks 4 | 5 | curl module ansible: 6 | # Equivalent of git cloning tags/v1.6.6 but much, much faster 7 | url: https://codeload.github.com/ansible/ansible/zip/69d85c22c7475ccf8169b6ec9dee3ee28c92a314 8 | unpack: zip 9 | export: ansible-69d85c22c7475ccf8169b6ec9dee3ee28c92a314 10 | 11 | git module ansible_playbooks: 12 | url: https://github.com/snowplow/ansible-playbooks.git 13 | # Comment out to fetch a specific rev instead of master: 14 | # rev: xxx 15 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | Vagrant.configure("2") do |config| 2 | 3 | config.vm.box = "ubuntu/trusty64" 4 | config.vm.hostname = "snowplow-kinesis-tee" 5 | config.ssh.forward_agent = true 6 | 7 | config.vm.provider :virtualbox do |vb| 8 | vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s 9 | vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] 10 | vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ] 11 | # Scala is memory-hungry 12 | vb.memory = 5120 13 | end 14 | 15 | config.vm.provision :shell do |sh| 16 | sh.path = "vagrant/up.bash" 17 | end 18 | 19 | end 20 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/models/Content.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.models 14 | 15 | /** 16 | * Usually a Kinesis record 17 | * @param row the data contained by the record 18 | * @param partitionKey the partition key in the record 19 | */ 20 | case class Content(row: String, partitionKey: String) 21 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/models/Stream.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.models 14 | 15 | import com.amazonaws.regions.Region 16 | 17 | /** 18 | * representation of a kinesis stream 19 | * @param name name (not arn) of a stream 20 | * @param region region the stream is in 21 | */ 22 | case class Stream(name: String, region: Region) 23 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/transformation/TransformationStrategy.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.transformation 14 | 15 | import com.snowplowanalytics.kinesistee.models.Content 16 | 17 | import scalaz.ValidationNel 18 | 19 | 20 | trait TransformationStrategy { 21 | 22 | def transform(content: Content): ValidationNel[Throwable, Content] 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/routing/RoutingStrategy.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.routing 15 | 16 | import com.snowplowanalytics.kinesistee.StreamWriter 17 | 18 | import scalaz.ValidationNel 19 | import com.snowplowanalytics.kinesistee.models.Stream 20 | 21 | trait RoutingStrategy { 22 | 23 | def route(): ValidationNel[String, StreamWriter] 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/filters/FilterStrategy.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.filters 14 | 15 | import com.snowplowanalytics.kinesistee.models.{ Stream, Content } 16 | import scalaz.ValidationNel 17 | 18 | /** 19 | * This trait defines a "strategy" for filtering data, returning true means the data is passed through to the routing strategy 20 | */ 21 | trait FilterStrategy { 22 | 23 | def filter(content: Content): ValidationNel[Throwable, Boolean] 24 | 25 | } 26 | 27 | -------------------------------------------------------------------------------- /deploy/gordon/kinesis-tee/kinesis-tee-app/settings.yml: -------------------------------------------------------------------------------- 1 | lambdas: 2 | kinesis-tee-code: 3 | code: kinesis-tee-code 4 | runtime: java8 5 | handler: com.snowplowanalytics.kinesistee.Main::kinesisEventHandler 6 | description: dynamodb:eu-west-1/tee-config-gordon 7 | memory: 512 8 | timeout: 60 9 | 10 | build: 11 | - cp -R ./* {target} 12 | policies: 13 | read_kinesis: 14 | Version: "2012-10-17" 15 | Statement: 16 | - 17 | Action: 18 | - "kinesis:DescribeStream" 19 | - "kinesis:ListStreams" 20 | - "kinesis:GetShardIterator" 21 | - "kinesis:GetRecords" 22 | Resource: "*" 23 | Effect: "Allow" 24 | read_dynamo: 25 | Version: "2012-10-17" 26 | Statement: 27 | - 28 | Action: 29 | - "dynamodb:DescribeTablem" 30 | - "dynamodb:GetItem" 31 | - "dynamodb:Query" 32 | - "dynamodb:Scan" 33 | Resource: "*" 34 | Effect: "Allow" 35 | read_lambda_desc: 36 | Version: "2012-10-17" 37 | Statement: 38 | - 39 | Action: 40 | - "lambda:GetFunctionConfiguration" 41 | Resource: "*" 42 | Effect: "Allow" 43 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/Tee.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee 14 | 15 | import com.snowplowanalytics.kinesistee.filters.FilterStrategy 16 | import com.snowplowanalytics.kinesistee.models.{Content, Stream} 17 | import com.snowplowanalytics.kinesistee.routing.RoutingStrategy 18 | import com.snowplowanalytics.kinesistee.transformation.TransformationStrategy 19 | 20 | /** 21 | * This trait allows us to stub out tee behaviours 22 | */ 23 | trait Tee { 24 | def tee(routingStrategy: RoutingStrategy, 25 | transformationStrategy: Option[TransformationStrategy], 26 | filterStrategy: Option[FilterStrategy], 27 | content: Seq[Content]): Unit 28 | } 29 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/config/Builder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.config 15 | 16 | import awscala.dynamodbv2.DynamoDB 17 | 18 | /** 19 | * This trait allows us to override configuration building 20 | */ 21 | trait Builder { 22 | /** 23 | * Build a Configuration object, collecting the configuration from dynamodb 24 | * @param tableName the name of the dynamodb table the configuration is stored in 25 | * @param functionName the name of the AWS lambda function (the 'id' key field in the dynamodb table) 26 | * @param dynamoDB the dynamodb instance to use for fetching data 27 | * @return a configuration object 28 | */ 29 | def build(tableName: String, functionName: String)(implicit dynamoDB: DynamoDB): Configuration 30 | } 31 | -------------------------------------------------------------------------------- /vagrant/up.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | vagrant_dir=/vagrant/vagrant 5 | bashrc=/home/vagrant/.bashrc 6 | 7 | echo "========================================" 8 | echo "INSTALLING PERU AND ANSIBLE DEPENDENCIES" 9 | echo "----------------------------------------" 10 | apt-get update 11 | apt-get install -y language-pack-en git unzip libyaml-dev python3-pip python-yaml python-paramiko python-jinja2 12 | 13 | echo "===============" 14 | echo "INSTALLING PERU" 15 | echo "---------------" 16 | sudo pip3 install peru 17 | 18 | echo "=======================================" 19 | echo "CLONING ANSIBLE AND PLAYBOOKS WITH PERU" 20 | echo "---------------------------------------" 21 | cd ${vagrant_dir} && peru sync -v 22 | echo "... done" 23 | 24 | env_setup=${vagrant_dir}/ansible/hacking/env-setup 25 | hosts=${vagrant_dir}/ansible.hosts 26 | 27 | echo "===================" 28 | echo "CONFIGURING ANSIBLE" 29 | echo "-------------------" 30 | touch ${bashrc} 31 | echo "source ${env_setup}" >> ${bashrc} 32 | echo "export ANSIBLE_HOSTS=${hosts}" >> ${bashrc} 33 | echo "... done" 34 | 35 | echo "==========================================" 36 | echo "RUNNING PLAYBOOKS WITH ANSIBLE*" 37 | echo "* no output while each playbook is running" 38 | echo "------------------------------------------" 39 | while read pb; do 40 | su - -c "source ${env_setup} && ${vagrant_dir}/ansible/bin/ansible-playbook ${vagrant_dir}/${pb} --connection=local --inventory-file=${hosts}" vagrant 41 | done <${vagrant_dir}/up.playbooks 42 | 43 | guidance=${vagrant_dir}/up.guidance 44 | 45 | if [ -f ${guidance} ]; then 46 | echo "===========" 47 | echo "PLEASE READ" 48 | echo "-----------" 49 | cat $guidance 50 | fi 51 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/routing/PointToPointRoute.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.routing 15 | 16 | import com.snowplowanalytics.kinesistee.StreamWriter 17 | 18 | import scalaz._ 19 | import scalaz.syntax.validation._ 20 | import com.snowplowanalytics.kinesistee.models.Stream 21 | 22 | /** 23 | * This routing strategy passes all traffic through to the destination 24 | * @param destination the endpoint to route all data to 25 | */ 26 | class PointToPointRoute(destination: StreamWriter) extends RoutingStrategy { 27 | 28 | /** 29 | * Routing strategy that sends all traffic to the given destination 30 | * @return all traffic sent to the given destination 31 | */ 32 | override def route: ValidationNel[String, StreamWriter] = { 33 | destination.success 34 | } 35 | 36 | override def toString:String = { 37 | s"Stream to stream route: stream `source` -> stream ${destination.toString}" 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - 2.11.8 4 | jdk: 5 | - oraclejdk8 6 | addons: 7 | apt: 8 | packages: 9 | - p7zip-full 10 | before_install: 11 | - gem install httparty 12 | deploy: 13 | skip_cleanup: true 14 | provider: script 15 | script: 16 | - sbt assembly && ./deploy/deploy.rb $TRAVIS_TAG 17 | on: 18 | tags: true 19 | env: 20 | global: 21 | - secure: F4fQ43tHDVfY6DCy8ys631eFdzg4Oqvm/e4y2I6MHdbE1yFjiHXpINUC9c5NPYVMkU/900o6CCVjAFuLu3nQyxHzPEAYajX4VC69cySH457SuzI6AGcy1W1jr3Ao238dfMZLRdxp7ObdwUAGxa3tEt6qLIT1hVI+m4lXCu1Re3sDeMWfVT/f6u0bZ4/2znsYYDxdiRsV7SwZHq67X6nvcGjdd0QZTHzO83oaKgaHw9BNZafh0KITqp8FKjHBP778tCTt7qLzrwbGYkCUrcilx+vbMDmsU3qrcBVVdcRN5ogYgFLoPdQaCD1Ns/FD6wyC0bwCcKf6bv0ILy6aPmj9h6IjK4Bl6yC4PZ80+/mbC7ut5+xrIV1/FrrjjaheK3NFclWn5xvq3M92i7oEG8oIXeaR68rOdrlf7UH6CCYdv2G8YDgyMMyIoaa5bL678HTP25SlhA3B+AeNksCKH203nTcfK46BKRYW8DkM3a+G5EKfRIKO/Z9EX63BIPmfZ8rJXmkH6G7vCiOSMuR/ijM+h3v2+xG5DblXD+B2wqfRBi+mbFy7iG7MwpT1yfelouQFSezAt993Q3VyA8I4zasWw0rc9KXihjskK+oGNL0D/mPV0ZQP3G3tlo9IY135w/80vMmyPgCxDNtqZ9gbW/U1fd2cbMLKQ6EKNaC6p8Uqxik= 22 | - secure: dEn48W86SSH2fFLQfNf9FdS+S+9c4hCMIoMWKJdAm4uoYbLjt25P6a4tFRBXsqJCW3dXuCRgUfUHq2Q+Da+6lLdnyoLpW4XbRFjLi6rIaHl3mdXx12H3x2vjDiGOTZn4WIU25YC6N6i0uk5I6AqZ6wfi/lVZ0jZQaWs9k8sqdhnMYaHyxvc5LNcuLiyrnbphz2BhtTKABi/GXPsfmzRY94h3pG1e91LI45FqO11nbRLQAtL0e46QVfPE4z+KUA1DvwdX5HvbRbOfHVoc4f4443KtG35uv57rcBctr8+PXqZf+sq1f82PuZZpU/Y5nNO53ObSje++kMgCJTGswJZyVzYQljqRdfnUGDWxTM0v0FHngRPcAP1TptZTrc9+jIUIlGj6kKCSlQVvIDVan7bYlKxvyxhGeiWYzVMbVUK1ZOJzad5Itgq7B8Jti03NvLO9Fp/Sg5O8/1bhEyAvf8b9aZA0V24dxf70MAsG2bhJF//vwN2/4Eq1630bpK9aFKLKuFdmrvf+l3dHyNXueOBUb+/nACWFuOb6BFTtF5nH0jP4pyxaeu3GjYOEUCUmem+iIUOPp9zbmyDPGeyoBLeGVkCfu/PhmVrHz4a8OcE4Zkxf6RFcZKYL8t5Rh1VN7fhjcJLO4cpOeuoeENRvwDJvIGYRUaPLFMUnnBLUdiLej2s= 23 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/transformation/SnowplowToJson.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.transformation 14 | import com.snowplowanalytics.kinesistee.models.Content 15 | import com.snowplowanalytics.snowplow.analytics.scalasdk.json.EventTransformer 16 | 17 | import scalaz.{Failure, Success, ValidationNel} 18 | import scalaz.syntax.validation._ 19 | 20 | /** 21 | * Transformation strategy - uses the Snowplow Scala analytics SDK to convert enriched events to nested JSON 22 | */ 23 | class SnowplowToJson extends TransformationStrategy { 24 | 25 | /** 26 | * Use the Snowplow Scala analytics SDK to turn an enriched event into nested JSON 27 | * @param content the record to transform 28 | * @return a nested json representation of the enriched event, or failure if it could not be converted 29 | */ 30 | override def transform(content: Content): ValidationNel[Throwable, Content] = { 31 | EventTransformer.transform(content.row) match { 32 | case Success(s) => Content(s, content.partitionKey).success 33 | case Failure(f) => new IllegalArgumentException(f.head.toString).failureNel 34 | } 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/config/SelfDescribingData.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.config 15 | 16 | import org.json4s._ 17 | import org.json4s.jackson.JsonMethods._ 18 | import scala.util.{Failure, Success, Try} 19 | 20 | 21 | /** 22 | * A wrapper around self describing json 23 | * allowing quick extraction of the data/schema parts 24 | * @param json the self describing json as a string 25 | */ 26 | class SelfDescribingData(json: String) { 27 | 28 | implicit val defaultFormats = org.json4s.DefaultFormats 29 | 30 | private val parsed = Try(parse(json)) match { 31 | case Failure(f) => throw new IllegalArgumentException(s"Invalid self describing schema: invalid JSON Avro", f) 32 | case Success(s) => s 33 | } 34 | 35 | val schema: String = (parsed \\ "schema").extractOrElse[String](throw new IllegalArgumentException("Invalid self describing schema: missing the `schema` field")) 36 | val data: String = parsed \ "data" match { 37 | case some:JObject => pretty(some) 38 | case _ => throw new IllegalArgumentException("Invalid self describing schema: missing the `data` field (or it is empty)") 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /project/KinesisTeeBuild.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | import sbt.Keys._ 15 | import sbt._ 16 | // avroghugger 17 | import sbtavrohugger.SbtAvrohugger._ 18 | 19 | object KinesisTeeBuild extends Build { 20 | 21 | import BuildSettings._ 22 | import Dependencies._ 23 | 24 | // Configure prompt to show current project. 25 | override lazy val settings = super.settings :+ { 26 | shellPrompt := { s => Project.extract(s).currentProject.id + " > " } 27 | } 28 | 29 | // Define our project, with basic project information and library 30 | // dependencies. 31 | lazy val project = Project("kinesis-tee", file(".")) 32 | .settings(buildSettings: _*) 33 | .settings( 34 | libraryDependencies ++= Seq( 35 | Libraries.scalaz7, 36 | Libraries.specs2, 37 | Libraries.scalazSpecs2, 38 | Libraries.json4s, 39 | Libraries.json4sExt, 40 | Libraries.analyticsSdk, 41 | Libraries.avro4s, 42 | Libraries.awsSdkCore, 43 | Libraries.awsSdk, 44 | Libraries.awsLambdaSdk, 45 | Libraries.awsLambda, 46 | Libraries.awsLambdaEvents, 47 | Libraries.awsKinesisSdk, 48 | Libraries.awsscala, 49 | Libraries.slf4jSimple 50 | ) 51 | ) 52 | 53 | } 54 | -------------------------------------------------------------------------------- /project/BuildSettings.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | import sbt.Keys._ 15 | import sbt._ 16 | import sbtavrohugger.SbtAvrohugger._ 17 | 18 | object BuildSettings { 19 | 20 | lazy val basicSettings = Seq[Setting[_]]( 21 | organization := "com.snowplowanalytics", 22 | version := "0.1.0", 23 | retrieveManaged := true, 24 | description := "Kinesis Tee", 25 | scalaVersion := "2.11.8", 26 | scalacOptions ++= Seq("-feature", "-target:jvm-1.8"), 27 | resolvers ++= Seq(Dependencies.snowplowRepo), 28 | 29 | initialize := { 30 | val _ = initialize.value 31 | if (sys.props("java.specification.version") != "1.8") 32 | sys.error("Java 8 is required for this project.") 33 | } 34 | ) 35 | 36 | import sbtassembly.Plugin._ 37 | import AssemblyKeys._ 38 | 39 | lazy val sbtAssemblySettings = assemblySettings ++ Seq( 40 | jarName in assembly := { 41 | name.value + "-" + version.value + ".jar" 42 | }, 43 | 44 | // META-INF discarding 45 | mergeStrategy in assembly := { 46 | case PathList("META-INF", xs@_*) => MergeStrategy.discard 47 | case x => MergeStrategy.first 48 | } 49 | ) 50 | 51 | lazy val avroSettings = sbtavrohugger.SbtAvrohugger.avroSettings 52 | 53 | lazy val buildSettings = basicSettings ++ sbtAssemblySettings ++ avroSettings 54 | 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/routing/PointToPointRouteSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.routing 14 | 15 | import com.amazonaws.regions.{Region, Regions} 16 | import com.amazonaws.services.kinesis.AmazonKinesisClient 17 | import com.snowplowanalytics.kinesistee.StreamWriter 18 | import org.specs2.mutable.Specification 19 | import com.snowplowanalytics.kinesistee.models.Stream 20 | import org.specs2.mock.Mockito 21 | import org.specs2.scalaz.ValidationMatchers 22 | 23 | import scalaz.NonEmptyList 24 | 25 | class PointToPointRouteSpec extends Specification with ValidationMatchers with Mockito { 26 | 27 | def buildStream(str:String): Stream = Stream(name = str, Region.getRegion(Regions.US_EAST_1)) 28 | def buildTarget(str:String): StreamWriter = mock[StreamWriter] 29 | 30 | "point to point routing" should { 31 | 32 | "direct traffic from the given origin to the given destination" in { 33 | val target = buildTarget("destination") 34 | val route = new PointToPointRoute(target) 35 | route.route must beSuccessful(target) 36 | } 37 | 38 | } 39 | 40 | "rendering as a string" should { 41 | 42 | "display the destination" in { 43 | val dest = new StreamWriter(Stream("destination", Region.getRegion(Regions.US_EAST_1)), None, mock[AmazonKinesisClient]) 44 | val sample = new PointToPointRoute(dest) 45 | sample.toString mustEqual s"Stream to stream route: stream `source` -> stream ${dest.toString}" 46 | } 47 | 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/filters/JavascriptFilter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.filters 14 | 15 | import java.io.StringReader 16 | 17 | import com.snowplowanalytics.kinesistee.models.{Content, Stream} 18 | import javax.script.Invocable 19 | import javax.script.ScriptEngineManager 20 | import javax.script.ScriptException 21 | 22 | import scalaz.syntax.validation._ 23 | import scalaz.ValidationNel 24 | 25 | class JavascriptFilter(js: String) extends FilterStrategy { 26 | 27 | // beware: 28 | // https://github.com/sbt/sbt/issues/1214 29 | val engine = new ScriptEngineManager(null).getEngineByName("nashorn") 30 | if (engine==null) { throw new IllegalStateException("Nashorn script engine not available") } 31 | val in: Invocable = engine.asInstanceOf[Invocable] 32 | 33 | engine.eval(new StringReader(js)) 34 | 35 | /** 36 | * JavaScript filter - invokes nashorn on the given js script 37 | * The script must contain a 'filter' function that accepts the record as an argument 38 | * @param content the record to filter 39 | * @return true if the value is to pass through the stream, false if not 40 | */ 41 | override def filter(content: Content): ValidationNel[Throwable, Boolean] = { 42 | try { 43 | val retVal = in.invokeFunction("filter", content.row) 44 | retVal match { 45 | case bool:java.lang.Boolean => bool.booleanValue().success 46 | case e => new RuntimeException(s"'$e' returned by your js function cannot be converted to boolean").failureNel 47 | } 48 | } catch { 49 | case e @ (_: ScriptException | _: NoSuchMethodException ) => e.failureNel 50 | } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/avro/com.snowplowanalytics.kinesistee.config/Configuration/avro/1-0-0.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace":"com.snowplowanalytics.kinesistee.config", 3 | "name":"Configuration", 4 | "type":"record", 5 | "fields":[ 6 | { 7 | "name":"name", 8 | "type":"string" 9 | }, 10 | { 11 | "name":"targetStream", 12 | "type":{ 13 | "name":"TargetStream", 14 | "type":"record", 15 | "fields":[ 16 | { 17 | "name":"name", 18 | "type":"string" 19 | }, 20 | { 21 | "name":"targetAccount", 22 | "type":[ 23 | "null", 24 | { 25 | "name":"TargetAccount", 26 | "type":"record", 27 | "fields":[ 28 | { 29 | "name":"awsAccessKey", 30 | "type":"string" 31 | }, 32 | { 33 | "name":"awsSecretAccessKey", 34 | "type":"string" 35 | }, 36 | { 37 | "name":"region", 38 | "type":"string" 39 | } 40 | ] 41 | } 42 | ] 43 | } 44 | ] 45 | } 46 | }, 47 | { 48 | "name":"transformer", 49 | "type":[ 50 | "null", 51 | { 52 | "name":"Transformer", 53 | "type":"record", 54 | "fields":[ 55 | { 56 | "name":"builtIn", 57 | "type": { "name": "BuiltIn", "type": "enum", "symbols": ["SNOWPLOW_TO_NESTED_JSON"] } 58 | } 59 | ] 60 | } 61 | ] 62 | }, 63 | { 64 | "name":"filter", 65 | "type":[ 66 | "null", 67 | { 68 | "name":"Filter", 69 | "type":"record", 70 | "fields":[ 71 | { 72 | "name":"javascript", 73 | "type":"string" 74 | } 75 | ] 76 | } 77 | ] 78 | } 79 | ] 80 | } -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/config/LambdaUtilsSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.config 14 | 15 | import com.amazonaws.regions.Regions 16 | import org.specs2.mutable.Specification 17 | import org.specs2.scalaz.ValidationMatchers 18 | 19 | import scalaz.NonEmptyList 20 | 21 | class LambdaUtilsSpec extends Specification with ValidationMatchers { 22 | 23 | "getting a region from an ARN" should { 24 | 25 | "get the correct region from a valid arn" in { 26 | val actual = LambdaUtils.getRegionFromArn("arn:aws:elasticbeanstalk:us-east-1:123456789012:environment/My App/MyEnvironment") 27 | actual must beSuccessful("us-east-1") 28 | } 29 | 30 | "an empty string must be rejected" in { 31 | val actual = LambdaUtils.getRegionFromArn(" ") 32 | actual must beFailing(NonEmptyList("Cannot extract region from an empty ARN")) 33 | } 34 | 35 | "an invalid string must be rejected with a message" in { 36 | val actual = LambdaUtils.getRegionFromArn("notvalid") 37 | actual must beFailing(NonEmptyList("Cannot extract region from ARN 'notvalid': invalid format")) 38 | } 39 | 40 | } 41 | 42 | "inflating a config from a URI" should { 43 | 44 | "inflate a valid URI" in { 45 | val uri = "dynamodb:eu-west-1/config-table" 46 | LambdaUtils.configLocationFromLambdaDesc(uri) must beSuccessful(Regions.fromName("eu-west-1"), "config-table") 47 | } 48 | 49 | "fail to inflate a valid URI with a strange resource type" in { 50 | val uri = "ddb:eu-west-1/config-table" 51 | LambdaUtils.configLocationFromLambdaDesc(uri) must beFailing(NonEmptyList("'ddb:eu-west-1/config-table' is not a valid configuration location - expected the format 'dynamodb:eu-west-1/config-table-name'")) 52 | } 53 | 54 | "fail to inflate a valid URI with an invalid region name" in { 55 | val uri = "dynamodb:notreal/config-table" 56 | LambdaUtils.configLocationFromLambdaDesc(uri) must beFailing(NonEmptyList("'notreal' is not a valid AWS region: Cannot create enum from notreal value!")) 57 | } 58 | 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/config/KinesisTeeConfigSchemaSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.config 15 | 16 | import java.io._ 17 | 18 | import org.specs2.mutable.Specification 19 | import com.sksamuel.avro4s.AvroInputStream 20 | 21 | 22 | class KinesisTeeConfigSchemaSpec extends Specification { 23 | 24 | def stackTrace(e:Throwable) = { 25 | val sw = new StringWriter() 26 | val pw = new PrintWriter(sw) 27 | e.printStackTrace(pw) 28 | sw.toString() 29 | } 30 | 31 | val sampleGoodConfig = scala.io.Source.fromURL(getClass.getResource("/sampleconfig.json")).mkString 32 | val sampleGoodConfigWithTarget = scala.io.Source.fromURL(getClass.getResource("/sample_with_target_acct.json")).mkString 33 | 34 | "load a sample configuration" in { 35 | val in = new ByteArrayInputStream(sampleGoodConfig.getBytes("UTF-8")) 36 | val input = AvroInputStream.json[Configuration](in) 37 | val result = input.singleEntity 38 | 39 | result match { 40 | case scala.util.Failure(f) => ko(stackTrace(f)) 41 | case scala.util.Success(s) => s mustEqual Configuration(name = "My Kinesis Tee example", 42 | targetStream = TargetStream("my-target-stream", None), 43 | transformer = Some(Transformer(BuiltIn.SNOWPLOW_TO_NESTED_JSON)), 44 | filter = None) 45 | } 46 | } 47 | 48 | "load a valid sample configuration with a a target account" in { 49 | val in = new ByteArrayInputStream(sampleGoodConfigWithTarget.getBytes("UTF-8")) 50 | val input = AvroInputStream.json[Configuration](in) 51 | val result = input.singleEntity 52 | 53 | result match { 54 | case scala.util.Failure(f) => ko(stackTrace(f)) 55 | case scala.util.Success(s) => s mustEqual Configuration(name = "My Kinesis Tee example", 56 | targetStream = TargetStream("my-target-stream", Some(TargetAccount("*", "*", "eu-west-1"))), 57 | transformer = Some(Transformer(BuiltIn.SNOWPLOW_TO_NESTED_JSON)), 58 | filter = None) 59 | } 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kinesis Tee 2 | 3 | [ ![Build Status][travis-image]][travis] 4 | [ ![Release][release-image]][releases] 5 | [ ![License][license-image]][license] 6 | 7 | ## Overview 8 | 9 | Kinesis Tee is like **[Unix tee][tee]**, but for Kinesis streams. Use it to: 10 | 11 | 1. Transform the format of a Kinesis stream 12 | 2. Filter records from a Kinesis stream based on rules 13 | 3. Write a Kinesis stream to another Kinesis stream 14 | 15 | Rules to apply to your Kinesis stream (e.g. for filtering) are written in JavaScript. 16 | 17 | ## How it works 18 | 19 | You configure Kinesis Tee with a self-describing Avro configuration file containing: 20 | 21 | 1. A single **source stream** to read records from 22 | 2. A single **sink stream** to write records to 23 | 3. An optional **stream transformer** to convert the records to another supported format 24 | 4. An optional **stream filter** to determine whether to write the records to the sink stream 25 | 26 | Here is an example: 27 | 28 | ```json 29 | { 30 | "schema": "iglu:com.snowplowanalytics.kinesis-tee/Config/avro/1-0-0", 31 | "data": { 32 | "name": "My Kinesis Tee example", 33 | "targetStream": { 34 | "name": "my-target-stream", 35 | }, 36 | "transformer": "SNOWPLOW_TO_NESTED_JSON", // Or "NONE" 37 | "filter": { // Or null 38 | "javascript": "BASE64 ENCODED STRING" 39 | } 40 | } 41 | } 42 | ``` 43 | 44 | Avro schema for configuration: **[com.snowplowanalytics.kinesistee/config/avro/1-0-0][config-file]** 45 | 46 | ## Find out more 47 | 48 | | **[Devops Guide][devops-guide]** | **[Developers Guide][developers-guide]** | 49 | |:--------------------------------------:|:---------------------------------------------:| 50 | 51 | ## Copyright and license 52 | 53 | Kinesis Tee is copyright 2015-2016 Snowplow Analytics Ltd. 54 | 55 | Licensed under the **[Apache License, Version 2.0][license]** (the "License"); 56 | you may not use this software except in compliance with the License. 57 | 58 | Unless required by applicable law or agreed to in writing, software 59 | distributed under the License is distributed on an "AS IS" BASIS, 60 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 61 | See the License for the specific language governing permissions and 62 | limitations under the License. 63 | 64 | [developers-guide]: https://github.com/snowplow/kinesis-tee/wiki/Guide-for-developers 65 | [devops-guide]: https://github.com/snowplow/kinesis-tee/wiki/Guide-for-devops-users 66 | 67 | [travis-image]: https://travis-ci.org/snowplow/kinesis-tee.png?branch=master 68 | [travis]: http://travis-ci.org/snowplow/kinesis-tee 69 | 70 | [release-image]: http://img.shields.io/badge/release-0.1.0-blue.svg?style=flat 71 | [releases]: https://github.com/snowplow/kinesis-tee/releases 72 | 73 | [license-image]: http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat 74 | [license]: http://www.apache.org/licenses/LICENSE-2.0 75 | 76 | [tee]: https://en.wikipedia.org/wiki/Tee_%28command%29 77 | 78 | [config-file]: http://iglucentral.com/schemas/com.snowplowanalytics.kinesistee.config/Configuration/avro/1-0-0 79 | -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | import sbt._ 15 | 16 | object Dependencies { 17 | 18 | val snowplowRepo = "Snowplow Analytics" at "http://maven.snplow.com/releases/" 19 | 20 | object V { 21 | val awsLambdaCore = "1.1.0" 22 | val awsLambdaEvents = "1.3.0" 23 | val awsSdk = "1.11.7" 24 | val awsLambdaSdk = "1.11.7" 25 | val awsKinesisClient = "1.7.0" 26 | val awscala = "0.5.+" 27 | val json4s = "3.2.10" 28 | val analyticsSdk = "0.1.0" 29 | val scalaz7 = "7.0.6" 30 | val specs2 = "2.3.13" 31 | val scalazSpecs2 = "0.2" 32 | val avro4s = "1.5.4" 33 | val slf4jSimple = "1.7.+" 34 | } 35 | 36 | object Libraries { 37 | 38 | val awsLambda = "com.amazonaws" % "aws-lambda-java-core" % V.awsLambdaCore 39 | val awsLambdaEvents = "com.amazonaws" % "aws-lambda-java-events" % V.awsLambdaEvents 40 | val awsLambdaSdk = "com.amazonaws" % "aws-java-sdk-lambda" % V.awsLambdaSdk 41 | val awsDynamoDbSdk = "com.amazonaws" % "aws-java-sdk-dynamodb" % V.awsSdk 42 | val awsKinesisSdk = "com.amazonaws" % "amazon-kinesis-client" % V.awsKinesisClient 43 | val awsSdk = "com.amazonaws" % "aws-java-sdk" % V.awsSdk % "provided" 44 | val awsSdkCore = "com.amazonaws" % "aws-java-sdk-core" % V.awsSdk % "provided" 45 | val awsscala = "com.github.seratch" %% "awscala" % V.awscala 46 | val slf4jSimple = "org.slf4j" % "slf4j-simple" % V.slf4jSimple 47 | 48 | val scalaz7 = "org.scalaz" %% "scalaz-core" % V.scalaz7 49 | val specs2 = "org.specs2" %% "specs2" % V.specs2 % "test" 50 | val scalazSpecs2 = "org.typelevel" %% "scalaz-specs2" % V.scalazSpecs2 % "test" 51 | val json4s = "org.json4s" %% "json4s-jackson" % V.json4s 52 | val json4sExt = "org.json4s" %% "json4s-ext" % V.json4s 53 | val analyticsSdk = "com.snowplowanalytics" %% "snowplow-scala-analytics-sdk" % V.analyticsSdk 54 | val avro4s = "com.sksamuel.avro4s" %% "avro4s-core" % V.avro4s 55 | 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/config/SelfDescribingDataSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.config 15 | 16 | import org.specs2.mutable.Specification 17 | import org.json4s._ 18 | import org.json4s.jackson.JsonMethods._ 19 | 20 | import scala.util.{Failure, Success, Try} 21 | 22 | class SelfDescribingDataSpec extends Specification { 23 | 24 | "with valid self describing JSON" should { 25 | 26 | "give the right schema" in { 27 | val sdd = new SelfDescribingData( 28 | """ 29 | |{ 30 | | "schema":"iglu:com.acme.banana", 31 | | "data": { } 32 | |} 33 | """.stripMargin) 34 | 35 | sdd.schema mustEqual "iglu:com.acme.banana" 36 | } 37 | 38 | "give correctly formed json data payload" in { 39 | val sdd = new SelfDescribingData( 40 | """ 41 | |{ 42 | | "schema":"iglu:com.acme.banana", 43 | | "data": { 44 | | "foo":"bar" 45 | | } 46 | |} 47 | """.stripMargin) 48 | 49 | val expected = pretty(parse( 50 | """ 51 | | { "foo":"bar" } 52 | """.stripMargin 53 | )) 54 | 55 | sdd.data mustEqual expected 56 | } 57 | 58 | 59 | } 60 | 61 | "with invalid self describing JSON" should { 62 | 63 | "error if the `schema` field is missing" in { 64 | Try(new SelfDescribingData( 65 | """ 66 | | { 67 | | "data" : { "nope": "nope" } 68 | | } 69 | """.stripMargin)) match { 70 | case Success(_) => ko("self describing data created without a `schema` field") 71 | case Failure(f) => f.getMessage mustEqual "Invalid self describing schema: missing the `schema` field" 72 | } 73 | } 74 | 75 | "error if the `data` field is missing" in { 76 | Try(new SelfDescribingData( 77 | """ 78 | | { 79 | | "schema": "iglu:com.acme.thing" 80 | | } 81 | """.stripMargin)) match { 82 | case Success(_) => ko("self describing data created without a `data` field") 83 | case Failure(f) => f.getMessage mustEqual "Invalid self describing schema: missing the `data` field (or it is empty)" 84 | } 85 | } 86 | } 87 | 88 | "with invalid json" should { 89 | 90 | "return a validation exception" in { 91 | Try(new SelfDescribingData("{")) match { 92 | case Success(_) => ko("Invalid JSON parsed successfully") 93 | case Failure(f) => f.getMessage mustEqual "Invalid self describing schema: invalid JSON Avro" 94 | } 95 | } 96 | 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/KinesisTee.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee 15 | 16 | import com.snowplowanalytics.kinesistee.filters.FilterStrategy 17 | import com.snowplowanalytics.kinesistee.models.Content 18 | import com.snowplowanalytics.kinesistee.transformation.TransformationStrategy 19 | import com.snowplowanalytics.kinesistee.models.Stream 20 | import com.snowplowanalytics.kinesistee.routing.RoutingStrategy 21 | 22 | import scalaz.{Failure, Success} 23 | 24 | /** 25 | * This object is responsible for gluing together the routing, transformation and filtering steps 26 | */ 27 | object KinesisTee extends Tee { 28 | 29 | /** 30 | * Send incoming content to the given stream 31 | * using the given routingStrategy. Transform the data using the given transformation strategy 32 | * first - and then use the given filter strategy to filter the transformed data 33 | * @param routingStrategy routing strategy to use 34 | * @param transformationStrategy transformation strategy to use 35 | * @param filterStrategy filtering strategy to use 36 | * @param content list of records/content to tee on 37 | */ 38 | def tee(routingStrategy: RoutingStrategy, 39 | transformationStrategy: Option[TransformationStrategy], 40 | filterStrategy: Option[FilterStrategy], 41 | content: Seq[Content]): Unit = { 42 | 43 | // transform first 44 | // then filter 45 | // then push to stream via StreamWriter 46 | 47 | def transform(content:Content) = { 48 | transformationStrategy match { 49 | case Some(strategy) => { 50 | strategy.transform(content) match { 51 | case Success(s) => s 52 | case Failure(f) => { 53 | // see https://github.com/snowplow/kinesis-tee/issues/11 54 | System.err.println(s"Error transforming item: '$content'\n\n:Reason: ${f.head.getMessage}") 55 | content 56 | } 57 | } 58 | } 59 | case None => content 60 | } 61 | } 62 | 63 | def filter(content:Content) = { 64 | filterStrategy match { 65 | case Some(strategy) => { 66 | strategy.filter(content) match { 67 | case Success(s) => s 68 | case Failure(f) => { 69 | // see https://github.com/snowplow/kinesis-tee/issues/11 70 | System.err.println(s"Error filtering item '$content'\n\n:Reason: ${f.head.getMessage}") 71 | false 72 | } 73 | } 74 | } 75 | case None => true 76 | } 77 | } 78 | 79 | def route = { 80 | routingStrategy.route() match { 81 | case Success(s) => s 82 | case Failure(f) => throw new IllegalStateException(s"Error routing item '$content': ${f.head}") 83 | } 84 | } 85 | 86 | content 87 | .map(transform) 88 | .filter(filter) 89 | .foreach(route.write) 90 | } 91 | 92 | 93 | } 94 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/config/ConfigurationBuilder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.config 15 | 16 | import java.io.ByteArrayInputStream 17 | 18 | import com.sksamuel.avro4s.AvroInputStream 19 | import awscala.dynamodbv2.DynamoDB 20 | import com.amazonaws.regions.{Region, Regions} 21 | import com.amazonaws.services.dynamodbv2.model.{AttributeValue, QueryRequest} 22 | import scala.collection.JavaConverters._ 23 | import scalaz._ 24 | import scalaz.syntax.validation._ 25 | 26 | /** 27 | * Object that lets us collect configuration from dynamodb, and inflate it into a Configuration object 28 | */ 29 | object ConfigurationBuilder extends Builder { 30 | 31 | /** 32 | * Fetch a configuration from specified dynamodb instance, using the lambda function name as the key 33 | * 34 | * @param tableName the name of the table to look for config in 35 | * @param functionName the name of the lambda function (a configuration must exist for this!) 36 | * @param dynamoDB a dynamodb instance to connect using 37 | * @return the configuration stored for the lambda function name 38 | */ 39 | def build(tableName: String, functionName: String)(implicit dynamoDB: DynamoDB): Configuration = { 40 | fetchConfigString(tableName, functionName)(dynamoDB) match { 41 | case Success(config) => build(config) 42 | case Failure(f) => throw new IllegalStateException(f.head) 43 | } 44 | } 45 | 46 | private def fetchConfigString(tableName: String, functionName: String)(dynamoDB: DynamoDB): ValidationNel[String, String] = { 47 | val request = new QueryRequest() 48 | .withTableName(tableName) 49 | .withKeyConditionExpression("id = :id") 50 | .withExpressionAttributeValues(Map(":id" -> new AttributeValue(functionName)).asJava) 51 | 52 | val response = scala.util.Try(dynamoDB.query(request)) match { 53 | case scala.util.Success(resp) => resp 54 | case scala.util.Failure(f) => return f.getMessage.failureNel 55 | } 56 | 57 | val row = response.getItems.asScala.headOption 58 | row match { 59 | case Some(data) => { 60 | if (data.containsKey("configuration")) { 61 | data.get("configuration").getS.success 62 | } else { 63 | s"Config table '${tableName}' for lambda '$functionName' is missing a 'configuration' field!".failureNel 64 | } 65 | } 66 | case None => s"No configuration in table '$tableName' for lambda '$functionName'!".failureNel 67 | } 68 | } 69 | 70 | /** 71 | * Build the configuration from a string, not using dynamodb 72 | * @param json the avro/json configuration file 73 | * @return a configuration object inflated from the avro/json input 74 | */ 75 | def build(json: String): Configuration = { 76 | val data = new SelfDescribingData(json).data 77 | val input = AvroInputStream.json[Configuration](new ByteArrayInputStream(data.getBytes("UTF-8"))) 78 | 79 | input.singleEntity match { 80 | case scala.util.Success(configuration) => configuration 81 | case scala.util.Failure(f) => throw new IllegalArgumentException("Invalid configuration", f) 82 | } 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/config/lambdaUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.config 15 | 16 | import com.amazonaws.regions.Regions 17 | import com.amazonaws.services.lambda.AWSLambdaClient 18 | import com.amazonaws.services.lambda.model.GetFunctionConfigurationRequest 19 | 20 | import scalaz.Scalaz._ 21 | import scalaz._ 22 | 23 | /** 24 | * A set of utilities for interacting with AWS Lambda 25 | */ 26 | object LambdaUtils extends AwsLambdaUtils { 27 | 28 | val isUri = "^dynamodb:([^/]*)/([^/]*)/?$".r 29 | 30 | /** 31 | * pull a region from an ARN 32 | * 33 | * @param arn the arn to extract the region from 34 | * @return the region as it appears in the arn, for example eu-west-1 35 | */ 36 | def getRegionFromArn(arn: String): ValidationNel[String, String] = { 37 | if (arn.trim.isEmpty) { 38 | "Cannot extract region from an empty ARN".failureNel 39 | } else { 40 | scala.util.Try(arn.split(":")(3)) match { 41 | case scala.util.Success(v) => v.success 42 | case scala.util.Failure(_) => "Cannot extract region from ARN '%s': invalid format".format(arn).failureNel 43 | } 44 | } 45 | } 46 | 47 | 48 | /** 49 | * Uses the AWS Lambda SDK to collect the description of a given lambda 50 | * 51 | * @param lambdaFunction the name of the AWS lambda to query 52 | * @param region the region the lambda to query is in 53 | * @return the description of the AWS lambda given 54 | */ 55 | def getLambdaDescription(lambdaFunction: String, region: String): ValidationNel[Exception, String] = { 56 | try { 57 | val request = new GetFunctionConfigurationRequest().withFunctionName(lambdaFunction) 58 | val client: AWSLambdaClient = new AWSLambdaClient().withRegion(Regions.fromName(region)) 59 | val response = client.getFunctionConfiguration(request) 60 | 61 | response.getDescription.success 62 | } catch { 63 | case e: java.lang.Exception => e.failureNel 64 | } 65 | } 66 | 67 | /** 68 | * Converts a specially crafted description (in the format dynamodb:region/table-name) into its constituent parts 69 | * 70 | * @param description the given lambda description 71 | * @return the AWS region and table name 72 | */ 73 | def configLocationFromLambdaDesc(description:String): ValidationNel[String, (Regions, String)] = { 74 | description match { 75 | case isUri(region, table) => { 76 | scala.util.Try(Regions.fromName(region)) match { 77 | case scala.util.Success(r) => (r, table).success 78 | case scala.util.Failure(f) => s"'$region' is not a valid AWS region: ${f.getMessage}".failureNel 79 | } 80 | } 81 | case _ => s"'$description' is not a valid configuration location - expected the format 'dynamodb:eu-west-1/config-table-name'".failureNel 82 | } 83 | } 84 | 85 | } 86 | 87 | trait AwsLambdaUtils { 88 | def getRegionFromArn(arn: String): ValidationNel[String, String] 89 | def getLambdaDescription(lambdaFunction: String, region: String): ValidationNel[Exception, String] 90 | def configLocationFromLambdaDesc(description:String): ValidationNel[String, (Regions, String)] 91 | } -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/StreamWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee 14 | 15 | import java.io.{PrintWriter, StringWriter} 16 | import java.nio.ByteBuffer 17 | 18 | import com.amazonaws.auth.{AWSCredentials, AWSCredentialsProvider, BasicAWSCredentials, DefaultAWSCredentialsProviderChain} 19 | import com.amazonaws.regions.{Region, Regions} 20 | import com.amazonaws.services.kinesis.AmazonKinesisClient 21 | import com.snowplowanalytics.kinesistee.config.TargetAccount 22 | import com.snowplowanalytics.kinesistee.models.{Content, Stream} 23 | 24 | /** 25 | * Write a record to a predefined stream 26 | * @param stream the stream to write to 27 | * @param targetAccount the target account details, if not using Lambda IAM perms 28 | * @param producer the kinesis client to use 29 | */ 30 | class StreamWriter(stream: Stream, targetAccount: Option[TargetAccount], producer: AmazonKinesisClient) { 31 | 32 | /** 33 | * push the given record to the requested stream 34 | * @param content the record to push 35 | */ 36 | def write(content: Content): Unit = { 37 | producer.putRecord(stream.name, ByteBuffer.wrap(content.row.getBytes("UTF-8")), content.partitionKey) 38 | } 39 | 40 | def flush: Unit = { 41 | } 42 | 43 | def close: Unit = { 44 | flush 45 | } 46 | 47 | override def toString: String = { 48 | s"`${stream.name}`, using separate account details: " + (if (targetAccount.isDefined) { "TRUE" } else { "FALSE" }) 49 | } 50 | 51 | } 52 | 53 | object StreamWriter { 54 | 55 | /** 56 | * Write a stacktrace as a string 57 | * @param t a Throwable to generate the string stacktrace for 58 | * @return a string representation of the given Throwable 59 | */ 60 | def stacktrace(t:Throwable): String = { 61 | val sw = new StringWriter() 62 | t.printStackTrace(new PrintWriter(sw)) 63 | sw.toString 64 | } 65 | 66 | /** 67 | * Build the kinesis client we need to push data to 68 | * @param region the region the target stream is in 69 | * @param targetCreds the credentials needed to push to the target stream 70 | * @return a kinesis client 71 | */ 72 | def buildClientConfig(region: Region, targetCreds: Option[(String, String)]) = { 73 | 74 | val credentialsProvider = if (targetCreds.isDefined) { 75 | val (acctKey, secretKey) = targetCreds.get 76 | new AWSCredentialsProvider() { 77 | override def refresh(): Unit = {} 78 | override def getCredentials: AWSCredentials = new BasicAWSCredentials(acctKey, secretKey) 79 | } 80 | } else { 81 | new DefaultAWSCredentialsProviderChain() 82 | } 83 | 84 | val client = new AmazonKinesisClient(credentialsProvider) 85 | 86 | client.setRegion(region) 87 | 88 | client 89 | } 90 | 91 | /** 92 | * Build the client using the supplied credentials 93 | * @param region the AWS region the stream is located 94 | * @param targetAccount alternate account details (if none use our own IAM perms) 95 | * @return an kinesis client connector 96 | */ 97 | def buildClient(region:Region, targetAccount: Option[TargetAccount]) = { 98 | 99 | val creds = targetAccount match { 100 | case Some(t) => Some((t.awsAccessKey, t.awsSecretAccessKey)) 101 | case _ => None 102 | } 103 | 104 | buildClientConfig(region, creds) 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /src/main/scala/com/snowplowanalytics/kinesistee/Main.scala: -------------------------------------------------------------------------------- 1 | package com.snowplowanalytics.kinesistee 2 | 3 | import java.lang.String 4 | 5 | import awscala.{Region => AWSScalaRegion} 6 | import awscala.dynamodbv2.DynamoDB 7 | import com.amazonaws.regions.{Region, Regions} 8 | import com.amazonaws.services.kinesis.AmazonKinesisClient 9 | import com.amazonaws.services.lambda.runtime.{Context => LambdaContext} 10 | import com.amazonaws.services.lambda.runtime.events.KinesisEvent 11 | import com.amazonaws.services.lambda.runtime.events.KinesisEvent.KinesisEventRecord 12 | import com.snowplowanalytics.kinesistee.config.{_} 13 | import com.snowplowanalytics.kinesistee.filters.JavascriptFilter 14 | 15 | import scala.collection.JavaConversions._ 16 | import scalaz._ 17 | import com.snowplowanalytics.kinesistee.models.{Content, Stream} 18 | import com.snowplowanalytics.kinesistee.routing.PointToPointRoute 19 | import com.snowplowanalytics.kinesistee.transformation.SnowplowToJson 20 | 21 | class Main { 22 | 23 | val kinesisTee:Tee = KinesisTee 24 | val lambdaUtils:AwsLambdaUtils = LambdaUtils 25 | val configurationBuilder:Builder = ConfigurationBuilder 26 | val getKinesisConnector: (Region, Option[TargetAccount]) => AmazonKinesisClient = StreamWriter.buildClient 27 | val ddb: (AWSScalaRegion) => DynamoDB = DynamoDB.at 28 | 29 | /** 30 | * AWS Lambda entry point 31 | * 32 | * @param event an amazon kinesis event 33 | * @param context the context our lambda is in 34 | */ 35 | def kinesisEventHandler(event: KinesisEvent, context: LambdaContext): Unit = { 36 | 37 | val conf = getConfiguration(context) 38 | val data = for { rec: KinesisEventRecord <- event.getRecords 39 | row = new String(rec.getKinesis.getData.array(), "UTF-8") 40 | partitionKey = rec.getKinesis.getPartitionKey 41 | content = Content(row, partitionKey) 42 | } yield content 43 | 44 | val myRegion = lambdaUtils.getRegionFromArn(context.getInvokedFunctionArn) match { 45 | case Success(r) => Region.getRegion(Regions.fromName(r)) 46 | case Failure(f) => throw new IllegalStateException(f.toString()) 47 | } 48 | 49 | val transformation = conf.transformer match { 50 | case Some(Transformer(BuiltIn.SNOWPLOW_TO_NESTED_JSON)) => Some(new SnowplowToJson) 51 | case _ => None 52 | } 53 | 54 | val filter = conf.filter match { 55 | case Some(f) => Some(new JavascriptFilter(new String(java.util.Base64.getDecoder.decode(f.javascript), "UTF-8"))) 56 | case _ => None 57 | } 58 | 59 | val targetAccount = conf.targetStream.targetAccount 60 | val targetStream = targetAccount match { 61 | case Some(t) => Stream(conf.targetStream.name, Region.getRegion(Regions.fromName(t.region))) 62 | case None => Stream(conf.targetStream.name, myRegion) 63 | } 64 | val streamWriter = new StreamWriter(targetStream, targetAccount, getKinesisConnector(targetStream.region, targetAccount)) 65 | val route = new PointToPointRoute(streamWriter) 66 | 67 | kinesisTee.tee(route, 68 | transformation, 69 | filter, 70 | data) 71 | 72 | streamWriter.flush 73 | streamWriter.close 74 | } 75 | 76 | def getConfiguration(context: LambdaContext): Configuration = { 77 | val region = lambdaUtils.getRegionFromArn(context.getInvokedFunctionArn) match { 78 | case Success(r) => r 79 | case Failure(f) => throw new IllegalStateException(f.toString()) 80 | } 81 | 82 | val (confRegion, confTable) = lambdaUtils.getLambdaDescription(context.getFunctionName, region) match { 83 | case Success(desc) => { 84 | lambdaUtils.configLocationFromLambdaDesc(desc) match { 85 | case Success((region,table)) => (region, table) 86 | case Failure(f) => throw new IllegalStateException(f.toString()) 87 | } 88 | } 89 | case Failure(f) => throw new IllegalStateException(f.toString(), f.head) 90 | } 91 | 92 | scala.util.Try(configurationBuilder.build(confTable, context.getFunctionName)(ddb(Region.getRegion(confRegion)))) match { 93 | case scala.util.Success(c) => c 94 | case scala.util.Failure(f) => throw new IllegalStateException("Couldn't build configuration", f) 95 | } 96 | } 97 | 98 | } -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/filters/JavascriptFilterSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.filters 14 | 15 | import com.amazonaws.regions.{Region, Regions} 16 | import com.snowplowanalytics.kinesistee.models.{Content, Stream} 17 | import org.specs2.mutable.Specification 18 | import org.specs2.scalaz.ValidationMatchers 19 | 20 | import scalaz.{Failure, Success} 21 | 22 | class JavascriptFilterSpec extends Specification with ValidationMatchers { 23 | 24 | "A valid JS filter" should { 25 | 26 | val jsTrue = 27 | """ 28 | | function filter(row) { 29 | | return true; 30 | | } 31 | """.stripMargin 32 | 33 | val jsFalse = 34 | """ 35 | | function filter(row) { 36 | | return false; 37 | | } 38 | """.stripMargin 39 | 40 | val jsHelloWorldOnly = 41 | """ 42 | | function filter(row) { 43 | | return row != "hello world"; 44 | | } 45 | """.stripMargin 46 | 47 | 48 | "with a js function that only returns true, return true" in { 49 | val strategy = new JavascriptFilter(jsTrue) 50 | strategy.filter(Content("hello world", "p")) must beSuccessful(true) 51 | } 52 | 53 | "with a js function that only returns false, return false" in { 54 | val strategy = new JavascriptFilter(jsFalse) 55 | strategy.filter(Content("hello world", "p")) must beSuccessful(false) 56 | } 57 | 58 | "with a function that filters out `hello world`, return true for non hello world" in { 59 | val strategy = new JavascriptFilter(jsHelloWorldOnly) 60 | strategy.filter(Content("banana", "p")) must beSuccessful(true) 61 | } 62 | 63 | "with a function that filters out `hello world`, return false if content is `hello world`" in { 64 | val strategy = new JavascriptFilter(jsHelloWorldOnly) 65 | strategy.filter(Content("hello world", "p")) must beSuccessful(false) 66 | } 67 | 68 | } 69 | 70 | "An invalid js filter" should { 71 | 72 | "fail if js is not well formed" in { 73 | val badlyFormedJs = 74 | """ 75 | | function filter(row) { 76 | """.stripMargin // no trailing slash 77 | 78 | val expectedError = 79 | """:3:8 Expected } but found eof 80 | | 81 | | ^ in at line number 3 at column number 8""".stripMargin 82 | 83 | scala.util.Try(new JavascriptFilter(badlyFormedJs)) match { 84 | case scala.util.Success(_) => ko("Badly formed JS did not generate exception") 85 | case scala.util.Failure(f) => f.getMessage.replaceAll("\\s", "") mustEqual expectedError.replaceAll("\\s", "") 86 | } 87 | } 88 | 89 | "fail if the js is missing a 'filter' function" in { 90 | val missingfunc = 91 | """ 92 | |function banana() { 93 | | return false; 94 | |} 95 | """.stripMargin 96 | 97 | val strategy = new JavascriptFilter(missingfunc) 98 | strategy.filter(Content("abc", "p")) match { 99 | case Success(_) => ko("Filter cannot succeed without a 'filter' function") 100 | case Failure(f) => f.toString() mustEqual "NonEmptyList(java.lang.NoSuchMethodException: No such function filter)" 101 | } 102 | } 103 | 104 | "fail if the js has a runtime error" in { 105 | val runtimeBloop = 106 | """ 107 | |function filter(org) { return 1/0; } 108 | """.stripMargin 109 | 110 | val strategy = new JavascriptFilter(runtimeBloop) 111 | strategy.filter(Content("abc", "p")) must beFailing 112 | } 113 | 114 | 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /deploy/deploy.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'shellwords' 4 | require 'json' 5 | require 'httparty' 6 | 7 | def perror(msg) 8 | puts "Error: #{msg}" 9 | exit 1 10 | end 11 | 12 | def get_version(str) 13 | str.match(/(\d+\.\d+\.\d+)/)[1] 14 | end 15 | 16 | def get_project_version(base_dir) 17 | ver_output = "" 18 | Dir.chdir(base_dir){ 19 | ver_output = %x[sbt version] 20 | } 21 | 22 | get_version(ver_output) 23 | end 24 | 25 | def exec(args) 26 | safe_args = args.map { |arg| Shellwords.escape arg }.join " " 27 | res = system "#{safe_args} > /dev/null" 28 | if !res then 29 | perror "Failed to execute '#{safe_args}' process exited abnormally" 30 | end 31 | end 32 | 33 | def unzip_file(file, destination) 34 | exec ["7z", "x", "#{file}", "-aoa", "-o#{destination}"] 35 | end 36 | 37 | def zip_dir(directory, archive_name) 38 | exec ["7z", "a", "-tzip", "-mx=9", "#{archive_name}", "#{directory}"] 39 | end 40 | 41 | def create_bintray_version(org, user, key, version, repo, package_name) 42 | 43 | bintray_repo = "#{org}/#{repo}" 44 | 45 | request = { :name => "#{version}", :desc => "Release of '#{package_name}'" }.to_json 46 | auth = {:username => user, :password => key} 47 | 48 | url = "https://api.bintray.com/packages/#{bintray_repo}/#{package_name}/versions" 49 | 50 | response = HTTParty.post(url, 51 | { 52 | :body => request, 53 | :basic_auth => auth, 54 | :headers => { 'Content-Type' => 'application/json', 'Accept' => 'application/json' } 55 | }) 56 | 57 | case response.code 58 | when 200..300 ## ok 59 | puts "Version #{version} of #{package_name} created" 60 | when 409 61 | puts "Version #{version} of #{package_name} exists, skipping..." 62 | when 500..600 63 | perror "Failed to create version #{version} of #{package_name} (error code: #{response.code})" 64 | else 65 | perror "Unknown error creating version #{version} of #{package_name} (error code: #{response.code})" 66 | end 67 | end 68 | 69 | def publish_to_bintray(org, user, key, file, repo, package, version) 70 | 71 | bintray_repo = "#{org}/#{repo}" 72 | 73 | puts "Publishing #{file}" 74 | 75 | url = "https://api.bintray.com/content/#{bintray_repo}/#{package}/#{version}/#{file}?publish=1&override=1" 76 | 77 | upload_file = File.new(file, 'rb').read 78 | def upload_file.bytesize; self.size; end 79 | auth = {:username => user, :password => key} 80 | 81 | response = HTTParty.put(url, 82 | { 83 | :headers => {"Content-Type"=>"application/octet-stream"}, 84 | :body => upload_file, 85 | :basic_auth => auth 86 | }) 87 | 88 | case response.code 89 | when 200..300 90 | puts "File uploaded!" 91 | else 92 | perror "File failed to upload (error code: #{response.code})" 93 | end 94 | end 95 | 96 | def upload_to_bintray(org, user, key, file, version, repo, package_name) 97 | 98 | puts "Uploading '#{file}' to Bintray" 99 | 100 | create_bintray_version org, user, key, version, repo, package_name 101 | 102 | publish_to_bintray org, user, key, file, repo, package_name, version 103 | 104 | end 105 | 106 | # check versions 107 | 108 | base_dir = ENV['TRAVIS_BUILD_DIR'] 109 | target_version = ARGV[0] 110 | 111 | if base_dir.nil? 112 | perror "TRAVIS_BUILD_DIR not set" 113 | end 114 | 115 | actual_version = get_project_version(base_dir) 116 | 117 | if actual_version != get_version(target_version) 118 | perror "Tag version '#{target_version}' doesn't match project version '#{actual_version}'" 119 | end 120 | 121 | assembled_jar = "#{base_dir}/target/scala-2.11/kinesis-tee-#{get_version(target_version)}.jar" 122 | target_dir = "#{base_dir}/deploy/gordon/kinesis-tee/kinesis-tee-app/kinesis-tee-code" 123 | 124 | 125 | if !File.exist?(assembled_jar) 126 | perror "Cannot find build artifact in '#{assembled_jar}'" 127 | end 128 | 129 | if !Dir.exist?(target_dir) 130 | perror "Gordon target directory '#{target_dir}' doesn't exist" 131 | end 132 | 133 | bintray_user = ENV['BINTRAY_SNOWPLOW_GENERIC_USER'] 134 | bintray_api_key = ENV['BINTRAY_SNOWPLOW_GENERIC_API_KEY'] 135 | 136 | if bintray_user.nil? 137 | perror "Cannot find required field: BINTRAY_SNOWPLOW_GENERIC_USER" 138 | end 139 | 140 | if bintray_api_key.nil? 141 | perror "Cannot find required field: BINTRAY_SNOWPLOW_GENERIC_API_KEY" 142 | end 143 | 144 | # unzip assembled jar 145 | # copy into deploy/gordon/kinesis-tee/kinesis-tee-app/kinesis-tee-code 146 | 147 | unzip_file(assembled_jar, target_dir) 148 | 149 | zip_target_dir = base_dir+"/deploy/gordon/*" 150 | archive_name = "kinesis_tee_#{target_version.gsub(/-/, '_')}.zip" 151 | 152 | # # zip everything there up 153 | zip_dir(zip_target_dir, archive_name) 154 | 155 | # # ship to bintray 156 | 157 | upload_to_bintray "snowplow", bintray_user, bintray_api_key, archive_name, target_version, "snowplow-generic", "kinesis-tee" -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/config/ConfigurationBuilderSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | 14 | package com.snowplowanalytics.kinesistee.config 15 | 16 | import java.util 17 | import awscala.dynamodbv2.{AttributeValue, DynamoDB} 18 | import com.amazonaws.services.dynamodbv2.model.{QueryRequest, QueryResult} 19 | import org.specs2.mock.Mockito 20 | import org.specs2.mutable.Specification 21 | 22 | class ConfigurationBuilderSpec extends Specification with Mockito { 23 | 24 | 25 | val sampleGoodConfig = scala.io.Source.fromURL(getClass.getResource("/sample_self_describing_config.json")).mkString 26 | val sampleConfig = Configuration(name = "My Kinesis Tee example", 27 | targetStream = TargetStream("my-target-stream", None), 28 | transformer = Some(Transformer(BuiltIn.SNOWPLOW_TO_NESTED_JSON)), 29 | filter = None) 30 | 31 | "A valid configuration" should { 32 | 33 | "generate the correct case class" in { 34 | ConfigurationBuilder.build(sampleGoodConfig) mustEqual sampleConfig 35 | } 36 | 37 | } 38 | 39 | "An invalid JSON configuration" should { 40 | 41 | "throw an exception" in { 42 | ConfigurationBuilder.build("banana") must throwA[IllegalArgumentException] 43 | } 44 | 45 | } 46 | 47 | "A configuration that doesn't match the given schema" should { 48 | 49 | "throw an exception" in { 50 | ConfigurationBuilder.build( 51 | """ 52 | |{ 53 | | "schema": "com.thing", 54 | | "data": { "foo":"bar" } 55 | |} 56 | """.stripMargin) must throwA(new IllegalArgumentException("Invalid configuration")) 57 | } 58 | 59 | } 60 | 61 | "Loading from DynamoDB" should { 62 | 63 | val sampleConfigTableName = "config-table-sample-name" 64 | 65 | "load a configuration using dynamodb and the specified table name" in { 66 | implicit val dynamoDB = mock[DynamoDB] 67 | val res = mock[QueryResult] 68 | val items:util.List[java.util.Map[java.lang.String,com.amazonaws.services.dynamodbv2.model.AttributeValue]] = new util.ArrayList() 69 | 70 | val one:util.Map[String,com.amazonaws.services.dynamodbv2.model.AttributeValue] = new util.HashMap() 71 | one.put("id", new AttributeValue(Some("with-id"))) 72 | one.put("configuration", new AttributeValue(Some(sampleGoodConfig))) 73 | items.add(one) 74 | 75 | res.getItems returns items 76 | dynamoDB.query(any[QueryRequest]) returns res 77 | 78 | ConfigurationBuilder.build(sampleConfigTableName, "with-id") mustEqual sampleConfig 79 | } 80 | 81 | "give a good error if the table doesn't have a matching entry" in { 82 | implicit val dynamoDB = mock[DynamoDB] 83 | val res = mock[QueryResult] 84 | val items:util.List[java.util.Map[java.lang.String,com.amazonaws.services.dynamodbv2.model.AttributeValue]] = new util.ArrayList() 85 | 86 | res.getItems returns items 87 | dynamoDB.query(any[QueryRequest]) returns res 88 | 89 | ConfigurationBuilder.build(sampleConfigTableName, "with-id") must throwA(new IllegalStateException(s"No configuration in table '$sampleConfigTableName' for lambda 'with-id'!")) 90 | } 91 | 92 | "give a good error if the table doesn't have the right keys (id and configuration)" in { 93 | implicit val dynamoDB = mock[DynamoDB] 94 | val res = mock[QueryResult] 95 | val items:util.List[java.util.Map[java.lang.String,com.amazonaws.services.dynamodbv2.model.AttributeValue]] = new util.ArrayList() 96 | 97 | val one:util.Map[String,com.amazonaws.services.dynamodbv2.model.AttributeValue] = new util.HashMap() 98 | one.put("id", new AttributeValue(Some("with-id"))) 99 | one.put("this-is-not-config", new AttributeValue(Some("abc"))) 100 | 101 | items.add(one) 102 | res.getItems returns items 103 | dynamoDB.query(any[QueryRequest]) returns res 104 | 105 | ConfigurationBuilder.build(sampleConfigTableName, "with-id") must throwA(new IllegalStateException(s"Config table '${sampleConfigTableName}' for lambda 'with-id' is missing a 'configuration' field!")) 106 | } 107 | 108 | "do something reasonable if ddb errors" in { 109 | implicit val dynamoDB = mock[DynamoDB] 110 | val exception = new IllegalArgumentException("Query exploded") 111 | dynamoDB.query(any[QueryRequest]) throws exception 112 | 113 | // NB IllegalArgumentException is rethrown as IllegalStateException 114 | ConfigurationBuilder.build(sampleConfigTableName, "with-id") must throwA[IllegalStateException](message = "Query exploded") 115 | } 116 | 117 | 118 | } 119 | 120 | } 121 | -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/KinesisTeeSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee 14 | 15 | import com.amazonaws.regions.{Region, Regions} 16 | import org.specs2.mock.Mockito 17 | import org.specs2.mutable.Specification 18 | import com.snowplowanalytics.kinesistee.filters.FilterStrategy 19 | import com.snowplowanalytics.kinesistee.models.Content 20 | import com.snowplowanalytics.kinesistee.models.Stream 21 | import com.snowplowanalytics.kinesistee.routing.RoutingStrategy 22 | import com.snowplowanalytics.kinesistee.transformation.TransformationStrategy 23 | import org.mockito.Matchers.{eq => eqTo} 24 | 25 | import scala.language.reflectiveCalls 26 | import scalaz.syntax.validation._ 27 | import scalaz.ValidationNel 28 | 29 | class KinesisTeeSpec extends Specification with Mockito { 30 | 31 | def sampleStream(streamName:String) = Stream(streamName, Region.getRegion(Regions.US_EAST_1)) 32 | 33 | "the tee function" should { 34 | 35 | def mockRoute = new RoutingStrategy { 36 | val mockStreamWriter = mock[StreamWriter] 37 | override def route(): ValidationNel[String, StreamWriter] = mockStreamWriter.success 38 | } 39 | 40 | "write everything to the StreamWriter if no filter strategy is in use" in { 41 | val sampleContent = Seq(Content("a", "p"), Content("a", "p"), Content("a", "p")) 42 | val route = mockRoute 43 | KinesisTee.tee(route, None, None, sampleContent) 44 | there was three (route.mockStreamWriter).write(eqTo(Content("a", "p"))) 45 | } 46 | 47 | "write to the stream writer only if the filter function returns false" in { 48 | val sampleContent = Seq(Content("a", "p"), Content("a", "p"), Content("a", "p")) 49 | 50 | class FilterEverything extends FilterStrategy { 51 | override def filter(content: Content): ValidationNel[Throwable, Boolean] = { 52 | false.success 53 | } 54 | } 55 | 56 | val routeMock = mockRoute 57 | KinesisTee.tee(routingStrategy = routeMock, 58 | transformationStrategy = None, 59 | filterStrategy = Some(new FilterEverything), 60 | content = sampleContent) 61 | there was no (routeMock.mockStreamWriter).write(any[Content]) 62 | } 63 | 64 | "transform stream content using the given transformation strategy" in { 65 | val sampleContent = Seq(Content("a", "p"), Content("a", "p"), Content("a", "p")) 66 | 67 | class MakeEverythingB extends TransformationStrategy { 68 | override def transform(content: Content): ValidationNel[Throwable, Content] = { 69 | Content("b", "p").success 70 | } 71 | } 72 | 73 | val routeMock = mockRoute 74 | KinesisTee.tee(routeMock, Some(new MakeEverythingB), None, sampleContent) 75 | 76 | there was three (routeMock.mockStreamWriter).write(eqTo(Content("b", "p"))) 77 | } 78 | 79 | "run the transformation strategy prior to the filter strategy" in { 80 | val sampleContent = Seq(Content("a", "p"), Content("a", "p"), Content("a", "p")) 81 | 82 | class MakeEverythingB extends TransformationStrategy { 83 | override def transform(content: Content): ValidationNel[Throwable, Content] = { 84 | Content("b", "p").success 85 | } 86 | } 87 | 88 | class FilterNotB extends FilterStrategy { 89 | override def filter(content: Content): ValidationNel[Throwable, Boolean] = { 90 | content match { 91 | case Content("b", "p") => true.success 92 | case _ => false.success 93 | } 94 | } 95 | } 96 | 97 | val routeMock = mockRoute 98 | KinesisTee.tee(routeMock, Some(new MakeEverythingB), Some(new FilterNotB), sampleContent) 99 | 100 | there was three (routeMock.mockStreamWriter).write(eqTo(Content("b", "p"))) 101 | } 102 | 103 | "swallow failures in the filter strategy before pushing anything to the stream writer" in { 104 | class FailureFilter extends FilterStrategy { 105 | override def filter(content: Content): ValidationNel[Throwable, Boolean] = new IllegalArgumentException("something").failureNel 106 | } 107 | 108 | val routeMock = mockRoute 109 | there was no (routeMock.mockStreamWriter).write(any[Content]) 110 | } 111 | 112 | "throw failures in the transformation strategy before pushing anything to the stream writer" in { 113 | class FailureTransform extends TransformationStrategy { 114 | override def transform(content: Content): ValidationNel[Throwable, Content] = new IllegalStateException("something").failureNel 115 | } 116 | 117 | val routeMock = mockRoute 118 | KinesisTee.tee(routeMock, Some(new FailureTransform), None, Seq(Content("b", "p"))) 119 | there was one (routeMock.mockStreamWriter).write(any[Content]) 120 | } 121 | 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/StreamWriterSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee 14 | 15 | import java.io.{PrintWriter, StringWriter} 16 | import java.nio.ByteBuffer 17 | import java.util.concurrent.{Executor, TimeUnit} 18 | 19 | import com.amazonaws.auth.{BasicAWSCredentials, DefaultAWSCredentialsProviderChain} 20 | import com.amazonaws.regions.{Region, Regions} 21 | import com.google.common.util.concurrent.ListenableFuture 22 | import com.snowplowanalytics.kinesistee.config.TargetAccount 23 | import org.specs2.mock.Mockito 24 | import org.specs2.mutable.Specification 25 | import com.snowplowanalytics.kinesistee.models._ 26 | 27 | class StreamWriterSpec extends Specification with Mockito { 28 | 29 | val sampleTarget = new TargetAccount("access_key", "secret_access_key", "eu-west-1") 30 | val sampleContent = Content("row", "p") 31 | 32 | "building a kinesis producer configuration" should { 33 | 34 | // "set the region to that specified in the target account" in { 35 | // val c = StreamWriter.buildClientConfig(Some(sampleTarget)) 36 | // c.getEndpointPrefix mustEqual "eu-west-1" 37 | // } 38 | 39 | // "set the region to null if the target account is not given" in { 40 | // val c = StreamWriter.buildClientConfig(None) 41 | // c.getRegion mustEqual "" 42 | // } 43 | // 44 | // "set the max connections to one" in { 45 | // val c = StreamWriter.buildClientConfig(None) 46 | // c.getMaxConnections mustEqual 1 47 | // } 48 | // 49 | // "set the max buffered time according to the constant value" in { 50 | // val c = StreamWriter.buildClientConfig(None) 51 | // c.getRecordMaxBufferedTime mustEqual StreamWriter.MaxBufferedTime 52 | // } 53 | // 54 | // "set the max timeout tie according to the constant value" in { 55 | // val c = StreamWriter.buildClientConfig(None) 56 | // c.getRequestTimeout mustEqual StreamWriter.RequestTimeout 57 | // } 58 | // 59 | // "use the account details for the target account, if one is given" in { 60 | // val c = StreamWriter.buildClientConfig(Some(sampleTarget)) 61 | // val creds = c.getCredentialsProvider.getCredentials 62 | // val (key, secret) = (creds.getAWSAccessKeyId, creds.getAWSSecretKey) 63 | // (key,secret) mustEqual ("access_key", "secret_access_key") 64 | // } 65 | // 66 | // "use the default credentials provider chain if no target account is given" in { 67 | // val c = StreamWriter.buildClientConfig(None) 68 | // c.getCredentialsProvider.isInstanceOf[DefaultAWSCredentialsProviderChain] mustEqual true 69 | // } 70 | 71 | } 72 | 73 | "getting a kinesis producer" should { 74 | "return a KinesisProducer" in { StreamWriter.buildClientConfig(Region.getRegion(Regions.US_EAST_1), None) must not beNull } 75 | } 76 | 77 | "formatting a stacktrace" should { 78 | 79 | "generate a sensible stacktrace" in { 80 | val sampleException = new IllegalStateException("hello") 81 | val sw = new StringWriter() 82 | sampleException.printStackTrace(new PrintWriter(sw)) 83 | val expected = sw.toString 84 | 85 | StreamWriter.stacktrace(sampleException) mustEqual expected 86 | } 87 | 88 | } 89 | 90 | // "flushing" should { 91 | // 92 | // "synchronously flush the kinesis producer" in { 93 | // val producer = mock[KinesisProducer] 94 | // new StreamWriter(Stream("sample"), None, producer).flush 95 | // there was one (producer).flushSync() 96 | // } 97 | // 98 | // } 99 | 100 | // "closing a streamwriter" should { 101 | // 102 | // "flush the kinesis producer" in { 103 | // val producer = mock[KinesisProducer] 104 | // new StreamWriter(Stream("sample"), None, producer).close 105 | // there was one (producer).flushSync() 106 | // } 107 | // 108 | // "destroy the kinesis producer" in { 109 | // val producer = mock[KinesisProducer] 110 | // new StreamWriter(Stream("sample"), None, producer).close 111 | // there was one (producer).destroy() 112 | // } 113 | // 114 | // } 115 | 116 | // "writing using a streamwriter" should { 117 | // 118 | // "add a record to the stream using the given producer" in { 119 | // val producer = mock[KinesisProducer] 120 | // 121 | // producer.addUserRecord(any[String], any[String], any[ByteBuffer]) returns new ListenableFuture[UserRecordResult] { 122 | // override def addListener(listener: Runnable, executor: Executor): Unit = {} 123 | // 124 | // override def isCancelled: Boolean = false 125 | // 126 | // override def get(): UserRecordResult = mock[UserRecordResult] 127 | // 128 | // override def get(timeout: Long, unit: TimeUnit): UserRecordResult = mock[UserRecordResult] 129 | // 130 | // override def cancel(mayInterruptIfRunning: Boolean): Boolean = false 131 | // 132 | // override def isDone: Boolean = true 133 | // } 134 | // 135 | // new StreamWriter(Stream("sample"), None, producer).write(sampleContent) 136 | // there was one (producer).addUserRecord("sample", sampleContent.partitionKey, ByteBuffer.wrap(sampleContent.row.getBytes("UTF-8"))) 137 | // } 138 | // 139 | // } 140 | 141 | } 142 | -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/transformation/SnowplowToJsonSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee.transformation 14 | 15 | import com.snowplowanalytics.kinesistee.models.Content 16 | import org.json4s.JValue 17 | import org.specs2.mutable.Specification 18 | import org.specs2.scalaz.ValidationMatchers 19 | import org.json4s.JsonDSL._ 20 | import org.json4s.jackson.JsonMethods._ 21 | 22 | import scalaz.{Failure, Success} 23 | 24 | class SnowplowToJsonSpec extends Specification with ValidationMatchers { 25 | 26 | val unstructJson = 27 | """{ 28 | "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", 29 | "data": { 30 | "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", 31 | "data": { 32 | "targetUrl": "http://www.example.com", 33 | "elementClasses": ["foreground"], 34 | "elementId": "exampleLink" 35 | } 36 | } 37 | }""" 38 | 39 | val contextsJson = 40 | """{ 41 | "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", 42 | "data": [ 43 | { 44 | "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", 45 | "data": { 46 | "genre": "blog", 47 | "inLanguage": "en-US", 48 | "datePublished": "2014-11-06T00:00:00Z", 49 | "author": "Fred Blundun", 50 | "breadcrumb": [ 51 | "blog", 52 | "releases" 53 | ], 54 | "keywords": [ 55 | "snowplow", 56 | "javascript", 57 | "tracker", 58 | "event" 59 | ] 60 | } 61 | }, 62 | { 63 | "schema": "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0", 64 | "data": { 65 | "navigationStart": 1415358089861, 66 | "unloadEventStart": 1415358090270, 67 | "unloadEventEnd": 1415358090287, 68 | "redirectStart": 0, 69 | "redirectEnd": 0, 70 | "fetchStart": 1415358089870, 71 | "domainLookupStart": 1415358090102, 72 | "domainLookupEnd": 1415358090102, 73 | "connectStart": 1415358090103, 74 | "connectEnd": 1415358090183, 75 | "requestStart": 1415358090183, 76 | "responseStart": 1415358090265, 77 | "responseEnd": 1415358090265, 78 | "domLoading": 1415358090270, 79 | "domInteractive": 1415358090886, 80 | "domContentLoadedEventStart": 1415358090968, 81 | "domContentLoadedEventEnd": 1415358091309, 82 | "domComplete": 0, 83 | "loadEventStart": 0, 84 | "loadEventEnd": 0 85 | } 86 | } 87 | ] 88 | }""" 89 | 90 | val derivedContextsJson = 91 | """{ 92 | "schema": "iglu:com.snowplowanalytics.snowplow\/contexts\/jsonschema\/1-0-1", 93 | "data": [ 94 | { 95 | "schema": "iglu:com.snowplowanalytics.snowplow\/ua_parser_context\/jsonschema\/1-0-0", 96 | "data": { 97 | "useragentFamily": "IE", 98 | "useragentMajor": "7", 99 | "useragentMinor": "0", 100 | "useragentPatch": null, 101 | "useragentVersion": "IE 7.0", 102 | "osFamily": "Windows XP", 103 | "osMajor": null, 104 | "osMinor": null, 105 | "osPatch": null, 106 | "osPatchMinor": null, 107 | "osVersion": "Windows XP", 108 | "deviceFamily": "Other" 109 | } 110 | } 111 | ] 112 | }""" 113 | 114 | val nvPairs = List( 115 | "app_id" -> "angry-birds", 116 | "platform" -> "web", 117 | "etl_tstamp" -> "2017-01-26 00:01:25.292", 118 | "collector_tstamp" -> "2013-11-26 00:02:05", 119 | "dvce_created_tstamp" -> "2013-11-26 00:03:57.885", 120 | "event" -> "page_view", 121 | "event_id" -> "c6ef3124-b53a-4b13-a233-0088f79dcbcb", 122 | "txn_id" -> "41828", 123 | "name_tracker" -> "cloudfront-1", 124 | "v_tracker" -> "js-2.1.0", 125 | "v_collector" -> "clj-tomcat-0.1.0", 126 | "v_etl" -> "serde-0.5.2", 127 | "user_id" -> "jon.doe@email.com", 128 | "user_ipaddress" -> "92.231.54.234", 129 | "user_fingerprint" -> "2161814971", 130 | "domain_userid" -> "bc2e92ec6c204a14", 131 | "domain_sessionidx" -> "3", 132 | "network_userid" -> "ecdff4d0-9175-40ac-a8bb-325c49733607", 133 | "geo_country" -> "US", 134 | "geo_region" -> "TX", 135 | "geo_city" -> "New York", 136 | "geo_zipcode" -> "94109", 137 | "geo_latitude" -> "37.443604", 138 | "geo_longitude" -> "-122.4124", 139 | "geo_region_name" -> "Florida", 140 | "ip_isp" -> "FDN Communications", 141 | "ip_organization" -> "Bouygues Telecom", 142 | "ip_domain" -> "nuvox.net", 143 | "ip_netspeed" -> "Cable/DSL", 144 | "page_url" -> "http://www.snowplowanalytics.com", 145 | "page_title" -> "On Analytics", 146 | "page_referrer" -> "", 147 | "page_urlscheme" -> "http", 148 | "page_urlhost" -> "www.snowplowanalytics.com", 149 | "page_urlport" -> "80", 150 | "page_urlpath" -> "/product/index.html", 151 | "page_urlquery" -> "id=GTM-DLRG", 152 | "page_urlfragment" -> "4-conclusion", 153 | "refr_urlscheme" -> "", 154 | "refr_urlhost" -> "", 155 | "refr_urlport" -> "", 156 | "refr_urlpath" -> "", 157 | "refr_urlquery" -> "", 158 | "refr_urlfragment" -> "", 159 | "refr_medium" -> "", 160 | "refr_source" -> "", 161 | "refr_term" -> "", 162 | "mkt_medium" -> "", 163 | "mkt_source" -> "", 164 | "mkt_term" -> "", 165 | "mkt_content" -> "", 166 | "mkt_campaign" -> "", 167 | "contexts" -> contextsJson, 168 | "se_category" -> "", 169 | "se_action" -> "", 170 | "se_label" -> "", 171 | "se_property" -> "", 172 | "se_value" -> "", 173 | "unstruct_event" -> unstructJson, 174 | "tr_orderid" -> "", 175 | "tr_affiliation" -> "", 176 | "tr_total" -> "", 177 | "tr_tax" -> "", 178 | "tr_shipping" -> "", 179 | "tr_city" -> "", 180 | "tr_state" -> "", 181 | "tr_country" -> "", 182 | "ti_orderid" -> "", 183 | "ti_sku" -> "", 184 | "ti_name" -> "", 185 | "ti_category" -> "", 186 | "ti_price" -> "", 187 | "ti_quantity" -> "", 188 | "pp_xoffset_min" -> "", 189 | "pp_xoffset_max" -> "", 190 | "pp_yoffset_min" -> "", 191 | "pp_yoffset_max" -> "", 192 | "useragent" -> "", 193 | "br_name" -> "", 194 | "br_family" -> "", 195 | "br_version" -> "", 196 | "br_type" -> "", 197 | "br_renderengine" -> "", 198 | "br_lang" -> "", 199 | "br_features_pdf" -> "1", 200 | "br_features_flash" -> "0", 201 | "br_features_java" -> "", 202 | "br_features_director" -> "", 203 | "br_features_quicktime" -> "", 204 | "br_features_realplayer" -> "", 205 | "br_features_windowsmedia" -> "", 206 | "br_features_gears" -> "", 207 | "br_features_silverlight" -> "", 208 | "br_cookies" -> "", 209 | "br_colordepth" -> "", 210 | "br_viewwidth" -> "", 211 | "br_viewheight" -> "", 212 | "os_name" -> "", 213 | "os_family" -> "", 214 | "os_manufacturer" -> "", 215 | "os_timezone" -> "", 216 | "dvce_type" -> "", 217 | "dvce_ismobile" -> "", 218 | "dvce_screenwidth" -> "", 219 | "dvce_screenheight" -> "", 220 | "doc_charset" -> "", 221 | "doc_width" -> "", 222 | "doc_height" -> "", 223 | "tr_currency" -> "", 224 | "tr_total_base" -> "", 225 | "tr_tax_base" -> "", 226 | "tr_shipping_base" -> "", 227 | "ti_currency" -> "", 228 | "ti_price_base" -> "", 229 | "base_currency" -> "", 230 | "geo_timezone" -> "", 231 | "mkt_clickid" -> "", 232 | "mkt_network" -> "", 233 | "etl_tags" -> "", 234 | "dvce_sent_tstamp" -> "", 235 | "refr_domain_userid" -> "", 236 | "refr_device_tstamp" -> "", 237 | "derived_contexts" -> derivedContextsJson, 238 | "domain_sessionid" -> "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", 239 | "derived_tstamp" -> "2013-11-26 00:03:57.886", 240 | "event_vendor" -> "com.snowplowanalytics.snowplow", 241 | "event_name" -> "link_click", 242 | "event_format" -> "jsonschema", 243 | "event_version" -> "1-0-0", 244 | "event_fingerprint" -> "e3dbfa9cca0412c3d4052863cefb547f", 245 | "true_tstamp" -> "2013-11-26 00:03:57.886" 246 | ) 247 | 248 | val eventValues = nvPairs.unzip._2.mkString("\t") 249 | 250 | "converting a Snowplow enriched event to JSON" should { 251 | 252 | "convert a valid snowplow event to JSON" in { 253 | new SnowplowToJson().transform(Content(eventValues, "p")) must beSuccessful 254 | } 255 | 256 | "give the snowplow analytics sdk error message on failure" in { 257 | val expectedMsg = """NonEmptyList(java.lang.IllegalArgumentException: Expected 131 fields, received 1 fields. This may be caused by attempting to use this SDK version on an older or newer version of Snowplow enriched events.)""" 258 | 259 | new SnowplowToJson().transform(Content("", "p")) match { 260 | case Success(s) => ko("have failed, it should") 261 | case Failure(f) => f.toString mustEqual expectedMsg 262 | } 263 | } 264 | 265 | } 266 | 267 | } 268 | -------------------------------------------------------------------------------- /src/test/scala/com/snowplowanalytics/kinesistee/MainSpec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 Snowplow Analytics Ltd. All rights reserved. 3 | * 4 | * This program is licensed to you under the Apache License Version 2.0, 5 | * and you may not use this file except in compliance with the Apache License Version 2.0. 6 | * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. 7 | * 8 | * Unless required by applicable law or agreed to in writing, 9 | * software distributed under the Apache License Version 2.0 is distributed on an 10 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. 12 | */ 13 | package com.snowplowanalytics.kinesistee 14 | 15 | import java.nio.ByteBuffer 16 | 17 | import awscala.dynamodbv2.DynamoDB 18 | import com.amazonaws.regions.{Region, Regions} 19 | import com.amazonaws.services.lambda.runtime.events.KinesisEvent 20 | import com.amazonaws.services.lambda.runtime.events.KinesisEvent.KinesisEventRecord 21 | import org.specs2.mock.Mockito 22 | import org.specs2.mutable.Specification 23 | import com.snowplowanalytics.kinesistee.config.{TargetStream, Transformer, _} 24 | import com.amazonaws.services.lambda.runtime.{Context => LambdaContext} 25 | import com.snowplowanalytics.kinesistee.filters.FilterStrategy 26 | import com.snowplowanalytics.kinesistee.models.{Content, Stream} 27 | import com.snowplowanalytics.kinesistee.routing.{PointToPointRoute, RoutingStrategy} 28 | import com.snowplowanalytics.kinesistee.transformation.{SnowplowToJson, TransformationStrategy} 29 | import org.mockito.Matchers.{eq => eqTo} 30 | 31 | import scalaz.{Success, ValidationNel} 32 | import scalaz.syntax.validation._ 33 | import scala.collection.JavaConversions._ 34 | import scala.language.reflectiveCalls 35 | import java.nio.charset.StandardCharsets 36 | 37 | import com.amazonaws.services.kinesis.AmazonKinesisClient 38 | 39 | class MainSpec extends Specification with Mockito { 40 | 41 | val sampleConfig = Configuration(name = "My Kinesis Tee example", 42 | targetStream = TargetStream("my-target-stream", None), 43 | transformer = Some(Transformer(BuiltIn.SNOWPLOW_TO_NESTED_JSON)), 44 | filter = None) 45 | 46 | class MockMain extends Main { 47 | override val kinesisTee:Tee = mock[Tee] 48 | 49 | override val lambdaUtils:AwsLambdaUtils = { 50 | val util = mock[AwsLambdaUtils] 51 | util.getLambdaDescription(any[String], any[String]) returns "dynamodb:us-east-1/config".success 52 | util.getRegionFromArn(any[String]) returns "us-east-1".success 53 | util.configLocationFromLambdaDesc(any[String]) returns (Regions.US_EAST_1, "table-name").success 54 | util 55 | } 56 | 57 | override val configurationBuilder:Builder = { 58 | val builder = mock[Builder] 59 | builder.build(any[String], any[String])(any[DynamoDB]) returns sampleConfig 60 | builder 61 | } 62 | 63 | override val getKinesisConnector = (_:Region, _:Option[TargetAccount]) => mock[AmazonKinesisClient] 64 | 65 | override val ddb = (_:Region) => mock[DynamoDB] 66 | } 67 | 68 | val sampleArn = "arn:aws:elasticbeanstalk:us-east-1:123456789012:environment/My App/MyEnvironment" 69 | val sampleFunctionName = "fun" 70 | 71 | def sampleContext = { 72 | val context = mock[LambdaContext] 73 | context.getInvokedFunctionArn returns sampleArn 74 | context.getFunctionName returns sampleFunctionName 75 | context 76 | } 77 | 78 | def sampleKinesisEvent = { 79 | val event = mock[KinesisEvent] 80 | val record = mock[KinesisEventRecord] 81 | val kinesisRecord = mock[KinesisEvent.Record] 82 | 83 | kinesisRecord.getPartitionKey returns "p" 84 | kinesisRecord.getData returns ByteBuffer.wrap("hello world".getBytes("UTF-8")) 85 | record.getKinesis returns kinesisRecord 86 | 87 | event.getRecords returns List(record) 88 | event 89 | } 90 | 91 | "getting configuration" should { 92 | 93 | "use the lambda utils to grab the ARN" in { 94 | val main = new MockMain 95 | main.getConfiguration(sampleContext) 96 | there was one (main.lambdaUtils).getRegionFromArn(eqTo(sampleArn)) 97 | } 98 | 99 | "throw an exception if the ARN cannot be ascertained" in { 100 | val main = new MockMain { 101 | override val lambdaUtils:AwsLambdaUtils = { 102 | val util = mock[AwsLambdaUtils] 103 | util.getRegionFromArn(any[String]) returns "Cannot handle it".failureNel 104 | util 105 | } 106 | } 107 | main.getConfiguration(sampleContext) must throwA[IllegalStateException](message = "Cannot handle it") 108 | } 109 | 110 | "use the given arn/function name to fetch lambda description" in { 111 | val mockMain = new MockMain 112 | mockMain.getConfiguration(sampleContext) 113 | there was one (mockMain.lambdaUtils).getLambdaDescription(eqTo(sampleFunctionName), eqTo("us-east-1")) 114 | } 115 | 116 | "throw an exception if the lambda description cannot be ascertained" in { 117 | val mockMain = new MockMain { 118 | override val lambdaUtils:AwsLambdaUtils = { 119 | val util = mock[AwsLambdaUtils] 120 | util.getLambdaDescription(any[String], any[String]) returns new RuntimeException("failed?").failureNel 121 | util.getRegionFromArn(any[String]) returns "us-east-1".success 122 | } 123 | } 124 | 125 | mockMain.getConfiguration(sampleContext) must throwA[IllegalStateException](message = "failed?") 126 | } 127 | 128 | "throw an exception if the lambda config location cannot be ascertained" in { 129 | val mockMain = new MockMain { 130 | override val lambdaUtils:AwsLambdaUtils = { 131 | val util = mock[AwsLambdaUtils] 132 | util.getLambdaDescription(any[String], any[String]) returns "dynamodb:sample/sample".success 133 | util.getRegionFromArn(any[String]) returns "us-east-1".success 134 | util.configLocationFromLambdaDesc(eqTo("dynamodb:sample/sample")) returns "oops".failureNel 135 | } 136 | } 137 | 138 | mockMain.getConfiguration(sampleContext) must throwA[IllegalStateException](message = "oops") 139 | } 140 | 141 | "use the lambda description to build config from" in { 142 | val main = new MockMain 143 | main.getConfiguration(sampleContext) 144 | there was one (main.configurationBuilder).build(eqTo("table-name"), eqTo(sampleFunctionName))(any[DynamoDB]) 145 | } 146 | 147 | "throw an exception if the configuration fails to build" in { 148 | val main = new MockMain { 149 | override val configurationBuilder:Builder = { 150 | val cb = mock[Builder] 151 | cb.build(any[String], any[String])(any[DynamoDB]) throws new RuntimeException("broken") 152 | } 153 | } 154 | 155 | main.getConfiguration(sampleContext) must throwA[IllegalStateException](message="Couldn't build configuration") 156 | } 157 | 158 | } 159 | 160 | "the kinesis tee lambda entry point" should { 161 | 162 | "tee with the given records" in { 163 | val main = new MockMain 164 | main.kinesisEventHandler(sampleKinesisEvent, sampleContext) 165 | there was one (main.kinesisTee).tee(any[RoutingStrategy], 166 | any[Option[TransformationStrategy]], 167 | any[Option[FilterStrategy]], 168 | eqTo(Seq(Content("hello world", "p")))) 169 | 170 | } 171 | 172 | "tee using the routing strategy point-to-point" in { 173 | val main = new MockMain { 174 | override val kinesisTee = new Tee { 175 | 176 | var lastRoutingStrategy: Option[PointToPointRoute] = None 177 | 178 | override def tee(routingStrategy: RoutingStrategy, 179 | transformationStrategy: Option[TransformationStrategy], 180 | filterStrategy: Option[FilterStrategy], 181 | content: Seq[Content]): Unit = { 182 | lastRoutingStrategy = Some(routingStrategy.asInstanceOf[PointToPointRoute]) 183 | } 184 | } 185 | } 186 | main.kinesisEventHandler(sampleKinesisEvent, sampleContext) 187 | val expectedRouter = new PointToPointRoute(new StreamWriter(Stream(sampleConfig.targetStream.name, Region.getRegion(Regions.US_EAST_1)), 188 | sampleConfig.targetStream.targetAccount, 189 | mock[AmazonKinesisClient])) 190 | 191 | val lastRoutingStrategy:PointToPointRoute = main.kinesisTee.lastRoutingStrategy.get 192 | lastRoutingStrategy.toString mustEqual expectedRouter.toString 193 | } 194 | 195 | "tee using the filter strategy defined in the configuration (base64 encoded js)" in { 196 | 197 | val sampleFilterJs = 198 | """ 199 | | function filter(data) { 200 | | if (data=="good") { return true; } 201 | | else { return false; } 202 | | } 203 | """.stripMargin 204 | 205 | val base64Js = java.util.Base64.getEncoder.encodeToString(sampleFilterJs.getBytes(StandardCharsets.UTF_8)) 206 | 207 | val main = new MockMain { 208 | override val configurationBuilder:Builder = { 209 | val builder = mock[Builder] 210 | builder.build(any[String], any[String])(any[DynamoDB]) returns sampleConfig.copy( filter = Some(new Filter(javascript = base64Js)) ) 211 | builder 212 | } 213 | override val kinesisTee = new Tee { 214 | var lastFilterStrategy:Option[FilterStrategy] = None 215 | 216 | override def tee(routingStrategy: RoutingStrategy, 217 | transformationStrategy: Option[TransformationStrategy], 218 | filterStrategy: Option[FilterStrategy], 219 | content: Seq[Content]): Unit = { 220 | lastFilterStrategy = filterStrategy 221 | } 222 | } 223 | } 224 | 225 | main.kinesisEventHandler(sampleKinesisEvent, sampleContext) 226 | val lastFilter = main.kinesisTee.lastFilterStrategy.get 227 | val passing = lastFilter.filter(Content("good", "p")) 228 | val failing = lastFilter.filter(Content("something else", "p")) 229 | 230 | (passing, failing) match { 231 | case (Success(p), Success(f)) => (p, f) mustEqual (true, false) 232 | case _ => ko("The test filter failed to execute, this is unexpected") 233 | } 234 | } 235 | 236 | "tee with a transformer given in the configuration (set to None)" in { 237 | val main = new MockMain { 238 | override val configurationBuilder:Builder = { 239 | val builder = mock[Builder] 240 | builder.build(any[String], any[String])(any[DynamoDB]) returns sampleConfig.copy(transformer = None) 241 | builder 242 | } 243 | } 244 | main.kinesisEventHandler(sampleKinesisEvent, sampleContext) 245 | there was one (main.kinesisTee).tee(any[RoutingStrategy], 246 | eqTo(None), 247 | any[Option[FilterStrategy]], 248 | any[Seq[Content]]) 249 | } 250 | } 251 | 252 | } 253 | -------------------------------------------------------------------------------- /LICENSE-2.0.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. --------------------------------------------------------------------------------