├── Gemfile ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── Rakefile ├── lib ├── logstash-input-dynamodb_jars.rb └── logstash │ └── inputs │ ├── DynamoDBLogParser.rb │ ├── LogStashRecordProcessor.rb │ ├── LogStashRecordProcessorFactory.rb │ └── dynamodb.rb ├── logstash-input-dynamodb.gemspec └── spec ├── inputs └── dynamodb_spec.rb ├── log_parser_spec.rb ├── record_processor_and_factory_spec.rb └── spec_helper.rb /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gemspec 3 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [2015] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Logstash-input-DynamoDB Plugin 2 | Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Logstash Plugin for Amazon DynamoDB 2 | 3 | The Logstash plugin for Amazon DynamoDB gives you a nearly real-time view of the data in your DynamoDB table. The Logstash plugin for DynamoDB uses DynamoDB Streams to parse and output data as it is added to a DynamoDB table. After you install and activate the Logstash plugin for DynamoDB, it scans the data in the specified table, and then it starts consuming your updates using Streams and then outputs them to Elasticsearch, or a Logstash output of your choice. 4 | 5 | Logstash is a data pipeline service that processes data, parses data, and then outputs it to a selected location in a selected format. Elasticsearch is a distributed, full-text search server. For more information about Logstash and Elasticsearch, go to https://www.elastic.co/products/elasticsearch. 6 | 7 | ## The following sections walk you through the process to: 8 | 9 | 1. Create a DynamoDB table and enable a new stream on the table. 10 | 2. Download, build, and install the Logstash plugin for DynamoDB. 11 | 3. Configure Logstash to output to Elasticsearch and the command line. 12 | 4. Run the Logstash plugin for DynamoDB. 13 | 5. Test Logstash by adding DynamoDB items to the table. 14 | 15 | When this process is finished, you can search your data in the Elasticsearch cluster. 16 | 17 | ### Prerequisites 18 | 19 | **The following items are required to use the Logstash plugin for Amazon DynamoDB:** 20 | 21 | 1. Amazon Web Services (AWS) account with DynamoDB 22 | 2. A running Elasticsearch cluster—To download Elasticsearch, go to https://www.elastic.co/products/elasticsearch. 23 | 3. Logstash—To download Logstash, go to https://github.com/awslabs/logstash-input-dynamodb. 24 | 4. JRuby—To download JRuby, go to http://jruby.org/download. 25 | 5. Git—To download Git, go to http://git-scm.com/downloads 26 | 6. Apache Maven—To get Apache Maven, go to http://maven.apache.org/. 27 | 28 | ### Before You Begin: Create a Source Table 29 | 30 | In this step, you will create a DynamoDB table with DynamoDB Streams enabled. This will be the source table and writes to this table will be processed by the Logstash plugin for DynamoDB. 31 | 32 | **To create the source table** 33 | 34 | 1. Open the DynamoDB console at https://console.aws.amazon.com/dynamodb/. 35 | 2. Choose **Create Table**. 36 | 3. On the **Create Table** page, enter the following settings: 37 | 1. **Table Name** — SourceTable 38 | 2. **Primary Key Type** — Hash 39 | 3. **Hash attribute data type** — Number 40 | 4. **Hash Attribute Name** — Id 41 | 5. Choose **Continue**. 42 | 4. On the **Add Indexes** page, choose **Continue**. You will not need any indexes for this exercise. 43 | 5. On the **Provisioned Throughput** page, choose **Continue**. 44 | 6. On the **Additional Options** page, do the following: 45 | 1. Select **Enable Streams**, and then set the **View Type** to **New and Old Images**. 46 | 2. Clear **Use Basic Alarms**. You will not need alarms for this exercise. 47 | 3. When you are ready, choose **Continue**. 48 | 7. On the **Summary** page, choose **Create**. 49 | 50 | The source table will be created within a few minutes. 51 | 52 | ### Setting Up the Logstash Plugin for Amazon DynamoDB 53 | 54 | To use the Logstash plugin for DynamoDB, you need to build, install, run the plugin, and then you can test it. 55 | 56 | **IMPORTANT: in order to successfully build and install Logstash, you must have previously installed ```MAVEN``` to satisfy jar dependencies, and ```JRUBY``` to build and run the logstash gem.** 57 | 58 | **To build the Logstash plugin for DynamoDB** 59 | 60 | At the command prompt, change to the directory where you want to install the Logstash plugin for DynamoDB and demo project. 61 | 62 | In the directory where you want the Git project, clone the Git project: 63 | 64 | ``` 65 | git clone https://github.com/awslabs/logstash-input-dynamodb.git 66 | ``` 67 | 68 | **Install the Bundler gem by typing the following:** 69 | 70 | ``` 71 | jruby -S gem install bundler 72 | ``` 73 | 74 | **NOTE: The ```jruby -S``` syntax ensures that our gem is installed with ```jruby``` and not ```ruby```** 75 | 76 | The Bundler gem checks dependencies for Ruby gems and installs them for you. 77 | 78 | To install the dependencies for the Logstash plugin for DynamoDB, type the following command: 79 | 80 | ``` 81 | jruby -S bundle install 82 | ``` 83 | 84 | To build the gem, type the following command: 85 | 86 | ``` 87 | jruby -S gem build logstash-input-dynamodb.gemspec 88 | ``` 89 | 90 | To install the gem, in the logstash-dynamodb-input folder type: 91 | 92 | ``` 93 | jruby -S gem install --local logstash-input-dynamodb-1.0.0-java.gem 94 | ``` 95 | 96 | ### To install the Logstash plugin for DynamoDB 97 | 98 | Now that you have built the plugin gem, you can install it. 99 | 100 | Change directories to your local Logstash directory. 101 | 102 | In the Logstash directory, open the Gemfile file in a text editor and add the following line. 103 | 104 | ``` 105 | gem "logstash-input-dynamodb" 106 | ``` 107 | 108 | To install the plugin, in your logstash folder type the command: 109 | 110 | ``` 111 | bin/plugin install --no-verify logstash-input-dynamodb 112 | ``` 113 | 114 | To list all the installed plugins type the following command: 115 | 116 | ``` 117 | bin/plugin list 118 | ``` 119 | 120 | If the logstash-output-elasticsearch or logstash-output-stdout plugins are not listed you need to install them. For instructions on installing plugins, go to the Working with Plugins page in the Logstash documentation. 121 | 122 | ### Running the Logstash Plugin for Amazon DynamoDB 123 | 124 | **NOTE: First, make sure you have *Enabled Streams* (see above) for your DynamoDB table(s) before running logstash. Logstash for DynamoDB requires that each table you are logging from have a streams enabled to work.** 125 | 126 | In the local Logstash directory create a ```logstash-dynamodb.conf``` file with the following contents: 127 | 128 | ``` 129 | input { 130 | dynamodb{ 131 | endpoint => "dynamodb.us-east-1.amazonaws.com" 132 | streams_endpoint => "streams.dynamodb.us-east-1.amazonaws.com" 133 | view_type => "new_and_old_images" 134 | aws_access_key_id => "" 135 | aws_secret_access_key => "" 136 | table_name => "SourceTable" 137 | } 138 | } 139 | output { 140 | elasticsearch { 141 | host => localhost 142 | } 143 | stdout { } 144 | } 145 | ``` 146 | 147 | **Important** 148 | 149 | This is an example configuration. You must replace `````` and `````` with your access key and secret key. If you have credentials saved in a credentials file, you can omit these configuration values. 150 | 151 | To run logstash type: 152 | 153 | ``` 154 | bin/logstash -f logstash-dynamodb.conf 155 | ``` 156 | 157 | Logstash should successfully start and begin indexing the records from your DynamoDB table. 158 | 159 | You can also change the other configuration options to match your particular use case. 160 | 161 | You can also configure the plugin to index multiple tables by adding additional ```dynamodb { }``` sections to the ```input``` section. 162 | 163 | **The following table shows the configuration values.** 164 | 165 | ### Setting Description 166 | 167 | Settings Id | Description 168 | ------- | -------- 169 | table_name | The name of the table to index. This table must exist. 170 | endpoint | The DynamoDB endpoint to use. If you are running DynamoDB on your computer, use http://localhost:8000 as the endpoint. 171 | streams_endpoint | The name of a checkpoint table. This does not need to exist prior to plugin activation. 172 | view_type | The view type of the DynamoDB stream. ("new_and_old_images", "new_image", "old_image", "keys_only" Note: these must match the settings for your table's stream configured in the DynamoDB console.) 173 | aws_access_key_id | Your AWS access key ID. This is optional if you have credentials saved in a credentials file. Note: If you are running DynamoDB on your computer, this ID must match the access key ID that you used to create the table. If it does not match, the Logstash plugin will fail because DynamoDB partitions data by access key ID and region. 174 | aws_secret_access_key | Your AWS access key ID. Your AWS access key ID. This is optional if you have credentials saved in a credentials file. 175 | perform_scan | A boolean flag to indicate whether or not Logstash should scan the entire table before streaming new records. Note: Set this option to false if your are restarting the Logstash plugin. 176 | checkpointer | A string that uniquely identifies the KCL checkpointer name and CloudWatch metrics name. This is used when one worker leaves a shard so that another worker knows where to start again. 177 | publish_metrics | Boolean option to publish metrics to CloudWatch using the checkpointer name. 178 | perform_stream | Boolean option to not automatically stream new data into Logstash from DynamoDB streams. 179 | read_ops | Number of read operations per second to perform when scanning the specified table. 180 | number_of_scan_threads | Number of threads to use when scanning the specified table. 181 | number_of_write_threads | Number of threads to write to the Logstash queue when scanning the table. 182 | log_format | Log transfer format. "plain" - Returns the object as a DynamoDB object. "json_drop_binary" - Translates the item format to JSON and drops any binary attributes. "json_binary_as_text" - Translates the item format to JSON and represents any binary attributes as 64-bit encoded binary strings. For more information, see the JSON Data Format topic in the DynamoDB documentation. 183 | 184 | ### Testing the Logstash Plugin for Amazon DynamoDB 185 | 186 | The Logstash plugin for DynamoDB starts scanning the DynamoDB table and indexing the table data when you run it. As you insert new records into the DynamoDB table, the Logstash plugin consumes the new records from DynamoDB streams to continue indexing. 187 | 188 | To test this, you can add items to the DynamoDB table in the AWS console, and view the output (stdout) in the command prompt window. The items are also inserted into Elasticsearch and indexed for searching. 189 | 190 | **To test the Logstash plugin for DynamoDB** 191 | 192 | Open the DynamoDB console at https://console.aws.amazon.com/dynamodb/. 193 | 194 | In the list of tables, open (double-click) **SourceTable**. 195 | 196 | Choose **New Item**, add the following data, and then choose **PutItem**: 197 | 198 | Id—1 199 | Message—First item 200 | 201 | Repeat the previous step to add the following data items: 202 | 203 | Id—2 and Message—Second item 204 | Id—3 and Message—Third item 205 | 206 | Return to the command-prompt window and verify the Logstash output (it should have dumped the logstash output for each item you added to the console). 207 | 208 | **(Optional) Go back to the SourceTable in us-east-1 and do the following:** 209 | 210 | Update item 2. Set the Message to Hello world! 211 | Delete item 3. 212 | 213 | Go to the command-prompt window and verify the data output. 214 | 215 | You can now search the DynamoDB items in Elasticsearch. 216 | 217 | For information about accessing and searching data in Elasticsearch, see the Elasticsearch documentation. 218 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | @files=[] 2 | 3 | task :default do 4 | system("rake -T") 5 | end 6 | 7 | require "logstash/devutils/rake" 8 | -------------------------------------------------------------------------------- /lib/logstash-input-dynamodb_jars.rb: -------------------------------------------------------------------------------- 1 | # this is a generated file, to avoid over-writing it just delete this comment 2 | require 'jar_dependencies' 3 | 4 | require_jar( 'com.amazonaws', 'aws-java-sdk-elasticbeanstalk', '1.10.11' ) 5 | require_jar( 'com.amazonaws', 'aws-java-sdk-ses', '1.10.11' ) 6 | require_jar( 'log4j', 'log4j', '1.2.17' ) 7 | require_jar( 'com.amazonaws', 'aws-java-sdk-opsworks', '1.10.11' ) 8 | require_jar( 'com.amazonaws', 'dynamodb-streams-kinesis-adapter', '1.0.0' ) 9 | require_jar( 'com.amazonaws', 'aws-java-sdk-sqs', '1.10.11' ) 10 | require_jar( 'com.amazonaws', 'aws-java-sdk-emr', '1.10.11' ) 11 | require_jar( 'com.amazonaws', 'aws-java-sdk-cloudformation', '1.10.11' ) 12 | require_jar( 'com.beust', 'jcommander', '1.48' ) 13 | require_jar( 'com.amazonaws', 'aws-java-sdk-redshift', '1.10.11' ) 14 | require_jar( 'com.amazonaws', 'aws-java-sdk-iam', '1.10.11' ) 15 | require_jar( 'com.amazonaws', 'aws-java-sdk-codedeploy', '1.10.11' ) 16 | require_jar( 'com.amazonaws', 'aws-java-sdk-dynamodb', '1.10.10' ) 17 | require_jar( 'com.amazonaws', 'aws-java-sdk-directconnect', '1.10.11' ) 18 | require_jar( 'org.apache.httpcomponents', 'httpclient', '4.3.6' ) 19 | require_jar( 'com.amazonaws', 'aws-java-sdk-sns', '1.10.11' ) 20 | require_jar( 'com.amazonaws', 'aws-java-sdk-directory', '1.10.11' ) 21 | require_jar( 'com.google.protobuf', 'protobuf-java', '2.6.1' ) 22 | require_jar( 'com.amazonaws', 'aws-java-sdk-cloudfront', '1.10.11' ) 23 | require_jar( 'com.amazonaws', 'aws-java-sdk-kinesis', '1.10.8' ) 24 | require_jar( 'com.amazonaws', 'aws-java-sdk-workspaces', '1.10.11' ) 25 | require_jar( 'com.amazonaws', 'aws-java-sdk-swf-libraries', '1.10.11' ) 26 | require_jar( 'com.amazonaws', 'aws-java-sdk-cloudhsm', '1.10.11' ) 27 | require_jar( 'com.amazonaws', 'aws-java-sdk-simpledb', '1.10.11' ) 28 | require_jar( 'com.amazonaws', 'aws-java-sdk-codepipeline', '1.10.11' ) 29 | require_jar( 'com.amazonaws', 'aws-java-sdk-s3', '1.10.10' ) 30 | require_jar( 'com.amazonaws', 'aws-java-sdk-cognitoidentity', '1.10.11' ) 31 | require_jar( 'com.amazonaws', 'aws-java-sdk-machinelearning', '1.10.11' ) 32 | require_jar( 'com.amazonaws', 'aws-java-sdk-logs', '1.10.11' ) 33 | require_jar( 'org.apache.commons', 'commons-lang3', '3.3.2' ) 34 | require_jar( 'commons-codec', 'commons-codec', '1.6' ) 35 | require_jar( 'com.fasterxml.jackson.core', 'jackson-annotations', '2.5.0' ) 36 | require_jar( 'com.amazonaws', 'aws-java-sdk-sts', '1.10.11' ) 37 | require_jar( 'com.amazonaws', 'aws-java-sdk-route53', '1.10.11' ) 38 | require_jar( 'com.amazonaws', 'aws-java-sdk-elasticloadbalancing', '1.10.11' ) 39 | require_jar( 'com.amazonaws', 'aws-java-sdk-storagegateway', '1.10.11' ) 40 | require_jar( 'org.apache.httpcomponents', 'httpcore', '4.3.3' ) 41 | require_jar( 'com.amazonaws', 'aws-java-sdk-efs', '1.10.11' ) 42 | require_jar( 'com.amazonaws', 'aws-java-sdk-ec2', '1.10.11' ) 43 | require_jar( 'com.amazonaws', 'aws-java-sdk-ssm', '1.10.11' ) 44 | require_jar( 'com.amazonaws', 'aws-java-sdk-core', '1.10.10' ) 45 | require_jar( 'com.amazonaws', 'dynamodb-import-export-tool', '1.0.0' ) 46 | require_jar( 'commons-lang', 'commons-lang', '2.6' ) 47 | require_jar( 'com.amazonaws', 'aws-java-sdk-config', '1.10.11' ) 48 | require_jar( 'com.amazonaws', 'aws-java-sdk-cloudtrail', '1.10.11' ) 49 | require_jar( 'com.amazonaws', 'aws-java-sdk-elastictranscoder', '1.10.11' ) 50 | require_jar( 'com.amazonaws', 'aws-java-sdk-codecommit', '1.10.11' ) 51 | require_jar( 'joda-time', 'joda-time', '2.5' ) 52 | require_jar( 'com.amazonaws', 'aws-java-sdk-importexport', '1.10.11' ) 53 | require_jar( 'com.fasterxml.jackson.core', 'jackson-databind', '2.5.3' ) 54 | require_jar( 'com.amazonaws', 'aws-java-sdk-cloudsearch', '1.10.11' ) 55 | require_jar( 'com.amazonaws', 'aws-java-sdk', '1.10.11' ) 56 | require_jar( 'com.amazonaws', 'amazon-kinesis-client', '1.6.0' ) 57 | require_jar( 'com.google.guava', 'guava', '15.0' ) 58 | require_jar( 'com.fasterxml.jackson.core', 'jackson-core', '2.5.3' ) 59 | require_jar( 'com.amazonaws', 'aws-java-sdk-rds', '1.10.11' ) 60 | require_jar( 'com.amazonaws', 'aws-java-sdk-cognitosync', '1.10.11' ) 61 | require_jar( 'com.amazonaws', 'aws-java-sdk-datapipeline', '1.10.11' ) 62 | require_jar( 'com.amazonaws', 'aws-java-sdk-support', '1.10.11' ) 63 | require_jar( 'commons-logging', 'commons-logging', '1.1.3' ) 64 | require_jar( 'com.amazonaws', 'aws-java-sdk-cloudwatchmetrics', '1.10.11' ) 65 | require_jar( 'com.amazonaws', 'aws-java-sdk-glacier', '1.10.11' ) 66 | require_jar( 'com.amazonaws', 'aws-java-sdk-elasticache', '1.10.11' ) 67 | require_jar( 'com.amazonaws', 'aws-java-sdk-simpleworkflow', '1.10.11' ) 68 | require_jar( 'com.amazonaws', 'aws-java-sdk-lambda', '1.10.11' ) 69 | require_jar( 'com.amazonaws', 'aws-java-sdk-autoscaling', '1.10.11' ) 70 | require_jar( 'com.amazonaws', 'aws-java-sdk-ecs', '1.10.11' ) 71 | require_jar( 'com.amazonaws', 'aws-java-sdk-devicefarm', '1.10.11' ) 72 | require_jar( 'com.amazonaws', 'aws-java-sdk-kms', '1.10.10' ) 73 | require_jar( 'com.amazonaws', 'aws-java-sdk-cloudwatch', '1.10.8' ) 74 | -------------------------------------------------------------------------------- /lib/logstash/inputs/DynamoDBLogParser.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | #Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | #Licensed under the Apache License, Version 2.0 (the "License"); 6 | #you may not use this file except in compliance with the License. 7 | #You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | #Unless required by applicable law or agreed to in writing, software 12 | #distributed under the License is distributed on an "AS IS" BASIS, 13 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | #See the License for the specific language governing permissions and 15 | #limitations under the License. 16 | # 17 | require 'java' 18 | require 'json' 19 | require 'bigdecimal' 20 | require 'activesupport/json_encoder' 21 | require 'base64' 22 | 23 | require "logstash-input-dynamodb_jars" 24 | java_import "com.fasterxml.jackson.databind.ObjectMapper" 25 | java_import "com.amazonaws.services.dynamodbv2.model.AttributeValue" 26 | java_import "com.amazonaws.dynamodb.bootstrap.AttributeValueMixIn" 27 | 28 | module Logstash 29 | module Inputs 30 | module DynamoDB 31 | class DynamoDBLogParser 32 | 33 | MAX_NUMBER_OF_BYTES_FOR_NUMBER = 21; 34 | 35 | def initialize(view_type, log_format, key_schema, region) 36 | @view_type = view_type 37 | @log_format = log_format 38 | @mapper ||= ObjectMapper.new() 39 | @mapper.setSerializationInclusion(JsonInclude::Include::NON_NULL) 40 | @mapper.addMixInAnnotations(AttributeValue, AttributeValueMixIn); 41 | @key_schema = key_schema 42 | ActiveSupport.encode_big_decimal_as_string = false 43 | @hash_template = Hash.new 44 | @hash_template["eventID"] = "0" 45 | @hash_template["eventName"] = "INSERT" 46 | @hash_template["eventVersion"] = "1.0" 47 | @hash_template["eventSource"] = "aws:dynamodb" 48 | @hash_template["awsRegion"] = region 49 | end 50 | 51 | public 52 | def parse_scan(log, new_image_size) 53 | data_hash = JSON.parse(@mapper.writeValueAsString(log)) 54 | 55 | @hash_template["dynamodb"] = Hash.new 56 | @hash_template["dynamodb"]["keys"] = Hash.new 57 | size_bytes = calculate_key_size_in_bytes(log) 58 | @key_schema.each { |x| 59 | @hash_template["dynamodb"]["keys"][x] = data_hash[x] 60 | } 61 | unless @view_type == "keys_only" 62 | size_bytes += new_image_size 63 | @hash_template["dynamodb"]["newImage"] = data_hash 64 | end 65 | @hash_template["dynamodb"]["sequenceNumber"] = "0" 66 | @hash_template["dynamodb"]["sizeBytes"] = size_bytes 67 | @hash_template["dynamodb"]["streamViewType"] = @view_type.upcase 68 | 69 | return parse_view_type(@hash_template) 70 | end 71 | 72 | public 73 | def parse_stream(log) 74 | return parse_view_type(JSON.parse(@mapper.writeValueAsString(log))["internalObject"]) 75 | end 76 | 77 | private 78 | def calculate_key_size_in_bytes(record) 79 | key_size = 0 80 | @key_schema.each { |x| 81 | key_size += x.length 82 | value = record.get(x) 83 | if !(value.getB().nil?) 84 | b = value.getB(); 85 | key_size += Base64.decode64(b).length 86 | elsif !(value.getS().nil?) 87 | s = value.getS(); 88 | key_size += s.length; 89 | elsif !(value.getN().nil?) 90 | key_size += MAX_NUMBER_OF_BYTES_FOR_NUMBER; 91 | end 92 | } 93 | return key_size 94 | end 95 | 96 | private 97 | def parse_view_type(hash) 98 | if @log_format == LogStash::Inputs::DynamoDB::LF_PLAIN 99 | return hash.to_json 100 | end 101 | case @view_type 102 | when LogStash::Inputs::DynamoDB::VT_KEYS_ONLY 103 | return parse_format(hash["dynamodb"]["keys"]) 104 | when LogStash::Inputs::DynamoDB::VT_OLD_IMAGE 105 | return parse_format(hash["dynamodb"]["oldImage"]) 106 | when LogStash::Inputs::DynamoDB::VT_NEW_IMAGE 107 | return parse_format(hash["dynamodb"]["newImage"]) #check new and old, dynamodb. 108 | end 109 | end 110 | 111 | private 112 | def parse_format(hash) 113 | if @log_format == LogStash::Inputs::DynamoDB::LF_DYNAMODB 114 | return hash.to_json 115 | else 116 | return dynamodb_to_json(hash) 117 | end 118 | end 119 | 120 | private 121 | def dynamodb_to_json(hash) 122 | return formatAttributeValueMap(hash).to_json 123 | end 124 | 125 | private 126 | def formatAttributeValueMap(hash) 127 | keys_to_delete = [] 128 | hash.each do |k, v| 129 | dynamodb_key = v.keys.first 130 | dynamodb_value = v.values.first 131 | if @log_format == LogStash::Inputs::DynamoDB::LF_JSON_NO_BIN and (dynamodb_key == "BS" or dynamodb_key == "B") 132 | keys_to_delete.push(k) # remove binary values and binary sets 133 | next 134 | end 135 | hash[k] = formatAttributeValue(v.keys.first, v.values.first) 136 | end 137 | keys_to_delete.each {|key| hash.delete(key)} 138 | return hash 139 | end 140 | 141 | private 142 | def formatAttributeValue(key, value) 143 | case key 144 | when "M" 145 | formatAttributeValueMap(value) 146 | when "L" 147 | value.map! do |v| 148 | v = formatAttributeValue(v.keys.first, v.values.first) 149 | end 150 | when "NS","SS","BS" 151 | value.map! do |v| 152 | v = formatAttributeValue(key[0], v) 153 | end 154 | when "N" 155 | BigDecimal.new(value) 156 | when "NULL" 157 | nil 158 | else 159 | value 160 | end 161 | end 162 | 163 | end 164 | end 165 | end 166 | end 167 | -------------------------------------------------------------------------------- /lib/logstash/inputs/LogStashRecordProcessor.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | #Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | #Licensed under the Apache License, Version 2.0 (the "License"); 6 | #you may not use this file except in compliance with the License. 7 | #You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | #Unless required by applicable law or agreed to in writing, software 12 | #distributed under the License is distributed on an "AS IS" BASIS, 13 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | #See the License for the specific language governing permissions and 15 | #limitations under the License. 16 | # 17 | require "java" 18 | 19 | require "logstash-input-dynamodb_jars" 20 | java_import "com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason" 21 | java_import "java.lang.IllegalStateException" 22 | java_import "org.apache.log4j.LogManager" 23 | 24 | module Logstash 25 | module Inputs 26 | module DynamoDB 27 | class LogStashRecordProcessor 28 | include com.amazonaws.services.kinesis.clientlibrary.interfaces::IRecordProcessor 29 | 30 | attr_accessor :queue, :shard_id 31 | 32 | def initialize(queue) 33 | # Workaround for IRecordProcessor.initialize(String shardId) interfering with constructor. 34 | # No good way to overload methods in JRuby, so deciding which was supposed to be called here. 35 | if (queue.is_a? String) 36 | @shard_id = queue 37 | return 38 | else 39 | @queue ||= queue 40 | @logger ||= LogStash::Inputs::DynamoDB.logger 41 | end 42 | end 43 | 44 | def process_records(records, checkpointer) 45 | @logger.debug("Processing batch of " + records.size().to_s + " records") 46 | records.each do |record| 47 | @queue.push(record) 48 | end 49 | #checkpoint once all of the records have been consumed 50 | checkpointer.checkpoint() 51 | end 52 | 53 | def shutdown(checkpointer, reason) 54 | case reason 55 | when ShutdownReason::TERMINATE 56 | checkpointer.checkpoint() 57 | when ShutdownReason::ZOMBIE 58 | else 59 | raise RuntimeError, "Invalid shutdown reason." 60 | end 61 | unless @shard_id.nil? 62 | @logger.info("shutting down record processor with shardId: " + @shard_id + " with reason " + reason.to_s) 63 | end 64 | end 65 | end 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/logstash/inputs/LogStashRecordProcessorFactory.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | #Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | #Licensed under the Apache License, Version 2.0 (the "License"); 6 | #you may not use this file except in compliance with the License. 7 | #You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | #Unless required by applicable law or agreed to in writing, software 12 | #distributed under the License is distributed on an "AS IS" BASIS, 13 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | #See the License for the specific language governing permissions and 15 | #limitations under the License. 16 | # 17 | require 'java' 18 | require_relative "LogStashRecordProcessor" 19 | 20 | require "logstash-input-dynamodb_jars" 21 | 22 | module KCL 23 | include_package "com.amazonaws.services.kinesis.clientlibrary.interfaces" 24 | end 25 | 26 | module Logstash 27 | module Inputs 28 | module DynamoDB 29 | class LogStashRecordProcessorFactory 30 | include KCL::IRecordProcessorFactory 31 | 32 | def initialize(queue) 33 | @queue ||= queue 34 | end 35 | 36 | def create_processor 37 | return Logstash::Inputs::DynamoDB::LogStashRecordProcessor.new(@queue) 38 | end 39 | 40 | end 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /lib/logstash/inputs/dynamodb.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | #Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | #Licensed under the Apache License, Version 2.0 (the "License"); 6 | #you may not use this file except in compliance with the License. 7 | #You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | #Unless required by applicable law or agreed to in writing, software 12 | #distributed under the License is distributed on an "AS IS" BASIS, 13 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | #See the License for the specific language governing permissions and 15 | #limitations under the License. 16 | # 17 | require "logstash/inputs/base" 18 | require "logstash/namespace" 19 | require "securerandom" 20 | require "thread" 21 | require "socket" 22 | require_relative "LogStashRecordProcessorFactory" 23 | require_relative "DynamoDBLogParser" 24 | 25 | require "logstash-input-dynamodb_jars" 26 | 27 | require 'java' 28 | java_import "com.amazonaws.AmazonClientException" 29 | java_import "org.apache.log4j.LogManager" 30 | java_import "org.apache.log4j.Level" 31 | java_import "com.fasterxml.jackson.annotation.JsonInclude" 32 | java_import "com.amazonaws.regions.RegionUtils" 33 | 34 | module AmazonDynamoDB 35 | include_package "com.amazonaws" 36 | include_package "com.amazonaws.services.dynamodbv2" 37 | include_package "com.amazonaws.services.dynamodbv2.streamsadapter" 38 | include_package "com.amazonaws.services.dynamodbv2.model" 39 | end 40 | module AmazonCredentials 41 | include_package "com.amazonaws.auth" 42 | include_package "com.amazonaws.internal" 43 | end 44 | 45 | module DynamoDBBootstrap 46 | include_package "com.amazonaws.dynamodb.bootstrap" 47 | end 48 | 49 | module CloudWatch 50 | include_package "com.amazonaws.services.cloudwatch" 51 | end 52 | 53 | module KCL 54 | include_package "com.amazonaws.services.kinesis.clientlibrary.lib.worker" 55 | end 56 | 57 | #DynamoDBStreams plugin that will first scan the DynamoDB table 58 | #and then consume streams and push those records into Logstash 59 | class LogStash::Inputs::DynamoDB < LogStash::Inputs::Base 60 | config_name "dynamodb" 61 | 62 | USER_AGENT = " logstash-input-dynamodb/1.0.0".freeze 63 | 64 | LF_DYNAMODB = "dymamodb".freeze 65 | LF_JSON_NO_BIN = "json_drop_binary".freeze 66 | LF_PLAIN = "plain".freeze 67 | LF_JSON_BIN_AS_TEXT = "json_binary_as_text".freeze 68 | VT_KEYS_ONLY = "keys_only".freeze 69 | VT_OLD_IMAGE = "old_image".freeze 70 | VT_NEW_IMAGE = "new_image".freeze 71 | VT_ALL_IMAGES = "new_and_old_images".freeze 72 | 73 | default :codec, 'json' 74 | 75 | # The name of the table to copy and stream through Logstash 76 | config :table_name, :validate => :string, :required => true 77 | 78 | # Configuration for what information from the scan and streams to include in the log. 79 | # keys_only will return the hash and range keys along with the values for each entry 80 | # new_image will return the entire new entry and keys 81 | # old_image will return the entire entry before modification and keys (NOTE: Cannot perform scan when using this option) 82 | # new_and_old_images will return the old entry before modification along with the new entry and keys 83 | config :view_type, :validate => [VT_KEYS_ONLY, VT_OLD_IMAGE, VT_NEW_IMAGE, VT_ALL_IMAGES], :required => true 84 | 85 | # Endpoint from which the table is located. Example: dynamodb.us-east-1.amazonaws.com 86 | config :endpoint, :validate => :string, :required => true 87 | 88 | # Endpoint from which streams should read. Example: streams.dynamodb.us-east-1.amazonaws.com 89 | config :streams_endpoint, :validate => :string 90 | 91 | # AWS credentials access key. 92 | config :aws_access_key_id, :validate => :string, :default => "" 93 | 94 | # AWS credentials secret access key. 95 | config :aws_secret_access_key, :validate => :string, :default => "" 96 | 97 | # A flag to indicate whether or not the plugin should scan the entire table before streaming new records. 98 | # Streams will only push records that are less than 24 hours old, so in order to get the entire table 99 | # an initial scan must be done. 100 | config :perform_scan, :validate => :boolean, :default => true 101 | 102 | # A string that uniquely identifies the KCL checkpointer name and cloudwatch metrics name. 103 | # This is used when one worker leaves a shard so that another worker knows where to start again. 104 | config :checkpointer, :validate => :string, :default => "logstash_input_dynamodb_cptr" 105 | 106 | # Option to publish metrics to Cloudwatch using the checkpointer name. 107 | config :publish_metrics, :validate => :boolean, :default => false 108 | 109 | # Option to not automatically stream new data into logstash from DynamoDB streams. 110 | config :perform_stream, :validate => :boolean, :default => true 111 | 112 | # Number of read operations per second to perform when scanning the specified table. 113 | config :read_ops, :validate => :number, :default => 1 114 | 115 | # Number of threads to use when scanning the specified table 116 | config :number_of_scan_threads, :validate => :number, :default => 1 117 | 118 | # Number of threads to write to the logstash queue when scanning the table 119 | config :number_of_write_threads, :validate => :number, :default => 1 120 | 121 | # Configuation for how the logs will be transferred. 122 | # plain is simply pass the message along without editing it. 123 | # dynamodb will return just the data specified in the view_format in dynamodb format. 124 | # For more information see: docs.aws.amazon.com/amazondynamodb/latest/developerguide/DataFormat.html 125 | # json_drop_binary will return just the data specified in the view_format in JSON while not including any binary values that were present. 126 | # json_binary_as_text will return just the data specified in the view_format in JSON while including binary values as base64-encoded text. 127 | config :log_format, :validate => [LF_PLAIN, LF_DYNAMODB, LF_JSON_NO_BIN, LF_JSON_BIN_AS_TEXT], :default => "plain" 128 | 129 | public 130 | def build_credentials 131 | if !@aws_access_key_id.to_s.empty? and !@aws_secret_access_key.to_s.empty? 132 | @logger.info("Using static credentials: " + @aws_access_key_id + ", " + @aws_secret_access_key) 133 | basic = AmazonCredentials::BasicAWSCredentials.new(@aws_access_key_id, @aws_secret_access_key) 134 | return AmazonCredentials::StaticCredentialsProvider.new(basic) 135 | else 136 | @logger.info("Using default provider chain") 137 | return AmazonCredentials::DefaultAWSCredentialsProviderChain.new() 138 | end # if neither aws access keys 139 | end # def build_credentials 140 | 141 | public 142 | def register 143 | LogStash::Logger.setup_log4j(@logger) 144 | 145 | @host = Socket.gethostname 146 | @logger.info("Tablename: " + @table_name) 147 | @queue = SizedQueue.new(20) 148 | @credentials = build_credentials() 149 | @logger.info("Checkpointer: " + @checkpointer) 150 | 151 | if @perform_scan and @view_type == VT_OLD_IMAGE 152 | raise(LogStash::ConfigurationError, "Cannot perform scan with view type: " + @view_type + " configuration") 153 | end 154 | if @view_type == VT_ALL_IMAGES and !(@log_format == LF_PLAIN) 155 | raise(LogStash::ConfigurationError, "Cannot show view_type: " + @view_type + ", with log_format: " + @log_format) 156 | end 157 | 158 | #Create DynamoDB Client 159 | @client_configuration = AmazonDynamoDB::ClientConfiguration.new() 160 | @client_configuration.setUserAgent(@client_configuration.getUserAgent() + USER_AGENT) 161 | @dynamodb_client = AmazonDynamoDB::AmazonDynamoDBClient.new(@credentials, @client_configuration) 162 | 163 | @logger.info(@dynamodb_client.to_s) 164 | 165 | @dynamodb_client.setEndpoint(@endpoint) 166 | @logger.info("DynamoDB endpoint: " + @endpoint) 167 | 168 | @key_schema = Array.new 169 | @table_description = @dynamodb_client.describeTable(@table_name).getTable() 170 | key_iterator = @table_description.getKeySchema().iterator() 171 | while(key_iterator.hasNext()) 172 | @key_schema.push(key_iterator.next().getAttributeName().to_s) 173 | end 174 | region = RegionUtils.getRegionByEndpoint(@endpoint) 175 | 176 | @parser ||= Logstash::Inputs::DynamoDB::DynamoDBLogParser.new(@view_type, @log_format, @key_schema, region) 177 | 178 | if @perform_stream 179 | setup_stream 180 | end # unless @perform_stream 181 | end # def register 182 | 183 | public 184 | def run(logstash_queue) 185 | begin 186 | run_with_catch(logstash_queue) 187 | rescue LogStash::ShutdownSignal 188 | exit_threads 189 | until @queue.empty? 190 | @logger.info("Flushing rest of events in logstash queue") 191 | event = @queue.pop() 192 | queue_event(@parser.parse_stream(event), logstash_queue, @host) 193 | end # until !@queue.empty? 194 | end # begin 195 | end # def run(logstash_queue) 196 | 197 | # Starts KCL app in a background thread 198 | # Starts parallel scan if need be in a background thread 199 | private 200 | def run_with_catch(logstash_queue) 201 | if @perform_scan 202 | scan(logstash_queue) 203 | end # if @perform_scan 204 | 205 | # Once scan is finished, start kcl thread to read from streams 206 | if @perform_stream 207 | stream(logstash_queue) 208 | end # unless @perform_stream 209 | end # def run 210 | 211 | private 212 | def setup_stream 213 | worker_id = SecureRandom.uuid() 214 | @logger.info("WorkerId: " + worker_id) 215 | 216 | dynamodb_streams_client = AmazonDynamoDB::AmazonDynamoDBStreamsClient.new(@credentials, @client_configuration) 217 | adapter = Java::ComAmazonawsServicesDynamodbv2Streamsadapter::AmazonDynamoDBStreamsAdapterClient.new(@credentials) 218 | if !@streams_endpoint.nil? 219 | adapter.setEndpoint(@streams_endpoint) 220 | dynamodb_streams_client.setEndpoint(@streams_endpoint) 221 | @logger.info("DynamoDB Streams endpoint: " + @streams_endpoint) 222 | else 223 | raise(LogStash::ConfigurationError, "Cannot stream without a configured streams endpoint") 224 | end # if not @streams_endpoint.to_s.empty? 225 | 226 | stream_arn = nil 227 | begin 228 | stream_arn = @table_description.getLatestStreamArn() 229 | stream_description = dynamodb_streams_client.describeStream(AmazonDynamoDB::DescribeStreamRequest.new() \ 230 | .withStreamArn(stream_arn)).getStreamDescription() 231 | 232 | stream_status = stream_description.getStreamStatus() 233 | 234 | stream_view_type = stream_description.getStreamViewType().to_s.downcase 235 | unless (stream_view_type == @view_type or @view_type == VT_KEYS_ONLY or stream_view_type == VT_ALL_IMAGES) 236 | raise(LogStash::ConfigurationError, "Cannot stream " + @view_type + " when stream is setup for " + stream_view_type) 237 | end 238 | 239 | while stream_status == "ENABLING" 240 | if(stream_status == "ENABLING") 241 | @logger.info("Sleeping until stream is enabled") 242 | sleep(1) 243 | end # if stream_status == "ENABLING" 244 | stream_description = dynamodb_streams_client.describeStream(AmazonDynamoDB::DescribeStreamRequest.new() \ 245 | .withStreamArn(stream_arn)).getStreamDescription() 246 | stream_status = stream_description.getStreamStatus() 247 | end # while not active 248 | 249 | if !(stream_status == "ENABLED") 250 | raise(LogStash::PluginLoadingError, "No streams are enabled") 251 | end # if not active 252 | @logger.info("Stream Id: " + stream_arn) 253 | rescue AmazonDynamoDB::ResourceNotFoundException => rnfe 254 | raise(LogStash::PluginLoadingError, rnfe.message) 255 | rescue AmazonClientException => ace 256 | raise(LogStash::ConfigurationError, "AWS credentials invalid or not found in the provider chain\n" + ace.message) 257 | end # begin 258 | 259 | kcl_config = KCL::KinesisClientLibConfiguration.new(@checkpointer, stream_arn, @credentials, worker_id) \ 260 | .withInitialPositionInStream(KCL::InitialPositionInStream::TRIM_HORIZON) 261 | cloudwatch_client = nil 262 | if @publish_metrics 263 | cloudwatch_client = CloudWatch::AmazonCloudWatchClient.new(@credentials) 264 | else 265 | kclMetricsLogger = LogManager.getLogger("com.amazonaws.services.kinesis.metrics") 266 | kclMetricsLogger.setAdditivity(false) 267 | kclMetricsLogger.setLevel(Level::OFF) 268 | end # if @publish_metrics 269 | @worker = KCL::Worker.new(Logstash::Inputs::DynamoDB::LogStashRecordProcessorFactory.new(@queue), kcl_config, adapter, @dynamodb_client, cloudwatch_client) 270 | end # def setup_stream 271 | 272 | private 273 | def scan(logstash_queue) 274 | @logger.info("Starting scan...") 275 | @logstash_writer = DynamoDBBootstrap::BlockingQueueConsumer.new(@number_of_write_threads) 276 | 277 | @connector = DynamoDBBootstrap::DynamoDBBootstrapWorker.new(@dynamodb_client, @read_ops, @table_name, @number_of_scan_threads) 278 | start_table_copy_thread 279 | 280 | scan_queue = @logstash_writer.getQueue() 281 | while true 282 | event = scan_queue.take() 283 | if event.getEntry().nil? and event.getSize() == -1 284 | break 285 | end # if event.isEmpty() 286 | queue_event(@parser.parse_scan(event.getEntry(), event.getSize()), logstash_queue, @host) 287 | end # while true 288 | end 289 | 290 | private 291 | def stream(logstash_queue) 292 | @logger.info("Starting stream...") 293 | start_kcl_thread 294 | 295 | while true 296 | event = @queue.pop() 297 | queue_event(@parser.parse_stream(event), logstash_queue, @host) 298 | end # while true 299 | end 300 | 301 | private 302 | def exit_threads 303 | unless @dynamodb_scan_thread.nil? 304 | @dynamodb_scan_thread.exit 305 | end # unless @dynamodb_scan_thread.nil? 306 | 307 | unless @kcl_thread.nil? 308 | @kcl_thread.exit 309 | end # unless @kcl_thread.nil? 310 | end # def exit_threads 311 | 312 | public 313 | def queue_event(event, logstash_queue, event_host) 314 | logstash_event = LogStash::Event.new("message" => event, "host" => event_host) 315 | decorate(logstash_event) 316 | logstash_queue << logstash_event 317 | end # def queue_event 318 | 319 | private 320 | def start_table_copy_thread 321 | @dynamodb_scan_thread = Thread.new(@connector, @logstash_writer) { 322 | begin 323 | @connector.pipe(@logstash_writer) 324 | rescue Exception => e 325 | abort("Scanning the table caused an error.\n" + e.message) 326 | end # begin 327 | } 328 | end # def start_table_copy_thread() 329 | 330 | private 331 | def start_kcl_thread 332 | @kcl_thread = Thread.new(@worker) { 333 | begin 334 | @worker.run() 335 | rescue Exception => e 336 | abort("KCL worker encountered an error.\n" + e.message) 337 | end # begin 338 | } 339 | end # def start_kcl_thread 340 | 341 | end # class Logstash::Inputs::DynamoDB 342 | -------------------------------------------------------------------------------- /logstash-input-dynamodb.gemspec: -------------------------------------------------------------------------------- 1 | Gem::Specification.new do |s| 2 | s.name = 'logstash-input-dynamodb' 3 | s.version = '1.0.0' 4 | s.licenses = ['Apache License (2.0)'] 5 | s.summary = "This input plugin scans a specified DynamoDB table and then reads changes to a DynamoDB table from the associated DynamoDB Stream." 6 | s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" 7 | s.authors = ["Amazon"] 8 | s.email = 'dynamodb-interest@amazon.com' 9 | s.homepage = "https://github.com/logstash-plugins/logstash-input-dynamodb" 10 | s.require_paths = ["lib"] 11 | s.platform = 'java' 12 | 13 | # Files 14 | s.files = `git ls-files`.split($\) 15 | # Tests 16 | s.test_files = s.files.grep(%r{^(test|spec|features)/}) 17 | 18 | # Special flag to let us know this is actually a logstash plugin 19 | s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } 20 | 21 | # Gem dependencies 22 | s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0' 23 | s.add_runtime_dependency "logstash-codec-json" 24 | s.add_runtime_dependency "activesupport-json_encoder" 25 | # Jar dependencies 26 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-elasticbeanstalk', '1.10.11'" 27 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-ses', '1.10.11' " 28 | s.requirements << "jar 'log4j:log4j', '1.2.17'" 29 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-opsworks', '1.10.11'" 30 | s.requirements << "jar 'com.amazonaws:dynamodb-streams-kinesis-adapter', '1.0.0'" 31 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-sqs', '1.10.11'" 32 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-emr', '1.10.11'" 33 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cloudformation', '1.10.11'" 34 | s.requirements << "jar 'com.beust:jcommander', '1.48'" 35 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-redshift', '1.10.11'" 36 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-iam', '1.10.11'" 37 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-codedeploy', '1.10.11'" 38 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-dynamodb', '1.10.10'" 39 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-directconnect', '1.10.11'" 40 | s.requirements << "jar 'org.apache.httpcomponents:httpclient', '4.3.6'" 41 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-sns', '1.10.11'" 42 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-directory', '1.10.11'" 43 | s.requirements << "jar 'com.google.protobuf:protobuf-java', '2.6.1'" 44 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cloudfront', '1.10.11'" 45 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-kinesis', '1.10.8'" 46 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-workspaces', '1.10.11'" 47 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-swf-libraries', '1.10.11'" 48 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cloudhsm', '1.10.11'" 49 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-simpledb', '1.10.11'" 50 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-codepipeline', '1.10.11'" 51 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-s3', '1.10.10'" 52 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cognitoidentity', '1.10.11'" 53 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-machinelearning', '1.10.11'" 54 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-logs', '1.10.11'" 55 | s.requirements << "jar 'org.apache.commons:commons-lang3', '3.3.2'" 56 | s.requirements << "jar 'commons-codec:commons-codec', '1.6'" 57 | s.requirements << "jar 'com.fasterxml.jackson.core:jackson-annotations', '2.5.0'" 58 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-sts', '1.10.11'" 59 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-route53', '1.10.11'" 60 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-elasticloadbalancing', '1.10.11'" 61 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-storagegateway', '1.10.11'" 62 | s.requirements << "jar 'org.apache.httpcomponents:httpcore', '4.3.3'" 63 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-efs', '1.10.11'" 64 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-ec2', '1.10.11'" 65 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-ssm', '1.10.11'" 66 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-core', '1.10.10'" 67 | s.requirements << "jar 'com.amazonaws:dynamodb-import-export-tool', '1.0.0'" 68 | s.requirements << "jar 'commons-lang:commons-lang', '2.6'" 69 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-config', '1.10.11'" 70 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cloudtrail', '1.10.11'" 71 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-elastictranscoder', '1.10.11'" 72 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-codecommit', '1.10.11'" 73 | s.requirements << "jar 'joda-time:joda-time', '2.5'" 74 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-importexport', '1.10.11'" 75 | s.requirements << "jar 'com.fasterxml.jackson.core:jackson-databind', '2.5.3'" 76 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cloudsearch', '1.10.11'" 77 | s.requirements << "jar 'com.amazonaws:aws-java-sdk', '1.10.11'" 78 | s.requirements << "jar 'com.amazonaws:amazon-kinesis-client', '1.6.0'" 79 | s.requirements << "jar 'com.google.guava:guava', '15.0'" 80 | s.requirements << "jar 'com.fasterxml.jackson.core:jackson-core', '2.5.3'" 81 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-rds', '1.10.11'" 82 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cognitosync', '1.10.11'" 83 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-datapipeline', '1.10.11'" 84 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-support', '1.10.11'" 85 | s.requirements << "jar 'commons-logging:commons-logging', '1.1.3'" 86 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cloudwatchmetrics', '1.10.11'" 87 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-glacier', '1.10.11'" 88 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-elasticache', '1.10.11'" 89 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-simpleworkflow', '1.10.11'" 90 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-lambda', '1.10.11'" 91 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-autoscaling', '1.10.11'" 92 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-ecs', '1.10.11'" 93 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-devicefarm', '1.10.11'" 94 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-kms', '1.10.10'" 95 | s.requirements << "jar 'com.amazonaws:aws-java-sdk-cloudwatch', '1.10.8'" 96 | s.add_runtime_dependency 'jar-dependencies' 97 | # Development dependencies 98 | s.add_development_dependency "logstash-devutils" 99 | s.add_development_dependency "mocha" 100 | end 101 | -------------------------------------------------------------------------------- /spec/inputs/dynamodb_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | #Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | #Licensed under the Apache License, Version 2.0 (the "License"); 6 | #you may not use this file except in compliance with the License. 7 | #You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | #Unless required by applicable law or agreed to in writing, software 12 | #distributed under the License is distributed on an "AS IS" BASIS, 13 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | #See the License for the specific language governing permissions and 15 | #limitations under the License. 16 | # 17 | require "spec/spec_helper" 18 | 19 | class LogStash::Inputs::TestDynamoDB < LogStash::Inputs::DynamoDB 20 | default :codec, 'json' 21 | 22 | private 23 | def shutdown_count 24 | @shutdown_count ||= 0 25 | end 26 | 27 | def queue_event(event, logstash_queue, host) 28 | super(event, logstash_queue, host) 29 | # Add additional item to plugin's queue to ensure run() flushes queue before shutting down. 30 | # Queue the event and then shutdown, otherwise the threads would run forever 31 | if shutdown_count == 0 32 | @shutdown_count += 1 33 | @queue << "additional event stuck in queue during shutdown" 34 | raise LogStash::ShutdownSignal 35 | end 36 | end 37 | 38 | def start_kcl_thread() 39 | @queue << "some message from kcl thread calling process" 40 | end 41 | end 42 | 43 | class TestParser 44 | 45 | def parse_scan(msg) 46 | return msg 47 | end 48 | 49 | def parse_stream(msg) 50 | return msg 51 | end 52 | 53 | end 54 | 55 | describe 'inputs/dynamodb' do 56 | let (:dynamodb_client) {mock("AmazonDynamoDB::AmazonDynamoDBClient")} 57 | let (:dynamodb_streams_client) {mock("AmazonDynamoDB::AmazonDynamoDBStreamsClient")} 58 | let (:adapter) {mock("AmazonDynamoDB::AmazonDynamoDBStreamsAdapterClient")} 59 | let (:parser) {mock("DynamoDBLogParser")} 60 | let (:region_utils) {mock("RegionUtils")} 61 | 62 | def allow_invalid_credentials(stream_status = "ENABLED", error_to_raise = nil) 63 | AmazonDynamoDB::AmazonDynamoDBClient.expects(:new).returns(dynamodb_client) 64 | AmazonDynamoDB::AmazonDynamoDBStreamsClient.expects(:new).returns(dynamodb_streams_client) 65 | AmazonDynamoDB::AmazonDynamoDBStreamsAdapterClient.expects(:new).returns(adapter) 66 | Logstash::Inputs::DynamoDB::DynamoDBLogParser.expects(:new).returns(TestParser.new()) 67 | RegionUtils.expects(:getRegionByEndpoint).with("some endpoint").returns("some region") 68 | 69 | mock_table_description = stub 70 | mock_table = stub 71 | mock_key_schema = stub 72 | mock_iterator = stub 73 | mock_describe_stream = stub 74 | mock_stream_description = stub 75 | unless error_to_raise.nil? 76 | dynamodb_client.expects(:describeTable).raises(error_to_raise) 77 | return 78 | end 79 | 80 | adapter.expects(:setEndpoint).with("some streams endpoint") 81 | dynamodb_streams_client.expects(:setEndpoint).with("some streams endpoint") 82 | dynamodb_streams_client.expects(:describeStream).returns(mock_describe_stream) 83 | mock_describe_stream.expects(:getStreamDescription).returns(mock_stream_description) 84 | mock_stream_description.expects(:getStreamStatus).returns(stream_status) 85 | mock_stream_description.expects(:getStreamViewType).returns("new_and_old_images") 86 | mock_table.expects(:getLatestStreamArn).returns("test streamId") 87 | dynamodb_client.expects(:setEndpoint) 88 | dynamodb_client.expects(:describeTable).returns(mock_table_description) 89 | mock_table_description.expects(:getTable).returns(mock_table) 90 | mock_table.expects(:getKeySchema).returns(mock_key_schema) 91 | mock_key_schema.expects(:iterator).returns(mock_iterator) 92 | mock_iterator.expects(:hasNext).returns(false) 93 | 94 | end 95 | 96 | it "should not allow empty config" do 97 | expect {LogStash::Plugin.lookup("input", "dynamodb").new(empty_config)}.to raise_error(LogStash::ConfigurationError) 98 | end 99 | 100 | it "should need endpoint" do 101 | config = tablename 102 | config.delete("endpoint") 103 | expect {LogStash::Plugin.lookup("input", "dynamodb").new(config)}.to raise_error(LogStash::ConfigurationError) 104 | end 105 | 106 | it "should need table_name config" do 107 | config = tablename 108 | config.delete("table_name") 109 | expect {LogStash::Plugin.lookup("input", "dynamodb").new(config)}.to raise_error(LogStash::ConfigurationError) 110 | end 111 | 112 | it "should need view_type config" do 113 | config = tablename 114 | config.delete("view_type") 115 | expect {LogStash::Plugin.lookup("input", "dynamodb").new(config)}.to raise_error(LogStash::ConfigurationError) 116 | end 117 | 118 | it "should use default AWS credentials " do 119 | input = LogStash::Plugin.lookup("input", "dynamodb").new(tablename) 120 | expect(input.build_credentials()).to be_an_instance_of(Java::ComAmazonawsAuth::DefaultAWSCredentialsProviderChain) 121 | end 122 | 123 | it "should register correctly" do 124 | input = LogStash::Plugin.lookup("input", "dynamodb").new(invalid_aws_credentials_config) 125 | allow_invalid_credentials() 126 | expect {input.register}.not_to raise_error 127 | end 128 | 129 | it "should create new logstash event with metadata and add to queue" do 130 | input = LogStash::Plugin.lookup("input", "dynamodb").new(invalid_aws_credentials_config) 131 | queue = SizedQueue.new(20) 132 | input.queue_event("some message", queue, "some host") 133 | expect(queue.size()).to eq(1) 134 | event = queue.pop() 135 | expect(event["message"]).to eq("some message") 136 | expect(event["host"]).to eq("some host") 137 | end 138 | 139 | it "should start mock kcl worker thread and receive event from it, then flush additional events stuck in queue before shutting down" do 140 | input = LogStash::Inputs::TestDynamoDB.new(invalid_aws_credentials_config.merge({'perform_scan' => false})) 141 | allow_invalid_credentials() 142 | input.register 143 | queue = SizedQueue.new(20) 144 | input.run queue 145 | expect(queue.size()).to eq(2) 146 | event = queue.pop() 147 | expect(event["message"]).to eq("some message from kcl thread calling process") 148 | event = queue.pop() 149 | expect(event["message"]).to eq("additional event stuck in queue during shutdown") 150 | end 151 | 152 | it "should raise error since no active streams" do 153 | input = LogStash::Plugin.lookup("input", "dynamodb").new(invalid_aws_credentials_config) 154 | allow_invalid_credentials(stream_status="DISABLED") 155 | expect {input.register}.to raise_error(LogStash::PluginLoadingError, "No streams are enabled") 156 | end 157 | 158 | it "should handle error for nonexistent table" do 159 | input = LogStash::Plugin.lookup("input", "dynamodb").new(invalid_aws_credentials_config) 160 | allow_invalid_credentials(error_to_raise=Java::ComAmazonawsServicesDynamodbv2Model::ResourceNotFoundException.new("table does not exist")) 161 | expect {input.register}.to raise_error(LogStash::PluginLoadingError) 162 | end 163 | 164 | it "should allow cloudwatch metrics when specified by user" do 165 | input = LogStash::Inputs::TestDynamoDB.new(invalid_aws_credentials_config.merge({"publish_metrics" => true})) 166 | allow_invalid_credentials() 167 | cloudwatch_mock = mock("Java::ComAmazonawsServicesCloudwatch::AmazonCloudWatchClient") 168 | Java::ComAmazonawsServicesCloudwatch::AmazonCloudWatchClient.expects(:new).returns(cloudwatch_mock) 169 | 170 | input.register 171 | end 172 | 173 | it "should throw error trying to perform scan with old images" do 174 | input = LogStash::Inputs::TestDynamoDB.new(invalid_aws_credentials_config.merge({"view_type" => LogStash::Inputs::DynamoDB::VT_OLD_IMAGE, \ 175 | "perform_scan" => true})) 176 | expect {input.register}.to raise_error(LogStash::ConfigurationError) 177 | end 178 | 179 | it "should throw error when view type all images and dynamodb format" do 180 | input = LogStash::Inputs::TestDynamoDB.new(invalid_aws_credentials_config.merge({"view_type" => LogStash::Inputs::DynamoDB::VT_ALL_IMAGES, \ 181 | "log_format" => LogStash::Inputs::DynamoDB::LF_DYNAMODB})) 182 | expect {input.register}.to raise_error(LogStash::ConfigurationError) 183 | end 184 | 185 | it "should throw error when view type all images and json_drop_binary format" do 186 | input = LogStash::Inputs::TestDynamoDB.new(invalid_aws_credentials_config.merge({"view_type" => LogStash::Inputs::DynamoDB::VT_ALL_IMAGES, \ 187 | "log_format" => LogStash::Inputs::DynamoDB::LF_JSON_NO_BIN})) 188 | expect {input.register}.to raise_error(LogStash::ConfigurationError) 189 | end 190 | 191 | it "should throw error when view type all images and json_binary_as_text format" do 192 | input = LogStash::Inputs::TestDynamoDB.new(invalid_aws_credentials_config.merge({"view_type" => LogStash::Inputs::DynamoDB::VT_ALL_IMAGES, \ 193 | "log_format" => LogStash::Inputs::DynamoDB::LF_JSON_BIN_AS_TEXT})) 194 | expect {input.register}.to raise_error(LogStash::ConfigurationError) 195 | end 196 | 197 | 198 | end 199 | -------------------------------------------------------------------------------- /spec/log_parser_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | #Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | #Licensed under the Apache License, Version 2.0 (the "License"); 6 | #you may not use this file except in compliance with the License. 7 | #You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | #Unless required by applicable law or agreed to in writing, software 12 | #distributed under the License is distributed on an "AS IS" BASIS, 13 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | #See the License for the specific language governing permissions and 15 | #limitations under the License. 16 | # 17 | require "spec/spec_helper" 18 | 19 | class Logstash::Inputs::DynamoDB::DynamoDBLogParserTest < Logstash::Inputs::DynamoDB::DynamoDBLogParser 20 | 21 | private 22 | def calculate_key_size_in_bytes(record) 23 | return 10 24 | end 25 | 26 | end 27 | 28 | describe "inputs/LogParser" do 29 | let (:object_mapper) {mock("ObjectMapper")} 30 | 31 | before(:each) do 32 | Java::comFasterxmlJacksonDatabind::ObjectMapper.expects(:new).returns(object_mapper) 33 | object_mapper.expects(:setSerializationInclusion) 34 | object_mapper.expects(:addMixInAnnotations) 35 | end 36 | 37 | def expect_parse_stream() 38 | object_mapper.expects(:writeValueAsString).with(sample_stream_result).returns(sample_stream_result) 39 | JSON.expects(:parse).with(sample_stream_result).returns(sample_stream_result) 40 | end 41 | 42 | def expect_parse_scan() 43 | object_mapper.expects(:writeValueAsString).with(sample_scan_result).returns(sample_scan_result) 44 | JSON.expects(:parse).with(sample_scan_result).returns(sample_scan_result) 45 | end 46 | 47 | it "should parse a scan and parse a stream the same way" do 48 | expect_parse_stream 49 | expect_parse_scan 50 | parser = Logstash::Inputs::DynamoDB::DynamoDBLogParserTest.new(LogStash::Inputs::DynamoDB::VT_ALL_IMAGES, LogStash::Inputs::DynamoDB::LF_PLAIN, key_schema, "us-west-1") 51 | scan_after_parse = parser.parse_scan(sample_scan_result, 38) 52 | stream_after_parse = parser.parse_stream(sample_stream_result) 53 | expect(scan_after_parse).to eq(stream_after_parse) 54 | end 55 | 56 | it "should drop binary values when parsing into a json with the correct configuration" do 57 | expect_parse_scan 58 | parser = Logstash::Inputs::DynamoDB::DynamoDBLogParserTest.new(LogStash::Inputs::DynamoDB::VT_NEW_IMAGE, LogStash::Inputs::DynamoDB::LF_JSON_NO_BIN, key_schema, "us-west-1") 59 | result = parser.parse_scan(sample_scan_result, 38) 60 | expect(result).to eq({"TBCZDPHPXUTOTYGP" => "sampleString"}.to_json) 61 | end 62 | 63 | end 64 | -------------------------------------------------------------------------------- /spec/record_processor_and_factory_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | #Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | #Licensed under the Apache License, Version 2.0 (the "License"); 6 | #you may not use this file except in compliance with the License. 7 | #You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | #Unless required by applicable law or agreed to in writing, software 12 | #distributed under the License is distributed on an "AS IS" BASIS, 13 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | #See the License for the specific language governing permissions and 15 | #limitations under the License. 16 | # 17 | require "spec/spec_helper" 18 | 19 | describe 'inputs/LogStashRecordProcessor' do 20 | before(:each) do 21 | @queue = SizedQueue.new(20) 22 | @processor = Logstash::Inputs::DynamoDB::LogStashRecordProcessor.new(@queue) 23 | end 24 | 25 | it "should call setShardId when being called with a String" do 26 | processor_with_shard = Logstash::Inputs::DynamoDB::LogStashRecordProcessor.new("test shardId") 27 | expect(processor_with_shard.shard_id).to eq("test shardId") 28 | end 29 | 30 | it "should not call setShardId when being called with a queue" do 31 | expect(@processor.queue).to eq(@queue) 32 | expect(@processor.shard_id).to be_nil 33 | end 34 | 35 | it "should checkpoint when shutdown is called with reason TERMINATE" do 36 | checkpointer = mock("checkpointer") 37 | checkpointer.expects(:checkpoint).once 38 | @processor.shutdown(checkpointer, ShutdownReason::TERMINATE) 39 | end 40 | 41 | it "should not checkpoint when shutdown is called with reason ZOMBIE" do 42 | checkpointer = mock("checkpointer") 43 | checkpointer.expects(:checkpoint).never 44 | @processor.shutdown(checkpointer, ShutdownReason::ZOMBIE) 45 | end 46 | 47 | it "should raise error when shutdown is called with unknown reason" do 48 | expect {@processor.shutdown("some checkpointer", "unknown reason")}.to raise_error(RuntimeError) 49 | end 50 | 51 | it "should translate each record into String, push them onto queue, and then checkpoint when process_records is called" do 52 | checkpointer = mock("checkpointer") 53 | checkpointer.expects(:checkpoint).once 54 | 55 | records = [{"a records data" => "a records value"}, {"another records data" => "another records value"}] 56 | @processor.process_records(records, checkpointer) 57 | end 58 | 59 | end 60 | 61 | describe 'inputs/LogStashRecordProcessorFactory' do 62 | 63 | it "should create a new factory correctly and create a new LogStashRecordProcessor when called upon" do 64 | queue = SizedQueue.new(20) 65 | factory = Logstash::Inputs::DynamoDB::LogStashRecordProcessorFactory.new(queue) 66 | processor = factory.create_processor 67 | expect(processor).to be_an_instance_of(Logstash::Inputs::DynamoDB::LogStashRecordProcessor) 68 | end 69 | 70 | end 71 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # This file was generated by the `rspec --init` command. Conventionally, all 2 | # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. 3 | # The generated `.rspec` file contains `--require spec_helper` which will cause 4 | # this file to always be loaded, without a need to explicitly require it in any 5 | # files. 6 | # 7 | # Given that it is always loaded, you are encouraged to keep this file as 8 | # light-weight as possible. Requiring heavyweight dependencies from this file 9 | # will add to the boot time of your test suite on EVERY test run, even for an 10 | # individual file that may not need all of that loaded. Instead, consider making 11 | # a separate helper file that requires the additional dependencies and performs 12 | # the additional setup, and require it from the spec files that actually need 13 | # it. 14 | # 15 | # The `.rspec` file also contains a few flags that are not defaults but that 16 | # users commonly want. 17 | # 18 | # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration 19 | RSpec.configure do |config| 20 | # rspec-expectations config goes here. You can use an alternate 21 | # assertion/expectation library such as wrong or the stdlib/minitest 22 | # assertions if you prefer. 23 | config.expect_with :rspec do |expectations| 24 | # This option will default to `true` in RSpec 4. It makes the `description` 25 | # and `failure_message` of custom matchers include text for helper methods 26 | # defined using `chain`, e.g.: 27 | # be_bigger_than(2).and_smaller_than(4).description 28 | # # => "be bigger than 2 and smaller than 4" 29 | # ...rather than: 30 | # # => "be bigger than 2" 31 | expectations.include_chain_clauses_in_custom_matcher_descriptions = true 32 | end 33 | 34 | # rspec-mocks config goes here. You can use an alternate test double 35 | # library (such as bogus or mocha) by changing the `mock_with` option here. 36 | config.mock_with :rspec do |mocks| 37 | # Prevents you from mocking or stubbing a method that does not exist on 38 | # a real object. This is generally recommended, and will default to 39 | # `true` in RSpec 4. 40 | mocks.verify_partial_doubles = true 41 | end 42 | 43 | # The settings below are suggested to provide a good initial experience 44 | # with RSpec, but feel free to customize to your heart's content. 45 | =begin 46 | # These two settings work together to allow you to limit a spec run 47 | # to individual examples or groups you care about by tagging them with 48 | # `:focus` metadata. When nothing is tagged with `:focus`, all examples 49 | # get run. 50 | config.filter_run :focus 51 | config.run_all_when_everything_filtered = true 52 | 53 | # Allows RSpec to persist some state between runs in order to support 54 | # the `--only-failures` and `--next-failure` CLI options. We recommend 55 | # you configure your source control system to ignore this file. 56 | config.example_status_persistence_file_path = "spec/examples.txt" 57 | 58 | # Limits the available syntax to the non-monkey patched syntax that is 59 | # recommended. For more details, see: 60 | # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax 61 | # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ 62 | # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching 63 | config.disable_monkey_patching! 64 | 65 | # This setting enables warnings. It's recommended, but in some cases may 66 | # be too noisy due to issues in dependencies. 67 | config.warnings = true 68 | 69 | # Many RSpec users commonly either run the entire suite or an individual 70 | # file, and it's useful to allow more verbose output when running an 71 | # individual spec file. 72 | if config.files_to_run.one? 73 | # Use the documentation formatter for detailed output, 74 | # unless a formatter has already been configured 75 | # (e.g. via a command-line flag). 76 | config.default_formatter = 'doc' 77 | end 78 | 79 | # Print the 10 slowest examples and example groups at the 80 | # end of the spec run, to help surface which specs are running 81 | # particularly slow. 82 | config.profile_examples = 10 83 | 84 | # Run specs in random order to surface order dependencies. If you find an 85 | # order dependency and want to debug it, you can fix the order by providing 86 | # the seed, which is printed after each run. 87 | # --seed 1234 88 | config.order = :random 89 | 90 | # Seed global randomization in this process using the `--seed` CLI option. 91 | # Setting this allows you to use `--seed` to deterministically reproduce 92 | # test failures related to randomization by passing the same `--seed` value 93 | # as the one that triggered the failure. 94 | Kernel.srand config.seed 95 | =end 96 | end 97 | 98 | require "logstash/devutils/rspec/spec_helper" 99 | require "logstash/inputs/DynamoDBLogParser" 100 | require "logstash/inputs/dynamodb" 101 | require "rspec/expectations" 102 | require "rspec/mocks" 103 | require "mocha" 104 | require "java" 105 | 106 | RSpec.configure do |config| 107 | config.mock_with :mocha 108 | end 109 | 110 | def empty_config 111 | {} 112 | end 113 | 114 | def tablename 115 | {'table_name' => 'test tablename', 'view_type' => "new_and_old_images", "endpoint" => "some endpoint"} 116 | end 117 | def invalid_aws_credentials_config 118 | {'table_name' => 'test tablename', "endpoint" => "some endpoint", 'aws_access_key_id' => 'invalid', 'aws_secret_access_key' => 'invalid_also', 'view_type' => "new_and_old_images", "streams_endpoint" => "some streams endpoint"} 119 | end 120 | def invalid_aws_credentials_config_no_endpoints 121 | {'table_name' => 'test tablename', 'aws_access_key_id' => 'invalid', 'aws_secret_access_key' => 'invalid_also', 'view_type' => "new_and_old_images"} 122 | end 123 | def key_schema 124 | ["TBCZDPHPXUTOTYGP", "some bin key"] 125 | end 126 | def sample_scan_result 127 | {"TBCZDPHPXUTOTYGP" => {"S" => "sampleString"}, "some bin key" => {"B" => "actualbinval"}} 128 | end 129 | def sample_stream_result 130 | {"internalObject" => {"eventID" => "0","eventName" => "INSERT","eventVersion" => "1.0", \ 131 | "eventSource" => "aws:dynamodb","awsRegion" => "us-west-1","dynamodb" => {"keys" => {"TBCZDPHPXUTOTYGP" => {"S" => "sampleString"}, \ 132 | "some bin key" => {"B" => "actualbinval"}}, "newImage" => {"TBCZDPHPXUTOTYGP" => {"S" => "sampleString"}, \ 133 | "some bin key" => {"B" => "actualbinval"}},"sequenceNumber" => "0","sizeBytes" => 48,"streamViewType" => LogStash::Inputs::DynamoDB::VT_ALL_IMAGES.upcase}}} 134 | end 135 | --------------------------------------------------------------------------------