├── .gitignore ├── LICENSE ├── README.rst ├── pom.xml ├── sink-quickstart.properties ├── source-quickstart.properties └── src ├── assembly ├── development.xml ├── package.xml └── standalone.xml ├── main └── java │ └── dynamok │ ├── Version.java │ ├── sink │ ├── AttributeValueConverter.java │ ├── ConnectorConfig.java │ ├── DynamoDbSinkConnector.java │ ├── DynamoDbSinkTask.java │ ├── KafkaCoordinateNames.java │ └── UnprocessedItemsException.java │ └── source │ ├── ConnectorConfig.java │ ├── DynamoDbSourceConnector.java │ ├── DynamoDbSourceTask.java │ ├── RecordMapper.java │ └── TaskConfig.java └── test └── java └── dynamok ├── sink └── AttributeValueConverterTest.java └── source └── RecordMapperTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | kafka-connect-dynamodb.iml 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | **kafka-connect-dynamodb** is a `Kafka Connector `_ for loading data to and from Amazon DynamoDB. 2 | 3 | It is implemented using the AWS Java SDK for DynamoDB. 4 | For authentication, the `DefaultAWSCredentialsProviderChain `_ is used. 5 | 6 | Building 7 | ======== 8 | 9 | Run:: 10 | 11 | $ mvn clean package 12 | 13 | Then you will find this connector and required JARs it depends upon in ``target/kafka-connect-dynamodb-$version-SNAPSHOT-package/share/java/kafka-connect-dynamodb/*``. 14 | 15 | To create an uber JAR:: 16 | 17 | $ mvn -P standalone clean package 18 | 19 | The uber JAR will be created at ``target/kafka-connect-dynamodb-$version-SNAPSHOT-standalone.jar``. 20 | 21 | Sink Connector 22 | ============== 23 | 24 | Example configuration 25 | --------------------- 26 | 27 | Ingest the ``orders`` topic to a DynamoDB table of the same name in the specified region:: 28 | 29 | name=dynamodb-sink-test 30 | topics=orders 31 | connector.class=dynamok.sink.DynamoDbSinkConnector 32 | region=us-west-2 33 | ignore.record.key=true 34 | 35 | Record conversion 36 | ----------------- 37 | 38 | Refer to `DynamoDB Data Types `_. 39 | 40 | At the top-level, we need to either be converting to the DynamoDB ``Map`` data type, 41 | or the ``top.key.attribute`` or ``top.value.attribute`` configuration options for the Kafka record key or value as applicable should be configured, 42 | so we can ensure being able to hoist the converted value as a DynamoDB record. 43 | 44 | Schema present 45 | ^^^^^^^^^^^^^^ 46 | 47 | ================================================================================ ============= 48 | **Connect Schema Type** **DynamoDB** 49 | -------------------------------------------------------------------------------- ------------- 50 | ``INT8``, ``INT16``, ``INT32``, ``INT64``, ``FLOAT32``, ``FLOAT64``, ``Decimal`` ``Number`` 51 | ``BOOL`` ``Boolean`` 52 | ``BYTES`` ``Binary`` 53 | ``STRING`` ``String`` 54 | ``ARRAY`` ``List`` 55 | ``MAP`` [#]_, ``STRUCT`` ``Map`` 56 | ================================================================================ ============= 57 | 58 | .. [#] Map keys must be primitive types, and cannot be optional. 59 | 60 | ``null`` values for optional schemas are translated to the ``Null`` type. 61 | 62 | Schemaless 63 | ^^^^^^^^^^ 64 | 65 | ======================================================================================= ============== 66 | **Java** **DynamoDB** 67 | --------------------------------------------------------------------------------------- -------------- 68 | ``null`` ``Null`` 69 | ``Number`` [#]_ ``Number`` 70 | ``Boolean`` ``Boolean`` 71 | ``byte[]``, ``ByteBuffer`` ``Binary`` 72 | ``String`` ``String`` 73 | ``List`` ``List`` 74 | Empty ``Set`` [#]_ ``Null`` 75 | ``Set`` ``String Set`` 76 | ``Set`` ``Number Set`` 77 | ``Set``, ``Set`` ``Binary Set`` 78 | ``Map`` [#]_ ``Map`` 79 | ======================================================================================= ============== 80 | 81 | Any other datatype will result in the connector to fail. 82 | 83 | .. [#] i.e. ``Byte``, ``Short``, ``Integer``, ``Long``, ``Float``, ``Double``, ``BigInteger``, ``BigDecimal`` 84 | 85 | .. [#] It is not possible to determine the element type of an empty set. 86 | 87 | .. [#] Map keys must be primitive types, and cannot be optional. 88 | 89 | Configuration options 90 | --------------------- 91 | 92 | ``region`` 93 | AWS region for DynamoDB. 94 | 95 | * Type: string 96 | * Default: "" 97 | * Importance: high 98 | 99 | ``access.key.id`` 100 | Explicit AWS access key ID. Leave empty to utilize the default credential provider chain. 101 | 102 | * Type: password 103 | * Default: [hidden] 104 | * Importance: low 105 | 106 | ``secret.key`` 107 | Explicit AWS secret access key. Leave empty to utilize the default credential provider chain. 108 | 109 | * Type: password 110 | * Default: [hidden] 111 | * Importance: low 112 | 113 | ``batch.size`` 114 | Batch size between 1 (dedicated ``PutItemRequest`` for each record) and 25 (which is the maximum number of items in a ``BatchWriteItemRequest``) 115 | 116 | * Type: int 117 | * Default: 1 118 | * Importance: high 119 | 120 | ``kafka.attributes`` 121 | Trio of ``topic,partition,offset`` attribute names to include in records, set to empty to omit these attributes. 122 | 123 | * Type: list 124 | * Default: [kafka_topic, kafka_partition, kafka_offset] 125 | * Importance: high 126 | 127 | ``table.format`` 128 | Format string for destination DynamoDB table name, use ``${topic}`` as placeholder for source topic. 129 | 130 | * Type: string 131 | * Default: "${topic}" 132 | * Importance: high 133 | 134 | ``ignore.record.key`` 135 | Whether to ignore Kafka record keys in preparing the DynamoDB record. 136 | 137 | * Type: boolean 138 | * Default: false 139 | * Importance: medium 140 | 141 | ``ignore.record.value`` 142 | Whether to ignore Kafka record value in preparing the DynamoDB record. 143 | 144 | * Type: boolean 145 | * Default: false 146 | * Importance: medium 147 | 148 | ``top.key.attribute`` 149 | DynamoDB attribute name to use for the record key. Leave empty if no top-level envelope attribute is desired. 150 | 151 | * Type: string 152 | * Default: "" 153 | * Importance: medium 154 | 155 | ``top.value.attribute`` 156 | DynamoDB attribute name to use for the record value. Leave empty if no top-level envelope attribute is desired. 157 | 158 | * Type: string 159 | * Default: "" 160 | * Importance: medium 161 | 162 | ``max.retries`` 163 | The maximum number of times to retry on errors before failing the task. 164 | 165 | * Type: int 166 | * Default: 10 167 | * Importance: medium 168 | 169 | ``retry.backoff.ms`` 170 | The time in milliseconds to wait following an error before a retry attempt is made. 171 | 172 | * Type: int 173 | * Default: 3000 174 | * Importance: medium 175 | 176 | Source Connector 177 | ================ 178 | 179 | Example configuration 180 | --------------------- 181 | 182 | Ingest all DynamoDB tables in the specified region, to Kafka topics with the same name as the source table:: 183 | 184 | name=dynamodb-source-test 185 | connector.class=dynamok.source.DynamoDbSourceConnector 186 | region=us-west-2 187 | 188 | Record conversion 189 | ----------------- 190 | 191 | *TODO describe conversion scheme* 192 | 193 | Limitations 194 | ^^^^^^^^^^^ 195 | 196 | DynamoDB records containing heterogeneous lists (``L``) or maps (``M``) are not currently supported, these fields will be silently dropped. 197 | It will be possible to add support for them with the implementation of `KAFKA-3910 `_. 198 | 199 | Configuration options 200 | --------------------- 201 | 202 | ``region`` 203 | AWS region for DynamoDB. 204 | 205 | * Type: string 206 | * Default: "" 207 | * Importance: high 208 | 209 | ``access.key.id`` 210 | Explicit AWS access key ID. Leave empty to utilize the default credential provider chain. 211 | 212 | * Type: password 213 | * Default: [hidden] 214 | * Importance: low 215 | 216 | ``secret.key`` 217 | Explicit AWS secret access key. Leave empty to utilize the default credential provider chain. 218 | 219 | * Type: password 220 | * Default: [hidden] 221 | * Importance: low 222 | 223 | ``topic.format`` 224 | Format string for destination Kafka topic, use ``${table}`` as placeholder for source table name. 225 | 226 | * Type: string 227 | * Default: "${table}" 228 | * Importance: high 229 | 230 | ``tables.prefix`` 231 | Prefix for DynamoDB tables to source from. 232 | 233 | * Type: string 234 | * Default: "" 235 | * Importance: medium 236 | 237 | ``tables.whitelist`` 238 | Whitelist for DynamoDB tables to source from. 239 | 240 | * Type: list 241 | * Default: "" 242 | * Importance: medium 243 | 244 | ``tables.blacklist`` 245 | Blacklist for DynamoDB tables to source from. 246 | 247 | * Type: list 248 | * Default: "" 249 | * Importance: medium 250 | 251 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | dynamok 9 | kafka-connect-dynamodb 10 | jar 11 | 0.3.0-SNAPSHOT 12 | kafka-connect-dynamodb 13 | 14 | A Kafka Connect DynamoDB connector for copying data between DynamoDB and Kafka. 15 | 16 | 17 | 18 | 19 | Apache License 2.0 20 | http://www.apache.org/licenses/LICENSE-2.0.html 21 | repo 22 | 23 | 24 | 25 | 26 | scm:git:git://github.com/shikhar/kafka-connect-dynamodb.git 27 | scm:git:git@github.com:shikhar/kafka-connect-dynamodb.git 28 | https://github.com/shikhar/kafka-connect-dynamodb 29 | HEAD 30 | 31 | 32 | 33 | 0.10.1.0 34 | 4.12 35 | 1.7.5 36 | UTF-8 37 | 38 | 39 | 40 | 41 | org.apache.kafka 42 | connect-api 43 | ${kafka.version} 44 | provided 45 | 46 | 47 | com.amazonaws 48 | aws-java-sdk-dynamodb 49 | 1.11.65 50 | 51 | 52 | org.slf4j 53 | slf4j-simple 54 | ${slf4j.version} 55 | test 56 | 57 | 58 | junit 59 | junit 60 | ${junit.version} 61 | test 62 | 63 | 64 | com.google.guava 65 | guava 66 | 19.0 67 | test 68 | 69 | 70 | 71 | 72 | 73 | 74 | org.apache.maven.plugins 75 | maven-compiler-plugin 76 | 2.5.1 77 | true 78 | 79 | 1.8 80 | 1.8 81 | 82 | 83 | 84 | maven-assembly-plugin 85 | 2.5.3 86 | 87 | 88 | src/assembly/development.xml 89 | src/assembly/package.xml 90 | 91 | 92 | 93 | 94 | make-assembly 95 | package 96 | 97 | single 98 | 99 | 100 | 101 | 102 | 103 | org.apache.maven.plugins 104 | maven-surefire-plugin 105 | 2.18.1 106 | 107 | -Djava.awt.headless=true 108 | 109 | 110 | 111 | 112 | 113 | src/main/resources 114 | true 115 | 116 | 117 | 118 | 119 | 120 | 121 | standalone 122 | 123 | 124 | 125 | maven-assembly-plugin 126 | 127 | 128 | src/assembly/standalone.xml 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /sink-quickstart.properties: -------------------------------------------------------------------------------- 1 | name=dynamodb-sink-test 2 | topics=orders 3 | connector.class=dynamok.sink.DynamoDbSinkConnector 4 | region=us-west-2 5 | ignore.record.key=true 6 | -------------------------------------------------------------------------------- /source-quickstart.properties: -------------------------------------------------------------------------------- 1 | name=dynamodb-source-test 2 | connector.class=dynamok.source.DynamoDbSourceConnector 3 | region=us-west-2 4 | -------------------------------------------------------------------------------- /src/assembly/development.xml: -------------------------------------------------------------------------------- 1 | 5 | 7 | development 8 | 9 | dir 10 | 11 | false 12 | 13 | 14 | share/java/kafka-connect-dynamodb/ 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/assembly/package.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | package 7 | 8 | dir 9 | 10 | false 11 | 12 | 13 | ${project.basedir} 14 | share/doc/kafka-connect-dynamodb/ 15 | 16 | version.txt 17 | README* 18 | LICENSE* 19 | NOTICE* 20 | licenses/ 21 | 22 | 23 | 24 | ${project.basedir}/config 25 | etc/kafka-connect-dynamodb 26 | 27 | * 28 | 29 | 30 | 31 | 32 | 33 | share/java/kafka-connect-dynamodb 34 | true 35 | true 36 | 37 | org.apache.kafka:connect-api 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/assembly/standalone.xml: -------------------------------------------------------------------------------- 1 | 5 | 7 | standalone 8 | 9 | jar 10 | 11 | false 12 | 13 | 14 | ${project.basedir} 15 | / 16 | 17 | README* 18 | LICENSE* 19 | NOTICE* 20 | 21 | 22 | 23 | 24 | 25 | / 26 | true 27 | true 28 | runtime 29 | 30 | 31 | -------------------------------------------------------------------------------- /src/main/java/dynamok/Version.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok; 18 | 19 | public class Version { 20 | 21 | // TODO pull this in from a packaged resource controlled by the build 22 | private static final String VERSION = "0.1.0-SNAPSHOT"; 23 | 24 | public static String get() { 25 | return VERSION; 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/dynamok/sink/AttributeValueConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.sink; 18 | 19 | import com.amazonaws.services.dynamodbv2.model.AttributeValue; 20 | import org.apache.kafka.connect.data.Decimal; 21 | import org.apache.kafka.connect.data.Field; 22 | import org.apache.kafka.connect.data.Schema; 23 | import org.apache.kafka.connect.data.Struct; 24 | import org.apache.kafka.connect.errors.DataException; 25 | 26 | import java.nio.ByteBuffer; 27 | import java.util.Base64; 28 | import java.util.HashMap; 29 | import java.util.Iterator; 30 | import java.util.List; 31 | import java.util.Map; 32 | import java.util.Set; 33 | import java.util.stream.Collectors; 34 | 35 | public class AttributeValueConverter { 36 | 37 | public static final AttributeValue NULL_VALUE = new AttributeValue().withNULL(true); 38 | 39 | public static AttributeValue toAttributeValue(Schema schema, Object value) { 40 | if (value == null) { 41 | if (schema.defaultValue() != null) { 42 | value = schema.defaultValue(); 43 | } else if (schema.isOptional()) { 44 | return NULL_VALUE; 45 | } else { 46 | throw new DataException("null value for non-optional schema with no default value"); 47 | } 48 | } 49 | 50 | if (schema.name() != null && schema.name().equals(Decimal.LOGICAL_NAME)) { 51 | return new AttributeValue().withN(value.toString()); 52 | } 53 | 54 | switch (schema.type()) { 55 | case INT8: 56 | case INT16: 57 | case INT32: 58 | case INT64: 59 | case FLOAT32: 60 | case FLOAT64: 61 | return new AttributeValue().withN(value.toString()); 62 | case BOOLEAN: 63 | return new AttributeValue().withBOOL((boolean) value); 64 | case STRING: 65 | return new AttributeValue().withS((String) value); 66 | case BYTES: 67 | return new AttributeValue().withB(toByteBuffer(value)); 68 | case ARRAY: { 69 | return new AttributeValue().withL( 70 | ((List) value).stream() 71 | .map(item -> toAttributeValue(schema.valueSchema(), item)) 72 | .collect(Collectors.toList()) 73 | ); 74 | } 75 | case MAP: { 76 | if (schema.keySchema().isOptional()) { 77 | throw new DataException("MAP key schema must not be optional"); 78 | } 79 | if (!schema.keySchema().type().isPrimitive()) { 80 | throw new DataException("MAP key schema must be of primitive type"); 81 | } 82 | final Map sourceMap = (Map) value; 83 | final Map attributesMap = new HashMap<>(sourceMap.size()); 84 | for (Map.Entry e : sourceMap.entrySet()) { 85 | attributesMap.put( 86 | primitiveAsString(nullFallback(e.getKey(), schema.keySchema().defaultValue())), 87 | toAttributeValue(schema.valueSchema(), e.getValue()) 88 | ); 89 | } 90 | return new AttributeValue().withM(attributesMap); 91 | } 92 | case STRUCT: { 93 | final Struct struct = (Struct) value; 94 | final List fields = schema.fields(); 95 | final Map attributesMap = new HashMap<>(fields.size()); 96 | for (Field field : fields) { 97 | attributesMap.put(field.name(), toAttributeValue(field.schema(), struct.get(field))); 98 | } 99 | return new AttributeValue().withM(attributesMap); 100 | } 101 | default: 102 | throw new DataException("Unknown Schema.Type: " + schema.type()); 103 | } 104 | } 105 | 106 | public static AttributeValue toAttributeValueSchemaless(Object value) { 107 | if (value == null) { 108 | return NULL_VALUE; 109 | } 110 | if (value instanceof Number) { 111 | return new AttributeValue().withN(value.toString()); 112 | } 113 | if (value instanceof Boolean) { 114 | return new AttributeValue().withBOOL((Boolean) value); 115 | } 116 | if (value instanceof String) { 117 | return new AttributeValue().withS((String) value); 118 | } 119 | if (value instanceof byte[] || value instanceof ByteBuffer) { 120 | return new AttributeValue().withB(toByteBuffer(value)); 121 | } 122 | if (value instanceof List) { 123 | // We could have treated it as NS/BS/SS if the list is homogeneous and a compatible type, but can't know for ane empty list 124 | return new AttributeValue().withL( 125 | ((List) value).stream() 126 | .map(AttributeValueConverter::toAttributeValueSchemaless) 127 | .collect(Collectors.toList()) 128 | ); 129 | } 130 | if (value instanceof Set) { 131 | final Set set = (Set) value; 132 | if (set.isEmpty()) { 133 | return NULL_VALUE; 134 | } 135 | final Object firstItem = ((Iterator) set.iterator()).next(); 136 | if (firstItem instanceof String) { 137 | return new AttributeValue().withSS((Set) set); 138 | } 139 | if (firstItem instanceof Number) { 140 | return new AttributeValue().withNS(set.stream().map(Object::toString).collect(Collectors.toSet())); 141 | } 142 | if (firstItem instanceof byte[] || firstItem instanceof ByteBuffer) { 143 | return new AttributeValue().withBS(set.stream().map(AttributeValueConverter::toByteBuffer).collect(Collectors.toSet())); 144 | } 145 | throw new DataException("Unsupported Set element type: " + firstItem.getClass()); 146 | } 147 | if (value instanceof Map) { 148 | final Map sourceMap = (Map) value; 149 | final Map attributesMap = new HashMap<>(sourceMap.size()); 150 | for (Map.Entry e : sourceMap.entrySet()) { 151 | attributesMap.put( 152 | primitiveAsString(e.getKey()), 153 | toAttributeValueSchemaless(e.getValue()) 154 | ); 155 | } 156 | return new AttributeValue().withM(attributesMap); 157 | } 158 | throw new DataException("Unsupported value type: " + value.getClass()); 159 | } 160 | 161 | private static Object nullFallback(Object x, Object fallback) { 162 | return x != null ? x : fallback; 163 | } 164 | 165 | private static String primitiveAsString(Object value) { 166 | if (value instanceof Number || value instanceof Boolean || value instanceof String) { 167 | return value.toString(); 168 | } 169 | if (value instanceof byte[]) { 170 | return Base64.getEncoder().encodeToString((byte[]) value); 171 | } else if (value instanceof ByteBuffer) { 172 | return Base64.getEncoder().encode((ByteBuffer) value).asCharBuffer().toString(); 173 | } 174 | throw new DataException("Not a primitive: " + value.getClass()); 175 | } 176 | 177 | private static ByteBuffer toByteBuffer(Object bytesValue) { 178 | if (bytesValue instanceof byte[]) { 179 | return ByteBuffer.wrap((byte[]) bytesValue); 180 | } else if (bytesValue instanceof ByteBuffer) { 181 | return ((ByteBuffer) bytesValue); 182 | } else { 183 | throw new DataException("Invalid bytes value of type: " + bytesValue.getClass()); 184 | } 185 | } 186 | 187 | } 188 | -------------------------------------------------------------------------------- /src/main/java/dynamok/sink/ConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.sink; 18 | 19 | import com.amazonaws.regions.Regions; 20 | import org.apache.kafka.common.config.AbstractConfig; 21 | import org.apache.kafka.common.config.ConfigDef; 22 | import org.apache.kafka.common.config.ConfigException; 23 | import org.apache.kafka.common.config.types.Password; 24 | 25 | import java.util.Arrays; 26 | import java.util.Iterator; 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | class ConnectorConfig extends AbstractConfig { 31 | 32 | private enum Keys { 33 | ; 34 | static final String REGION = "region"; 35 | static final String ACCESS_KEY_ID = "access.key.id"; 36 | static final String SECRET_KEY = "secret.key"; 37 | static final String TABLE_FORMAT = "table.format"; 38 | static final String BATCH_SIZE = "batch.size"; 39 | static final String KAFKA_ATTRIBUTES = "kafka.attributes"; 40 | static final String IGNORE_RECORD_KEY = "ignore.record.key"; 41 | static final String IGNORE_RECORD_VALUE = "ignore.record.value"; 42 | static final String TOP_KEY_ATTRIBUTE = "top.key.attribute"; 43 | static final String TOP_VALUE_ATTRIBUTE = "top.value.attribute"; 44 | static final String MAX_RETRIES = "max.retries"; 45 | static final String RETRY_BACKOFF_MS = "retry.backoff.ms"; 46 | } 47 | 48 | static final ConfigDef CONFIG_DEF = new ConfigDef() 49 | .define(Keys.REGION, ConfigDef.Type.STRING, ConfigDef.NO_DEFAULT_VALUE, (key, regionName) -> { 50 | if (Arrays.stream(Regions.values()).noneMatch(x -> x.getName().equals(regionName))) { 51 | throw new ConfigException("Invalid AWS region: " + regionName); 52 | } 53 | }, ConfigDef.Importance.HIGH, "AWS region for DynamoDB.") 54 | .define(Keys.ACCESS_KEY_ID, ConfigDef.Type.PASSWORD, "", 55 | ConfigDef.Importance.LOW, "Explicit AWS access key ID. " + 56 | "Leave empty to utilize the default credential provider chain.") 57 | .define(Keys.SECRET_KEY, ConfigDef.Type.PASSWORD, "", 58 | ConfigDef.Importance.LOW, "Explicit AWS secret access key. " + 59 | "Leave empty to utilize the default credential provider chain.") 60 | .define(Keys.TABLE_FORMAT, ConfigDef.Type.STRING, "${topic}", 61 | ConfigDef.Importance.HIGH, "Format string for destination DynamoDB table name, use ``${topic}`` as placeholder for source topic.") 62 | .define(Keys.BATCH_SIZE, ConfigDef.Type.INT, 1, ConfigDef.Range.between(1, 25), 63 | ConfigDef.Importance.HIGH, "Batch size between 1 (dedicated ``PutItemRequest`` for each record) and 25 (which is the maximum number of items in a ``BatchWriteItemRequest``)") 64 | .define(Keys.KAFKA_ATTRIBUTES, ConfigDef.Type.LIST, "kafka_topic,kafka_partition,kafka_offset", (key, names) -> { 65 | final List namesList = (List) names; 66 | if (!namesList.isEmpty() && namesList.size() != 3) 67 | throw new ConfigException(Keys.KAFKA_ATTRIBUTES, 68 | "Must be empty or contain exactly 3 attribute names mapping to the topic, partition and offset, but was: " + namesList); 69 | }, ConfigDef.Importance.HIGH, "Trio of ``topic,partition,offset`` attribute names to include in records, set to empty to omit these attributes.") 70 | .define(Keys.IGNORE_RECORD_KEY, ConfigDef.Type.BOOLEAN, false, 71 | ConfigDef.Importance.MEDIUM, "Whether to ignore Kafka record keys in preparing the DynamoDB record.") 72 | .define(Keys.IGNORE_RECORD_VALUE, ConfigDef.Type.BOOLEAN, false, 73 | ConfigDef.Importance.MEDIUM, "Whether to ignore Kafka record value in preparing the DynamoDB record.") 74 | .define(Keys.TOP_KEY_ATTRIBUTE, ConfigDef.Type.STRING, "", 75 | ConfigDef.Importance.MEDIUM, "DynamoDB attribute name to use for the record key. " + 76 | "Leave empty if no top-level envelope attribute is desired.") 77 | .define(Keys.TOP_VALUE_ATTRIBUTE, ConfigDef.Type.STRING, "", 78 | ConfigDef.Importance.MEDIUM, "DynamoDB attribute name to use for the record value. " + 79 | "Leave empty if no top-level envelope attribute is desired.") 80 | .define(Keys.MAX_RETRIES, ConfigDef.Type.INT, 10, 81 | ConfigDef.Importance.MEDIUM, "The maximum number of times to retry on errors before failing the task.") 82 | .define(Keys.RETRY_BACKOFF_MS, ConfigDef.Type.INT, 3000, 83 | ConfigDef.Importance.MEDIUM, "The time in milliseconds to wait following an error before a retry attempt is made."); 84 | 85 | final Regions region; 86 | final Password accessKeyId; 87 | final Password secretKey; 88 | final String tableFormat; 89 | final int batchSize; 90 | final KafkaCoordinateNames kafkaCoordinateNames; 91 | final boolean ignoreRecordKey; 92 | final boolean ignoreRecordValue; 93 | final String topKeyAttribute; 94 | final String topValueAttribute; 95 | final int maxRetries; 96 | final int retryBackoffMs; 97 | 98 | ConnectorConfig(ConfigDef config, Map parsedConfig) { 99 | super(config, parsedConfig); 100 | region = Regions.fromName(getString(Keys.REGION)); 101 | accessKeyId = getPassword(Keys.ACCESS_KEY_ID); 102 | secretKey = getPassword(Keys.SECRET_KEY); 103 | tableFormat = getString(Keys.TABLE_FORMAT); 104 | batchSize = getInt(Keys.BATCH_SIZE); 105 | kafkaCoordinateNames = kafkaCoordinateNamesFromConfig(getList(Keys.KAFKA_ATTRIBUTES)); 106 | ignoreRecordKey = getBoolean(Keys.IGNORE_RECORD_KEY); 107 | ignoreRecordValue = getBoolean(Keys.IGNORE_RECORD_VALUE); 108 | topKeyAttribute = getString(Keys.TOP_KEY_ATTRIBUTE); 109 | topValueAttribute = getString(Keys.TOP_VALUE_ATTRIBUTE); 110 | maxRetries = getInt(Keys.MAX_RETRIES); 111 | retryBackoffMs = getInt(Keys.RETRY_BACKOFF_MS); 112 | } 113 | 114 | ConnectorConfig(Map props) { 115 | this(CONFIG_DEF, props); 116 | } 117 | 118 | private static KafkaCoordinateNames kafkaCoordinateNamesFromConfig(List names) { 119 | if (names.isEmpty()) return null; 120 | final Iterator it = names.iterator(); 121 | return new KafkaCoordinateNames(it.next(), it.next(), it.next()); 122 | } 123 | 124 | public static void main(String... args) { 125 | System.out.println(CONFIG_DEF.toRst()); 126 | } 127 | 128 | } 129 | -------------------------------------------------------------------------------- /src/main/java/dynamok/sink/DynamoDbSinkConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.sink; 18 | 19 | import dynamok.Version; 20 | import org.apache.kafka.common.config.ConfigDef; 21 | import org.apache.kafka.connect.connector.Task; 22 | import org.apache.kafka.connect.sink.SinkConnector; 23 | 24 | import java.util.Collections; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | public class DynamoDbSinkConnector extends SinkConnector { 29 | 30 | private Map props; 31 | 32 | @Override 33 | public Class taskClass() { 34 | return DynamoDbSinkTask.class; 35 | } 36 | 37 | @Override 38 | public void start(Map props) { 39 | this.props = props; 40 | } 41 | 42 | @Override 43 | public List> taskConfigs(int maxTasks) { 44 | return Collections.nCopies(maxTasks, props); 45 | } 46 | 47 | @Override 48 | public void stop() { 49 | } 50 | 51 | @Override 52 | public ConfigDef config() { 53 | return ConnectorConfig.CONFIG_DEF; 54 | } 55 | 56 | @Override 57 | public String version() { 58 | return Version.get(); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/dynamok/sink/DynamoDbSinkTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.sink; 18 | 19 | import com.amazonaws.auth.BasicAWSCredentials; 20 | import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; 21 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; 22 | import com.amazonaws.services.dynamodbv2.model.AmazonDynamoDBException; 23 | import com.amazonaws.services.dynamodbv2.model.AttributeValue; 24 | import com.amazonaws.services.dynamodbv2.model.BatchWriteItemRequest; 25 | import com.amazonaws.services.dynamodbv2.model.BatchWriteItemResult; 26 | import com.amazonaws.services.dynamodbv2.model.LimitExceededException; 27 | import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputExceededException; 28 | import com.amazonaws.services.dynamodbv2.model.PutRequest; 29 | import com.amazonaws.services.dynamodbv2.model.WriteRequest; 30 | import dynamok.Version; 31 | import org.apache.kafka.clients.consumer.OffsetAndMetadata; 32 | import org.apache.kafka.common.TopicPartition; 33 | import org.apache.kafka.connect.data.Schema; 34 | import org.apache.kafka.connect.errors.ConnectException; 35 | import org.apache.kafka.connect.errors.DataException; 36 | import org.apache.kafka.connect.errors.RetriableException; 37 | import org.apache.kafka.connect.sink.SinkRecord; 38 | import org.apache.kafka.connect.sink.SinkTask; 39 | import org.slf4j.Logger; 40 | import org.slf4j.LoggerFactory; 41 | 42 | import java.util.ArrayList; 43 | import java.util.Collection; 44 | import java.util.HashMap; 45 | import java.util.Iterator; 46 | import java.util.List; 47 | import java.util.Map; 48 | 49 | 50 | public class DynamoDbSinkTask extends SinkTask { 51 | 52 | private enum ValueSource { 53 | RECORD_KEY { 54 | @Override 55 | String topAttributeName(ConnectorConfig config) { 56 | return config.topKeyAttribute; 57 | } 58 | }, 59 | RECORD_VALUE { 60 | @Override 61 | String topAttributeName(ConnectorConfig config) { 62 | return config.topValueAttribute; 63 | } 64 | }; 65 | 66 | abstract String topAttributeName(ConnectorConfig config); 67 | } 68 | 69 | private final Logger log = LoggerFactory.getLogger(DynamoDbSinkTask.class); 70 | 71 | private ConnectorConfig config; 72 | private AmazonDynamoDBClient client; 73 | private int remainingRetries; 74 | 75 | @Override 76 | public void start(Map props) { 77 | config = new ConnectorConfig(props); 78 | 79 | if (config.accessKeyId.value().isEmpty() || config.secretKey.value().isEmpty()) { 80 | client = new AmazonDynamoDBClient(DefaultAWSCredentialsProviderChain.getInstance()); 81 | log.debug("AmazonDynamoDBStreamsClient created with DefaultAWSCredentialsProviderChain"); 82 | } else { 83 | final BasicAWSCredentials awsCreds = new BasicAWSCredentials(config.accessKeyId.value(), config.secretKey.value()); 84 | client = new AmazonDynamoDBClient(awsCreds); 85 | log.debug("AmazonDynamoDBClient created with AWS credentials from connector configuration"); 86 | } 87 | 88 | client.configureRegion(config.region); 89 | remainingRetries = config.maxRetries; 90 | } 91 | 92 | @Override 93 | public void put(Collection records) { 94 | if (records.isEmpty()) return; 95 | 96 | try { 97 | if (records.size() == 1 || config.batchSize == 1) { 98 | for (final SinkRecord record : records) { 99 | client.putItem(tableName(record), toPutRequest(record).getItem()); 100 | } 101 | } else { 102 | final Iterator recordIterator = records.iterator(); 103 | while (recordIterator.hasNext()) { 104 | final Map> writesByTable = toWritesByTable(recordIterator); 105 | final BatchWriteItemResult batchWriteResponse = client.batchWriteItem(new BatchWriteItemRequest(writesByTable)); 106 | if (!batchWriteResponse.getUnprocessedItems().isEmpty()) { 107 | throw new UnprocessedItemsException(batchWriteResponse.getUnprocessedItems()); 108 | } 109 | } 110 | } 111 | } catch (LimitExceededException | ProvisionedThroughputExceededException e) { 112 | log.debug("Write failed with Limit/Throughput Exceeded exception; backing off"); 113 | context.timeout(config.retryBackoffMs); 114 | throw new RetriableException(e); 115 | } catch (AmazonDynamoDBException | UnprocessedItemsException e) { 116 | log.warn("Write failed, remainingRetries={}", 0, remainingRetries, e); 117 | if (remainingRetries == 0) { 118 | throw new ConnectException(e); 119 | } else { 120 | remainingRetries--; 121 | context.timeout(config.retryBackoffMs); 122 | throw new RetriableException(e); 123 | } 124 | } 125 | 126 | remainingRetries = config.maxRetries; 127 | } 128 | 129 | private Map> toWritesByTable(Iterator recordIterator) { 130 | final Map> writesByTable = new HashMap<>(); 131 | for (int count = 0; recordIterator.hasNext() && count < config.batchSize; count++) { 132 | final SinkRecord record = recordIterator.next(); 133 | final WriteRequest writeRequest = new WriteRequest(toPutRequest(record)); 134 | writesByTable.computeIfAbsent(tableName(record), k -> new ArrayList<>(config.batchSize)).add(writeRequest); 135 | } 136 | return writesByTable; 137 | } 138 | 139 | private PutRequest toPutRequest(SinkRecord record) { 140 | final PutRequest put = new PutRequest(); 141 | if (!config.ignoreRecordValue) { 142 | insert(ValueSource.RECORD_VALUE, record.valueSchema(), record.value(), put); 143 | } 144 | if (!config.ignoreRecordKey) { 145 | insert(ValueSource.RECORD_KEY, record.keySchema(), record.key(), put); 146 | } 147 | if (config.kafkaCoordinateNames != null) { 148 | put.addItemEntry(config.kafkaCoordinateNames.topic, new AttributeValue().withS(record.topic())); 149 | put.addItemEntry(config.kafkaCoordinateNames.partition, new AttributeValue().withN(String.valueOf(record.kafkaPartition()))); 150 | put.addItemEntry(config.kafkaCoordinateNames.offset, new AttributeValue().withN(String.valueOf(record.kafkaOffset()))); 151 | } 152 | return put; 153 | } 154 | 155 | private void insert(ValueSource valueSource, Schema schema, Object value, PutRequest put) { 156 | final AttributeValue attributeValue; 157 | try { 158 | attributeValue = schema == null 159 | ? AttributeValueConverter.toAttributeValueSchemaless(value) 160 | : AttributeValueConverter.toAttributeValue(schema, value); 161 | } catch (DataException e) { 162 | log.error("Failed to convert record with schema={} value={}", schema, value, e); 163 | throw e; 164 | } 165 | 166 | final String topAttributeName = valueSource.topAttributeName(config); 167 | if (!topAttributeName.isEmpty()) { 168 | put.addItemEntry(topAttributeName, attributeValue); 169 | } else if (attributeValue.getM() != null) { 170 | put.setItem(attributeValue.getM()); 171 | } else { 172 | throw new ConnectException("No top attribute name configured for " + valueSource + ", and it could not be converted to Map: " + attributeValue); 173 | } 174 | } 175 | 176 | private String tableName(SinkRecord record) { 177 | return config.tableFormat.replace("${topic}", record.topic()); 178 | } 179 | 180 | @Override 181 | public void flush(Map offsets) { 182 | } 183 | 184 | @Override 185 | public void stop() { 186 | if (client != null) { 187 | client.shutdown(); 188 | client = null; 189 | } 190 | } 191 | 192 | @Override 193 | public String version() { 194 | return Version.get(); 195 | } 196 | 197 | } 198 | -------------------------------------------------------------------------------- /src/main/java/dynamok/sink/KafkaCoordinateNames.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.sink; 18 | 19 | public class KafkaCoordinateNames { 20 | 21 | public final String topic; 22 | public final String partition; 23 | public final String offset; 24 | 25 | public KafkaCoordinateNames(String topic, String partition, String offset) { 26 | this.topic = topic; 27 | this.partition = partition; 28 | this.offset = offset; 29 | } 30 | 31 | @Override 32 | public String toString() { 33 | return "KafkaCoordinateNames{" + 34 | "topic='" + topic + '\'' + 35 | ", partition='" + partition + '\'' + 36 | ", offset='" + offset + '\'' + 37 | '}'; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/dynamok/sink/UnprocessedItemsException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.sink; 18 | 19 | import com.amazonaws.services.dynamodbv2.model.WriteRequest; 20 | 21 | import java.util.List; 22 | import java.util.Map; 23 | 24 | public class UnprocessedItemsException extends Exception { 25 | 26 | public final Map> unprocessedItems; 27 | 28 | public UnprocessedItemsException(Map> unprocessedItems) { 29 | super(makeMessage(unprocessedItems)); 30 | this.unprocessedItems = unprocessedItems; 31 | } 32 | 33 | private static String makeMessage(Map> unprocessedItems) { 34 | final StringBuilder msg = new StringBuilder("Unprocessed writes: {"); 35 | for (Map.Entry> e : unprocessedItems.entrySet()) { 36 | msg.append(" ").append(e.getKey()).append("(").append(e.getValue().size()).append(")").append(" "); 37 | } 38 | msg.append("}"); 39 | return msg.toString(); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/dynamok/source/ConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.source; 18 | 19 | import com.amazonaws.regions.Regions; 20 | import org.apache.kafka.common.config.AbstractConfig; 21 | import org.apache.kafka.common.config.ConfigDef; 22 | import org.apache.kafka.common.config.ConfigException; 23 | import org.apache.kafka.common.config.types.Password; 24 | 25 | import java.util.Arrays; 26 | import java.util.Collections; 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | class ConnectorConfig extends AbstractConfig { 31 | 32 | private enum Keys { 33 | ; 34 | static final String REGION = "region"; 35 | static final String ACCESS_KEY_ID = "access.key.id"; 36 | static final String SECRET_KEY = "secret.key"; 37 | static final String TABLES_PREFIX = "tables.prefix"; 38 | static final String TABLES_WHITELIST = "tables.whitelist"; 39 | static final String TABLES_BLACKLIST = "tables.blacklist"; 40 | static final String TOPIC_FORMAT = "topic.format"; 41 | } 42 | 43 | static final ConfigDef CONFIG_DEF = new ConfigDef() 44 | .define(Keys.REGION, ConfigDef.Type.STRING, ConfigDef.NO_DEFAULT_VALUE, (key, regionName) -> { 45 | if (Arrays.stream(Regions.values()).noneMatch(x -> x.getName().equals(regionName))) { 46 | throw new ConfigException("Invalid AWS region: " + regionName); 47 | } 48 | }, ConfigDef.Importance.HIGH, "AWS region for DynamoDB.") 49 | .define(Keys.ACCESS_KEY_ID, ConfigDef.Type.PASSWORD, "", 50 | ConfigDef.Importance.LOW, "Explicit AWS access key ID. " + 51 | "Leave empty to utilize the default credential provider chain.") 52 | .define(Keys.SECRET_KEY, ConfigDef.Type.PASSWORD, "", 53 | ConfigDef.Importance.LOW, "Explicit AWS secret access key. " + 54 | "Leave empty to utilize the default credential provider chain.") 55 | .define(Keys.TABLES_PREFIX, ConfigDef.Type.STRING, "", 56 | ConfigDef.Importance.MEDIUM, "Prefix for DynamoDB tables to source from.") 57 | .define(Keys.TABLES_WHITELIST, ConfigDef.Type.LIST, Collections.emptyList(), 58 | ConfigDef.Importance.MEDIUM, "Whitelist for DynamoDB tables to source from.") 59 | .define(Keys.TABLES_BLACKLIST, ConfigDef.Type.LIST, Collections.emptyList(), 60 | ConfigDef.Importance.MEDIUM, "Blacklist for DynamoDB tables to source from.") 61 | .define(Keys.TOPIC_FORMAT, ConfigDef.Type.STRING, "${table}", 62 | ConfigDef.Importance.HIGH, "Format string for destination Kafka topic, use ``${table}`` as placeholder for source table name."); 63 | 64 | final Regions region; 65 | final Password accessKeyId; 66 | final Password secretKey; 67 | final String topicFormat; 68 | final String tablesPrefix; 69 | final List tablesWhitelist; 70 | final List tablesBlacklist; 71 | 72 | ConnectorConfig(Map props) { 73 | super(CONFIG_DEF, props); 74 | region = Regions.fromName(getString(Keys.REGION)); 75 | accessKeyId = getPassword(Keys.ACCESS_KEY_ID); 76 | secretKey = getPassword(Keys.SECRET_KEY); 77 | tablesPrefix = getString(Keys.TABLES_PREFIX); 78 | tablesWhitelist = getList(Keys.TABLES_WHITELIST); 79 | tablesBlacklist = getList(Keys.TABLES_BLACKLIST); 80 | topicFormat = getString(Keys.TOPIC_FORMAT); 81 | } 82 | 83 | public static void main(String... args) { 84 | System.out.println(CONFIG_DEF.toRst()); 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /src/main/java/dynamok/source/DynamoDbSourceConnector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.source; 18 | 19 | import com.amazonaws.auth.BasicAWSCredentials; 20 | import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; 21 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; 22 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBStreamsClient; 23 | import com.amazonaws.services.dynamodbv2.model.DescribeStreamRequest; 24 | import com.amazonaws.services.dynamodbv2.model.DescribeStreamResult; 25 | import com.amazonaws.services.dynamodbv2.model.ListTablesResult; 26 | import com.amazonaws.services.dynamodbv2.model.Shard; 27 | import com.amazonaws.services.dynamodbv2.model.StreamSpecification; 28 | import com.amazonaws.services.dynamodbv2.model.StreamViewType; 29 | import com.amazonaws.services.dynamodbv2.model.TableDescription; 30 | import dynamok.Version; 31 | import org.apache.kafka.common.config.ConfigDef; 32 | import org.apache.kafka.connect.connector.Task; 33 | import org.apache.kafka.connect.errors.ConnectException; 34 | import org.apache.kafka.connect.source.SourceConnector; 35 | import org.apache.kafka.connect.util.ConnectorUtils; 36 | import org.slf4j.Logger; 37 | import org.slf4j.LoggerFactory; 38 | 39 | import java.util.ArrayList; 40 | import java.util.HashMap; 41 | import java.util.HashSet; 42 | import java.util.List; 43 | import java.util.Map; 44 | import java.util.Set; 45 | import java.util.stream.Collectors; 46 | 47 | public class DynamoDbSourceConnector extends SourceConnector { 48 | 49 | private final Logger log = LoggerFactory.getLogger(getClass()); 50 | 51 | private ConnectorConfig config; 52 | private Map streamShards; 53 | 54 | @Override 55 | public Class taskClass() { 56 | return DynamoDbSourceTask.class; 57 | } 58 | 59 | @Override 60 | public List> taskConfigs(int maxTasks) { 61 | return ConnectorUtils.groupPartitions(new ArrayList<>(streamShards.keySet()), maxTasks).stream().map(taskShards -> { 62 | final Map taskConfig = new HashMap<>(); 63 | taskConfig.put(TaskConfig.Keys.REGION, config.region.getName()); 64 | taskConfig.put(TaskConfig.Keys.TOPIC_FORMAT, config.topicFormat); 65 | taskConfig.put(TaskConfig.Keys.SHARDS, taskShards.stream().map(Shard::getShardId).collect(Collectors.joining(","))); 66 | taskShards.forEach(shard -> { 67 | final TableDescription tableDesc = streamShards.get(shard); 68 | taskConfig.put(shard.getShardId() + "." + TaskConfig.Keys.TABLE, tableDesc.getTableName()); 69 | taskConfig.put(shard.getShardId() + "." + TaskConfig.Keys.STREAM_ARN, tableDesc.getLatestStreamArn()); 70 | }); 71 | return taskConfig; 72 | }).collect(Collectors.toList()); 73 | } 74 | 75 | @Override 76 | public void start(Map props) { 77 | config = new ConnectorConfig(props); 78 | streamShards = new HashMap<>(); 79 | 80 | final AmazonDynamoDBClient client; 81 | final AmazonDynamoDBStreamsClient streamsClient; 82 | 83 | if (config.accessKeyId.value().isEmpty() || config.secretKey.value().isEmpty()) { 84 | client = new AmazonDynamoDBClient(DefaultAWSCredentialsProviderChain.getInstance()); 85 | streamsClient = new AmazonDynamoDBStreamsClient(DefaultAWSCredentialsProviderChain.getInstance()); 86 | log.debug("AmazonDynamoDBStreamsClient created with DefaultAWSCredentialsProviderChain"); 87 | } else { 88 | final BasicAWSCredentials awsCreds = new BasicAWSCredentials(config.accessKeyId.value(), config.secretKey.value()); 89 | client = new AmazonDynamoDBClient(awsCreds); 90 | streamsClient = new AmazonDynamoDBStreamsClient(awsCreds); 91 | log.debug("AmazonDynamoDB clients created with AWS credentials from connector configuration"); 92 | } 93 | 94 | client.configureRegion(config.region); 95 | streamsClient.configureRegion(config.region); 96 | 97 | final Set ignoredTables = new HashSet<>(); 98 | final Set consumeTables = new HashSet<>(); 99 | 100 | String lastEvaluatedTableName = null; 101 | do { 102 | final ListTablesResult listResult = client.listTables(lastEvaluatedTableName); 103 | 104 | for (String tableName : listResult.getTableNames()) { 105 | if (!acceptTable(tableName)) { 106 | ignoredTables.add(tableName); 107 | continue; 108 | } 109 | 110 | final TableDescription tableDesc = client.describeTable(tableName).getTable(); 111 | 112 | final StreamSpecification streamSpec = tableDesc.getStreamSpecification(); 113 | 114 | if (streamSpec == null || !streamSpec.isStreamEnabled()) { 115 | throw new ConnectException(String.format("DynamoDB table `%s` does not have streams enabled", tableName)); 116 | } 117 | 118 | final String streamViewType = streamSpec.getStreamViewType(); 119 | if (!streamViewType.equals(StreamViewType.NEW_IMAGE.name()) && !streamViewType.equals(StreamViewType.NEW_AND_OLD_IMAGES.name())) { 120 | throw new ConnectException(String.format("DynamoDB stream view type for table `%s` is %s", tableName, streamViewType)); 121 | } 122 | 123 | final DescribeStreamResult describeStreamResult = 124 | streamsClient.describeStream(new DescribeStreamRequest().withStreamArn(tableDesc.getLatestStreamArn())); 125 | 126 | for (Shard shard : describeStreamResult.getStreamDescription().getShards()) { 127 | streamShards.put(shard, tableDesc); 128 | } 129 | 130 | consumeTables.add(tableName); 131 | } 132 | 133 | lastEvaluatedTableName = listResult.getLastEvaluatedTableName(); 134 | } while (lastEvaluatedTableName != null); 135 | 136 | log.info("Tables to ignore: {}", ignoredTables); 137 | log.info("Tables to ingest: {}", consumeTables); 138 | 139 | client.shutdown(); 140 | streamsClient.shutdown(); 141 | } 142 | 143 | private boolean acceptTable(String tableName) { 144 | return tableName.startsWith(config.tablesPrefix) 145 | && (config.tablesWhitelist.isEmpty() || config.tablesWhitelist.contains(tableName)) 146 | && !config.tablesBlacklist.contains(tableName); 147 | } 148 | 149 | @Override 150 | public void stop() { 151 | } 152 | 153 | @Override 154 | public ConfigDef config() { 155 | return ConnectorConfig.CONFIG_DEF; 156 | } 157 | 158 | @Override 159 | public String version() { 160 | return Version.get(); 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/dynamok/source/DynamoDbSourceTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.source; 18 | 19 | import com.amazonaws.auth.BasicAWSCredentials; 20 | import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; 21 | import com.amazonaws.services.dynamodbv2.AmazonDynamoDBStreamsClient; 22 | import com.amazonaws.services.dynamodbv2.model.GetRecordsRequest; 23 | import com.amazonaws.services.dynamodbv2.model.GetRecordsResult; 24 | import com.amazonaws.services.dynamodbv2.model.GetShardIteratorRequest; 25 | import com.amazonaws.services.dynamodbv2.model.ShardIteratorType; 26 | import com.amazonaws.services.dynamodbv2.model.StreamRecord; 27 | import dynamok.Version; 28 | import org.apache.kafka.connect.errors.ConnectException; 29 | import org.apache.kafka.connect.source.SourceRecord; 30 | import org.apache.kafka.connect.source.SourceTask; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import java.util.ArrayList; 35 | import java.util.Collections; 36 | import java.util.HashMap; 37 | import java.util.List; 38 | import java.util.Map; 39 | import java.util.stream.Collectors; 40 | 41 | public class DynamoDbSourceTask extends SourceTask { 42 | 43 | private enum Keys { 44 | ; 45 | 46 | static final String SHARD = "shard"; 47 | static final String SEQNUM = "seqNum"; 48 | } 49 | 50 | private final Logger log = LoggerFactory.getLogger(getClass()); 51 | 52 | private TaskConfig config; 53 | private AmazonDynamoDBStreamsClient streamsClient; 54 | 55 | private List assignedShards; 56 | private Map shardIterators; 57 | private int currentShardIdx; 58 | 59 | @Override 60 | public void start(Map props) { 61 | config = new TaskConfig(props); 62 | 63 | if (config.accessKeyId.isEmpty() || config.secretKey.isEmpty()) { 64 | streamsClient = new AmazonDynamoDBStreamsClient(DefaultAWSCredentialsProviderChain.getInstance()); 65 | log.debug("AmazonDynamoDBStreamsClient created with DefaultAWSCredentialsProviderChain"); 66 | } else { 67 | final BasicAWSCredentials awsCreds = new BasicAWSCredentials(config.accessKeyId, config.secretKey); 68 | streamsClient = new AmazonDynamoDBStreamsClient(awsCreds); 69 | log.debug("AmazonDynamoDBStreamsClient created with AWS credentials from connector configuration"); 70 | } 71 | 72 | streamsClient.configureRegion(config.region); 73 | 74 | assignedShards = new ArrayList<>(config.shards); 75 | shardIterators = new HashMap<>(assignedShards.size()); 76 | currentShardIdx = 0; 77 | } 78 | 79 | @Override 80 | public List poll() throws InterruptedException { 81 | // TODO rate limiting? 82 | 83 | if (assignedShards.isEmpty()) { 84 | throw new ConnectException("No remaining source shards"); 85 | } 86 | 87 | final String shardId = assignedShards.get(currentShardIdx); 88 | 89 | final GetRecordsRequest req = new GetRecordsRequest(); 90 | req.setShardIterator(shardIterator(shardId)); 91 | req.setLimit(100); // TODO configurable 92 | 93 | final GetRecordsResult rsp = streamsClient.getRecords(req); 94 | if (rsp.getNextShardIterator() == null) { 95 | log.info("Shard ID `{}` for table `{}` has been closed, it will no longer be polled", shardId, config.tableForShard(shardId)); 96 | shardIterators.remove(shardId); 97 | assignedShards.remove(shardId); 98 | } else { 99 | log.debug("Retrieved {} records from shard ID `{}`", rsp.getRecords().size(), shardId); 100 | shardIterators.put(shardId, rsp.getNextShardIterator()); 101 | } 102 | 103 | currentShardIdx = (currentShardIdx + 1) % assignedShards.size(); 104 | 105 | final String tableName = config.tableForShard(shardId); 106 | final String topic = config.topicFormat.replace("${table}", tableName); 107 | final Map sourcePartition = sourcePartition(shardId); 108 | 109 | return rsp.getRecords().stream() 110 | .map(dynamoRecord -> toSourceRecord(sourcePartition, topic, dynamoRecord.getDynamodb())) 111 | .collect(Collectors.toList()); 112 | } 113 | 114 | private SourceRecord toSourceRecord(Map sourcePartition, String topic, StreamRecord dynamoRecord) { 115 | return new SourceRecord( 116 | sourcePartition, 117 | Collections.singletonMap(Keys.SEQNUM, dynamoRecord.getSequenceNumber()), 118 | topic, null, 119 | RecordMapper.attributesSchema(), RecordMapper.toConnect(dynamoRecord.getKeys()), 120 | RecordMapper.attributesSchema(), RecordMapper.toConnect(dynamoRecord.getNewImage()), 121 | dynamoRecord.getApproximateCreationDateTime().getTime() 122 | ); 123 | } 124 | 125 | private String shardIterator(String shardId) { 126 | String iterator = shardIterators.get(shardId); 127 | if (iterator == null) { 128 | final GetShardIteratorRequest req = getShardIteratorRequest( 129 | shardId, 130 | config.streamArnForShard(shardId), 131 | storedSequenceNumber(sourcePartition(shardId)) 132 | ); 133 | iterator = streamsClient.getShardIterator(req).getShardIterator(); 134 | shardIterators.put(shardId, iterator); 135 | } 136 | return iterator; 137 | } 138 | 139 | private Map sourcePartition(String shardId) { 140 | return Collections.singletonMap(Keys.SHARD, shardId); 141 | } 142 | 143 | private String storedSequenceNumber(Map partition) { 144 | final Map offsetMap = context.offsetStorageReader().offset(partition); 145 | return offsetMap != null ? (String) offsetMap.get(Keys.SEQNUM) : null; 146 | } 147 | 148 | private GetShardIteratorRequest getShardIteratorRequest( 149 | String shardId, 150 | String streamArn, 151 | String seqNum 152 | ) { 153 | final GetShardIteratorRequest req = new GetShardIteratorRequest(); 154 | req.setShardId(shardId); 155 | req.setStreamArn(streamArn); 156 | if (seqNum == null) { 157 | req.setShardIteratorType(ShardIteratorType.TRIM_HORIZON); 158 | } else { 159 | req.setShardIteratorType(ShardIteratorType.AFTER_SEQUENCE_NUMBER); 160 | req.setSequenceNumber(seqNum); 161 | } 162 | return req; 163 | } 164 | 165 | @Override 166 | public void stop() { 167 | if (streamsClient != null) { 168 | streamsClient.shutdown(); 169 | streamsClient = null; 170 | } 171 | } 172 | 173 | @Override 174 | public String version() { 175 | return Version.get(); 176 | } 177 | 178 | } 179 | -------------------------------------------------------------------------------- /src/main/java/dynamok/source/RecordMapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.source; 18 | 19 | import com.amazonaws.services.dynamodbv2.model.AttributeValue; 20 | import org.apache.kafka.connect.data.Schema; 21 | import org.apache.kafka.connect.data.SchemaBuilder; 22 | import org.apache.kafka.connect.data.Struct; 23 | 24 | import java.util.HashMap; 25 | import java.util.Map; 26 | 27 | public enum RecordMapper { 28 | ; 29 | 30 | private static final Schema AV_SCHEMA = 31 | SchemaBuilder.struct() 32 | .name("DynamoDB.AttributeValue") 33 | .field("S", Schema.OPTIONAL_STRING_SCHEMA) 34 | .field("N", Schema.OPTIONAL_STRING_SCHEMA) 35 | .field("B", Schema.OPTIONAL_BYTES_SCHEMA) 36 | .field("SS", SchemaBuilder.array(Schema.STRING_SCHEMA).optional().build()) 37 | .field("NS", SchemaBuilder.array(Schema.STRING_SCHEMA).optional().build()) 38 | .field("BS", SchemaBuilder.array(Schema.BYTES_SCHEMA).optional().build()) 39 | .field("NULL", Schema.OPTIONAL_BOOLEAN_SCHEMA) 40 | .field("BOOL", Schema.OPTIONAL_BOOLEAN_SCHEMA) 41 | // .field("L", "DynamoDB.AttributeValue") -- FIXME https://issues.apache.org/jira/browse/KAFKA-3910 42 | // .field("M", "DynamoDB.AttributeValue") -- FIXME https://issues.apache.org/jira/browse/KAFKA-3910 43 | .version(1) 44 | .build(); 45 | 46 | private static final Schema DYNAMODB_ATTRIBUTES_SCHEMA = 47 | SchemaBuilder.map(Schema.STRING_SCHEMA, AV_SCHEMA) 48 | .name("DynamoDB.Attributes") 49 | .version(1) 50 | .build(); 51 | 52 | public static Schema attributesSchema() { 53 | return DYNAMODB_ATTRIBUTES_SCHEMA; 54 | } 55 | 56 | public static Map toConnect(Map attributes) { 57 | Map connectAttributes = new HashMap<>(attributes.size()); 58 | for (Map.Entry attribute : attributes.entrySet()) { 59 | final String attributeName = attribute.getKey(); 60 | final AttributeValue attributeValue = attribute.getValue(); 61 | final Struct attributeValueStruct = new Struct(AV_SCHEMA); 62 | if (attributeValue.getS() != null) { 63 | attributeValueStruct.put("S", attributeValue.getS()); 64 | } else if (attributeValue.getN() != null) { 65 | attributeValueStruct.put("N", attributeValue.getN()); 66 | } else if (attributeValue.getB() != null) { 67 | attributeValueStruct.put("B", attributeValue.getB()); 68 | } else if (attributeValue.getSS() != null) { 69 | attributeValueStruct.put("SS", attributeValue.getSS()); 70 | } else if (attributeValue.getNS() != null) { 71 | attributeValueStruct.put("NS", attributeValue.getNS()); 72 | } else if (attributeValue.getBS() != null) { 73 | attributeValueStruct.put("BS", attributeValue.getBS()); 74 | } else if (attributeValue.getNULL() != null) { 75 | attributeValueStruct.put("NULL", attributeValue.getNULL()); 76 | } else if (attributeValue.getBOOL() != null) { 77 | attributeValueStruct.put("BOOL", attributeValue.getBOOL()); 78 | } 79 | connectAttributes.put(attributeName, attributeValueStruct); 80 | } 81 | return connectAttributes; 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/dynamok/source/TaskConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.source; 18 | 19 | import com.amazonaws.regions.Regions; 20 | import org.apache.kafka.common.config.ConfigException; 21 | 22 | import java.util.Arrays; 23 | import java.util.List; 24 | import java.util.Map; 25 | import java.util.stream.Collectors; 26 | 27 | class TaskConfig { 28 | 29 | enum Keys { 30 | ; 31 | 32 | static String REGION = "region"; 33 | static String ACCESS_KEY_ID = "access.key.id"; 34 | static String SECRET_KEY = "secret.key"; 35 | static String TOPIC_FORMAT = "topic.format"; 36 | static String SHARDS = "shards"; 37 | static String TABLE = "table"; 38 | static String STREAM_ARN = "stream.arn"; 39 | } 40 | 41 | private final Map props; 42 | 43 | final Regions region; 44 | final String accessKeyId; 45 | final String secretKey; 46 | final String topicFormat; 47 | final List shards; 48 | 49 | TaskConfig(Map props) { 50 | this.props = props; 51 | 52 | region = Regions.fromName(getValue(Keys.REGION)); 53 | accessKeyId = getValue(Keys.ACCESS_KEY_ID, ""); 54 | secretKey = getValue(Keys.SECRET_KEY, ""); 55 | topicFormat = getValue(Keys.TOPIC_FORMAT); 56 | shards = Arrays.stream(getValue(Keys.SHARDS).split(",")).filter(shardId -> !shardId.isEmpty()).collect(Collectors.toList()); 57 | } 58 | 59 | String tableForShard(String shardId) { 60 | return getValue(shardId + "." + Keys.TABLE); 61 | } 62 | 63 | String streamArnForShard(String shardId) { 64 | return getValue(shardId + "." + Keys.STREAM_ARN); 65 | } 66 | 67 | private String getValue(String key) { 68 | final String value = props.get(key); 69 | if (value == null) { 70 | throw new ConfigException(key, "Missing task configuration"); 71 | } 72 | return value; 73 | } 74 | 75 | private String getValue(String key, String defaultValue) { 76 | return props.getOrDefault(key, defaultValue); 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/dynamok/sink/AttributeValueConverterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.sink; 18 | 19 | import com.amazonaws.services.dynamodbv2.model.AttributeValue; 20 | import com.google.common.collect.ImmutableMap; 21 | import com.google.common.collect.ImmutableSet; 22 | import org.apache.kafka.connect.data.Decimal; 23 | import org.apache.kafka.connect.data.Schema; 24 | import org.apache.kafka.connect.data.SchemaBuilder; 25 | import org.apache.kafka.connect.data.Struct; 26 | import org.junit.Test; 27 | 28 | import java.math.BigDecimal; 29 | import java.nio.ByteBuffer; 30 | import java.util.Arrays; 31 | import java.util.Map; 32 | 33 | import static org.junit.Assert.assertEquals; 34 | import static org.junit.Assert.assertTrue; 35 | 36 | public class AttributeValueConverterTest { 37 | 38 | @Test 39 | public void schemalessConversion() { 40 | final Map attributeMap = 41 | AttributeValueConverter.toAttributeValueSchemaless( 42 | ImmutableMap.builder() 43 | .put("byte", (byte) 1) 44 | .put("short", (short) 2) 45 | .put("int", 3) 46 | .put("long", 4L) 47 | .put("float", 5.1f) 48 | .put("double", 6.2d) 49 | .put("decimal", new BigDecimal("7.3")) 50 | .put("bool", true) 51 | .put("string", "test") 52 | .put("byte_array", new byte[]{42}) 53 | .put("byte_buffer", ByteBuffer.wrap(new byte[]{42})) 54 | .put("list", Arrays.asList(1, 2, 3)) 55 | .put("empty_set", ImmutableSet.of()) 56 | .put("string_set", ImmutableSet.of("a", "b", "c")) 57 | .put("number_set", ImmutableSet.of(1, 2, 3)) 58 | .put("bytes_set", ImmutableSet.of(new byte[]{42})) 59 | .put("map", ImmutableMap.of("key", "value")) 60 | .build() 61 | ).getM(); 62 | assertEquals("1", attributeMap.get("byte").getN()); 63 | assertEquals("2", attributeMap.get("short").getN()); 64 | assertEquals("3", attributeMap.get("int").getN()); 65 | assertEquals("4", attributeMap.get("long").getN()); 66 | assertEquals("5.1", attributeMap.get("float").getN()); 67 | assertEquals("6.2", attributeMap.get("double").getN()); 68 | assertEquals("7.3", attributeMap.get("decimal").getN()); 69 | assertTrue(attributeMap.get("bool").getBOOL()); 70 | assertEquals("test", attributeMap.get("string").getS()); 71 | assertEquals(ByteBuffer.wrap(new byte[]{42}), attributeMap.get("byte_array").getB()); 72 | assertEquals( 73 | Arrays.asList(new AttributeValue().withN("1"), new AttributeValue().withN("2"), new AttributeValue().withN("3")), 74 | attributeMap.get("list").getL() 75 | ); 76 | assertTrue(attributeMap.get("empty_set").getNULL()); 77 | assertEquals(Arrays.asList("a", "b", "c"), attributeMap.get("string_set").getSS()); 78 | assertEquals(Arrays.asList("1", "2", "3"), attributeMap.get("number_set").getNS()); 79 | assertEquals(Arrays.asList(ByteBuffer.wrap(new byte[]{42})), attributeMap.get("bytes_set").getBS()); 80 | assertEquals(ImmutableMap.of("key", new AttributeValue().withS("value")), attributeMap.get("map").getM()); 81 | } 82 | 83 | @Test 84 | public void schemaedConversion() { 85 | Schema nestedStructSchema = SchemaBuilder.struct().field("x", SchemaBuilder.STRING_SCHEMA).build(); 86 | Schema schema = SchemaBuilder.struct() 87 | .field("int8", SchemaBuilder.INT8_SCHEMA) 88 | .field("int16", SchemaBuilder.INT16_SCHEMA) 89 | .field("int32", SchemaBuilder.INT32_SCHEMA) 90 | .field("int64", SchemaBuilder.INT64_SCHEMA) 91 | .field("float32", SchemaBuilder.FLOAT32_SCHEMA) 92 | .field("float64", SchemaBuilder.FLOAT64_SCHEMA) 93 | .field("decimal", Decimal.schema(1)) 94 | .field("bool", SchemaBuilder.BOOLEAN_SCHEMA) 95 | .field("string", SchemaBuilder.STRING_SCHEMA) 96 | .field("bytes_a", SchemaBuilder.BYTES_SCHEMA) 97 | .field("bytes_b", SchemaBuilder.BYTES_SCHEMA) 98 | .field("array", SchemaBuilder.array(SchemaBuilder.INT32_SCHEMA).build()) 99 | .field("map", SchemaBuilder.map(SchemaBuilder.STRING_SCHEMA, SchemaBuilder.STRING_SCHEMA)) 100 | .field("inner_struct", nestedStructSchema) 101 | .field("optional_string", SchemaBuilder.OPTIONAL_STRING_SCHEMA) 102 | .build(); 103 | 104 | final Struct struct = new Struct(schema) 105 | .put("int8", (byte) 1) 106 | .put("int16", (short) 2) 107 | .put("int32", 3) 108 | .put("int64", 4L) 109 | .put("float32", 5.1f) 110 | .put("float64", 6.2d) 111 | .put("decimal", new BigDecimal("7.3")) 112 | .put("bool", true) 113 | .put("string", "test") 114 | .put("bytes_a", new byte[]{42}) 115 | .put("bytes_b", ByteBuffer.wrap(new byte[]{42})) 116 | .put("array", Arrays.asList(1, 2, 3)) 117 | .put("map", ImmutableMap.of("key", "value")) 118 | .put("inner_struct", new Struct(nestedStructSchema).put("x", "y")); 119 | 120 | final Map attributeMap = AttributeValueConverter.toAttributeValue(schema, struct).getM(); 121 | assertEquals("1", attributeMap.get("int8").getN()); 122 | assertEquals("2", attributeMap.get("int16").getN()); 123 | assertEquals("3", attributeMap.get("int32").getN()); 124 | assertEquals("4", attributeMap.get("int64").getN()); 125 | assertEquals("5.1", attributeMap.get("float32").getN()); 126 | assertEquals("6.2", attributeMap.get("float64").getN()); 127 | assertEquals("7.3", attributeMap.get("decimal").getN()); 128 | assertTrue(attributeMap.get("bool").getBOOL()); 129 | assertEquals("test", attributeMap.get("string").getS()); 130 | assertEquals(ByteBuffer.wrap(new byte[]{42}), attributeMap.get("bytes_a").getB()); 131 | assertEquals(ByteBuffer.wrap(new byte[]{42}), attributeMap.get("bytes_b").getB()); 132 | assertEquals( 133 | Arrays.asList(new AttributeValue().withN("1"), new AttributeValue().withN("2"), new AttributeValue().withN("3")), 134 | attributeMap.get("array").getL() 135 | ); 136 | assertEquals(ImmutableMap.of("key", new AttributeValue().withS("value")), attributeMap.get("map").getM()); 137 | assertEquals(ImmutableMap.of("x", new AttributeValue().withS("y")), attributeMap.get("inner_struct").getM()); 138 | assertTrue(attributeMap.get("optional_string").getNULL()); 139 | } 140 | 141 | } 142 | -------------------------------------------------------------------------------- /src/test/java/dynamok/source/RecordMapperTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Shikhar Bhushan 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package dynamok.source; 18 | 19 | import com.amazonaws.services.dynamodbv2.model.AttributeValue; 20 | import com.google.common.collect.ImmutableMap; 21 | import org.apache.kafka.connect.data.Struct; 22 | import org.junit.Test; 23 | 24 | import java.nio.ByteBuffer; 25 | import java.util.Collections; 26 | import java.util.Map; 27 | 28 | import static org.junit.Assert.assertEquals; 29 | 30 | public class RecordMapperTest { 31 | 32 | @Test 33 | public void conversions() { 34 | final String string = "test"; 35 | final String number = "42"; 36 | final ByteBuffer bytes = ByteBuffer.wrap(new byte[]{42}); 37 | final boolean bool = true; 38 | final boolean nullValue = true; 39 | final Map record = RecordMapper.toConnect( 40 | ImmutableMap.builder() 41 | .put("thestring", new AttributeValue().withS(string)) 42 | .put("thenumber", new AttributeValue().withN(number)) 43 | .put("thebytes", new AttributeValue().withB(bytes)) 44 | .put("thestrings", new AttributeValue().withSS(string)) 45 | .put("thenumbers", new AttributeValue().withNS(number)) 46 | .put("thebyteslist", new AttributeValue().withBS(bytes)) 47 | .put("thenull", new AttributeValue().withNULL(true)) 48 | .put("thebool", new AttributeValue().withBOOL(bool)) 49 | .build() 50 | ); 51 | assertEquals(string, record.get("thestring").get("S")); 52 | assertEquals(number, record.get("thenumber").get("N")); 53 | assertEquals(bytes, record.get("thebytes").get("B")); 54 | assertEquals(Collections.singletonList(string), record.get("thestrings").get("SS")); 55 | assertEquals(Collections.singletonList(number), record.get("thenumbers").get("NS")); 56 | assertEquals(Collections.singletonList(bytes), record.get("thebyteslist").get("BS")); 57 | assertEquals(nullValue, record.get("thenull").get("NULL")); 58 | assertEquals(bool, record.get("thebool").get("BOOL")); 59 | } 60 | 61 | } 62 | --------------------------------------------------------------------------------