├── .circleci └── config.yml ├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE ├── README.md ├── assets └── dariobalinzo.png ├── doc ├── LICENSE └── README.md ├── etc └── quickstart-elasticsearch-source.properties ├── manifest.json ├── pom.xml └── src ├── main └── java │ └── com │ └── github │ └── dariobalinzo │ ├── ElasticSourceConnector.java │ ├── ElasticSourceConnectorConfig.java │ ├── Version.java │ ├── elastic │ ├── CursorField.java │ ├── ElasticConnection.java │ ├── ElasticConnectionBuilder.java │ ├── ElasticIndexMonitorThread.java │ ├── ElasticJsonNaming.java │ ├── ElasticRepository.java │ ├── SslContextException.java │ └── response │ │ ├── Cursor.java │ │ └── PageResult.java │ ├── filter │ ├── BlacklistFilter.java │ ├── DocumentFilter.java │ ├── JsonCastFilter.java │ ├── JsonElementFilter.java │ ├── JsonFilterVisitor.java │ └── WhitelistFilter.java │ ├── schema │ ├── AvroName.java │ ├── FieldNameConverter.java │ ├── NopNameConverter.java │ ├── SchemaConverter.java │ └── StructConverter.java │ └── task │ ├── ElasticSourceTask.java │ ├── ElasticSourceTaskConfig.java │ └── OffsetSerializer.java └── test ├── java └── com │ └── github │ └── dariobalinzo │ ├── ElasticIndexMonitorThreadTest.java │ ├── FooTest.java │ ├── TestContainersContext.java │ ├── elastic │ ├── ElasticRepositoryTest.java │ └── ElasticSourceConnectorTest.java │ ├── filter │ ├── BlacklistFilterTest.java │ ├── JsonCastFilterTest.java │ └── WhitelistFilterTest.java │ ├── foo.json │ ├── schema │ ├── AvroNameTest.java │ └── SchemaConverterTest.java │ └── task │ ├── ElasticSourceTaskTest.java │ └── MockOffsetFactory.java └── resources └── com └── github └── dariobalinzo ├── filter └── document.json ├── logback.xml └── schema └── complexDocument.json /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Check https://circleci.com/docs/2.0/language-java/ for more details 2 | # 3 | version: 2 4 | jobs: 5 | build: 6 | machine: ubuntu-2004:202008-01 7 | steps: 8 | - checkout 9 | 10 | - run: sudo apt-get update && sudo apt-get install openjdk-8-jdk 11 | - run: export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 && mvn -B clean install -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: [ "https://www.paypal.me/coffeeDarioBalinzo" ] 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /elastic-source-connect.iml 2 | /target/ 3 | /.idea/ 4 | /lib/ 5 | /.vscode/ 6 | /.settings/ 7 | .factorypath 8 | .classpath 9 | .project 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka-connect-elasticsearch-source 2 | 3 | [![YourActionName Actions Status](https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/workflows/Java%20CI%20with%20Maven/badge.svg)](https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/actions) 4 | 5 | Kafka Connect Elasticsearch Source: fetch data from elastic-search and sends it to kafka. The connector fetches only new 6 | data using a strictly incremental / temporal field (like a timestamp or an incrementing id). It supports dynamic schema 7 | and nested objects/ arrays. 8 | 9 | ## Requirements: 10 | 11 | - Elasticsearch 6.x and 7.x 12 | - Java >= 8 13 | - Maven 14 | 15 | ## Output data serialization format: 16 | 17 | The connector uses kafka-connect schema and structs, that are agnostic regarding the user serialization method (e.g. it 18 | might be Avro or json, etc...). 19 | 20 | ## Bugs or new Ideas? 21 | 22 | - Issues tracker: https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/issues 23 | - Feel free to open an issue to discuss new ideas (or propose new solutions with a PR). 24 | 25 | ## Installation: 26 | 27 | Compile the project with: 28 | 29 | ```bash 30 | mvn clean package -DskipTests 31 | ``` 32 | 33 | You can also compile and running both unit and integration tests (docker is mandatory) with: 34 | 35 | ```bash 36 | mvn clean package 37 | ``` 38 | 39 | Copy the jar with dependencies from the target folder into connect classpath ( 40 | e.g ``/usr/share/java/kafka-connect-elasticsearch`` ) or set ``plugin.path`` parameter appropriately. 41 | 42 | ## Example 43 | 44 | Using kafka connect in distributed way, a sample config file to fetch ``my_awesome_index*`` indices and to produce 45 | output topics with ``es_`` prefix: 46 | 47 | ```json 48 | { 49 | "name": "elastic-source", 50 | "config": { 51 | "connector.class":"com.github.dariobalinzo.ElasticSourceConnector", 52 | "tasks.max": "1", 53 | "es.host" : "localhost", 54 | "es.port" : "9200", 55 | "index.prefix" : "my_awesome_index", 56 | "topic.prefix" : "es_", 57 | "incrementing.field.name" : "@timestamp" 58 | } 59 | } 60 | ``` 61 | 62 | To start the connector with curl: 63 | 64 | ```bash 65 | curl -X POST -H "Content-Type: application/json" --data @config.json http://localhost:8083/connectors | jq 66 | ``` 67 | 68 | To check the status: 69 | 70 | ```bash 71 | curl localhost:8083/connectors/elastic-source/status | jq 72 | ``` 73 | 74 | To stop the connector: 75 | 76 | ```bash 77 | curl -X DELETE localhost:8083/connectors/elastic-source | jq 78 | ``` 79 | 80 | ## Documentation 81 | 82 | ### Elasticsearch Configuration 83 | 84 | ``es.host`` 85 | ElasticSearch host. Optionally it is possible to specify many hosts using ``;`` as separator (``host1;host2;host3``) 86 | 87 | * Type: string 88 | * Importance: high 89 | * Dependents: ``index.prefix`` 90 | 91 | ``es.port`` 92 | ElasticSearch port 93 | 94 | * Type: string 95 | * Importance: high 96 | * Dependents: ``index.prefix`` 97 | 98 | ``es.scheme`` 99 | ElasticSearch scheme (http/https) 100 | 101 | * Type: string 102 | * Importance: medium 103 | * Default: ``http`` 104 | 105 | ``es.user`` 106 | Elasticsearch username 107 | 108 | * Type: string 109 | * Default: null 110 | * Importance: high 111 | 112 | ``es.password`` 113 | Elasticsearch password 114 | 115 | * Type: password 116 | * Default: null 117 | * Importance: high 118 | 119 | 120 | ``incrementing.field.name`` 121 | The name of the strictly incrementing field to use to detect new records. 122 | 123 | * Type: any 124 | * Importance: high 125 | 126 | ``incrementing.secondary.field.name`` 127 | In case the main incrementing field may have duplicates, 128 | this secondary field is used as a secondary sort field in order 129 | to avoid data losses when paginating (available starting from versions >= 1.4). 130 | 131 | * Type: any 132 | * Importance: low 133 | 134 | 135 | ``es.tls.truststore.location`` 136 | Elastic ssl truststore location 137 | 138 | * Type: string 139 | * Importance: medium 140 | 141 | ``es.tls.truststore.password`` 142 | Elastic ssl truststore password 143 | 144 | * Type: string 145 | * Default: "" 146 | * Importance: medium 147 | 148 | ``es.tls.keystore.location`` 149 | Elasticsearch keystore location 150 | 151 | * Type: string 152 | * Importance: medium 153 | 154 | ``es.tls.keystore.password`` 155 | Elasticsearch keystore password 156 | 157 | * Type: string 158 | * Default: "" 159 | * Importance: medium 160 | 161 | ``connection.attempts`` 162 | Maximum number of attempts to retrieve a valid Elasticsearch connection. 163 | 164 | * Type: int 165 | * Default: 3 166 | * Importance: low 167 | 168 | ``connection.backoff.ms`` 169 | Backoff time in milliseconds between connection attempts. 170 | 171 | * Type: long 172 | * Default: 10000 173 | * Importance: low 174 | 175 | ``index.prefix`` 176 | Indices prefix to include in copying. 177 | Periodically, new indices are discovered if they match the pattern. 178 | 179 | * Type: string 180 | * Default: "" 181 | * Importance: medium 182 | 183 | ``index.names`` 184 | List of elasticsearch indices: `es1,es2,es3` 185 | 186 | * Type: string 187 | * Default: null 188 | * Importance: medium 189 | 190 | ### Connector Configuration 191 | 192 | ``poll.interval.ms`` 193 | Frequency in ms to poll for new data in each index. 194 | 195 | * Type: int 196 | * Default: 5000 197 | * Importance: high 198 | 199 | ``batch.max.rows`` 200 | Maximum number of documents to include in a single batch when polling for new data. 201 | 202 | * Type: int 203 | * Default: 10000 204 | * Importance: low 205 | 206 | ``topic.prefix`` 207 | Prefix to prepend to index names to generate the name of the Kafka topic to publish data 208 | 209 | * Type: string 210 | * Importance: high 211 | 212 | ``filters.whitelist`` 213 | Whitelist filter for extracting a subset of fields from elastic-search json documents. The whitelist filter supports 214 | nested fields. To provide multiple fields use `;` as separator 215 | (e.g. `customer;order.qty;order.price`). 216 | 217 | * Type: string 218 | * Importance: medium 219 | * Default: null 220 | 221 | ``filters.blacklist`` 222 | Blacklist filter for extracting a subset of fields from elastic-search json documents. The blacklist filter supports 223 | nested fields. To provide multiple fields use `;` as separator 224 | (e.g. `customer;order.qty;order.price`). 225 | 226 | * Type: string 227 | * Importance: medium 228 | * Default: null 229 | 230 | ``filters.json_cast`` 231 | This filter casts nested fields to json string, avoiding parsing recursively as kafka connect-schema. The json-cast 232 | filter supports nested fields. To provide multiple fields use `;` as separator 233 | (e.g. `customer;order.qty;order.price`). 234 | 235 | * Type: string 236 | * Importance: medium 237 | * Default: null 238 | 239 | ``fieldname_converter`` 240 | Configuring which field name converter should be used (allowed values: `avro` or `nop`). By default, the avro field name 241 | converter renames the json fields non respecting the avro 242 | specifications (https://avro.apache.org/docs/current/spec.html#names) 243 | in order to be serialized correctly. To disable the field name conversion set this parameter to `nop`. 244 | 245 | * Type: string 246 | * Importance: medium 247 | * Default: avro 248 | -------------------------------------------------------------------------------- /assets/dariobalinzo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DarioBalinzo/kafka-connect-elasticsearch-source/bc9d2632ec596fa88b2328cfc7658533dcb6baa0/assets/dariobalinzo.png -------------------------------------------------------------------------------- /doc/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Kafka-connect-elasticsearch-source 2 | 3 | [![YourActionName Actions Status](https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/workflows/Java%20CI%20with%20Maven/badge.svg)](https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/actions) 4 | 5 | Kafka Connect Elasticsearch Source: fetch data from elastic-search and sends it to kafka. The connector fetches only new 6 | data using a strictly incremental / temporal field (like a timestamp or an incrementing id). It supports dynamic schema 7 | and nested objects/ arrays. 8 | 9 | ## Requirements: 10 | 11 | - Elasticsearch 6.x and 7.x 12 | - Java >= 8 13 | - Maven 14 | 15 | ## Output data serialization format: 16 | 17 | The connector uses kafka-connect schema and structs, that are agnostic regarding the user serialization method (e.g. it 18 | might be Avro or json, etc...). 19 | 20 | ## Bugs or new Ideas? 21 | 22 | - Issues tracker: https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/issues 23 | - Feel free to open an issue to discuss new ideas (or propose new solutions with a PR). 24 | 25 | ## Installation: 26 | 27 | Compile the project with: 28 | 29 | ```bash 30 | mvn clean package -DskipTests 31 | ``` 32 | 33 | You can also compile and running both unit and integration tests (docker is mandatory) with: 34 | 35 | ```bash 36 | mvn clean package 37 | ``` 38 | 39 | Copy the jar with dependencies from the target folder into connect classpath ( 40 | e.g ``/usr/share/java/kafka-connect-elasticsearch`` ) or set ``plugin.path`` parameter appropriately. 41 | 42 | ## Example 43 | 44 | Using kafka connect in distributed way, a sample config file to fetch ``my_awesome_index*`` indices and to produce 45 | output topics with ``es_`` prefix: 46 | 47 | ```json 48 | { 49 | "name": "elastic-source", 50 | "config": { 51 | "connector.class":"com.github.dariobalinzo.ElasticSourceConnector", 52 | "tasks.max": "1", 53 | "es.host" : "localhost", 54 | "es.port" : "9200", 55 | "index.prefix" : "my_awesome_index", 56 | "topic.prefix" : "es_", 57 | "incrementing.field.name" : "@timestamp" 58 | } 59 | } 60 | ``` 61 | 62 | To start the connector with curl: 63 | 64 | ```bash 65 | curl -X POST -H "Content-Type: application/json" --data @config.json http://localhost:8083/connectors | jq 66 | ``` 67 | 68 | To check the status: 69 | 70 | ```bash 71 | curl localhost:8083/connectors/elastic-source/status | jq 72 | ``` 73 | 74 | To stop the connector: 75 | 76 | ```bash 77 | curl -X DELETE localhost:8083/connectors/elastic-source | jq 78 | ``` 79 | 80 | ## Documentation 81 | 82 | ### Elasticsearch Configuration 83 | 84 | ``es.host`` 85 | ElasticSearch host. Optionally it is possible to specify many hosts using ``;`` as separator (``host1;host2;host3``) 86 | 87 | * Type: string 88 | * Importance: high 89 | * Dependents: ``index.prefix`` 90 | 91 | ``es.port`` 92 | ElasticSearch port 93 | 94 | * Type: string 95 | * Importance: high 96 | * Dependents: ``index.prefix`` 97 | 98 | ``es.scheme`` 99 | ElasticSearch scheme (http/https) 100 | 101 | * Type: string 102 | * Importance: medium 103 | * Default: ``http`` 104 | 105 | ``es.user`` 106 | Elasticsearch username 107 | 108 | * Type: string 109 | * Default: null 110 | * Importance: high 111 | 112 | ``es.password`` 113 | Elasticsearch password 114 | 115 | * Type: password 116 | * Default: null 117 | * Importance: high 118 | 119 | 120 | ``incrementing.field.name`` 121 | The name of the strictly incrementing field to use to detect new records. 122 | 123 | * Type: any 124 | * Importance: high 125 | 126 | ``incrementing.secondary.field.name`` 127 | In case the main incrementing field may have duplicates, 128 | this secondary field is used as a secondary sort field in order 129 | to avoid data losses when paginating (available starting from versions >= 1.4). 130 | 131 | * Type: any 132 | * Importance: low 133 | 134 | 135 | ``es.tls.truststore.location`` 136 | Elastic ssl truststore location 137 | 138 | * Type: string 139 | * Importance: medium 140 | 141 | ``es.tls.truststore.password`` 142 | Elastic ssl truststore password 143 | 144 | * Type: string 145 | * Default: "" 146 | * Importance: medium 147 | 148 | ``es.tls.keystore.location`` 149 | Elasticsearch keystore location 150 | 151 | * Type: string 152 | * Importance: medium 153 | 154 | ``es.tls.keystore.password`` 155 | Elasticsearch keystore password 156 | 157 | * Type: string 158 | * Default: "" 159 | * Importance: medium 160 | 161 | ``connection.attempts`` 162 | Maximum number of attempts to retrieve a valid Elasticsearch connection. 163 | 164 | * Type: int 165 | * Default: 3 166 | * Importance: low 167 | 168 | ``connection.backoff.ms`` 169 | Backoff time in milliseconds between connection attempts. 170 | 171 | * Type: long 172 | * Default: 10000 173 | * Importance: low 174 | 175 | ``index.prefix`` 176 | Indices prefix to include in copying. 177 | Periodically, new indices are discovered if they match the pattern. 178 | 179 | * Type: string 180 | * Default: "" 181 | * Importance: medium 182 | 183 | ``index.names`` 184 | List of elasticsearch indices: `es1,es2,es3` 185 | 186 | * Type: string 187 | * Default: null 188 | * Importance: medium 189 | 190 | ### Connector Configuration 191 | 192 | ``poll.interval.ms`` 193 | Frequency in ms to poll for new data in each index. 194 | 195 | * Type: int 196 | * Default: 5000 197 | * Importance: high 198 | 199 | ``batch.max.rows`` 200 | Maximum number of documents to include in a single batch when polling for new data. 201 | 202 | * Type: int 203 | * Default: 10000 204 | * Importance: low 205 | 206 | ``topic.prefix`` 207 | Prefix to prepend to index names to generate the name of the Kafka topic to publish data 208 | 209 | * Type: string 210 | * Importance: high 211 | 212 | ``filters.whitelist`` 213 | Whitelist filter for extracting a subset of fields from elastic-search json documents. The whitelist filter supports 214 | nested fields. To provide multiple fields use `;` as separator 215 | (e.g. `customer;order.qty;order.price`). 216 | 217 | * Type: string 218 | * Importance: medium 219 | * Default: null 220 | 221 | ``filters.blacklist`` 222 | Blacklist filter for extracting a subset of fields from elastic-search json documents. The blacklist filter supports 223 | nested fields. To provide multiple fields use `;` as separator 224 | (e.g. `customer;order.qty;order.price`). 225 | 226 | * Type: string 227 | * Importance: medium 228 | * Default: null 229 | 230 | ``filters.json_cast`` 231 | This filter casts nested fields to json string, avoiding parsing recursively as kafka connect-schema. The json-cast 232 | filter supports nested fields. To provide multiple fields use `;` as separator 233 | (e.g. `customer;order.qty;order.price`). 234 | 235 | * Type: string 236 | * Importance: medium 237 | * Default: null 238 | 239 | ``fieldname_converter`` 240 | Configuring which field name converter should be used (allowed values: `avro` or `nop`). By default, the avro field name 241 | converter renames the json fields non respecting the avro 242 | specifications (https://avro.apache.org/docs/current/spec.html#names) 243 | in order to be serialized correctly. To disable the field name conversion set this parameter to `nop`. 244 | 245 | * Type: string 246 | * Importance: medium 247 | * Default: avro 248 | -------------------------------------------------------------------------------- /etc/quickstart-elasticsearch-source.properties: -------------------------------------------------------------------------------- 1 | connector.class=com.github.dariobalinzo.ElasticSourceConnector 2 | tasks.max=1 3 | es.host=localhost 4 | es.port=9200 5 | index.prefix=my_awesome_index 6 | topic.prefix=es_ 7 | incrementing.field.name=@timestamp 8 | -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "component_types": [ 3 | "source" 4 | ], 5 | "description": "This is a connector for getting data out of Elasticsearch into Apache Kafka.", 6 | "documentation_url": "https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source", 7 | "features": { 8 | "confluent_control_center_integration": true, 9 | "delivery_guarantee": [ 10 | "at_least_once" 11 | ], 12 | "kafka_connect_api": true, 13 | "single_message_transforms": true, 14 | "supported_encodings": [ 15 | "any" 16 | ] 17 | }, 18 | "license": [ 19 | { 20 | "name": "Apache License, Version 2.0", 21 | "url": "http://www.apache.org/licenses/LICENSE-2.0", 22 | "logo": "assets/apache_logo.gif" 23 | } 24 | ], 25 | "name": "kafka-connect-elasticsearch-source", 26 | "owner": { 27 | "logo": "assets/dariobalinzo.png", 28 | "name": "Dario Balinzo", 29 | "type": "user", 30 | "url": "https://github.com/DarioBalinzo", 31 | "username": "dariobalinzo" 32 | }, 33 | "requirements": [ 34 | "Elasticsearch >= 6.x" 35 | ], 36 | "support": { 37 | "logo": "assets/dariobalinzo.png", 38 | "provider_name": "Dario Balinzo", 39 | "summary": "For support please open an issue in the github repository", 40 | "url": "https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/issues" 41 | }, 42 | "tags": [ 43 | "source", 44 | "elasticsearch", 45 | "elastic" 46 | ], 47 | "title": "Kafka Connect Elasticsearch Source", 48 | "version": "1.5.0" 49 | } 50 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 17 | 20 | 4.0.0 21 | 22 | 23 | 1.8 24 | 1.8 25 | UTF-8 26 | 27 | 28 | com.github.dariobalinzo 29 | elastic-source-connect 30 | 1.5.5 31 | 32 | 33 | 34 | Apache License 2.0 35 | http://www.apache.org/licenses/LICENSE-2.0.html 36 | repo 37 | 38 | 39 | 40 | 41 | 42 | org.apache.logging.log4j 43 | log4j-api 44 | 2.19.0 45 | 46 | 47 | org.apache.kafka 48 | connect-api 49 | 2.8.0 50 | 51 | 52 | org.elasticsearch.client 53 | elasticsearch-rest-high-level-client 54 | 7.16.1 55 | 56 | 57 | org.elasticsearch.client 58 | elasticsearch-rest-client 59 | 7.16.1 60 | 61 | 62 | junit 63 | junit 64 | 4.13.2 65 | test 66 | 67 | 68 | org.testcontainers 69 | testcontainers 70 | 1.16.2 71 | test 72 | 73 | 74 | org.testcontainers 75 | elasticsearch 76 | 1.16.2 77 | test 78 | 79 | 80 | ch.qos.logback 81 | logback-core 82 | 1.2.9 83 | test 84 | 85 | 86 | ch.qos.logback 87 | logback-classic 88 | 1.2.8 89 | test 90 | 91 | 92 | 93 | org.mockito 94 | mockito-core 95 | 2.28.2 96 | test 97 | 98 | 99 | com.fasterxml.jackson.core 100 | jackson-databind 101 | 2.12.7.1 102 | 103 | 104 | 105 | com.fasterxml.jackson.dataformat 106 | jackson-dataformat-cbor 107 | 2.14.2 108 | 109 | 110 | org.yaml 111 | snakeyaml 112 | 2.0 113 | 114 | 115 | org.apache.httpcomponents 116 | httpclient 117 | 4.5.13 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | maven-assembly-plugin 126 | 127 | 128 | jar-with-dependencies 129 | 130 | 131 | 132 | 133 | make-assembly 134 | package 135 | 136 | single 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/ElasticSourceConnector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo; 18 | 19 | import com.github.dariobalinzo.elastic.ElasticConnection; 20 | import com.github.dariobalinzo.elastic.ElasticConnectionBuilder; 21 | import com.github.dariobalinzo.elastic.ElasticRepository; 22 | import com.github.dariobalinzo.elastic.ElasticIndexMonitorThread; 23 | import com.github.dariobalinzo.task.ElasticSourceTask; 24 | import org.apache.kafka.common.config.ConfigDef; 25 | import org.apache.kafka.common.config.ConfigException; 26 | import org.apache.kafka.connect.connector.Task; 27 | import org.apache.kafka.connect.errors.ConnectException; 28 | import org.apache.kafka.connect.source.SourceConnector; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | import java.util.*; 33 | 34 | public class ElasticSourceConnector extends SourceConnector { 35 | private static Logger logger = LoggerFactory.getLogger(ElasticSourceConnector.class); 36 | private static final long MAX_TIMEOUT = 10000L; 37 | private static final long POLL_MILISSECONDS = 5000L; 38 | 39 | private ElasticSourceConnectorConfig config; 40 | private ElasticConnection elasticConnection; 41 | private ElasticRepository elasticRepository; 42 | private Map configProperties; 43 | private ElasticIndexMonitorThread indexMonitorThread; 44 | 45 | @Override 46 | public String version() { 47 | return Version.VERSION; 48 | } 49 | 50 | @Override 51 | public void start(Map props) { 52 | try { 53 | configProperties = props; 54 | config = new ElasticSourceConnectorConfig(props); 55 | } catch (ConfigException e) { 56 | throw new ConnectException("Couldn't start ElasticSourceConnector due to configuration " 57 | + "error", e); 58 | } 59 | 60 | String esScheme = config.getString(ElasticSourceConnectorConfig.ES_SCHEME_CONF); 61 | String esHost = config.getString(ElasticSourceConnectorConfig.ES_HOST_CONF); 62 | 63 | //using rest config all the parameters are strings 64 | int esPort = Integer.parseInt(config.getString(ElasticSourceConnectorConfig.ES_PORT_CONF)); 65 | 66 | String esUser = config.getString(ElasticSourceConnectorConfig.ES_USER_CONF); 67 | String esPwd = config.getString(ElasticSourceConnectorConfig.ES_PWD_CONF); 68 | 69 | int maxConnectionAttempts = Integer.parseInt(config.getString( 70 | ElasticSourceConnectorConfig.CONNECTION_ATTEMPTS_CONFIG 71 | )); 72 | long connectionRetryBackoff = Long.parseLong(config.getString( 73 | ElasticSourceConnectorConfig.CONNECTION_BACKOFF_CONFIG 74 | )); 75 | 76 | ElasticConnectionBuilder connectionBuilder = new ElasticConnectionBuilder(esHost, esPort) 77 | .withProtocol(esScheme) 78 | .withMaxAttempts(maxConnectionAttempts) 79 | .withBackoff(connectionRetryBackoff); 80 | 81 | String truststore = config.getString(ElasticSourceConnectorConfig.ES_TRUSTSTORE_CONF); 82 | String truststorePass = config.getString(ElasticSourceConnectorConfig.ES_TRUSTSTORE_PWD_CONF); 83 | String keystore = config.getString(ElasticSourceConnectorConfig.ES_KEYSTORE_CONF); 84 | String keystorePass = config.getString(ElasticSourceConnectorConfig.ES_KEYSTORE_PWD_CONF); 85 | 86 | if (truststore != null) { 87 | connectionBuilder.withTrustStore(truststore, truststorePass); 88 | } 89 | 90 | if (keystore != null) { 91 | connectionBuilder.withKeyStore(keystore, keystorePass); 92 | } 93 | 94 | if (esUser == null || esUser.isEmpty()) { 95 | elasticConnection = connectionBuilder.build(); 96 | } else { 97 | elasticConnection = connectionBuilder.withUser(esUser) 98 | .withPassword(esPwd) 99 | .build(); 100 | } 101 | 102 | elasticRepository = new ElasticRepository(elasticConnection); 103 | 104 | indexMonitorThread = new ElasticIndexMonitorThread(context, POLL_MILISSECONDS, elasticRepository, config.getString(ElasticSourceConnectorConfig.INDEX_PREFIX_CONFIG)); 105 | indexMonitorThread.start(); 106 | } 107 | 108 | @Override 109 | public Class taskClass() { 110 | return ElasticSourceTask.class; 111 | } 112 | 113 | 114 | @Override 115 | public List> taskConfigs(int maxTasks) { 116 | if (configProperties.containsKey(ElasticSourceConnectorConfig.INDEX_NAMES_CONFIG)) { 117 | String indicesNames = configProperties.get(ElasticSourceConnectorConfig.INDEX_NAMES_CONFIG); 118 | String[] indicesList = indicesNames.split(","); 119 | return generateTaskFromFixedList(Arrays.asList(indicesList), maxTasks); 120 | } else { 121 | return findTaskFromIndexPrefix(maxTasks); 122 | } 123 | } 124 | 125 | private List> generateTaskFromFixedList(List indicesList, int maxTasks) { 126 | int numGroups = Math.min(indicesList.size(), maxTasks); 127 | return groupIndicesToTasksConfig(maxTasks, indicesList); 128 | } 129 | 130 | private List> findTaskFromIndexPrefix(int maxTasks) { 131 | List currentIndexes = indexMonitorThread.indexes(); 132 | return groupIndicesToTasksConfig(maxTasks, currentIndexes); 133 | } 134 | 135 | private List> groupIndicesToTasksConfig(int maxTasks, List currentIndexes) { 136 | int numGroups = Math.min(currentIndexes.size(), maxTasks); 137 | List> indexGrouped = groupPartitions(currentIndexes, numGroups); 138 | List> taskConfigs = new ArrayList<>(indexGrouped.size()); 139 | for (List taskIndices : indexGrouped) { 140 | Map taskProps = new HashMap<>(configProperties); 141 | taskProps.put(ElasticSourceConnectorConfig.INDICES_CONFIG, 142 | String.join(",", taskIndices)); 143 | taskConfigs.add(taskProps); 144 | } 145 | return taskConfigs; 146 | } 147 | 148 | @Override 149 | public void stop() { 150 | logger.info("stopping elastic source"); 151 | indexMonitorThread.shutdown(); 152 | try { 153 | indexMonitorThread.join(MAX_TIMEOUT); 154 | } catch (InterruptedException e) { 155 | // Ignore, shouldn't be interrupted 156 | } 157 | elasticConnection.closeQuietly(); 158 | } 159 | 160 | @Override 161 | public ConfigDef config() { 162 | return ElasticSourceConnectorConfig.CONFIG_DEF; 163 | } 164 | 165 | 166 | private List> groupPartitions(List currentIndices, int numGroups) { 167 | List> result = new ArrayList<>(numGroups); 168 | for (int i = 0; i < numGroups; ++i) { 169 | result.add(new ArrayList<>()); 170 | } 171 | 172 | for (int i = 0; i < currentIndices.size(); ++i) { 173 | result.get(i % numGroups).add(currentIndices.get(i)); 174 | } 175 | 176 | return result; 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/ElasticSourceConnectorConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo; 18 | 19 | import org.apache.kafka.common.config.AbstractConfig; 20 | import org.apache.kafka.common.config.ConfigDef; 21 | import org.apache.kafka.common.config.ConfigDef.Importance; 22 | import org.apache.kafka.common.config.ConfigDef.Type; 23 | import org.apache.kafka.common.config.ConfigDef.Width; 24 | 25 | import java.util.Collections; 26 | import java.util.Map; 27 | 28 | public class ElasticSourceConnectorConfig extends AbstractConfig { 29 | 30 | public final static String ES_HOST_CONF = "es.host"; 31 | private final static String ES_HOST_DOC = "ElasticSearch host. " + 32 | "Optionally it is possible to specify many hosts " + 33 | "using ; as separator (host1;host2;host3)"; 34 | private final static String ES_HOST_DISPLAY = "Elastic host"; 35 | 36 | public final static String ES_SCHEME_CONF = "es.scheme"; 37 | private final static String ES_SCHEME_DOC = "Elasticsearch scheme (default: http)"; 38 | private final static String ES_SCHEME_DISPLAY = "Elasticsearch scheme"; 39 | private static final String ES_SCHEME_DEFAULT = "http"; 40 | 41 | public final static String ES_PORT_CONF = "es.port"; 42 | private final static String ES_PORT_DOC = "ElasticSearch port"; 43 | private final static String ES_PORT_DISPLAY = "ElasticSearch port"; 44 | 45 | public final static String ES_USER_CONF = "es.user"; 46 | private final static String ES_USER_DOC = "Elasticsearch username"; 47 | private final static String ES_USER_DISPLAY = "Elasticsearch username"; 48 | 49 | public final static String ES_PWD_CONF = "es.password"; 50 | private final static String ES_PWD_DOC = "Elasticsearch password"; 51 | private final static String ES_PWD_DISPLAY = "Elasticsearch password"; 52 | 53 | public final static String ES_KEYSTORE_CONF = "es.tls.keystore.location"; 54 | private final static String ES_KEYSTORE_DOC = "Elasticsearch keystore location"; 55 | 56 | public final static String ES_KEYSTORE_PWD_CONF = "es.tls.keystore.password"; 57 | private final static String ES_KEYSTORE_PWD_DOC = "Elasticsearch keystore password"; 58 | 59 | public final static String ES_TRUSTSTORE_CONF = "es.tls.truststore.location"; 60 | private final static String ES_TRUSTSTORE_DOC = "Elasticsearch truststore location"; 61 | 62 | public final static String ES_TRUSTSTORE_PWD_CONF = "es.tls.truststore.password"; 63 | private final static String ES_TRUSTSTORE_PWD_DOC = "Elasticsearch truststore password"; 64 | 65 | public static final String CONNECTION_ATTEMPTS_CONFIG = "connection.attempts"; 66 | private static final String CONNECTION_ATTEMPTS_DOC 67 | = "Maximum number of attempts to retrieve a valid Elasticsearch connection."; 68 | private static final String CONNECTION_ATTEMPTS_DISPLAY = "Elasticsearch connection attempts"; 69 | private static final String CONNECTION_ATTEMPTS_DEFAULT = "3"; 70 | 71 | public static final String CONNECTION_BACKOFF_CONFIG = "connection.backoff.ms"; 72 | private static final String CONNECTION_BACKOFF_DOC 73 | = "Backoff time in milliseconds between connection attempts."; 74 | private static final String CONNECTION_BACKOFF_DISPLAY 75 | = "Elastic connection backoff in milliseconds"; 76 | private static final String CONNECTION_BACKOFF_DEFAULT = "10000"; 77 | 78 | public static final String POLL_INTERVAL_MS_CONFIG = "poll.interval.ms"; 79 | private static final String POLL_INTERVAL_MS_DOC = "Frequency in ms to poll for new data in " 80 | + "each index."; 81 | private static final String POLL_INTERVAL_MS_DEFAULT = "5000"; 82 | private static final String POLL_INTERVAL_MS_DISPLAY = "Poll Interval (ms)"; 83 | 84 | public static final String BATCH_MAX_ROWS_CONFIG = "batch.max.rows"; 85 | private static final String BATCH_MAX_ROWS_DOC = 86 | "Maximum number of documents to include in a single batch when polling for new data."; 87 | private static final String BATCH_MAX_ROWS_DEFAULT = "10000"; 88 | private static final String BATCH_MAX_ROWS_DISPLAY = "Max Documents Per Batch"; 89 | 90 | private static final String MODE_UNSPECIFIED = ""; 91 | private static final String MODE_BULK = "bulk"; 92 | private static final String MODE_TIMESTAMP = "timestamp"; 93 | private static final String MODE_INCREMENTING = "incrementing"; 94 | private static final String MODE_TIMESTAMP_INCREMENTING = "timestamp+incrementing"; 95 | 96 | public static final String INCREMENTING_FIELD_NAME_CONFIG = "incrementing.field.name"; 97 | private static final String INCREMENTING_FIELD_NAME_DOC = 98 | "The name of the strictly incrementing field to use to detect new records."; 99 | private static final String INCREMENTING_FIELD_NAME_DEFAULT = ""; 100 | private static final String INCREMENTING_FIELD_NAME_DISPLAY = "Incrementing Field Name"; 101 | 102 | public static final String SECONDARY_INCREMENTING_FIELD_NAME_CONFIG = "incrementing.secondary.field.name"; 103 | private static final String SECONDARY_INCREMENTING_FIELD_NAME_DOC = 104 | "In case the main incrementing field may have duplicates, this secondary field is used as a secondary sort field" + 105 | " in order to avoid data losses when paginating"; 106 | private static final String SECONDARY_INCREMENTING_FIELD_NAME_DISPLAY = "Secondary Incrementing Field Name"; 107 | 108 | public static final String INDEX_PREFIX_CONFIG = "index.prefix"; 109 | private static final String INDEX_PREFIX_DOC = "List of indices to include in copying."; 110 | private static final String INDEX_PREFIX_DEFAULT = ""; 111 | private static final String INDEX_PREFIX_DISPLAY = "Indices prefix Whitelist"; 112 | 113 | public static final String INDEX_NAMES_CONFIG = "index.names"; 114 | private static final String INDEX_NAMES_DOC = "List of elasticsearch indices (es1,es2,es3)"; 115 | private static final String INDEX_NAMES_DEFAULT = null; 116 | private static final String INDEX_NAMES_DISPLAY = "List of elasticsearch indices (es1,es2,es3)"; 117 | 118 | 119 | public static final String TOPIC_PREFIX_CONFIG = "topic.prefix"; 120 | private static final String TOPIC_PREFIX_DOC = 121 | "Prefix to prepend to index names to generate the name of the Kafka topic to publish data"; 122 | private static final String TOPIC_PREFIX_DISPLAY = "Topic Prefix"; 123 | 124 | private static final String DATABASE_GROUP = "Elasticsearch"; 125 | private static final String MODE_GROUP = "Mode"; 126 | private static final String CONNECTOR_GROUP = "Connector"; 127 | 128 | private static final String MODE_CONFIG = "mode"; 129 | private static final String MODE_DOC = ""; 130 | private static final String MODE_DISPLAY = "Index Incrementing field"; 131 | 132 | public static final String INDICES_CONFIG = "es.indices"; 133 | 134 | public static final String FIELDS_WHITELIST_CONFIG = "filters.whitelist"; 135 | private static final String FIELDS_WHITELIST_DOC = "Whitelist filter for fields (e.g. order.qty;order.price;status )"; 136 | private static final String FIELDS_WHITELIST_DISPLAY = "Fields whitelist"; 137 | 138 | public static final String FIELDS_BLACKLIST_CONFIG = "filters.blacklist"; 139 | private static final String FIELDS_BLACKLIST_DOC = "Blacklist filter for fields (e.g. order.qty;order.price;status )"; 140 | private static final String FIELDS_BLACKLIST_DISPLAY = "Fields blacklist"; 141 | 142 | public static final String FIELDS_JSON_CAST_CONFIG = "filters.json_cast"; 143 | private static final String FIELDS_JSON_CAST_DOC = "Cast to json string instead of parsing nested objects (e.g. order.qty;order.price;status )"; 144 | private static final String FIELDS_JSON_CAST_DISPLAY = "Cast to json string"; 145 | 146 | public static final String CONNECTOR_FIELDNAME_CONVERTER_CONFIG = "fieldname_converter"; 147 | public static final String CONNECTOR_FIELDNAME_CONVERTER_DOC = "Determine which name converter should be used for document fields: avro converter as standard"; 148 | public static final String CONNECTOR_FIELDNAME_CONVERTER_DISPLAY = "Fields name converter (avro, nop)"; 149 | 150 | public static final String NOP_FIELDNAME_CONVERTER = "nop"; 151 | public static final String AVRO_FIELDNAME_CONVERTER = "avro"; 152 | 153 | public static final ConfigDef CONFIG_DEF = baseConfigDef(); 154 | 155 | protected static ConfigDef baseConfigDef() { 156 | ConfigDef config = new ConfigDef(); 157 | addDatabaseOptions(config); 158 | addModeOptions(config); 159 | addConnectorOptions(config); 160 | return config; 161 | } 162 | 163 | private static void addDatabaseOptions(ConfigDef config) { 164 | int orderInGroup = 0; 165 | config.define( 166 | ES_HOST_CONF, 167 | Type.STRING, 168 | Importance.HIGH, 169 | ES_HOST_DOC, 170 | DATABASE_GROUP, 171 | ++orderInGroup, 172 | Width.LONG, 173 | ES_HOST_DISPLAY, 174 | Collections.singletonList(INDEX_PREFIX_CONFIG) 175 | ).define( 176 | ES_SCHEME_CONF, 177 | Type.STRING, 178 | ES_SCHEME_DEFAULT, 179 | Importance.MEDIUM, 180 | ES_SCHEME_DOC, 181 | DATABASE_GROUP, 182 | ++orderInGroup, 183 | Width.LONG, 184 | ES_SCHEME_DISPLAY 185 | ).define( 186 | ES_PORT_CONF, 187 | Type.STRING, 188 | Importance.HIGH, 189 | ES_PORT_DOC, 190 | DATABASE_GROUP, 191 | ++orderInGroup, 192 | Width.LONG, 193 | ES_PORT_DISPLAY, 194 | Collections.singletonList(INDEX_PREFIX_CONFIG) 195 | ).define( 196 | ES_USER_CONF, 197 | Type.STRING, 198 | null, 199 | Importance.HIGH, 200 | ES_USER_DOC, 201 | DATABASE_GROUP, 202 | ++orderInGroup, 203 | Width.LONG, 204 | ES_USER_DISPLAY 205 | ).define( 206 | ES_PWD_CONF, 207 | Type.STRING, 208 | null, 209 | Importance.HIGH, 210 | ES_PWD_DOC, 211 | DATABASE_GROUP, 212 | ++orderInGroup, 213 | Width.SHORT, 214 | ES_PWD_DISPLAY 215 | ).define( 216 | ES_KEYSTORE_CONF, 217 | Type.STRING, 218 | null, 219 | Importance.MEDIUM, 220 | ES_KEYSTORE_DOC, 221 | DATABASE_GROUP, 222 | ++orderInGroup, 223 | Width.SHORT, 224 | ES_KEYSTORE_DOC 225 | ).define( 226 | ES_KEYSTORE_PWD_CONF, 227 | Type.STRING, 228 | "", 229 | Importance.MEDIUM, 230 | ES_KEYSTORE_PWD_DOC, 231 | DATABASE_GROUP, 232 | ++orderInGroup, 233 | Width.SHORT, 234 | ES_KEYSTORE_PWD_DOC 235 | ).define( 236 | ES_TRUSTSTORE_CONF, 237 | Type.STRING, 238 | null, 239 | Importance.MEDIUM, 240 | ES_TRUSTSTORE_DOC, 241 | DATABASE_GROUP, 242 | ++orderInGroup, 243 | Width.SHORT, 244 | ES_TRUSTSTORE_DOC 245 | ).define( 246 | ES_TRUSTSTORE_PWD_CONF, 247 | Type.STRING, 248 | "", 249 | Importance.MEDIUM, 250 | ES_TRUSTSTORE_PWD_DOC, 251 | DATABASE_GROUP, 252 | ++orderInGroup, 253 | Width.SHORT, 254 | ES_TRUSTSTORE_PWD_DOC 255 | ).define( 256 | CONNECTION_ATTEMPTS_CONFIG, 257 | Type.STRING, 258 | CONNECTION_ATTEMPTS_DEFAULT, 259 | Importance.LOW, 260 | CONNECTION_ATTEMPTS_DOC, 261 | DATABASE_GROUP, 262 | ++orderInGroup, 263 | ConfigDef.Width.SHORT, 264 | CONNECTION_ATTEMPTS_DISPLAY 265 | ).define( 266 | CONNECTION_BACKOFF_CONFIG, 267 | Type.STRING, 268 | CONNECTION_BACKOFF_DEFAULT, 269 | Importance.LOW, 270 | CONNECTION_BACKOFF_DOC, 271 | DATABASE_GROUP, 272 | ++orderInGroup, 273 | Width.SHORT, 274 | CONNECTION_BACKOFF_DISPLAY 275 | ).define( 276 | INDEX_PREFIX_CONFIG, 277 | Type.STRING, 278 | INDEX_PREFIX_DEFAULT, 279 | Importance.MEDIUM, 280 | INDEX_PREFIX_DOC, 281 | DATABASE_GROUP, 282 | ++orderInGroup, 283 | Width.LONG, 284 | INDEX_PREFIX_DISPLAY 285 | ).define( 286 | INDEX_NAMES_CONFIG, 287 | Type.STRING, 288 | INDEX_NAMES_DEFAULT, 289 | Importance.MEDIUM, 290 | INDEX_NAMES_DOC, 291 | DATABASE_GROUP, 292 | ++orderInGroup, 293 | Width.LONG, 294 | INDEX_NAMES_DISPLAY 295 | ).define( 296 | FIELDS_WHITELIST_CONFIG, 297 | Type.STRING, 298 | null, 299 | Importance.MEDIUM, 300 | FIELDS_WHITELIST_DOC, 301 | CONNECTOR_GROUP, 302 | ++orderInGroup, 303 | Width.MEDIUM, 304 | FIELDS_WHITELIST_DISPLAY 305 | ).define( 306 | FIELDS_BLACKLIST_CONFIG, 307 | Type.STRING, 308 | null, 309 | Importance.MEDIUM, 310 | FIELDS_BLACKLIST_DOC, 311 | CONNECTOR_GROUP, 312 | ++orderInGroup, 313 | Width.MEDIUM, 314 | FIELDS_BLACKLIST_DISPLAY 315 | ).define( 316 | FIELDS_JSON_CAST_CONFIG, 317 | Type.STRING, 318 | null, 319 | Importance.MEDIUM, 320 | FIELDS_WHITELIST_DOC, 321 | CONNECTOR_GROUP, 322 | ++orderInGroup, 323 | Width.MEDIUM, 324 | FIELDS_JSON_CAST_DISPLAY 325 | ); 326 | } 327 | 328 | private static void addModeOptions(ConfigDef config) { 329 | int orderInGroup = 0; 330 | config.define( 331 | MODE_CONFIG, 332 | Type.STRING, 333 | MODE_UNSPECIFIED, 334 | ConfigDef.ValidString.in( 335 | MODE_UNSPECIFIED, 336 | MODE_BULK, 337 | MODE_TIMESTAMP, 338 | MODE_INCREMENTING, 339 | MODE_TIMESTAMP_INCREMENTING 340 | ), 341 | Importance.HIGH, 342 | MODE_DOC, 343 | MODE_GROUP, 344 | ++orderInGroup, 345 | Width.MEDIUM, 346 | MODE_DISPLAY, 347 | Collections.singletonList( 348 | INCREMENTING_FIELD_NAME_CONFIG 349 | ) 350 | ).define( 351 | INCREMENTING_FIELD_NAME_CONFIG, 352 | Type.STRING, 353 | INCREMENTING_FIELD_NAME_DEFAULT, 354 | Importance.MEDIUM, 355 | INCREMENTING_FIELD_NAME_DOC, 356 | MODE_GROUP, 357 | ++orderInGroup, 358 | Width.MEDIUM, 359 | INCREMENTING_FIELD_NAME_DISPLAY 360 | ).define( 361 | SECONDARY_INCREMENTING_FIELD_NAME_CONFIG, 362 | Type.STRING, 363 | null, 364 | Importance.LOW, 365 | SECONDARY_INCREMENTING_FIELD_NAME_DOC, 366 | MODE_GROUP, 367 | ++orderInGroup, 368 | Width.MEDIUM, 369 | SECONDARY_INCREMENTING_FIELD_NAME_DISPLAY 370 | ); 371 | } 372 | 373 | private static void addConnectorOptions(ConfigDef config) { 374 | int orderInGroup = 0; 375 | config.define( 376 | POLL_INTERVAL_MS_CONFIG, 377 | Type.STRING, 378 | POLL_INTERVAL_MS_DEFAULT, 379 | Importance.HIGH, 380 | POLL_INTERVAL_MS_DOC, 381 | CONNECTOR_GROUP, 382 | ++orderInGroup, 383 | Width.SHORT, 384 | POLL_INTERVAL_MS_DISPLAY 385 | ).define( 386 | BATCH_MAX_ROWS_CONFIG, 387 | Type.STRING, 388 | BATCH_MAX_ROWS_DEFAULT, 389 | Importance.LOW, 390 | BATCH_MAX_ROWS_DOC, 391 | CONNECTOR_GROUP, 392 | ++orderInGroup, 393 | Width.SHORT, 394 | BATCH_MAX_ROWS_DISPLAY 395 | ).define( 396 | TOPIC_PREFIX_CONFIG, 397 | Type.STRING, 398 | Importance.HIGH, 399 | TOPIC_PREFIX_DOC, 400 | CONNECTOR_GROUP, 401 | ++orderInGroup, 402 | Width.MEDIUM, 403 | TOPIC_PREFIX_DISPLAY 404 | ).define( 405 | CONNECTOR_FIELDNAME_CONVERTER_CONFIG, 406 | Type.STRING, 407 | AVRO_FIELDNAME_CONVERTER, 408 | Importance.MEDIUM, 409 | CONNECTOR_FIELDNAME_CONVERTER_DOC, 410 | CONNECTOR_GROUP, 411 | ++orderInGroup, 412 | Width.MEDIUM, 413 | CONNECTOR_FIELDNAME_CONVERTER_DISPLAY 414 | ); 415 | } 416 | 417 | public ElasticSourceConnectorConfig(Map properties) { 418 | super(CONFIG_DEF, properties); 419 | } 420 | 421 | protected ElasticSourceConnectorConfig(ConfigDef subclassConfigDef, Map props) { 422 | super(subclassConfigDef, props); 423 | } 424 | 425 | } 426 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/Version.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo; 18 | 19 | public class Version { 20 | public static final String VERSION = "1.5.5"; 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/CursorField.java: -------------------------------------------------------------------------------- 1 | package com.github.dariobalinzo.elastic; 2 | 3 | import java.util.Map; 4 | 5 | import static com.github.dariobalinzo.elastic.ElasticJsonNaming.removeKeywordSuffix; 6 | 7 | public class CursorField { 8 | private final String cursor; 9 | 10 | public CursorField(String cursor) { 11 | this.cursor = removeKeywordSuffix(cursor); 12 | } 13 | 14 | public String read(Map document) { 15 | return read(document, cursor); 16 | } 17 | 18 | private String read(Map document, String field) { 19 | int firstDot = field.indexOf('.'); 20 | 21 | Object value = null; 22 | if (document.containsKey(field)) { 23 | value = document.get(field); 24 | } else if (firstDot > 0 && firstDot < field.length() - 1) { 25 | String parent = field.substring(0, firstDot); 26 | Object nested = document.get(parent); 27 | if (nested instanceof Map) { 28 | return read((Map) document.get(parent), 29 | field.substring(firstDot + 1)); 30 | } 31 | } 32 | 33 | return value == null ? null : value.toString(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/ElasticConnection.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.elastic; 18 | 19 | import org.apache.http.HttpHost; 20 | import org.apache.http.auth.AuthScope; 21 | import org.apache.http.auth.UsernamePasswordCredentials; 22 | import org.apache.http.client.CredentialsProvider; 23 | import org.apache.http.impl.client.BasicCredentialsProvider; 24 | import org.apache.http.ssl.SSLContextBuilder; 25 | import org.apache.http.ssl.SSLContexts; 26 | import org.elasticsearch.client.RestClient; 27 | import org.elasticsearch.client.RestHighLevelClient; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import javax.net.ssl.SSLContext; 32 | import java.io.IOException; 33 | import java.io.InputStream; 34 | import java.nio.file.Files; 35 | import java.nio.file.Path; 36 | import java.nio.file.Paths; 37 | import java.security.KeyStore; 38 | import java.util.Arrays; 39 | import java.util.Objects; 40 | 41 | public class ElasticConnection { 42 | public final static Logger logger = LoggerFactory.getLogger(ElasticConnection.class); 43 | 44 | private RestHighLevelClient client; 45 | private final long connectionRetryBackoff; 46 | private final int maxConnectionAttempts; 47 | private final String hosts; 48 | private final String protocol; 49 | private final int port; 50 | private final SSLContext sslContext; 51 | private final CredentialsProvider credentialsProvider; 52 | 53 | ElasticConnection(ElasticConnectionBuilder builder) { 54 | hosts = builder.hosts; 55 | protocol = builder.protocol; 56 | port = builder.port; 57 | 58 | String user = builder.user; 59 | String pwd = builder.pwd; 60 | if (user != null) { 61 | credentialsProvider = new BasicCredentialsProvider(); 62 | credentialsProvider.setCredentials(AuthScope.ANY, 63 | new UsernamePasswordCredentials(user, pwd)); 64 | } else { 65 | credentialsProvider = null; 66 | } 67 | 68 | sslContext = builder.trustStorePath == null ? null : 69 | getSslContext( 70 | builder.trustStorePath, 71 | builder.trustStorePassword, 72 | builder.keyStorePath, 73 | builder.keyStorePassword 74 | ); 75 | 76 | createConnection(); 77 | 78 | this.maxConnectionAttempts = builder.maxConnectionAttempts; 79 | this.connectionRetryBackoff = builder.connectionRetryBackoff; 80 | } 81 | 82 | private void createConnection() { 83 | HttpHost[] hostList = parseHosts(hosts, protocol, port); 84 | 85 | client = new RestHighLevelClient( 86 | RestClient.builder(hostList) 87 | .setHttpClientConfigCallback( 88 | httpClientBuilder -> { 89 | if (credentialsProvider != null) { 90 | httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); 91 | } 92 | if (sslContext != null) { 93 | httpClientBuilder.setSSLContext(sslContext); 94 | } 95 | return httpClientBuilder; 96 | } 97 | ) 98 | ); 99 | } 100 | 101 | private SSLContext getSslContext(String trustStoreConf, String trustStorePass, 102 | String keyStoreConf, String keyStorePass) { 103 | 104 | Objects.requireNonNull(trustStoreConf, "truststore location is required"); 105 | Objects.requireNonNull(trustStorePass, "truststore password is required"); 106 | 107 | try { 108 | Path trustStorePath = Paths.get(trustStoreConf); 109 | KeyStore truststore = KeyStore.getInstance("pkcs12"); 110 | try (InputStream is = Files.newInputStream(trustStorePath)) { 111 | truststore.load(is, trustStorePass.toCharArray()); 112 | } 113 | SSLContextBuilder sslBuilder = SSLContexts.custom() 114 | .loadTrustMaterial(truststore, null); 115 | 116 | if (keyStoreConf != null) { 117 | Objects.requireNonNull(keyStorePass, "keystore password is required"); 118 | Path keyStorePath = Paths.get(keyStoreConf); 119 | KeyStore keyStore = KeyStore.getInstance("pkcs12"); 120 | try (InputStream is = Files.newInputStream(keyStorePath)) { 121 | keyStore.load(is, keyStorePass.toCharArray()); 122 | } 123 | sslBuilder.loadKeyMaterial(keyStore, keyStorePass.toCharArray()); 124 | } 125 | 126 | return sslBuilder.build(); 127 | } catch (Exception e) { 128 | throw new SslContextException(e); 129 | } 130 | } 131 | 132 | private HttpHost[] parseHosts(String hosts, String protocol, int port) { 133 | return Arrays.stream(hosts.split(";")) 134 | .map(host -> new HttpHost(host, port, protocol)) 135 | .toArray(HttpHost[]::new); 136 | } 137 | 138 | public RestHighLevelClient getClient() { 139 | return client; 140 | } 141 | 142 | public long getConnectionRetryBackoff() { 143 | return connectionRetryBackoff; 144 | } 145 | 146 | public int getMaxConnectionAttempts() { 147 | return maxConnectionAttempts; 148 | } 149 | 150 | public void closeQuietly() { 151 | try { 152 | client.close(); 153 | } catch (IOException e) { 154 | logger.error("error in close", e); 155 | } 156 | } 157 | 158 | } 159 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/ElasticConnectionBuilder.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.elastic; 18 | 19 | public class ElasticConnectionBuilder { 20 | final String hosts; 21 | final int port; 22 | 23 | String protocol = "http"; 24 | int maxConnectionAttempts = 3; 25 | long connectionRetryBackoff = 1_000; 26 | String user; 27 | String pwd; 28 | 29 | String trustStorePath; 30 | String trustStorePassword; 31 | String keyStorePath; 32 | String keyStorePassword; 33 | 34 | public ElasticConnectionBuilder(String hosts, int port) { 35 | this.hosts = hosts; 36 | this.port = port; 37 | } 38 | 39 | public ElasticConnectionBuilder withProtocol(String protocol) { 40 | this.protocol = protocol; 41 | return this; 42 | } 43 | 44 | public ElasticConnectionBuilder withUser(String user) { 45 | this.user = user; 46 | return this; 47 | } 48 | 49 | public ElasticConnectionBuilder withPassword(String password) { 50 | this.pwd = password; 51 | return this; 52 | } 53 | 54 | public ElasticConnectionBuilder withMaxAttempts(int maxConnectionAttempts) { 55 | this.maxConnectionAttempts = maxConnectionAttempts; 56 | return this; 57 | } 58 | 59 | public ElasticConnectionBuilder withBackoff(long connectionRetryBackoff) { 60 | this.connectionRetryBackoff = connectionRetryBackoff; 61 | return this; 62 | } 63 | 64 | public ElasticConnectionBuilder withTrustStore(String path, String password) { 65 | this.trustStorePath = path; 66 | this.trustStorePassword = password; 67 | return this; 68 | } 69 | 70 | public ElasticConnectionBuilder withKeyStore(String path, String password) { 71 | this.keyStorePath = path; 72 | this.keyStorePassword = password; 73 | return this; 74 | } 75 | 76 | public ElasticConnection build() { 77 | return new ElasticConnection(this); 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/ElasticIndexMonitorThread.java: -------------------------------------------------------------------------------- 1 | package com.github.dariobalinzo.elastic; 2 | 3 | import org.apache.kafka.connect.connector.ConnectorContext; 4 | import org.apache.kafka.connect.errors.ConnectException; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | import java.util.concurrent.CountDownLatch; 11 | import java.util.concurrent.TimeUnit; 12 | 13 | 14 | /** 15 | * Thread that monitors Elastic for changes to the set of topics. 16 | */ 17 | public class ElasticIndexMonitorThread extends Thread { 18 | private static final Logger log = LoggerFactory.getLogger(ElasticIndexMonitorThread.class); 19 | private static final long TIMEOUT = 10_000L; 20 | 21 | private final ConnectorContext context; 22 | private final CountDownLatch shutdownLatch; 23 | private final long pollMs; 24 | private final ElasticRepository elasticRepository; 25 | private final String prefix; 26 | private List indexes; 27 | 28 | public ElasticIndexMonitorThread(ConnectorContext context, long pollMs, ElasticRepository elasticRepository, String prefix) { 29 | this.context = context; 30 | this.shutdownLatch = new CountDownLatch(1); 31 | this.pollMs = pollMs; 32 | this.elasticRepository = elasticRepository; 33 | this.prefix = prefix; 34 | this.indexes = new ArrayList<>(); 35 | } 36 | 37 | public static long getTimeout() { 38 | return TIMEOUT; 39 | } 40 | 41 | @Override 42 | public void run() { 43 | while (shutdownLatch.getCount() > 0) { 44 | try { 45 | if (updateIndexes()) { 46 | context.requestTaskReconfiguration(); 47 | } 48 | } catch (Exception e) { 49 | context.raiseError(e); 50 | throw e; 51 | } 52 | 53 | try { 54 | boolean shuttingDown = shutdownLatch.await(pollMs, TimeUnit.MILLISECONDS); 55 | if (shuttingDown) { 56 | return; 57 | } 58 | } catch (InterruptedException e) { 59 | log.error("Unexpected InterruptedException, ignoring: ", e); 60 | } 61 | } 62 | } 63 | 64 | public synchronized List indexes() { 65 | 66 | long started = System.currentTimeMillis(); 67 | long now = started; 68 | while (indexes.isEmpty() && now - started < TIMEOUT) { 69 | try { 70 | wait(TIMEOUT - (now - started)); 71 | } catch (InterruptedException e) { 72 | // Ignore 73 | } 74 | now = System.currentTimeMillis(); 75 | } 76 | if (indexes.isEmpty()) { 77 | throw new ConnectException("Cannot find any elasticsearch index"); 78 | } 79 | return indexes; 80 | } 81 | 82 | public void shutdown() { 83 | shutdownLatch.countDown(); 84 | } 85 | 86 | private synchronized boolean updateIndexes() { 87 | final List indexes; 88 | try { 89 | indexes = elasticRepository.catIndices(this.prefix); 90 | log.debug("Got the following topics: {}", indexes); 91 | } catch (RuntimeException e) { 92 | log.error("Error while trying to get updated topics list, ignoring and waiting for next table poll interval", e); 93 | return false; 94 | } 95 | 96 | if (!indexes.equals(this.indexes)) { 97 | log.debug("After filtering we got topics: {}", indexes); 98 | List previousIndexes = this.indexes; 99 | this.indexes = indexes; 100 | notifyAll(); 101 | // Only return true if the table list wasn't previously null, i.e. if this was not the 102 | // first table lookup 103 | return !previousIndexes.isEmpty(); 104 | } 105 | return false; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/ElasticJsonNaming.java: -------------------------------------------------------------------------------- 1 | package com.github.dariobalinzo.elastic; 2 | 3 | public class ElasticJsonNaming { 4 | public static String removeKeywordSuffix(String fieldName) { 5 | return fieldName == null ? null : fieldName.replace(".keyword", ""); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/ElasticRepository.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.elastic; 18 | 19 | import com.github.dariobalinzo.elastic.response.Cursor; 20 | import com.github.dariobalinzo.elastic.response.PageResult; 21 | import org.elasticsearch.action.search.SearchRequest; 22 | import org.elasticsearch.action.search.SearchResponse; 23 | import org.elasticsearch.client.Request; 24 | import org.elasticsearch.client.RequestOptions; 25 | import org.elasticsearch.client.Response; 26 | import org.elasticsearch.index.query.QueryBuilder; 27 | import org.elasticsearch.search.builder.SearchSourceBuilder; 28 | import org.elasticsearch.search.sort.SortOrder; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | import java.io.BufferedReader; 33 | import java.io.IOException; 34 | import java.io.InputStreamReader; 35 | import java.util.*; 36 | import java.util.stream.Collectors; 37 | 38 | import static com.github.dariobalinzo.elastic.ElasticJsonNaming.removeKeywordSuffix; 39 | import static org.elasticsearch.index.query.QueryBuilders.*; 40 | 41 | public final class ElasticRepository { 42 | private final static Logger logger = LoggerFactory.getLogger(ElasticRepository.class); 43 | 44 | private final ElasticConnection elasticConnection; 45 | 46 | private final String cursorSearchField; 47 | private final String secondaryCursorSearchField; 48 | private final CursorField cursorField; 49 | private final CursorField secondaryCursorField; 50 | 51 | private int pageSize = 5000; 52 | 53 | public ElasticRepository(ElasticConnection elasticConnection) { 54 | this(elasticConnection, "_id"); 55 | } 56 | 57 | public ElasticRepository(ElasticConnection elasticConnection, String cursorField) { 58 | this(elasticConnection, cursorField, null); 59 | } 60 | 61 | public ElasticRepository(ElasticConnection elasticConnection, String cursorSearchField, String secondaryCursorSearchField) { 62 | this.elasticConnection = elasticConnection; 63 | this.cursorSearchField = cursorSearchField; 64 | this.cursorField = new CursorField(cursorSearchField); 65 | this.secondaryCursorSearchField = secondaryCursorSearchField; 66 | this.secondaryCursorField = secondaryCursorSearchField == null ? null : new CursorField(secondaryCursorSearchField); 67 | } 68 | 69 | public PageResult searchAfter(String index, Cursor cursor) throws IOException, InterruptedException { 70 | QueryBuilder queryBuilder = cursor.getPrimaryCursor() == null ? 71 | matchAllQuery() : 72 | buildGreaterThen(cursorSearchField, cursor.getPrimaryCursor()); 73 | 74 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() 75 | .query(queryBuilder) 76 | .size(pageSize) 77 | .sort(cursorSearchField, SortOrder.ASC); 78 | 79 | SearchRequest searchRequest = new SearchRequest(index) 80 | .source(searchSourceBuilder); 81 | 82 | SearchResponse response = executeSearch(searchRequest); 83 | 84 | List> documents = extractDocuments(response); 85 | 86 | Cursor lastCursor; 87 | if (documents.isEmpty()) { 88 | lastCursor = Cursor.empty(); 89 | } else { 90 | Map lastDocument = documents.get(documents.size() - 1); 91 | lastCursor = new Cursor(cursorField.read(lastDocument)); 92 | } 93 | return new PageResult(index, documents, lastCursor); 94 | } 95 | 96 | private List> extractDocuments(SearchResponse response) { 97 | return Arrays.stream(response.getHits().getHits()) 98 | .map(hit -> { 99 | Map sourceMap = hit.getSourceAsMap(); 100 | sourceMap.put("es-id", hit.getId()); 101 | sourceMap.put("es-index", hit.getIndex()); 102 | return sourceMap; 103 | }).collect(Collectors.toList()); 104 | } 105 | 106 | public PageResult searchAfterWithSecondarySort(String index, Cursor cursor) throws IOException, InterruptedException { 107 | Objects.requireNonNull(secondaryCursorField); 108 | String primaryCursor = cursor.getPrimaryCursor(); 109 | String secondaryCursor = cursor.getSecondaryCursor(); 110 | boolean noPrevCursor = primaryCursor == null && secondaryCursor == null; 111 | 112 | QueryBuilder queryBuilder = noPrevCursor ? matchAllQuery() : 113 | getSecondarySortFieldQuery(primaryCursor, secondaryCursor); 114 | 115 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() 116 | .query(queryBuilder) 117 | .size(pageSize) 118 | .sort(cursorSearchField, SortOrder.ASC) 119 | .sort(secondaryCursorSearchField, SortOrder.ASC); 120 | 121 | SearchRequest searchRequest = new SearchRequest(index) 122 | .source(searchSourceBuilder); 123 | 124 | SearchResponse response = executeSearch(searchRequest); 125 | 126 | List> documents = extractDocuments(response); 127 | 128 | Cursor lastCursor; 129 | if (documents.isEmpty()) { 130 | lastCursor = Cursor.empty(); 131 | } else { 132 | Map lastDocument = documents.get(documents.size() - 1); 133 | String primaryCursorValue = cursorField.read(lastDocument); 134 | String secondaryCursorValue = secondaryCursorField.read(lastDocument); 135 | lastCursor = new Cursor(primaryCursorValue, secondaryCursorValue); 136 | } 137 | return new PageResult(index, documents, lastCursor); 138 | } 139 | 140 | private QueryBuilder buildGreaterThen(String cursorField, String cursorValue) { 141 | return rangeQuery(cursorField).from(cursorValue, false); 142 | } 143 | 144 | private QueryBuilder getSecondarySortFieldQuery(String primaryCursor, String secondaryCursor) { 145 | if (secondaryCursor == null) { 146 | return buildGreaterThen(cursorSearchField, primaryCursor); 147 | } 148 | return boolQuery() 149 | .minimumShouldMatch(1) 150 | .should(buildGreaterThen(cursorSearchField, primaryCursor)) 151 | .should( 152 | boolQuery() 153 | .filter(matchQuery(cursorSearchField, primaryCursor)) 154 | .filter(buildGreaterThen(secondaryCursorSearchField, secondaryCursor)) 155 | ); 156 | } 157 | 158 | private SearchResponse executeSearch(SearchRequest searchRequest) throws IOException, InterruptedException { 159 | int maxTrials = elasticConnection.getMaxConnectionAttempts(); 160 | if (maxTrials <= 0) { 161 | throw new IllegalArgumentException("MaxConnectionAttempts should be > 0"); 162 | } 163 | IOException lastError = null; 164 | for (int i = 0; i < maxTrials; ++i) { 165 | try { 166 | return elasticConnection.getClient() 167 | .search(searchRequest, RequestOptions.DEFAULT); 168 | } catch (IOException e) { 169 | lastError = e; 170 | Thread.sleep(elasticConnection.getConnectionRetryBackoff()); 171 | } 172 | } 173 | throw lastError; 174 | } 175 | 176 | public List catIndices(String prefix) { 177 | Response resp; 178 | try { 179 | resp = elasticConnection.getClient() 180 | .getLowLevelClient() 181 | .performRequest(new Request("GET", "/_cat/indices")); 182 | } catch (IOException e) { 183 | logger.error("error in searching index names"); 184 | throw new RuntimeException(e); 185 | } 186 | 187 | List result = new ArrayList<>(); 188 | try (BufferedReader reader = new BufferedReader(new InputStreamReader(resp.getEntity().getContent()))) { 189 | String line; 190 | 191 | while ((line = reader.readLine()) != null) { 192 | String index = line.split("\\s+")[2]; 193 | if (index.startsWith(prefix)) { 194 | result.add(index); 195 | } 196 | } 197 | } catch (IOException e) { 198 | logger.error("error while getting indices", e); 199 | } 200 | 201 | Collections.sort(result); 202 | 203 | return result; 204 | } 205 | 206 | public void refreshIndex(String index) { 207 | try { 208 | elasticConnection.getClient() 209 | .getLowLevelClient() 210 | .performRequest(new Request("POST", "/" + index + "/_refresh")); 211 | } catch (IOException e) { 212 | logger.error("error in refreshing index " + index); 213 | throw new RuntimeException(e); 214 | } 215 | } 216 | 217 | public void setPageSize(int pageSize) { 218 | this.pageSize = pageSize; 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/SslContextException.java: -------------------------------------------------------------------------------- 1 | package com.github.dariobalinzo.elastic; 2 | 3 | public class SslContextException extends RuntimeException { 4 | public SslContextException(Exception e) { 5 | super(e); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/response/Cursor.java: -------------------------------------------------------------------------------- 1 | package com.github.dariobalinzo.elastic.response; 2 | 3 | public class Cursor { 4 | private final String primaryCursor; 5 | private final String secondaryCursor; 6 | 7 | public Cursor(String primaryCursor, String secondaryCursor) { 8 | this.primaryCursor = primaryCursor; 9 | this.secondaryCursor = secondaryCursor; 10 | } 11 | 12 | public Cursor(String primaryCursor) { 13 | this.primaryCursor = primaryCursor; 14 | this.secondaryCursor = null; 15 | } 16 | 17 | public String getPrimaryCursor() { 18 | return primaryCursor; 19 | } 20 | 21 | public String getSecondaryCursor() { 22 | return secondaryCursor; 23 | } 24 | 25 | public static Cursor empty() { 26 | return new Cursor(null, null); 27 | } 28 | 29 | @Override 30 | public String toString() { 31 | return "Cursor{" + 32 | "primaryCursor='" + primaryCursor + '\'' + 33 | ", secondaryCursor='" + secondaryCursor + '\'' + 34 | '}'; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/elastic/response/PageResult.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.elastic.response; 18 | 19 | import java.util.List; 20 | import java.util.Map; 21 | 22 | public class PageResult { 23 | private final String index; 24 | private final List> documents; 25 | private final Cursor lastCursor; 26 | 27 | public PageResult(String index, List> documents, Cursor cursor) { 28 | this.index = index; 29 | this.documents = documents; 30 | this.lastCursor = cursor; 31 | } 32 | 33 | public List> getDocuments() { 34 | return documents; 35 | } 36 | 37 | public Cursor getLastCursor() { 38 | return lastCursor; 39 | } 40 | 41 | public String getIndex() { 42 | return index; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/filter/BlacklistFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.dariobalinzo.filter; 17 | 18 | import java.util.List; 19 | import java.util.Map; 20 | import java.util.Set; 21 | 22 | public class BlacklistFilter implements DocumentFilter { 23 | private final JsonFilterVisitor visitor; 24 | private final Set fieldsToRemove; 25 | 26 | public BlacklistFilter(Set fieldsToRemove) { 27 | this.fieldsToRemove = fieldsToRemove; 28 | visitor = new JsonFilterVisitor(this::filterBlacklistItem); 29 | } 30 | 31 | private Object filterBlacklistItem(String key, Object value) { 32 | if (value instanceof Map || value instanceof List) { 33 | boolean shouldVisitNestedObj = fieldsToRemove.stream() 34 | .anyMatch(jsonPath -> jsonPath.startsWith(key)); 35 | return shouldVisitNestedObj ? value : null; 36 | } 37 | return fieldsToRemove.contains(key) ? null : value; 38 | } 39 | 40 | @Override 41 | public void filter(Map document) { 42 | visitor.visit(document); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/filter/DocumentFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.filter; 18 | 19 | import java.util.Map; 20 | 21 | public interface DocumentFilter { 22 | 23 | void filter(Map document); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/filter/JsonCastFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.dariobalinzo.filter; 17 | 18 | import com.fasterxml.jackson.core.JsonProcessingException; 19 | import com.fasterxml.jackson.databind.ObjectMapper; 20 | 21 | import java.util.Map; 22 | import java.util.Set; 23 | 24 | public class JsonCastFilter implements DocumentFilter { 25 | private final Set fieldsToCast; 26 | private final JsonFilterVisitor visitor; 27 | private final ObjectMapper objectMapper = new ObjectMapper(); 28 | 29 | public JsonCastFilter(Set fieldsToCast) { 30 | this.fieldsToCast = fieldsToCast; 31 | visitor = new JsonFilterVisitor(this::checkIfJsonCastNeeded); 32 | } 33 | 34 | @Override 35 | public void filter(Map document) { 36 | visitor.visit(document); 37 | } 38 | 39 | private Object checkIfJsonCastNeeded(String key, Object value) { 40 | if (fieldsToCast.contains(key)) { 41 | return castToJson(value); 42 | } else { 43 | return value; 44 | } 45 | } 46 | 47 | private String castToJson(Object value) { 48 | try { 49 | return objectMapper.writeValueAsString(value); 50 | } catch (JsonProcessingException e) { 51 | throw new RuntimeException(e); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/filter/JsonElementFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.dariobalinzo.filter; 17 | 18 | @FunctionalInterface 19 | public interface JsonElementFilter { 20 | Object filterElement(String fieldPath, Object value); 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/filter/JsonFilterVisitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.dariobalinzo.filter; 17 | 18 | import java.util.Iterator; 19 | import java.util.List; 20 | import java.util.Map; 21 | 22 | public class JsonFilterVisitor { 23 | private final JsonElementFilter businessLogic; 24 | 25 | public JsonFilterVisitor(JsonElementFilter businessLogic) { 26 | this.businessLogic = businessLogic; 27 | } 28 | 29 | public void visit(Map document) { 30 | visitJsonDocument("", document); 31 | } 32 | 33 | @SuppressWarnings("unchecked") 34 | private void visitJsonDocument(String prefixPathName, Map document) { 35 | Iterator> iterator = document.entrySet().iterator(); 36 | while (iterator.hasNext()) { 37 | Map.Entry entry = iterator.next(); 38 | String fullPathKey = prefixPathName + entry.getKey(); 39 | Object element = businessLogic.filterElement(fullPathKey, entry.getValue()); 40 | if (element == null) { 41 | iterator.remove(); 42 | } else { 43 | entry.setValue(element); 44 | } 45 | 46 | if (entry.getValue() instanceof List) { 47 | List nestedList = (List) entry.getValue(); 48 | visitNestedList(fullPathKey + ".", nestedList); 49 | } else if (entry.getValue() instanceof Map) { 50 | String nestedObjectPath = prefixPathName + entry.getKey() + "."; 51 | visitJsonDocument(nestedObjectPath, (Map) entry.getValue()); 52 | } 53 | } 54 | } 55 | 56 | private void visitNestedList(String prefixPathName, List nestedList) { 57 | nestedList.forEach(item -> visitNestedMap(prefixPathName, item)); 58 | } 59 | 60 | @SuppressWarnings("unchecked") 61 | private void visitNestedMap(String prefixPathName, Object item) { 62 | if (item instanceof Map) { 63 | visitJsonDocument(prefixPathName, (Map) item); 64 | } 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/filter/WhitelistFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.dariobalinzo.filter; 17 | 18 | import java.util.List; 19 | import java.util.Map; 20 | import java.util.Set; 21 | 22 | public class WhitelistFilter implements DocumentFilter { 23 | private final JsonFilterVisitor visitor; 24 | private final Set allowedValues; 25 | 26 | public WhitelistFilter(Set allowedValues) { 27 | this.allowedValues = allowedValues; 28 | visitor = new JsonFilterVisitor(this::filterWhitelistItem); 29 | } 30 | 31 | private Object filterWhitelistItem(String key, Object value) { 32 | if (value instanceof Map || value instanceof List) { 33 | boolean shouldVisitNestedObj = allowedValues.stream() 34 | .anyMatch(jsonPath -> jsonPath.startsWith(key)); 35 | return shouldVisitNestedObj ? value : null; 36 | } 37 | return allowedValues.contains(key) ? value : null; 38 | } 39 | 40 | @Override 41 | public void filter(Map document) { 42 | visitor.visit(document); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/schema/AvroName.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.schema; 18 | 19 | public class AvroName implements FieldNameConverter { 20 | 21 | public String from(String elasticName) { 22 | return elasticName == null ? null : filterInvalidCharacters(elasticName); 23 | } 24 | 25 | public String from(String prefix, String elasticName) { 26 | return elasticName == null ? prefix : prefix + filterInvalidCharacters(elasticName); 27 | } 28 | 29 | private String filterInvalidCharacters(String elasticName) { 30 | boolean alphabetic = Character.isLetter(elasticName.charAt(0)); 31 | if (!alphabetic) { 32 | elasticName = "avro" + elasticName; 33 | } 34 | return elasticName.replaceAll("[^a-zA-Z0-9]", ""); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/schema/FieldNameConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.schema; 18 | 19 | public interface FieldNameConverter { 20 | 21 | String from(String elasticName); 22 | 23 | String from(String prefix, String elasticName); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/schema/NopNameConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.schema; 18 | 19 | public class NopNameConverter implements FieldNameConverter { 20 | 21 | public String from(String elasticName) { 22 | return elasticName; 23 | } 24 | 25 | public String from(String prefix, String elasticName) { 26 | return elasticName == null ? prefix : prefix + elasticName; 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/schema/SchemaConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.schema; 18 | 19 | import org.apache.kafka.connect.data.Field; 20 | import org.apache.kafka.connect.data.Schema; 21 | import org.apache.kafka.connect.data.SchemaBuilder; 22 | 23 | import java.util.LinkedHashMap; 24 | import java.util.List; 25 | import java.util.Map; 26 | import java.util.Set; 27 | import java.util.function.Consumer; 28 | import java.util.stream.Collectors; 29 | 30 | import static org.apache.kafka.connect.data.Schema.OPTIONAL_BOOLEAN_SCHEMA; 31 | import static org.apache.kafka.connect.data.Schema.OPTIONAL_FLOAT64_SCHEMA; 32 | import static org.apache.kafka.connect.data.Schema.OPTIONAL_INT64_SCHEMA; 33 | import static org.apache.kafka.connect.data.Schema.OPTIONAL_STRING_SCHEMA; 34 | import static org.apache.kafka.connect.data.Schema.Type.ARRAY; 35 | import static org.apache.kafka.connect.data.Schema.Type.FLOAT64; 36 | import static org.apache.kafka.connect.data.Schema.Type.INT64; 37 | import static org.apache.kafka.connect.data.Schema.Type.STRUCT; 38 | import static org.apache.kafka.connect.data.SchemaBuilder.array; 39 | import static org.apache.kafka.connect.data.SchemaBuilder.struct; 40 | 41 | public class SchemaConverter { 42 | 43 | private final FieldNameConverter converter; 44 | 45 | public SchemaConverter(FieldNameConverter converter) { 46 | this.converter = converter; 47 | } 48 | 49 | public Schema convert(Map elasticDocument, String schemaName) { 50 | String validSchemaName = converter.from("", schemaName); 51 | SchemaBuilder schemaBuilder = struct().name(validSchemaName); 52 | convertDocumentSchema("", elasticDocument, schemaBuilder); 53 | return schemaBuilder.build(); 54 | } 55 | 56 | @SuppressWarnings("unchecked") 57 | private void convertDocumentSchema(String prefixName, Map doc, SchemaBuilder schemaBuilder) { 58 | for (Map.Entry entry : doc.entrySet()) { 59 | String key = entry.getKey(); 60 | Object value = entry.getValue(); 61 | String validKeyName = converter.from(key); 62 | if (value instanceof String) { 63 | schemaBuilder.field(validKeyName, OPTIONAL_STRING_SCHEMA); 64 | } else if (value instanceof Boolean) { 65 | schemaBuilder.field(validKeyName, OPTIONAL_BOOLEAN_SCHEMA); 66 | } else if (value instanceof Integer) { 67 | schemaBuilder.field(validKeyName, OPTIONAL_INT64_SCHEMA); 68 | } else if (value instanceof Long) { 69 | schemaBuilder.field(validKeyName, OPTIONAL_INT64_SCHEMA); 70 | } else if (value instanceof Float) { 71 | schemaBuilder.field(validKeyName, OPTIONAL_FLOAT64_SCHEMA); 72 | } else if (value instanceof Double) { 73 | schemaBuilder.field(validKeyName, OPTIONAL_FLOAT64_SCHEMA); 74 | } else if (value instanceof List) { 75 | if (!((List) value).isEmpty()) { 76 | Object head = ((List) value).get(0); 77 | if (head instanceof Map) { 78 | convertListOfObject(prefixName, schemaBuilder, key, (List>) value); 79 | } else { 80 | convertListSchema(prefixName, schemaBuilder, key, (List)value); 81 | } 82 | } 83 | } else if (value instanceof Map) { 84 | convertMapSchema(prefixName, schemaBuilder, entry); 85 | } else { 86 | if (value != null) { 87 | throw new RuntimeException("type not supported " + key); 88 | } 89 | } 90 | } 91 | } 92 | 93 | @SuppressWarnings("unchecked") 94 | private void convertMapSchema(String prefixName, SchemaBuilder schemaBuilder, Map.Entry entry) { 95 | String key = entry.getKey(); 96 | Map value = (Map) entry.getValue(); 97 | String validKeyName = converter.from(prefixName, key); 98 | SchemaBuilder nestedSchema = struct().name(validKeyName).optional(); 99 | convertDocumentSchema(validKeyName + ".", value, nestedSchema); 100 | schemaBuilder.field(converter.from(key), nestedSchema.build()); 101 | } 102 | 103 | @SuppressWarnings("unchecked") 104 | private void convertListSchema(String prefixName, SchemaBuilder schemaBuilder, String k, List items) { 105 | String validKeyName = converter.from(k); 106 | 107 | Set schemas = items.stream().filter(i -> i != null).map(this::convertListSchema).collect(Collectors.toSet()); 108 | Schema itemSchema; 109 | if(schemas.isEmpty()) { 110 | itemSchema = OPTIONAL_STRING_SCHEMA; 111 | } else if(schemas.size() == 1) { 112 | itemSchema = schemas.iterator().next(); 113 | } else if(!schemas.contains(OPTIONAL_STRING_SCHEMA) && !schemas.contains(OPTIONAL_BOOLEAN_SCHEMA)) { 114 | itemSchema = OPTIONAL_FLOAT64_SCHEMA; 115 | } else { 116 | throw new IllegalArgumentException("list " + validKeyName + " contains items of different schemas: " + schemas); 117 | } 118 | 119 | schemaBuilder.field( 120 | validKeyName, 121 | array(itemSchema).optional().build() 122 | ).build(); 123 | } 124 | 125 | private Schema convertListSchema(Object item) { 126 | if (item instanceof String) { 127 | return OPTIONAL_STRING_SCHEMA; 128 | } else if (item instanceof Boolean) { 129 | return OPTIONAL_BOOLEAN_SCHEMA; 130 | } else if (item instanceof Integer) { 131 | return OPTIONAL_INT64_SCHEMA; 132 | } else if (item instanceof Long) { 133 | return OPTIONAL_INT64_SCHEMA; 134 | } else if (item instanceof Float) { 135 | return OPTIONAL_FLOAT64_SCHEMA; 136 | } else if (item instanceof Double) { 137 | return OPTIONAL_FLOAT64_SCHEMA; 138 | } else { 139 | throw new RuntimeException("error in converting list: type not supported " + item.getClass()); 140 | } 141 | } 142 | 143 | 144 | private void convertListOfObject(String prefixName, SchemaBuilder schemaBuilder, String k, 145 | List> list) { 146 | String validKeyName = converter.from(k); 147 | String keyWithPrefix = converter.from(prefixName, k); 148 | Schema current = null; 149 | for (Map obj : list) { 150 | SchemaBuilder nestedSchema = struct().name(keyWithPrefix).optional(); 151 | convertDocumentSchema(keyWithPrefix + ".", obj, nestedSchema); 152 | 153 | if(current == null) { 154 | current = nestedSchema; 155 | } else { 156 | current = merge(current, nestedSchema); 157 | } 158 | } 159 | schemaBuilder.field(validKeyName, array(current)); 160 | } 161 | 162 | private Schema merge(Schema a, Schema b) { 163 | if (!(a.type() == STRUCT && b.type() == STRUCT)) { 164 | if(a.type() == INT64 && b.type() == FLOAT64) { 165 | return b; 166 | } else if(a.type() == FLOAT64 && b.type() == INT64) { 167 | return a; 168 | } else if (a.type() == ARRAY && b.type() == ARRAY) { 169 | SchemaBuilder builder = SchemaBuilder.array(merge(a.valueSchema(), b.valueSchema())); 170 | copyInto(builder, a); 171 | return builder; 172 | } else { 173 | // when we reach this cases we were not able to correctly merge the two schemas 174 | // we return the first and hope that it somehow works out 175 | return a; 176 | } 177 | } 178 | 179 | Map fieldsUnion = new LinkedHashMap<>(); 180 | Consumer collector = f -> { 181 | fieldsUnion.computeIfPresent(f.name(), (key, old) -> merge(old.schema(), f.schema())); 182 | fieldsUnion.putIfAbsent(f.name(), f.schema()); 183 | }; 184 | a.fields().forEach(collector); 185 | b.fields().forEach(collector); 186 | 187 | SchemaBuilder union = struct().name(a.name()).optional(); 188 | for (Map.Entry field : fieldsUnion.entrySet()) { 189 | union.field(field.getKey(), from(field.getValue()).optional().build()); 190 | } 191 | return union; 192 | } 193 | 194 | private SchemaBuilder from(Schema schema) { 195 | if(schema instanceof SchemaBuilder) { 196 | return (SchemaBuilder) schema; 197 | } else { 198 | SchemaBuilder builder; 199 | switch (schema.type()) { 200 | case STRUCT: { 201 | builder = struct(); 202 | for (Field field : schema.fields()) { 203 | builder.field(field.name(), field.schema()); 204 | } 205 | break; 206 | } 207 | case MAP: { 208 | builder = SchemaBuilder.map(schema.keySchema(), schema.valueSchema()); 209 | break; 210 | } 211 | case ARRAY: { 212 | builder = SchemaBuilder.array(schema.valueSchema()); 213 | break; 214 | } 215 | default: { 216 | builder = new SchemaBuilder(schema.type()); 217 | break; 218 | } 219 | } 220 | copyInto(builder, schema); 221 | return builder; 222 | } 223 | } 224 | 225 | private void copyInto(SchemaBuilder builder, Schema from) { 226 | if(from.isOptional()) { 227 | builder.optional(); 228 | } 229 | builder.name(from.name()); 230 | if(from.defaultValue() != null) { 231 | builder.defaultValue(from.defaultValue()); 232 | } 233 | builder.doc(from.doc()); 234 | if(from.parameters() != null) { 235 | builder.parameters(from.parameters()); 236 | } 237 | builder.version(from.version()); 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/schema/StructConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.schema; 18 | 19 | import org.apache.kafka.connect.data.Schema; 20 | import org.apache.kafka.connect.data.Struct; 21 | 22 | import java.util.ArrayList; 23 | import java.util.List; 24 | import java.util.Map; 25 | import java.util.stream.Collectors; 26 | 27 | import static org.apache.kafka.connect.data.Schema.Type.FLOAT64; 28 | 29 | public class StructConverter { 30 | 31 | private final FieldNameConverter converter; 32 | 33 | public StructConverter(FieldNameConverter converter) { 34 | this.converter = converter; 35 | } 36 | 37 | public Struct convert(Map doc, Schema schema) { 38 | Struct struct = new Struct(schema); 39 | convertDocumentStruct("", doc, struct, schema); 40 | return struct; 41 | } 42 | 43 | private void convertDocumentStruct(String prefixName, Map doc, Struct struct, Schema schema) { 44 | for (Map.Entry entry : doc.entrySet()) { 45 | String key = entry.getKey(); 46 | Object value = entry.getValue(); 47 | 48 | if (isScalar(value)) { 49 | String field = converter.from(key); 50 | boolean isFloat = struct.schema().field(field).schema().type() == FLOAT64; 51 | if(isFloat && value instanceof Number) { 52 | value = ((Number) value).doubleValue(); 53 | } else { 54 | value = handleNumericPrecision(value); 55 | } 56 | struct.put(field, value); 57 | } else if (value instanceof List) { 58 | convertListToAvroArray(prefixName, struct, schema, entry); 59 | } else if (value instanceof Map) { 60 | covertMapToAvroStruct(prefixName, struct, schema, entry); 61 | } else { 62 | if (value != null) { 63 | throw new RuntimeException("type not supported " + key); 64 | } 65 | } 66 | } 67 | } 68 | 69 | private boolean isScalar(Object value) { 70 | return value instanceof String 71 | || value instanceof Boolean 72 | || value instanceof Integer 73 | || value instanceof Long 74 | || value instanceof Double 75 | || value instanceof Float; 76 | } 77 | 78 | private Object handleNumericPrecision(Object value) { 79 | if (value instanceof Integer) { 80 | value = ((Integer) value).longValue(); 81 | } else if (value instanceof Float) { 82 | value = ((Float) value).doubleValue(); 83 | } 84 | return value; 85 | } 86 | 87 | @SuppressWarnings("unchecked") 88 | private void convertListToAvroArray(String prefixName, Struct struct, Schema schema, Map.Entry entry) { 89 | String key = entry.getKey(); 90 | List value = (List) entry.getValue(); 91 | 92 | if (!value.isEmpty()) { 93 | //assuming that every item of the list has the same schema 94 | Object head = value.stream().filter(i -> i != null).findFirst().orElse(null); 95 | if(head == null) { 96 | struct.put(converter.from(key), value); 97 | } else if (isScalar(head)) { 98 | boolean isFloat64 = struct.schema().field(converter.from(key)).schema().valueSchema().type().equals(FLOAT64); 99 | List scalars = value.stream() 100 | .map(s -> isFloat64 ? ((Number) s).doubleValue() : handleNumericPrecision(s)) 101 | .collect(Collectors.toList()); 102 | struct.put(converter.from(key), scalars); 103 | } else if (head instanceof Map) { 104 | List array = value 105 | .stream() 106 | .map(doc -> convertListOfObject(prefixName, schema, key, (Map) doc)) 107 | .collect(Collectors.toCollection(ArrayList::new)); 108 | struct.put(converter.from(key), array); 109 | } else { 110 | throw new RuntimeException("error in converting list: type not supported"); 111 | } 112 | 113 | } 114 | } 115 | 116 | @SuppressWarnings("unchecked") 117 | private void covertMapToAvroStruct(String prefixName, Struct struct, Schema schema, Map.Entry entry) { 118 | String k = entry.getKey(); 119 | Map value = (Map) entry.getValue(); 120 | Struct nestedStruct = new Struct(schema.field(converter.from(k)).schema()); 121 | convertDocumentStruct( 122 | converter.from(prefixName, k) + ".", 123 | value, 124 | nestedStruct, 125 | schema.field(converter.from(k)).schema() 126 | ); 127 | struct.put(converter.from(k), nestedStruct); 128 | } 129 | 130 | private Struct convertListOfObject(String prefixName, Schema schema, String key, Map doc) { 131 | String validKey = converter.from(key); 132 | String validKeyPrefix = converter.from(prefixName, key) + "."; 133 | Struct nestedStruct = new Struct( 134 | schema.field(validKey) 135 | .schema() 136 | .valueSchema() 137 | ); 138 | 139 | convertDocumentStruct( 140 | validKeyPrefix, 141 | doc, 142 | nestedStruct, 143 | schema.field(validKey) 144 | .schema() 145 | .valueSchema() 146 | ); 147 | return nestedStruct; 148 | } 149 | 150 | 151 | } 152 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/task/ElasticSourceTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.task; 18 | 19 | import com.github.dariobalinzo.ElasticSourceConnectorConfig; 20 | import com.github.dariobalinzo.Version; 21 | import com.github.dariobalinzo.elastic.CursorField; 22 | import com.github.dariobalinzo.elastic.ElasticConnection; 23 | import com.github.dariobalinzo.elastic.ElasticConnectionBuilder; 24 | import com.github.dariobalinzo.elastic.ElasticRepository; 25 | import com.github.dariobalinzo.elastic.response.Cursor; 26 | import com.github.dariobalinzo.elastic.response.PageResult; 27 | import com.github.dariobalinzo.filter.BlacklistFilter; 28 | import com.github.dariobalinzo.filter.DocumentFilter; 29 | import com.github.dariobalinzo.filter.JsonCastFilter; 30 | import com.github.dariobalinzo.filter.WhitelistFilter; 31 | import com.github.dariobalinzo.schema.*; 32 | import org.apache.kafka.common.config.ConfigException; 33 | import org.apache.kafka.connect.data.Schema; 34 | import org.apache.kafka.connect.data.Struct; 35 | import org.apache.kafka.connect.errors.ConnectException; 36 | import org.apache.kafka.connect.source.SourceRecord; 37 | import org.apache.kafka.connect.source.SourceTask; 38 | import org.slf4j.Logger; 39 | import org.slf4j.LoggerFactory; 40 | 41 | import java.util.*; 42 | import java.util.concurrent.atomic.AtomicBoolean; 43 | 44 | import static com.github.dariobalinzo.elastic.ElasticJsonNaming.removeKeywordSuffix; 45 | 46 | public class ElasticSourceTask extends SourceTask { 47 | 48 | private static final Logger logger = LoggerFactory.getLogger(ElasticSourceTask.class); 49 | private static final String INDEX = "index"; 50 | static final String POSITION = "position"; 51 | static final String POSITION_SECONDARY = "position_secondary"; 52 | 53 | 54 | private final OffsetSerializer offsetSerializer = new OffsetSerializer(); 55 | private SchemaConverter schemaConverter; 56 | private StructConverter structConverter; 57 | 58 | private ElasticSourceTaskConfig config; 59 | private ElasticConnection es; 60 | 61 | private final AtomicBoolean stopping = new AtomicBoolean(false); 62 | private List indices; 63 | private String topic; 64 | private String cursorSearchField; 65 | private CursorField cursorField; 66 | private String secondaryCursorSearchField; 67 | private CursorField secondaryCursorField; 68 | private int pollingMs; 69 | private final Map lastCursor = new HashMap<>(); 70 | private final Map sent = new HashMap<>(); 71 | private ElasticRepository elasticRepository; 72 | 73 | private final List documentFilters = new ArrayList<>(); 74 | 75 | @Override 76 | public String version() { 77 | return Version.VERSION; 78 | } 79 | 80 | @Override 81 | public void start(Map properties) { 82 | try { 83 | config = new ElasticSourceTaskConfig(properties); 84 | } catch (ConfigException e) { 85 | throw new ConnectException("Couldn't start ElasticSourceTask due to configuration error", e); 86 | } 87 | 88 | indices = Arrays.asList(config.getString(ElasticSourceTaskConfig.INDICES_CONFIG).split(",")); 89 | if (indices.isEmpty()) { 90 | throw new ConnectException("Invalid configuration: each ElasticSourceTask must have at " 91 | + "least one index assigned to it"); 92 | } 93 | 94 | topic = config.getString(ElasticSourceConnectorConfig.TOPIC_PREFIX_CONFIG); 95 | cursorSearchField = config.getString(ElasticSourceConnectorConfig.INCREMENTING_FIELD_NAME_CONFIG); 96 | Objects.requireNonNull(cursorSearchField, ElasticSourceConnectorConfig.INCREMENTING_FIELD_NAME_CONFIG 97 | + " conf is mandatory"); 98 | cursorField = new CursorField(cursorSearchField); 99 | secondaryCursorSearchField = config.getString(ElasticSourceConnectorConfig.SECONDARY_INCREMENTING_FIELD_NAME_CONFIG); 100 | secondaryCursorField = secondaryCursorSearchField == null ? null : new CursorField(secondaryCursorSearchField); 101 | pollingMs = Integer.parseInt(config.getString(ElasticSourceConnectorConfig.POLL_INTERVAL_MS_CONFIG)); 102 | 103 | initConnectorFilters(); 104 | initConnectorFieldConverter(); 105 | initEsConnection(); 106 | } 107 | 108 | private void initConnectorFilters() { 109 | String whiteFilters = config.getString(ElasticSourceConnectorConfig.FIELDS_WHITELIST_CONFIG); 110 | if (whiteFilters != null) { 111 | String[] whiteFiltersArray = whiteFilters.split(";"); 112 | Set whiteFiltersSet = new HashSet<>(Arrays.asList(whiteFiltersArray)); 113 | documentFilters.add(new WhitelistFilter(whiteFiltersSet)); 114 | } 115 | 116 | String blackFilters = config.getString(ElasticSourceConnectorConfig.FIELDS_BLACKLIST_CONFIG); 117 | if (blackFilters != null) { 118 | String[] blackFiltersArray = blackFilters.split(";"); 119 | Set blackFiltersSet = new HashSet<>(Arrays.asList(blackFiltersArray)); 120 | documentFilters.add(new BlacklistFilter(blackFiltersSet)); 121 | } 122 | 123 | String jsonCastFilters = config.getString(ElasticSourceConnectorConfig.FIELDS_JSON_CAST_CONFIG); 124 | if (jsonCastFilters != null) { 125 | String[] jsonCastFiltersArray = jsonCastFilters.split(";"); 126 | Set whiteFiltersSet = new HashSet<>(Arrays.asList(jsonCastFiltersArray)); 127 | documentFilters.add(new JsonCastFilter(whiteFiltersSet)); 128 | } 129 | } 130 | 131 | private void initConnectorFieldConverter() { 132 | String nameConverterConfig = config.getString(ElasticSourceConnectorConfig.CONNECTOR_FIELDNAME_CONVERTER_CONFIG); 133 | 134 | FieldNameConverter fieldNameConverter; 135 | switch (nameConverterConfig) { 136 | case ElasticSourceConnectorConfig.NOP_FIELDNAME_CONVERTER: 137 | fieldNameConverter = new NopNameConverter(); 138 | break; 139 | case ElasticSourceConnectorConfig.AVRO_FIELDNAME_CONVERTER: 140 | default: 141 | fieldNameConverter = new AvroName(); 142 | break; 143 | } 144 | this.schemaConverter = new SchemaConverter(fieldNameConverter); 145 | this.structConverter = new StructConverter(fieldNameConverter); 146 | } 147 | 148 | private void initEsConnection() { 149 | String esScheme = config.getString(ElasticSourceConnectorConfig.ES_SCHEME_CONF); 150 | String esHost = config.getString(ElasticSourceConnectorConfig.ES_HOST_CONF); 151 | int esPort = Integer.parseInt(config.getString(ElasticSourceConnectorConfig.ES_PORT_CONF)); 152 | 153 | String esUser = config.getString(ElasticSourceConnectorConfig.ES_USER_CONF); 154 | String esPwd = config.getString(ElasticSourceConnectorConfig.ES_PWD_CONF); 155 | 156 | int batchSize = Integer.parseInt(config.getString(ElasticSourceConnectorConfig.BATCH_MAX_ROWS_CONFIG)); 157 | 158 | int maxConnectionAttempts = Integer.parseInt(config.getString( 159 | ElasticSourceConnectorConfig.CONNECTION_ATTEMPTS_CONFIG 160 | )); 161 | long connectionRetryBackoff = Long.parseLong(config.getString( 162 | ElasticSourceConnectorConfig.CONNECTION_BACKOFF_CONFIG 163 | )); 164 | ElasticConnectionBuilder connectionBuilder = new ElasticConnectionBuilder(esHost, esPort) 165 | .withProtocol(esScheme) 166 | .withMaxAttempts(maxConnectionAttempts) 167 | .withBackoff(connectionRetryBackoff); 168 | 169 | String truststore = config.getString(ElasticSourceConnectorConfig.ES_TRUSTSTORE_CONF); 170 | String truststorePass = config.getString(ElasticSourceConnectorConfig.ES_TRUSTSTORE_PWD_CONF); 171 | String keystore = config.getString(ElasticSourceConnectorConfig.ES_KEYSTORE_CONF); 172 | String keystorePass = config.getString(ElasticSourceConnectorConfig.ES_KEYSTORE_PWD_CONF); 173 | 174 | if (truststore != null) { 175 | connectionBuilder.withTrustStore(truststore, truststorePass); 176 | } 177 | 178 | if (keystore != null) { 179 | connectionBuilder.withKeyStore(keystore, keystorePass); 180 | } 181 | 182 | if (esUser == null || esUser.isEmpty()) { 183 | es = connectionBuilder.build(); 184 | } else { 185 | es = connectionBuilder.withUser(esUser) 186 | .withPassword(esPwd) 187 | .build(); 188 | } 189 | 190 | elasticRepository = new ElasticRepository(es, cursorSearchField, secondaryCursorSearchField); 191 | elasticRepository.setPageSize(batchSize); 192 | } 193 | 194 | 195 | //will be called by connect with a different thread than the stop thread 196 | @Override 197 | public List poll() { 198 | List results = new ArrayList<>(); 199 | try { 200 | for (String index : indices) { 201 | if (!stopping.get()) { 202 | logger.info("fetching from {}", index); 203 | Cursor lastValue = fetchLastOffset(index); 204 | logger.info("found last value {}", lastValue); 205 | PageResult pageResult = secondaryCursorSearchField == null ? 206 | elasticRepository.searchAfter(index, lastValue) : 207 | elasticRepository.searchAfterWithSecondarySort(index, lastValue); 208 | parseResult(pageResult, results); 209 | logger.info("index {} total messages: {} ", index, sent.get(index)); 210 | } 211 | } 212 | if (results.isEmpty()) { 213 | logger.info("no data found, sleeping for {} ms", pollingMs); 214 | Thread.sleep(pollingMs); 215 | } 216 | 217 | } catch (Exception e) { 218 | logger.error("error", e); 219 | } 220 | return results; 221 | } 222 | 223 | private Cursor fetchLastOffset(String index) { 224 | //first we check in cache memory the last value 225 | if (lastCursor.get(index) != null) { 226 | return lastCursor.get(index); 227 | } 228 | 229 | //if cache is empty we check the framework 230 | Map offset = context.offsetStorageReader().offset(Collections.singletonMap(INDEX, index)); 231 | if (offset != null) { 232 | String primaryCursor = (String) offset.get(POSITION); 233 | String secondaryCursor = (String) offset.get(POSITION_SECONDARY); 234 | return new Cursor(primaryCursor, secondaryCursor); 235 | } else { 236 | return Cursor.empty(); 237 | } 238 | } 239 | 240 | private void parseResult(PageResult pageResult, List results) { 241 | String index = pageResult.getIndex(); 242 | for (Map elasticDocument : pageResult.getDocuments()) { 243 | Map sourcePartition = Collections.singletonMap(INDEX, index); 244 | Map sourceOffset = offsetSerializer.toMapOffset( 245 | cursorField, 246 | secondaryCursorField, 247 | elasticDocument 248 | ); 249 | String key = offsetSerializer.toStringOffset( 250 | cursorField, 251 | secondaryCursorField, 252 | index, 253 | elasticDocument 254 | ); 255 | 256 | lastCursor.put(index, pageResult.getLastCursor()); 257 | sent.merge(index, 1, Integer::sum); 258 | 259 | documentFilters.forEach(jsonFilter -> jsonFilter.filter(elasticDocument)); 260 | 261 | Schema schema = schemaConverter.convert(elasticDocument, index); 262 | Struct struct = structConverter.convert(elasticDocument, schema); 263 | 264 | SourceRecord sourceRecord = new SourceRecord( 265 | sourcePartition, 266 | sourceOffset, 267 | topic + index, 268 | //KEY 269 | Schema.STRING_SCHEMA, 270 | key, 271 | //VALUE 272 | schema, 273 | struct); 274 | results.add(sourceRecord); 275 | } 276 | } 277 | 278 | //will be called by connect with a different thread than poll thread 279 | public void stop() { 280 | stopping.set(true); 281 | if (es != null) { 282 | es.closeQuietly(); 283 | } 284 | } 285 | } 286 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/task/ElasticSourceTaskConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.task; 18 | 19 | 20 | import com.github.dariobalinzo.ElasticSourceConnectorConfig; 21 | import org.apache.kafka.common.config.ConfigDef; 22 | import org.apache.kafka.common.config.ConfigDef.Importance; 23 | import org.apache.kafka.common.config.ConfigDef.Type; 24 | 25 | import java.util.Map; 26 | 27 | /** 28 | * Configuration options for a single ElasticSourceTask. These are processed after all 29 | * Connector-level configs have been parsed. 30 | */ 31 | public class ElasticSourceTaskConfig extends ElasticSourceConnectorConfig { 32 | 33 | static ConfigDef config = baseConfigDef() 34 | .define(INDICES_CONFIG, Type.STRING, Importance.HIGH, INDICES_CONFIG); 35 | 36 | public ElasticSourceTaskConfig(Map props) { 37 | super(config, props); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/github/dariobalinzo/task/OffsetSerializer.java: -------------------------------------------------------------------------------- 1 | package com.github.dariobalinzo.task; 2 | 3 | import com.github.dariobalinzo.elastic.CursorField; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | import static com.github.dariobalinzo.task.ElasticSourceTask.POSITION; 9 | import static com.github.dariobalinzo.task.ElasticSourceTask.POSITION_SECONDARY; 10 | 11 | public class OffsetSerializer { 12 | 13 | public Map toMapOffset(CursorField primaryCursor, CursorField secondaryCursor, Map document) { 14 | Map result = new HashMap<>(); 15 | result.put(POSITION, primaryCursor.read(document)); 16 | if (secondaryCursor != null) { 17 | result.put(POSITION_SECONDARY, secondaryCursor.read(document)); 18 | } 19 | return result; 20 | } 21 | 22 | public String toStringOffset(CursorField cursor, CursorField secondaryCursor, String index, Map document) { 23 | String cursorValue = cursor.read(document); 24 | if (secondaryCursor == null) { 25 | return String.join("_", index, cursorValue); 26 | } else { 27 | return String.join("_", index, cursorValue, secondaryCursor.read(document)); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/ElasticIndexMonitorThreadTest.java: -------------------------------------------------------------------------------- 1 | package com.github.dariobalinzo; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.mockito.Mockito.atLeast; 5 | 6 | import java.io.IOException; 7 | 8 | import com.github.dariobalinzo.elastic.ElasticIndexMonitorThread; 9 | 10 | import org.apache.kafka.connect.connector.ConnectorContext; 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | import org.mockito.Mock; 14 | import org.mockito.Mockito; 15 | import org.mockito.MockitoAnnotations; 16 | 17 | 18 | public class ElasticIndexMonitorThreadTest extends TestContainersContext { 19 | 20 | @Mock 21 | private ConnectorContext context; 22 | 23 | @Before 24 | public void init() { 25 | MockitoAnnotations.initMocks(this); 26 | } 27 | 28 | @Test 29 | public void shouldRefreshIndexesList() throws InterruptedException, IOException { 30 | //given 31 | long pollInterval = 1000L; 32 | deleteTestIndex(); 33 | 34 | insertMockData(10, TEST_INDEX); 35 | refreshIndex(); 36 | 37 | ElasticIndexMonitorThread indexMonitorThread = new ElasticIndexMonitorThread(context, pollInterval, repository, TEST_INDEX); 38 | indexMonitorThread.start(); 39 | 40 | assertEquals(1, indexMonitorThread.indexes().size()); 41 | 42 | //when another index is created in Elastic 43 | insertMockData(10, TEST_INDEX + '2'); 44 | refreshIndex(); 45 | 46 | long waitRefresh = pollInterval + (long)(Math.random() * 1000); 47 | Thread.sleep(waitRefresh); 48 | 49 | //then 50 | Mockito.verify(context, atLeast(1)).requestTaskReconfiguration(); 51 | assertEquals(2, indexMonitorThread.indexes().size()); 52 | 53 | indexMonitorThread.shutdown(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/FooTest.java: -------------------------------------------------------------------------------- 1 | package com.github.dariobalinzo; 2 | 3 | import com.github.dariobalinzo.schema.AvroName; 4 | import com.github.dariobalinzo.schema.FieldNameConverter; 5 | import com.github.dariobalinzo.schema.NopNameConverter; 6 | import com.github.dariobalinzo.schema.SchemaConverter; 7 | import com.github.dariobalinzo.schema.StructConverter; 8 | import org.apache.kafka.connect.data.Schema; 9 | import org.apache.kafka.connect.data.Struct; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper; 13 | 14 | import java.io.IOException; 15 | import java.nio.file.Files; 16 | import java.nio.file.Paths; 17 | import java.util.Map; 18 | 19 | public class FooTest { 20 | 21 | private SchemaConverter schemaConverter; 22 | private StructConverter structConverter; 23 | 24 | Map elasticDocument; 25 | 26 | @Before 27 | public void setup() throws IOException { 28 | FieldNameConverter fieldNameConverter = new NopNameConverter(); 29 | this.schemaConverter = new SchemaConverter(fieldNameConverter); 30 | this.structConverter = new StructConverter(fieldNameConverter); 31 | 32 | String doc = new String(Files.readAllBytes(Paths.get("src/test/java/com/github/dariobalinzo/foo.json"))); 33 | elasticDocument = new ObjectMapper().readValue(doc, Map.class); 34 | } 35 | 36 | @Test 37 | public void foo() { 38 | 39 | Schema schema = schemaConverter.convert(elasticDocument, "foo"); 40 | Struct struct = structConverter.convert(elasticDocument, schema); 41 | 42 | System.out.println(struct); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/TestContainersContext.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo; 18 | 19 | import com.github.dariobalinzo.elastic.ElasticConnection; 20 | import com.github.dariobalinzo.elastic.ElasticConnectionBuilder; 21 | import com.github.dariobalinzo.elastic.ElasticRepository; 22 | import com.github.dariobalinzo.task.ElasticSourceTaskConfig; 23 | import org.apache.http.HttpHost; 24 | import org.elasticsearch.action.DocWriteResponse; 25 | import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; 26 | import org.elasticsearch.action.index.IndexRequest; 27 | import org.elasticsearch.action.index.IndexResponse; 28 | import org.elasticsearch.client.RequestOptions; 29 | import org.elasticsearch.xcontent.XContentBuilder; 30 | import org.elasticsearch.xcontent.XContentFactory; 31 | import org.junit.AfterClass; 32 | import org.junit.BeforeClass; 33 | import org.testcontainers.elasticsearch.ElasticsearchContainer; 34 | 35 | import java.io.IOException; 36 | import java.util.HashMap; 37 | import java.util.Map; 38 | 39 | import static org.junit.Assert.assertEquals; 40 | 41 | public class TestContainersContext { 42 | protected static final int TEST_PAGE_SIZE = 3; 43 | protected static final int MAX_TRIALS = 2; 44 | protected static final int RETRY_WAIT_MS = 1_000; 45 | 46 | protected static final String TEST_INDEX = "source_index"; 47 | protected static final String CURSOR_FIELD = "ts"; 48 | protected static final String NESTED_OBJECT = "nested"; 49 | protected static final String NESTED_CURSOR_FIELD = NESTED_OBJECT + "." + CURSOR_FIELD; 50 | protected static final String SECONDARY_CURSOR_FIELD = "fullName.keyword"; 51 | 52 | protected static final String ELASTICSEARCH_IMAGE = "docker.elastic.co/elasticsearch/elasticsearch:7.11.1"; 53 | 54 | protected static ElasticsearchContainer container; 55 | protected static ElasticConnection connection; 56 | protected static ElasticRepository repository; 57 | protected static ElasticRepository nestedRepository; 58 | protected static ElasticRepository secondarySortRepo; 59 | 60 | @BeforeClass 61 | public static void setupElastic() { 62 | // Create the elasticsearch container. 63 | container = new ElasticsearchContainer(ELASTICSEARCH_IMAGE); 64 | container.addEnv("ES_JAVA_OPTS", "-Xms512m -Xmx512m"); 65 | container.start(); 66 | 67 | HttpHost httpHost = HttpHost.create(container.getHttpHostAddress()); 68 | connection = new ElasticConnectionBuilder(httpHost.getHostName(), httpHost.getPort()) 69 | .withMaxAttempts(MAX_TRIALS) 70 | .withBackoff(RETRY_WAIT_MS) 71 | .build(); 72 | 73 | repository = new ElasticRepository(connection, CURSOR_FIELD); 74 | repository.setPageSize(TEST_PAGE_SIZE); 75 | 76 | nestedRepository = new ElasticRepository(connection, NESTED_CURSOR_FIELD); 77 | nestedRepository.setPageSize(TEST_PAGE_SIZE); 78 | 79 | secondarySortRepo = new ElasticRepository(connection, CURSOR_FIELD, SECONDARY_CURSOR_FIELD); 80 | secondarySortRepo.setPageSize(TEST_PAGE_SIZE); 81 | } 82 | 83 | 84 | protected void deleteTestIndex() { 85 | try { 86 | connection.getClient().indices().delete(new DeleteIndexRequest(TEST_INDEX), RequestOptions.DEFAULT); 87 | } catch (Exception ignored) { 88 | 89 | } 90 | } 91 | 92 | protected void refreshIndex() throws IOException, InterruptedException { 93 | repository.refreshIndex(TEST_INDEX); 94 | } 95 | 96 | protected void insertMockData(int tsStart) throws IOException { 97 | insertMockData(tsStart, TEST_INDEX); 98 | } 99 | 100 | protected void insertMockData(int tsStart, String index) throws IOException { 101 | insertMockData(tsStart, "Test", index); 102 | } 103 | 104 | protected void insertMockData(int tsStart, String fullName, String index) throws IOException { 105 | XContentBuilder builder = XContentFactory.jsonBuilder() 106 | .startObject() 107 | .field("fullName", fullName) 108 | .field(CURSOR_FIELD, tsStart) 109 | .field("age", 10) 110 | .field("non-avro-field", "non-avro-field") 111 | .field("avroField", "avro-field") 112 | .object(NESTED_OBJECT, b -> b.field(CURSOR_FIELD, tsStart)) 113 | .endObject(); 114 | 115 | IndexRequest indexRequest = new IndexRequest(index); 116 | indexRequest.type("_doc"); 117 | indexRequest.source(builder); 118 | 119 | IndexResponse response = connection.getClient().index(indexRequest, RequestOptions.DEFAULT); 120 | assertEquals(DocWriteResponse.Result.CREATED, response.getResult()); 121 | } 122 | 123 | protected Map getConf() { 124 | HttpHost httpHost = HttpHost.create(container.getHttpHostAddress()); 125 | Map conf = new HashMap<>(); 126 | conf.put(ElasticSourceTaskConfig.INDICES_CONFIG, TEST_INDEX); 127 | conf.put(ElasticSourceConnectorConfig.TOPIC_PREFIX_CONFIG, "topic"); 128 | conf.put(ElasticSourceConnectorConfig.INCREMENTING_FIELD_NAME_CONFIG, CURSOR_FIELD); 129 | conf.put(ElasticSourceConnectorConfig.POLL_INTERVAL_MS_CONFIG, String.valueOf(10)); 130 | conf.put(ElasticSourceConnectorConfig.ES_HOST_CONF, httpHost.getHostName()); 131 | conf.put(ElasticSourceConnectorConfig.ES_PORT_CONF, String.valueOf(httpHost.getPort())); 132 | conf.put(ElasticSourceConnectorConfig.BATCH_MAX_ROWS_CONFIG, String.valueOf(2)); 133 | conf.put(ElasticSourceConnectorConfig.CONNECTION_ATTEMPTS_CONFIG, String.valueOf(MAX_TRIALS)); 134 | conf.put(ElasticSourceConnectorConfig.CONNECTION_BACKOFF_CONFIG, String.valueOf(RETRY_WAIT_MS)); 135 | return conf; 136 | } 137 | 138 | 139 | @AfterClass 140 | public static void stopElastic() { 141 | if (container != null) { 142 | container.close(); 143 | } 144 | } 145 | 146 | } 147 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/elastic/ElasticRepositoryTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.elastic; 18 | 19 | import com.github.dariobalinzo.TestContainersContext; 20 | import com.github.dariobalinzo.elastic.response.Cursor; 21 | import com.github.dariobalinzo.elastic.response.PageResult; 22 | import org.junit.Test; 23 | 24 | import java.io.IOException; 25 | import java.util.Collections; 26 | 27 | import static org.junit.Assert.*; 28 | 29 | public class ElasticRepositoryTest extends TestContainersContext { 30 | 31 | 32 | @Test 33 | public void shouldFetchDataFromElastic() throws IOException, InterruptedException { 34 | deleteTestIndex(); 35 | 36 | insertMockData(111); 37 | insertMockData(112); 38 | insertMockData(113); 39 | insertMockData(114); 40 | refreshIndex(); 41 | 42 | PageResult firstPage = repository.searchAfter(TEST_INDEX, Cursor.empty()); 43 | assertEquals(3, firstPage.getDocuments().size()); 44 | 45 | PageResult secondPage = repository.searchAfter(TEST_INDEX, firstPage.getLastCursor()); 46 | assertEquals(1, secondPage.getDocuments().size()); 47 | 48 | PageResult emptyPage = repository.searchAfter(TEST_INDEX, secondPage.getLastCursor()); 49 | assertEquals(0, emptyPage.getDocuments().size()); 50 | assertNull(emptyPage.getLastCursor().getPrimaryCursor()); 51 | 52 | assertEquals(Collections.singletonList(TEST_INDEX), repository.catIndices("source")); 53 | assertEquals(Collections.emptyList(), repository.catIndices("non-existing")); 54 | } 55 | @Test 56 | public void shouldFetchDataFromElasticWithNestedCursor() throws IOException, InterruptedException { 57 | deleteTestIndex(); 58 | 59 | insertMockData(111); 60 | insertMockData(112); 61 | insertMockData(113); 62 | insertMockData(114); 63 | refreshIndex(); 64 | 65 | PageResult firstPage = nestedRepository.searchAfter(TEST_INDEX, Cursor.empty()); 66 | assertEquals(3, firstPage.getDocuments().size()); 67 | 68 | PageResult secondPage = nestedRepository.searchAfter(TEST_INDEX, firstPage.getLastCursor()); 69 | assertEquals(1, secondPage.getDocuments().size()); 70 | 71 | PageResult emptyPage = nestedRepository.searchAfter(TEST_INDEX, secondPage.getLastCursor()); 72 | assertEquals(0, emptyPage.getDocuments().size()); 73 | assertNull(emptyPage.getLastCursor().getPrimaryCursor()); 74 | 75 | assertEquals(Collections.singletonList(TEST_INDEX), nestedRepository.catIndices("source")); 76 | assertEquals(Collections.emptyList(), nestedRepository.catIndices("non-existing")); 77 | } 78 | 79 | @Test 80 | public void shouldListExistingIndices() throws IOException, InterruptedException { 81 | deleteTestIndex(); 82 | insertMockData(111); 83 | refreshIndex(); 84 | 85 | assertEquals(Collections.singletonList(TEST_INDEX), repository.catIndices("source")); 86 | assertEquals(Collections.emptyList(), repository.catIndices("non-existing")); 87 | } 88 | 89 | @Test 90 | public void shouldFetchDataUsingSecondarySortField() throws IOException, InterruptedException { 91 | deleteTestIndex(); 92 | 93 | insertMockData(111, "customerA", TEST_INDEX); 94 | insertMockData(111, "customerB", TEST_INDEX); 95 | insertMockData(111, "customerC", TEST_INDEX); 96 | insertMockData(111, "customerD", TEST_INDEX); 97 | insertMockData(112, "customerA", TEST_INDEX); 98 | insertMockData(113, "customerB", TEST_INDEX); 99 | insertMockData(113, "customerC", TEST_INDEX); 100 | insertMockData(113, "customerD", TEST_INDEX); 101 | 102 | refreshIndex(); 103 | 104 | PageResult firstPage = secondarySortRepo.searchAfterWithSecondarySort(TEST_INDEX, Cursor.empty()); 105 | assertEquals(3, firstPage.getDocuments().size()); 106 | 107 | PageResult secondPage = secondarySortRepo.searchAfterWithSecondarySort(TEST_INDEX, firstPage.getLastCursor()); 108 | assertEquals(3, secondPage.getDocuments().size()); 109 | 110 | PageResult thirdPage = secondarySortRepo.searchAfterWithSecondarySort(TEST_INDEX, secondPage.getLastCursor()); 111 | assertEquals(2, thirdPage.getDocuments().size()); 112 | 113 | PageResult emptyPage = secondarySortRepo.searchAfterWithSecondarySort(TEST_INDEX, thirdPage.getLastCursor()); 114 | assertEquals(0, emptyPage.getDocuments().size()); 115 | assertNull(emptyPage.getLastCursor().getPrimaryCursor()); 116 | assertNull(emptyPage.getLastCursor().getSecondaryCursor()); 117 | } 118 | 119 | @Test 120 | public void shouldFetchDataWithAdditionalField() throws IOException, InterruptedException { 121 | deleteTestIndex(); 122 | 123 | insertMockData(110, "customerA", TEST_INDEX); 124 | insertMockData(111, "customerB", TEST_INDEX); 125 | refreshIndex(); 126 | 127 | PageResult firstPage = repository.searchAfter(TEST_INDEX, Cursor.empty()); 128 | firstPage.getDocuments().forEach(item -> { 129 | assertNotNull(item.get((String) "es-index")); 130 | assertNotNull(item.get((String) "es-id")); 131 | }); 132 | } 133 | 134 | } 135 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/elastic/ElasticSourceConnectorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.elastic; 18 | 19 | 20 | import com.github.dariobalinzo.ElasticSourceConnector; 21 | import com.github.dariobalinzo.TestContainersContext; 22 | import com.github.dariobalinzo.task.ElasticSourceTaskConfig; 23 | 24 | import org.junit.Test; 25 | 26 | import java.io.IOException; 27 | import java.util.List; 28 | import java.util.Map; 29 | 30 | import static org.junit.Assert.assertEquals; 31 | import static org.junit.Assert.assertNotNull; 32 | 33 | public class ElasticSourceConnectorTest extends TestContainersContext { 34 | 35 | @Test 36 | public void shouldGetAListOfTasks() throws IOException { 37 | //given 38 | ElasticSourceConnector connector = new ElasticSourceConnector(); 39 | connector.start(getConf()); 40 | insertMockData(1, TEST_INDEX + 1); 41 | insertMockData(2, TEST_INDEX + 2); 42 | insertMockData(3, TEST_INDEX + 3); 43 | insertMockData(4, TEST_INDEX + 4); 44 | 45 | try { 46 | Thread.sleep(1000); 47 | } catch (InterruptedException ignored) { 48 | } 49 | 50 | //when 51 | int maxTasks = 3; 52 | List> taskList = connector.taskConfigs(maxTasks); 53 | 54 | //then 55 | assertEquals(maxTasks, taskList.size()); 56 | assertNotNull(connector.version()); 57 | connector.stop(); 58 | } 59 | 60 | @Test 61 | public void shouldGetTaskFromFixedList() { 62 | //given 63 | ElasticSourceConnector connector = new ElasticSourceConnector(); 64 | Map conf = getConf(); 65 | conf.remove(ElasticSourceTaskConfig.INDEX_PREFIX_CONFIG); 66 | conf.put(ElasticSourceTaskConfig.INDEX_NAMES_CONFIG, "index1,index2,index3"); 67 | connector.start(conf); 68 | 69 | //when 70 | int maxTasks = 3; 71 | List> taskList = connector.taskConfigs(maxTasks); 72 | 73 | //then 74 | assertEquals(maxTasks, taskList.size()); 75 | connector.stop(); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/filter/BlacklistFilterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.filter; 18 | 19 | 20 | import com.fasterxml.jackson.databind.ObjectMapper; 21 | import org.junit.Assert; 22 | import org.junit.Test; 23 | 24 | import java.io.IOException; 25 | import java.nio.file.Files; 26 | import java.nio.file.Paths; 27 | import java.util.HashSet; 28 | import java.util.LinkedHashMap; 29 | import java.util.Map; 30 | import java.util.Set; 31 | import java.util.stream.Collectors; 32 | import java.util.stream.Stream; 33 | 34 | import static junit.framework.TestCase.assertEquals; 35 | 36 | public class BlacklistFilterTest { 37 | private final ObjectMapper objectMapper = new ObjectMapper(); 38 | 39 | @Test 40 | public void shouldConvertSimpleSchema() { 41 | //given 42 | Map elasticDocument = new LinkedHashMap<>(); 43 | elasticDocument.put("name", "elastic"); 44 | elasticDocument.put("surname", "search"); 45 | elasticDocument.put("version", 7); 46 | elasticDocument.put("enabled", true); 47 | 48 | //when 49 | Set filterValues = Stream.of( 50 | "name", 51 | "surname", 52 | "version" 53 | ).collect(Collectors.toCollection(HashSet::new)); 54 | BlacklistFilter BlacklistFilter = new BlacklistFilter(filterValues); 55 | BlacklistFilter.filter(elasticDocument); 56 | 57 | //then 58 | Assert.assertEquals("{enabled=true}", elasticDocument.toString()); 59 | } 60 | 61 | @SuppressWarnings("unchecked") 62 | @Test 63 | public void shouldConvertNestedDocument() throws IOException { 64 | //given 65 | String file = this.getClass().getClassLoader() 66 | .getResource("com/github/dariobalinzo/filter/document.json") 67 | .getFile(); 68 | String jsonDocument = new String(Files.readAllBytes(Paths.get(file))); 69 | 70 | Map elasticDocument = objectMapper.readValue(jsonDocument, Map.class); 71 | 72 | //when 73 | Set whitelist = Stream.of( 74 | "name", 75 | "obj.details.qty", 76 | "order_list.details.qty" 77 | ).collect(Collectors.toSet()); 78 | BlacklistFilter BlacklistFilter = new BlacklistFilter(whitelist); 79 | BlacklistFilter.filter(elasticDocument); 80 | 81 | //then 82 | assertEquals( 83 | "{age=7, order_list=[{id=1, details={nested_det=test nested inside list}}, {id=2, details={nested_det=test nested inside list}}], obj={key=55, details={nested_det=test nested inside list}}}", 84 | elasticDocument.toString()); 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/filter/JsonCastFilterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.filter; 18 | 19 | 20 | import com.fasterxml.jackson.databind.ObjectMapper; 21 | import org.junit.Test; 22 | 23 | import java.io.IOException; 24 | import java.nio.file.Files; 25 | import java.nio.file.Paths; 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.Set; 29 | import java.util.stream.Collectors; 30 | import java.util.stream.Stream; 31 | 32 | import static junit.framework.TestCase.assertEquals; 33 | 34 | public class JsonCastFilterTest { 35 | 36 | private final ObjectMapper objectMapper = new ObjectMapper(); 37 | 38 | @SuppressWarnings("unchecked") 39 | @Test 40 | public void shouldConvertSimpleDocument() throws IOException { 41 | //given 42 | String file = this.getClass().getClassLoader() 43 | .getResource("com/github/dariobalinzo/filter/document.json") 44 | .getFile(); 45 | String jsonDocument = new String(Files.readAllBytes(Paths.get(file))); 46 | 47 | Map elasticDocument = objectMapper.readValue(jsonDocument, Map.class); 48 | 49 | //when 50 | Set toCast = Stream.of( 51 | "name", 52 | "obj.details", 53 | "order_list.details" 54 | ).collect(Collectors.toSet()); 55 | JsonCastFilter jsonCastFilter = new JsonCastFilter(toCast); 56 | jsonCastFilter.filter(elasticDocument); 57 | 58 | //then 59 | assertEquals(5, elasticDocument.keySet().size()); 60 | assertEquals("\"elastic\"", elasticDocument.get("name")); 61 | Map obj = (Map) elasticDocument.get("obj"); 62 | assertEquals("{\"nested_det\":\"test nested inside list\",\"qty\":2}", obj.get("details")); 63 | 64 | List nestedList = (List) elasticDocument.get("order_list"); 65 | Map nestedInsideList1 = (Map) nestedList.get(0); 66 | Map nestedInsideList2 = (Map) nestedList.get(1); 67 | 68 | assertEquals("{\"nested_det\":\"test nested inside list\",\"qty\":1}", nestedInsideList1.get("details")); 69 | assertEquals("{\"nested_det\":\"test nested inside list\",\"qty\":2}", nestedInsideList2.get("details")); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/filter/WhitelistFilterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.filter; 18 | 19 | 20 | import com.fasterxml.jackson.databind.ObjectMapper; 21 | import org.junit.Assert; 22 | import org.junit.Test; 23 | 24 | import java.io.IOException; 25 | import java.nio.file.Files; 26 | import java.nio.file.Paths; 27 | import java.util.HashSet; 28 | import java.util.LinkedHashMap; 29 | import java.util.Map; 30 | import java.util.Set; 31 | import java.util.stream.Collectors; 32 | import java.util.stream.Stream; 33 | 34 | import static junit.framework.TestCase.assertEquals; 35 | 36 | public class WhitelistFilterTest { 37 | private final ObjectMapper objectMapper = new ObjectMapper(); 38 | 39 | @Test 40 | public void shouldConvertSimpleSchema() { 41 | //given 42 | Map elasticDocument = new LinkedHashMap<>(); 43 | elasticDocument.put("name", "elastic"); 44 | elasticDocument.put("surname", "search"); 45 | elasticDocument.put("version", 7); 46 | elasticDocument.put("enabled", true); 47 | 48 | //when 49 | Set filterValues = Stream.of( 50 | "name", 51 | "surname", 52 | "version" 53 | ).collect(Collectors.toCollection(HashSet::new)); 54 | WhitelistFilter whitelistFilter = new WhitelistFilter(filterValues); 55 | whitelistFilter.filter(elasticDocument); 56 | 57 | //then 58 | Assert.assertEquals("{name=elastic, surname=search, version=7}", elasticDocument.toString()); 59 | } 60 | 61 | @SuppressWarnings("unchecked") 62 | @Test 63 | public void shouldConvertNestedDocument() throws IOException { 64 | //given 65 | String file = this.getClass().getClassLoader() 66 | .getResource("com/github/dariobalinzo/filter/document.json") 67 | .getFile(); 68 | String jsonDocument = new String(Files.readAllBytes(Paths.get(file))); 69 | 70 | Map elasticDocument = objectMapper.readValue(jsonDocument, Map.class); 71 | 72 | //when 73 | Set whitelist = Stream.of( 74 | "name", 75 | "obj.details.qty", 76 | "order_list.details.qty" 77 | ).collect(Collectors.toSet()); 78 | WhitelistFilter whitelistFilter = new WhitelistFilter(whitelist); 79 | whitelistFilter.filter(elasticDocument); 80 | 81 | //then 82 | assertEquals( 83 | "{name=elastic, order_list=[{details={qty=1}}, {details={qty=2}}], obj={details={qty=2}}}", 84 | elasticDocument.toString()); 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/foo.json: -------------------------------------------------------------------------------- 1 | { 2 | "equipments": [ 3 | { 4 | "prices": { 5 | "listPrice": { 6 | "netPrice": { 7 | "amount": 420.17, 8 | "currency": "EUR" 9 | }, 10 | "grossPrice": { 11 | "amount": 500, 12 | "currency": "EUR" 13 | }, 14 | "updatedAt": "2023-05-17T04:08:18.046399" 15 | }, 16 | "salesPrice": { 17 | "netPrice": { 18 | "amount": 0, 19 | "currency": "EUR" 20 | }, 21 | "grossPrice": { 22 | "amount": 0, 23 | "currency": "EUR" 24 | }, 25 | "updatedAt": "2023-05-17T04:08:17.844466" 26 | } 27 | } 28 | }, 29 | { 30 | "prices": { 31 | "listPrice": { 32 | "netPrice": { 33 | "amount": 0, 34 | "currency": "EUR" 35 | }, 36 | "grossPrice": { 37 | "amount": 0, 38 | "currency": "EUR" 39 | }, 40 | "updatedAt": "2023-05-17T04:08:18.046401" 41 | }, 42 | "salesPrice": { 43 | "netPrice": { 44 | "amount": 0, 45 | "currency": "EUR" 46 | }, 47 | "grossPrice": { 48 | "amount": 0, 49 | "currency": "EUR" 50 | }, 51 | "updatedAt": "2023-05-17T04:08:17.844525" 52 | } 53 | } 54 | }, 55 | { 56 | "prices": { 57 | "listPrice": { 58 | "netPrice": { 59 | "amount": 3403.36, 60 | "currency": "EUR" 61 | }, 62 | "grossPrice": { 63 | "amount": 4050, 64 | "currency": "EUR" 65 | }, 66 | "updatedAt": "2023-05-17T04:08:18.0464" 67 | }, 68 | "salesPrice": { 69 | "netPrice": { 70 | "amount": 0, 71 | "currency": "EUR" 72 | }, 73 | "grossPrice": { 74 | "amount": 0, 75 | "currency": "EUR" 76 | }, 77 | "updatedAt": "2023-05-17T04:08:17.844519" 78 | } 79 | } 80 | }, 81 | { 82 | "prices": { 83 | "listPrice": { 84 | "netPrice": { 85 | "amount": 0, 86 | "currency": "EUR" 87 | }, 88 | "grossPrice": { 89 | "amount": 0, 90 | "currency": "EUR" 91 | }, 92 | "updatedAt": "2023-05-17T04:08:18.046397" 93 | }, 94 | "salesPrice": { 95 | "netPrice": { 96 | "amount": 0, 97 | "currency": "EUR" 98 | }, 99 | "grossPrice": { 100 | "amount": 0, 101 | "currency": "EUR" 102 | }, 103 | "updatedAt": "2023-05-17T04:08:17.844406" 104 | } 105 | } 106 | } 107 | ] 108 | } 109 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/schema/AvroNameTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.schema; 18 | 19 | 20 | import org.junit.Assert; 21 | import org.junit.Test; 22 | 23 | public class AvroNameTest { 24 | 25 | @Test 26 | public void shouldCreateValidAvroNames() { 27 | //given 28 | String invalidName = "foo.bar"; 29 | FieldNameConverter converter = new AvroName(); 30 | 31 | //when 32 | String validName = converter.from(invalidName); 33 | String validNamePrefix = converter.from("prefix", invalidName); 34 | String startByNumber = converter.from("1invalid"); 35 | 36 | //then 37 | Assert.assertEquals("foobar", validName); 38 | Assert.assertEquals("prefixfoobar", validNamePrefix); 39 | Assert.assertEquals("avro1invalid", startByNumber); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/task/ElasticSourceTaskTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.task; 18 | 19 | import com.github.dariobalinzo.ElasticSourceConnectorConfig; 20 | import com.github.dariobalinzo.TestContainersContext; 21 | import org.apache.kafka.connect.data.Field; 22 | import org.apache.kafka.connect.data.Struct; 23 | import org.apache.kafka.connect.source.SourceRecord; 24 | import org.apache.kafka.connect.source.SourceTaskContext; 25 | import org.junit.Before; 26 | import org.junit.Test; 27 | import org.mockito.Mock; 28 | import org.mockito.Mockito; 29 | import org.mockito.MockitoAnnotations; 30 | 31 | import java.io.IOException; 32 | import java.util.List; 33 | import java.util.Map; 34 | 35 | import static com.github.dariobalinzo.ElasticSourceConnectorConfig.SECONDARY_INCREMENTING_FIELD_NAME_CONFIG; 36 | import static org.junit.Assert.*; 37 | 38 | public class ElasticSourceTaskTest extends TestContainersContext { 39 | 40 | @Mock 41 | private SourceTaskContext context; 42 | 43 | @Before 44 | public void init() { 45 | MockitoAnnotations.initMocks(this); 46 | } 47 | 48 | @Test 49 | public void shouldRunSourceTaskWithoutInitialOffset() throws IOException, InterruptedException { 50 | //given 51 | deleteTestIndex(); 52 | 53 | insertMockData(111); 54 | insertMockData(112); 55 | insertMockData(113); 56 | insertMockData(114); 57 | refreshIndex(); 58 | 59 | ElasticSourceTask task = new ElasticSourceTask(); 60 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty()); 61 | task.initialize(context); 62 | 63 | //when (fetching first page) 64 | task.start(getConf()); 65 | List poll1 = task.poll(); 66 | assertEquals(2, poll1.size()); 67 | assertEquals(111L, 68 | ((Struct) poll1.get(0).value()).get("ts") 69 | ); 70 | assertEquals("{position=111}", poll1.get(0).sourceOffset().toString()); 71 | assertEquals( 72 | 112L, 73 | ((Struct) poll1.get(1).value()).get("ts") 74 | ); 75 | assertEquals("{position=112}", poll1.get(1).sourceOffset().toString()); 76 | 77 | //when fetching (second page) 78 | List poll2 = task.poll(); 79 | assertEquals(2, poll2.size()); 80 | assertEquals( 81 | 113L, 82 | ((Struct) poll2.get(0).value()).get("ts") 83 | ); 84 | assertEquals("{position=113}", poll2.get(0).sourceOffset().toString()); 85 | assertEquals( 86 | 114L, 87 | ((Struct) poll2.get(1).value()).get("ts") 88 | ); 89 | assertEquals("{position=114}", poll2.get(1).sourceOffset().toString()); 90 | 91 | //then 92 | List empty = task.poll(); 93 | assertTrue(empty.isEmpty()); 94 | 95 | task.stop(); 96 | } 97 | 98 | @Test 99 | public void shouldRunTask_WithSecondarySort_WithoutInitialOffset() throws IOException, InterruptedException { 100 | //given 101 | deleteTestIndex(); 102 | 103 | insertMockData(111, "customerA", TEST_INDEX); 104 | insertMockData(111, "customerB", TEST_INDEX); 105 | insertMockData(111, "customerC", TEST_INDEX); 106 | insertMockData(111, "customerD", TEST_INDEX); 107 | insertMockData(112, "customerA", TEST_INDEX); 108 | refreshIndex(); 109 | 110 | ElasticSourceTask task = new ElasticSourceTask(); 111 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty()); 112 | task.initialize(context); 113 | 114 | //when (fetching first page) 115 | Map conf = getConf(); 116 | conf.put(SECONDARY_INCREMENTING_FIELD_NAME_CONFIG, SECONDARY_CURSOR_FIELD); 117 | task.start(conf); 118 | List poll1 = task.poll(); 119 | assertEquals(2, poll1.size()); 120 | assertEquals( 121 | "customerA", 122 | ((Struct) poll1.get(0).value()).get("fullName") 123 | ); 124 | assertEquals( 125 | 111L, 126 | ((Struct) poll1.get(0).value()).get("ts") 127 | ); 128 | assertEquals("{position_secondary=customerA, position=111}", poll1.get(0).sourceOffset().toString()); 129 | assertEquals( 130 | "customerB", 131 | ((Struct) poll1.get(1).value()).get("fullName") 132 | ); 133 | assertEquals( 134 | 111L, 135 | ((Struct) poll1.get(1).value()).get("ts") 136 | ); 137 | assertEquals("{position_secondary=customerB, position=111}", poll1.get(1).sourceOffset().toString()); 138 | 139 | //when fetching (second page) 140 | List poll2 = task.poll(); 141 | assertEquals(2, poll2.size()); 142 | assertEquals( 143 | "customerC", 144 | ((Struct) poll2.get(0).value()).get("fullName") 145 | ); 146 | assertEquals( 147 | 111L, 148 | ((Struct) poll2.get(0).value()).get("ts") 149 | ); 150 | assertEquals("{position_secondary=customerC, position=111}", poll2.get(0).sourceOffset().toString()); 151 | assertEquals( 152 | "customerD", 153 | ((Struct) poll2.get(1).value()).get("fullName") 154 | ); 155 | assertEquals( 156 | 111L, 157 | ((Struct) poll2.get(1).value()).get("ts") 158 | ); 159 | assertEquals("{position_secondary=customerD, position=111}", poll2.get(1).sourceOffset().toString()); 160 | 161 | //then 162 | List last = task.poll(); 163 | assertEquals(1, last.size()); 164 | assertEquals( 165 | "customerA", 166 | ((Struct) last.get(0).value()).get("fullName") 167 | ); 168 | assertEquals( 169 | 112L, 170 | ((Struct) last.get(0).value()).get("ts") 171 | ); 172 | assertEquals("{position_secondary=customerA, position=112}", last.get(0).sourceOffset().toString()); 173 | 174 | List empty = task.poll(); 175 | assertTrue(empty.isEmpty()); 176 | 177 | task.stop(); 178 | } 179 | 180 | @Test 181 | public void shouldRunSourceTaskWithInitialOffset() throws IOException, InterruptedException { 182 | //given 183 | deleteTestIndex(); 184 | 185 | insertMockData(111); 186 | insertMockData(112); 187 | insertMockData(113); 188 | insertMockData(114); 189 | refreshIndex(); 190 | 191 | ElasticSourceTask task = new ElasticSourceTask(); 192 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.from(String.valueOf(111))); 193 | task.initialize(context); 194 | 195 | //when (fetching first page) 196 | task.start(getConf()); 197 | List poll1 = task.poll(); 198 | 199 | assertEquals(2, poll1.size()); 200 | 201 | assertEquals( 202 | "Test", 203 | ((Struct) poll1.get(0).value()).get("fullName") 204 | ); 205 | assertEquals( 206 | 112L, 207 | ((Struct) poll1.get(0).value()).get("ts") 208 | ); 209 | assertEquals("{position=112}", poll1.get(0).sourceOffset().toString()); 210 | assertEquals( 211 | "Test", 212 | ((Struct) poll1.get(1).value()).get("fullName") 213 | ); 214 | assertEquals( 215 | 113L, 216 | ((Struct) poll1.get(1).value()).get("ts") 217 | ); 218 | assertEquals("{position=113}", poll1.get(1).sourceOffset().toString()); 219 | 220 | //when fetching (second page) 221 | List poll2 = task.poll(); 222 | assertEquals(1, poll2.size()); 223 | assertEquals( 224 | "Test", 225 | ((Struct) poll2.get(0).value()).get("fullName") 226 | ); 227 | assertEquals( 228 | 114L, 229 | ((Struct) poll2.get(0).value()).get("ts") 230 | ); 231 | assertEquals("{position=114}", poll2.get(0).sourceOffset().toString()); 232 | 233 | //then 234 | List empty = task.poll(); 235 | assertTrue(empty.isEmpty()); 236 | 237 | task.stop(); 238 | } 239 | 240 | @Test 241 | public void shouldRunTask_WithSecondarySort_WithOnlyPrimaryInitialOffset() throws IOException, InterruptedException { 242 | //given 243 | deleteTestIndex(); 244 | 245 | insertMockData(110, "customerA", TEST_INDEX); //already seen... 246 | insertMockData(111, "customerA", TEST_INDEX); 247 | insertMockData(111, "customerB", TEST_INDEX); 248 | insertMockData(111, "customerC", TEST_INDEX); 249 | insertMockData(111, "customerD", TEST_INDEX); 250 | insertMockData(112, "customerA", TEST_INDEX); 251 | refreshIndex(); 252 | 253 | ElasticSourceTask task = new ElasticSourceTask(); 254 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.from(String.valueOf(110))); 255 | task.initialize(context); 256 | 257 | //when (fetching first page) 258 | Map conf = getConf(); 259 | conf.put(SECONDARY_INCREMENTING_FIELD_NAME_CONFIG, SECONDARY_CURSOR_FIELD); 260 | task.start(conf); 261 | List poll1 = task.poll(); 262 | assertEquals(2, poll1.size()); 263 | assertEquals( 264 | "customerA", 265 | ((Struct) poll1.get(0).value()).get("fullName") 266 | ); 267 | assertEquals( 268 | 111L, 269 | ((Struct) poll1.get(0).value()).get("ts") 270 | ); 271 | assertEquals( 272 | "customerB", 273 | ((Struct) poll1.get(1).value()).get("fullName") 274 | ); 275 | assertEquals( 276 | 111L, 277 | ((Struct) poll1.get(1).value()).get("ts") 278 | ); 279 | 280 | //when fetching (second page) 281 | List poll2 = task.poll(); 282 | assertEquals( 283 | "customerC", 284 | ((Struct) poll2.get(0).value()).get("fullName") 285 | ); 286 | assertEquals( 287 | 111L, 288 | ((Struct) poll2.get(0).value()).get("ts") 289 | ); 290 | assertEquals("{position_secondary=customerC, position=111}", poll2.get(0).sourceOffset().toString()); 291 | assertEquals( 292 | "customerD", 293 | ((Struct) poll2.get(1).value()).get("fullName") 294 | ); 295 | assertEquals( 296 | 111L, 297 | ((Struct) poll2.get(1).value()).get("ts") 298 | ); 299 | assertEquals("{position_secondary=customerD, position=111}", poll2.get(1).sourceOffset().toString()); 300 | assertEquals(2, poll2.size()); 301 | 302 | //then 303 | List last = task.poll(); 304 | assertEquals(1, last.size()); 305 | assertEquals( 306 | "customerA", 307 | ((Struct) last.get(0).value()).get("fullName") 308 | ); 309 | assertEquals( 310 | 112L, 311 | ((Struct) last.get(0).value()).get("ts") 312 | ); 313 | assertEquals("{position_secondary=customerA, position=112}", last.get(0).sourceOffset().toString()); 314 | List empty = task.poll(); 315 | assertTrue(empty.isEmpty()); 316 | 317 | task.stop(); 318 | } 319 | 320 | @Test 321 | public void shouldRunTask_WithSecondarySort_WithInitialOffset() throws IOException, InterruptedException { 322 | //given 323 | deleteTestIndex(); 324 | 325 | insertMockData(110, "customerA", TEST_INDEX); //already seen 326 | insertMockData(111, "customerA", TEST_INDEX); //already seen 327 | insertMockData(111, "customerB", TEST_INDEX); 328 | insertMockData(111, "customerC", TEST_INDEX); 329 | insertMockData(111, "customerD", TEST_INDEX); 330 | insertMockData(112, "customerA", TEST_INDEX); 331 | refreshIndex(); 332 | 333 | ElasticSourceTask task = new ElasticSourceTask(); 334 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.from(String.valueOf(111), "customerA")); 335 | task.initialize(context); 336 | 337 | //when (fetching first page) 338 | Map conf = getConf(); 339 | conf.put(SECONDARY_INCREMENTING_FIELD_NAME_CONFIG, SECONDARY_CURSOR_FIELD); 340 | task.start(conf); 341 | List poll1 = task.poll(); 342 | assertEquals(2, poll1.size()); 343 | assertEquals( 344 | "customerB", 345 | ((Struct) poll1.get(0).value()).get("fullName") 346 | ); 347 | assertEquals("{position_secondary=customerB, position=111}", poll1.get(0).sourceOffset().toString()); 348 | assertEquals( 349 | "customerC", 350 | ((Struct) poll1.get(1).value()).get("fullName") 351 | ); 352 | 353 | //when fetching (second page) 354 | List poll2 = task.poll(); 355 | assertEquals(2, poll2.size()); 356 | assertEquals( 357 | "customerD", 358 | ((Struct) poll2.get(0).value()).get("fullName") 359 | ); 360 | assertEquals("{position_secondary=customerD, position=111}", poll2.get(0).sourceOffset().toString()); 361 | assertEquals( 362 | "customerA", 363 | ((Struct) poll2.get(1).value()).get("fullName") 364 | ); 365 | assertEquals("{position_secondary=customerA, position=112}", poll2.get(1).sourceOffset().toString()); 366 | 367 | //then 368 | List empty = task.poll(); 369 | assertTrue(empty.isEmpty()); 370 | 371 | task.stop(); 372 | } 373 | 374 | 375 | @Test 376 | public void shouldRunSourceTaskWhitelist() throws IOException, InterruptedException { 377 | //given 378 | deleteTestIndex(); 379 | 380 | insertMockData(111); 381 | insertMockData(112); 382 | insertMockData(113); 383 | insertMockData(114); 384 | refreshIndex(); 385 | 386 | ElasticSourceTask task = new ElasticSourceTask(); 387 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty()); 388 | task.initialize(context); 389 | Map conf = getConf(); 390 | conf.put(ElasticSourceConnectorConfig.FIELDS_WHITELIST_CONFIG, "fullName"); 391 | 392 | //when (fetching first page) 393 | task.start(conf); 394 | List poll1 = task.poll(); 395 | //Check the struct contains one only field, and this is "FullName" = "Test" 396 | assertEquals(1, ((Struct) poll1.get(0).value()).schema().fields().size()); 397 | assertEquals(((Struct) poll1.get(0).value()).get("fullName"), "Test"); 398 | task.stop(); 399 | } 400 | 401 | @Test 402 | public void shouldRunSourceTaskBlacklist() throws IOException, InterruptedException { 403 | //given 404 | deleteTestIndex(); 405 | 406 | insertMockData(111); 407 | insertMockData(112); 408 | insertMockData(113); 409 | insertMockData(114); 410 | refreshIndex(); 411 | 412 | ElasticSourceTask task = new ElasticSourceTask(); 413 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty()); 414 | task.initialize(context); 415 | Map conf = getConf(); 416 | conf.put(ElasticSourceConnectorConfig.FIELDS_BLACKLIST_CONFIG, "fullName"); 417 | 418 | //when (fetching first page) 419 | task.start(conf); 420 | List poll1 = task.poll(); 421 | 422 | //Then 423 | List fields = ((Struct) poll1.get(0).value()).schema().fields(); 424 | assertEquals(6, fields.size()); 425 | task.stop(); 426 | } 427 | 428 | @Test 429 | public void shouldRunSourceTaskWithJsonCastFilter() throws IOException, InterruptedException { 430 | //given 431 | deleteTestIndex(); 432 | 433 | insertMockData(111); 434 | insertMockData(112); 435 | insertMockData(113); 436 | insertMockData(114); 437 | refreshIndex(); 438 | 439 | ElasticSourceTask task = new ElasticSourceTask(); 440 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty()); 441 | task.initialize(context); 442 | Map conf = getConf(); 443 | conf.put(ElasticSourceConnectorConfig.FIELDS_JSON_CAST_CONFIG, "fullName"); 444 | 445 | //when (fetching first page) 446 | task.start(conf); 447 | List poll1 = task.poll(); 448 | Struct structValue = (Struct) poll1.get(0).value(); 449 | assertEquals(structValue.get("fullName"), "\"Test\""); 450 | assertEquals(structValue.get("avroField"), "avro-field"); 451 | assertEquals(structValue.get("nonavrofield"), "non-avro-field"); 452 | task.stop(); 453 | } 454 | 455 | @Test 456 | public void shouldRunSourceTaskWithAvroNameConverter() throws IOException, InterruptedException { 457 | //given 458 | deleteTestIndex(); 459 | 460 | insertMockData(111); 461 | insertMockData(112); 462 | insertMockData(113); 463 | insertMockData(114); 464 | refreshIndex(); 465 | 466 | ElasticSourceTask task = new ElasticSourceTask(); 467 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty()); 468 | task.initialize(context); 469 | Map conf = getConf(); 470 | 471 | //when (fetching first page) 472 | task.start(conf); 473 | List poll1 = task.poll(); 474 | Struct structValue = (Struct) poll1.get(0).value(); 475 | assertEquals(structValue.get("avroField"), "avro-field"); 476 | assertEquals(structValue.get("nonavrofield"), "non-avro-field"); 477 | task.stop(); 478 | } 479 | 480 | @Test 481 | public void shouldRunSourceTaskWithNopNameConverter() throws IOException, InterruptedException { 482 | //given 483 | deleteTestIndex(); 484 | 485 | insertMockData(111); 486 | insertMockData(112); 487 | insertMockData(113); 488 | insertMockData(114); 489 | refreshIndex(); 490 | 491 | ElasticSourceTask task = new ElasticSourceTask(); 492 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty()); 493 | task.initialize(context); 494 | Map conf = getConf(); 495 | conf.put(ElasticSourceConnectorConfig.CONNECTOR_FIELDNAME_CONVERTER_CONFIG, 496 | ElasticSourceConnectorConfig.NOP_FIELDNAME_CONVERTER); 497 | 498 | //when (fetching first page) 499 | task.start(conf); 500 | List poll1 = task.poll(); 501 | Struct structValue = (Struct) poll1.get(0).value(); 502 | assertEquals(structValue.get("avroField"), "avro-field"); 503 | assertEquals(structValue.get("non-avro-field"), "non-avro-field"); 504 | task.stop(); 505 | } 506 | 507 | 508 | } 509 | -------------------------------------------------------------------------------- /src/test/java/com/github/dariobalinzo/task/MockOffsetFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com) 3 | *

4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | *

8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | *

10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.github.dariobalinzo.task; 18 | 19 | import org.apache.kafka.connect.storage.OffsetStorageReader; 20 | 21 | import java.util.Collection; 22 | import java.util.HashMap; 23 | import java.util.Map; 24 | 25 | public class MockOffsetFactory { 26 | 27 | static OffsetStorageReader empty() { 28 | return emptyOffset; 29 | } 30 | 31 | static OffsetStorageReader from(String initialCursor) { 32 | return from(initialCursor, null); 33 | } 34 | 35 | static OffsetStorageReader from(String initialCursor, String secondaryCursor) { 36 | Map state = new HashMap<>(); 37 | state.put(ElasticSourceTask.POSITION, initialCursor); 38 | if (secondaryCursor != null) { 39 | state.put(ElasticSourceTask.POSITION_SECONDARY, secondaryCursor); 40 | } 41 | 42 | return new OffsetStorageReader() { 43 | @Override 44 | public Map offset(Map map) { 45 | return state; 46 | } 47 | 48 | @Override 49 | public Map, Map> offsets(Collection> collection) { 50 | return null; 51 | } 52 | }; 53 | } 54 | 55 | private static OffsetStorageReader emptyOffset = new OffsetStorageReader() { 56 | @Override 57 | public Map offset(Map map) { 58 | return new HashMap<>(); 59 | } 60 | 61 | @Override 62 | public Map, Map> offsets(Collection> collection) { 63 | return null; 64 | } 65 | }; 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/test/resources/com/github/dariobalinzo/filter/document.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "elastic", 3 | "age": 7, 4 | "order_list": [ 5 | { 6 | "id": 1, 7 | "details": { 8 | "nested_det": "test nested inside list", 9 | "qty": 1 10 | } 11 | }, 12 | { 13 | "id": 2, 14 | "details": { 15 | "nested_det": "test nested inside list", 16 | "qty": 2 17 | } 18 | } 19 | ], 20 | "obj": { 21 | "key": 55, 22 | "details": { 23 | "nested_det": "test nested inside list", 24 | "qty": 2 25 | } 26 | }, 27 | "other-obj": { 28 | "foo": "bar" 29 | } 30 | } -------------------------------------------------------------------------------- /src/test/resources/com/github/dariobalinzo/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/test/resources/com/github/dariobalinzo/schema/complexDocument.json: -------------------------------------------------------------------------------- 1 | { 2 | "current-time": "2020-12-11T07:24:44Z", 3 | "ip": "192.168.1.111", 4 | "xxx-api-endpoint": "https://192.168.1.111:5001/api", 5 | "parent": "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b", 6 | "architecture": "armv7l", 7 | "updated": "2020-12-11T07:24:45.198Z", 8 | "last-boot": "2020-12-09T21:45:27Z", 9 | "xxx-engine-version": "1.13.0", 10 | "docker-server-version": "19.03.14", 11 | "created": "2020-12-10T08:06:38.652Z", 12 | "hostname": "xyz", 13 | "updated-by": "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b", 14 | "gpio-pins": [ 15 | { 16 | "pin": 1, 17 | "name": "3.3v" 18 | }, 19 | { 20 | "pin": 2, 21 | "name": "5v" 22 | }, 23 | { 24 | "pin": 3, 25 | "bcm": 2, 26 | "name": "SDA.1", 27 | "mode": "IN", 28 | "voltage": 1 29 | }, 30 | { 31 | "pin": 4, 32 | "name": "5v" 33 | }, 34 | { 35 | "pin": 5, 36 | "bcm": 3, 37 | "name": "SCL.1", 38 | "mode": "IN", 39 | "voltage": 1 40 | }, 41 | { 42 | "pin": 6, 43 | "name": "0v" 44 | }, 45 | { 46 | "pin": 7, 47 | "bcm": 4, 48 | "name": "GPIO. 7", 49 | "mode": "IN", 50 | "voltage": 1 51 | }, 52 | { 53 | "pin": 8, 54 | "name": "TxD", 55 | "mode": "IN", 56 | "voltage": 1 57 | }, 58 | { 59 | "pin": 9, 60 | "name": "0v" 61 | }, 62 | { 63 | "pin": 10, 64 | "name": "RxD", 65 | "mode": "IN", 66 | "voltage": 1 67 | }, 68 | { 69 | "pin": 11, 70 | "bcm": 17, 71 | "name": "GPIO. 0", 72 | "mode": "IN", 73 | "voltage": 0 74 | }, 75 | { 76 | "pin": 12, 77 | "name": "GPIO. 1", 78 | "mode": "IN", 79 | "voltage": 0 80 | }, 81 | { 82 | "pin": 13, 83 | "bcm": 27, 84 | "name": "GPIO. 2", 85 | "mode": "IN", 86 | "voltage": 0 87 | }, 88 | { 89 | "pin": 14, 90 | "name": "0v" 91 | }, 92 | { 93 | "pin": 15, 94 | "bcm": 22, 95 | "name": "GPIO. 3", 96 | "mode": "IN", 97 | "voltage": 0 98 | }, 99 | { 100 | "pin": 16, 101 | "name": "GPIO. 4", 102 | "mode": "IN", 103 | "voltage": 0 104 | }, 105 | { 106 | "pin": 17, 107 | "name": "3.3v" 108 | }, 109 | { 110 | "pin": 18, 111 | "name": "GPIO. 5", 112 | "mode": "IN", 113 | "voltage": 0 114 | }, 115 | { 116 | "pin": 19, 117 | "bcm": 10, 118 | "name": "MOSI", 119 | "mode": "IN", 120 | "voltage": 0 121 | }, 122 | { 123 | "pin": 20, 124 | "name": "0v" 125 | }, 126 | { 127 | "pin": 21, 128 | "bcm": 9, 129 | "name": "MISO", 130 | "mode": "IN", 131 | "voltage": 0 132 | }, 133 | { 134 | "pin": 22, 135 | "name": "GPIO. 6", 136 | "mode": "IN", 137 | "voltage": 0 138 | }, 139 | { 140 | "pin": 23, 141 | "bcm": 11, 142 | "name": "SCLK", 143 | "mode": "IN", 144 | "voltage": 0 145 | }, 146 | { 147 | "pin": 24, 148 | "name": "CE0", 149 | "mode": "IN", 150 | "voltage": 1 151 | }, 152 | { 153 | "pin": 25, 154 | "name": "0v" 155 | }, 156 | { 157 | "pin": 26, 158 | "name": "CE1", 159 | "mode": "IN", 160 | "voltage": 1 161 | }, 162 | { 163 | "pin": 27, 164 | "bcm": 0, 165 | "name": "SDA.0", 166 | "mode": "IN", 167 | "voltage": 1 168 | }, 169 | { 170 | "pin": 28, 171 | "name": "SCL.0", 172 | "mode": "IN", 173 | "voltage": 1 174 | }, 175 | { 176 | "pin": 29, 177 | "bcm": 5, 178 | "name": "GPIO.21", 179 | "mode": "IN", 180 | "voltage": 1 181 | }, 182 | { 183 | "pin": 30, 184 | "name": "0v" 185 | }, 186 | { 187 | "pin": 31, 188 | "bcm": 6, 189 | "name": "GPIO.22", 190 | "mode": "IN", 191 | "voltage": 1 192 | }, 193 | { 194 | "pin": 32, 195 | "name": "GPIO.26", 196 | "mode": "IN", 197 | "voltage": 0 198 | }, 199 | { 200 | "pin": 33, 201 | "bcm": 13, 202 | "name": "GPIO.23", 203 | "mode": "IN", 204 | "voltage": 0 205 | }, 206 | { 207 | "pin": 34, 208 | "name": "0v" 209 | }, 210 | { 211 | "pin": 35, 212 | "bcm": 19, 213 | "name": "GPIO.24", 214 | "mode": "IN", 215 | "voltage": 0 216 | }, 217 | { 218 | "pin": 36, 219 | "name": "GPIO.27", 220 | "mode": "IN", 221 | "voltage": 0 222 | }, 223 | { 224 | "pin": 37, 225 | "bcm": 26, 226 | "name": "GPIO.25", 227 | "mode": "IN", 228 | "voltage": 0 229 | }, 230 | { 231 | "pin": 38, 232 | "name": "GPIO.28", 233 | "mode": "IN", 234 | "voltage": 0 235 | }, 236 | { 237 | "pin": 39, 238 | "name": "0v" 239 | }, 240 | { 241 | "pin": 40, 242 | "name": "GPIO.29", 243 | "mode": "IN", 244 | "voltage": 0 245 | } 246 | ], 247 | "created-by": "internal", 248 | "status": "OPERATIONAL", 249 | "id": "xxx-status/b5054ecf-9f18-4b86-bc95-30933fe05581", 250 | "operating-system": "Raspbian GNU/Linux 10 (buster)", 251 | "resource-type": "xxx-status", 252 | "acl": { 253 | "view-acl": [ 254 | "user/80454ed0-65eb-4b77-864e-2dc525627e38" 255 | ], 256 | "view-meta": [ 257 | "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b", 258 | "user/80454ed0-65eb-4b77-864e-2dc525627e38" 259 | ], 260 | "view-data": [ 261 | "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b", 262 | "user/80454ed0-65eb-4b77-864e-2dc525627e38" 263 | ], 264 | "edit-data": [ 265 | "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b" 266 | ], 267 | "edit-meta": [ 268 | "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b" 269 | ], 270 | "owners": [ 271 | "group/nuvla-admin" 272 | ] 273 | }, 274 | "next-heartbeat": "2020-12-11T07:25:15.209Z", 275 | "version": 1, 276 | "resources": { 277 | "cpu": { 278 | "topic": "cpu", 279 | "raw-sample": "{\"capacity\": 4, \"load\": 0.64}", 280 | "capacity": 4, 281 | "load": 0.64 282 | }, 283 | "ram": { 284 | "topic": "ram", 285 | "raw-sample": "{\"capacity\": 3828, \"used\": 1235}", 286 | "capacity": 3828, 287 | "used": 1235 288 | }, 289 | "disks": [ 290 | { 291 | "device": "overlay", 292 | "capacity": 28, 293 | "used": 4, 294 | "topic": "disks", 295 | "raw-sample": "{\"device\": \"overlay\", \"capacity\": 28, \"used\": 4}" 296 | } 297 | ], 298 | "net-stats": [ 299 | { 300 | "interface": "docker_gwbridge", 301 | "bytes-transmitted": 1810018, 302 | "bytes-received": 633 303 | }, 304 | { 305 | "interface": "lo", 306 | "bytes-transmitted": 153116745, 307 | "bytes-received": 153116745 308 | }, 309 | { 310 | "interface": "veth53b9858", 311 | "bytes-transmitted": 3865916, 312 | "bytes-received": 1275 313 | }, 314 | { 315 | "interface": "vetha95aba6", 316 | "bytes-transmitted": 4349209, 317 | "bytes-received": 0 318 | }, 319 | { 320 | "interface": "docker0", 321 | "bytes-transmitted": 58162393, 322 | "bytes-received": 1347447 323 | }, 324 | { 325 | "interface": "veth2d9e5be", 326 | "bytes-transmitted": 20942074, 327 | "bytes-received": 12350057 328 | }, 329 | { 330 | "interface": "vethe4e283e", 331 | "bytes-transmitted": 723871, 332 | "bytes-received": 352184 333 | }, 334 | { 335 | "interface": "veth5207da0", 336 | "bytes-transmitted": 23136462, 337 | "bytes-received": 61398287 338 | }, 339 | { 340 | "interface": "vethef962b3", 341 | "bytes-transmitted": 3858289, 342 | "bytes-received": 689 343 | }, 344 | { 345 | "interface": "vetha49fdcb", 346 | "bytes-transmitted": 3936275, 347 | "bytes-received": 7957 348 | }, 349 | { 350 | "interface": "br-193effb5470e", 351 | "bytes-transmitted": 145658655, 352 | "bytes-received": 147435494 353 | }, 354 | { 355 | "interface": "wlan0", 356 | "bytes-transmitted": 91616660, 357 | "bytes-received": 307622918 358 | }, 359 | { 360 | "interface": "veth3d6d8ed", 361 | "bytes-transmitted": 25273385, 362 | "bytes-received": 66929714 363 | }, 364 | { 365 | "interface": "eth0", 366 | "bytes-transmitted": 0, 367 | "bytes-received": 0 368 | } 369 | ] 370 | }, 371 | "inferred-location": [ 372 | 6.0826, 373 | 46.1443 374 | ], 375 | "vulnerabilities": { 376 | "summary": { 377 | "total": 1, 378 | "affected-products": [ 379 | "OpenSSH 7.9p1 Raspbian 10+deb10u2" 380 | ], 381 | "average-score": 8.1 382 | }, 383 | "items": [ 384 | { 385 | "product": "OpenSSH 7.9p1 Raspbian 10+deb10u2", 386 | "vulnerability-id": "CVE-2019-7639", 387 | "vulnerability-score": 8.1 388 | } 389 | ] 390 | } 391 | } --------------------------------------------------------------------------------