├── .circleci
└── config.yml
├── .github
└── FUNDING.yml
├── .gitignore
├── LICENSE
├── README.md
├── assets
└── dariobalinzo.png
├── doc
├── LICENSE
└── README.md
├── etc
└── quickstart-elasticsearch-source.properties
├── manifest.json
├── pom.xml
└── src
├── main
└── java
│ └── com
│ └── github
│ └── dariobalinzo
│ ├── ElasticSourceConnector.java
│ ├── ElasticSourceConnectorConfig.java
│ ├── Version.java
│ ├── elastic
│ ├── CursorField.java
│ ├── ElasticConnection.java
│ ├── ElasticConnectionBuilder.java
│ ├── ElasticIndexMonitorThread.java
│ ├── ElasticJsonNaming.java
│ ├── ElasticRepository.java
│ ├── SslContextException.java
│ └── response
│ │ ├── Cursor.java
│ │ └── PageResult.java
│ ├── filter
│ ├── BlacklistFilter.java
│ ├── DocumentFilter.java
│ ├── JsonCastFilter.java
│ ├── JsonElementFilter.java
│ ├── JsonFilterVisitor.java
│ └── WhitelistFilter.java
│ ├── schema
│ ├── AvroName.java
│ ├── FieldNameConverter.java
│ ├── NopNameConverter.java
│ ├── SchemaConverter.java
│ └── StructConverter.java
│ └── task
│ ├── ElasticSourceTask.java
│ ├── ElasticSourceTaskConfig.java
│ └── OffsetSerializer.java
└── test
├── java
└── com
│ └── github
│ └── dariobalinzo
│ ├── ElasticIndexMonitorThreadTest.java
│ ├── FooTest.java
│ ├── TestContainersContext.java
│ ├── elastic
│ ├── ElasticRepositoryTest.java
│ └── ElasticSourceConnectorTest.java
│ ├── filter
│ ├── BlacklistFilterTest.java
│ ├── JsonCastFilterTest.java
│ └── WhitelistFilterTest.java
│ ├── foo.json
│ ├── schema
│ ├── AvroNameTest.java
│ └── SchemaConverterTest.java
│ └── task
│ ├── ElasticSourceTaskTest.java
│ └── MockOffsetFactory.java
└── resources
└── com
└── github
└── dariobalinzo
├── filter
└── document.json
├── logback.xml
└── schema
└── complexDocument.json
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | # Check https://circleci.com/docs/2.0/language-java/ for more details
2 | #
3 | version: 2
4 | jobs:
5 | build:
6 | machine: ubuntu-2004:202008-01
7 | steps:
8 | - checkout
9 |
10 | - run: sudo apt-get update && sudo apt-get install openjdk-8-jdk
11 | - run: export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 && mvn -B clean install
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | custom: [ "https://www.paypal.me/coffeeDarioBalinzo" ]
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /elastic-source-connect.iml
2 | /target/
3 | /.idea/
4 | /lib/
5 | /.vscode/
6 | /.settings/
7 | .factorypath
8 | .classpath
9 | .project
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Kafka-connect-elasticsearch-source
2 |
3 | [](https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/actions)
4 |
5 | Kafka Connect Elasticsearch Source: fetch data from elastic-search and sends it to kafka. The connector fetches only new
6 | data using a strictly incremental / temporal field (like a timestamp or an incrementing id). It supports dynamic schema
7 | and nested objects/ arrays.
8 |
9 | ## Requirements:
10 |
11 | - Elasticsearch 6.x and 7.x
12 | - Java >= 8
13 | - Maven
14 |
15 | ## Output data serialization format:
16 |
17 | The connector uses kafka-connect schema and structs, that are agnostic regarding the user serialization method (e.g. it
18 | might be Avro or json, etc...).
19 |
20 | ## Bugs or new Ideas?
21 |
22 | - Issues tracker: https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/issues
23 | - Feel free to open an issue to discuss new ideas (or propose new solutions with a PR).
24 |
25 | ## Installation:
26 |
27 | Compile the project with:
28 |
29 | ```bash
30 | mvn clean package -DskipTests
31 | ```
32 |
33 | You can also compile and running both unit and integration tests (docker is mandatory) with:
34 |
35 | ```bash
36 | mvn clean package
37 | ```
38 |
39 | Copy the jar with dependencies from the target folder into connect classpath (
40 | e.g ``/usr/share/java/kafka-connect-elasticsearch`` ) or set ``plugin.path`` parameter appropriately.
41 |
42 | ## Example
43 |
44 | Using kafka connect in distributed way, a sample config file to fetch ``my_awesome_index*`` indices and to produce
45 | output topics with ``es_`` prefix:
46 |
47 | ```json
48 | {
49 | "name": "elastic-source",
50 | "config": {
51 | "connector.class":"com.github.dariobalinzo.ElasticSourceConnector",
52 | "tasks.max": "1",
53 | "es.host" : "localhost",
54 | "es.port" : "9200",
55 | "index.prefix" : "my_awesome_index",
56 | "topic.prefix" : "es_",
57 | "incrementing.field.name" : "@timestamp"
58 | }
59 | }
60 | ```
61 |
62 | To start the connector with curl:
63 |
64 | ```bash
65 | curl -X POST -H "Content-Type: application/json" --data @config.json http://localhost:8083/connectors | jq
66 | ```
67 |
68 | To check the status:
69 |
70 | ```bash
71 | curl localhost:8083/connectors/elastic-source/status | jq
72 | ```
73 |
74 | To stop the connector:
75 |
76 | ```bash
77 | curl -X DELETE localhost:8083/connectors/elastic-source | jq
78 | ```
79 |
80 | ## Documentation
81 |
82 | ### Elasticsearch Configuration
83 |
84 | ``es.host``
85 | ElasticSearch host. Optionally it is possible to specify many hosts using ``;`` as separator (``host1;host2;host3``)
86 |
87 | * Type: string
88 | * Importance: high
89 | * Dependents: ``index.prefix``
90 |
91 | ``es.port``
92 | ElasticSearch port
93 |
94 | * Type: string
95 | * Importance: high
96 | * Dependents: ``index.prefix``
97 |
98 | ``es.scheme``
99 | ElasticSearch scheme (http/https)
100 |
101 | * Type: string
102 | * Importance: medium
103 | * Default: ``http``
104 |
105 | ``es.user``
106 | Elasticsearch username
107 |
108 | * Type: string
109 | * Default: null
110 | * Importance: high
111 |
112 | ``es.password``
113 | Elasticsearch password
114 |
115 | * Type: password
116 | * Default: null
117 | * Importance: high
118 |
119 |
120 | ``incrementing.field.name``
121 | The name of the strictly incrementing field to use to detect new records.
122 |
123 | * Type: any
124 | * Importance: high
125 |
126 | ``incrementing.secondary.field.name``
127 | In case the main incrementing field may have duplicates,
128 | this secondary field is used as a secondary sort field in order
129 | to avoid data losses when paginating (available starting from versions >= 1.4).
130 |
131 | * Type: any
132 | * Importance: low
133 |
134 |
135 | ``es.tls.truststore.location``
136 | Elastic ssl truststore location
137 |
138 | * Type: string
139 | * Importance: medium
140 |
141 | ``es.tls.truststore.password``
142 | Elastic ssl truststore password
143 |
144 | * Type: string
145 | * Default: ""
146 | * Importance: medium
147 |
148 | ``es.tls.keystore.location``
149 | Elasticsearch keystore location
150 |
151 | * Type: string
152 | * Importance: medium
153 |
154 | ``es.tls.keystore.password``
155 | Elasticsearch keystore password
156 |
157 | * Type: string
158 | * Default: ""
159 | * Importance: medium
160 |
161 | ``connection.attempts``
162 | Maximum number of attempts to retrieve a valid Elasticsearch connection.
163 |
164 | * Type: int
165 | * Default: 3
166 | * Importance: low
167 |
168 | ``connection.backoff.ms``
169 | Backoff time in milliseconds between connection attempts.
170 |
171 | * Type: long
172 | * Default: 10000
173 | * Importance: low
174 |
175 | ``index.prefix``
176 | Indices prefix to include in copying.
177 | Periodically, new indices are discovered if they match the pattern.
178 |
179 | * Type: string
180 | * Default: ""
181 | * Importance: medium
182 |
183 | ``index.names``
184 | List of elasticsearch indices: `es1,es2,es3`
185 |
186 | * Type: string
187 | * Default: null
188 | * Importance: medium
189 |
190 | ### Connector Configuration
191 |
192 | ``poll.interval.ms``
193 | Frequency in ms to poll for new data in each index.
194 |
195 | * Type: int
196 | * Default: 5000
197 | * Importance: high
198 |
199 | ``batch.max.rows``
200 | Maximum number of documents to include in a single batch when polling for new data.
201 |
202 | * Type: int
203 | * Default: 10000
204 | * Importance: low
205 |
206 | ``topic.prefix``
207 | Prefix to prepend to index names to generate the name of the Kafka topic to publish data
208 |
209 | * Type: string
210 | * Importance: high
211 |
212 | ``filters.whitelist``
213 | Whitelist filter for extracting a subset of fields from elastic-search json documents. The whitelist filter supports
214 | nested fields. To provide multiple fields use `;` as separator
215 | (e.g. `customer;order.qty;order.price`).
216 |
217 | * Type: string
218 | * Importance: medium
219 | * Default: null
220 |
221 | ``filters.blacklist``
222 | Blacklist filter for extracting a subset of fields from elastic-search json documents. The blacklist filter supports
223 | nested fields. To provide multiple fields use `;` as separator
224 | (e.g. `customer;order.qty;order.price`).
225 |
226 | * Type: string
227 | * Importance: medium
228 | * Default: null
229 |
230 | ``filters.json_cast``
231 | This filter casts nested fields to json string, avoiding parsing recursively as kafka connect-schema. The json-cast
232 | filter supports nested fields. To provide multiple fields use `;` as separator
233 | (e.g. `customer;order.qty;order.price`).
234 |
235 | * Type: string
236 | * Importance: medium
237 | * Default: null
238 |
239 | ``fieldname_converter``
240 | Configuring which field name converter should be used (allowed values: `avro` or `nop`). By default, the avro field name
241 | converter renames the json fields non respecting the avro
242 | specifications (https://avro.apache.org/docs/current/spec.html#names)
243 | in order to be serialized correctly. To disable the field name conversion set this parameter to `nop`.
244 |
245 | * Type: string
246 | * Importance: medium
247 | * Default: avro
248 |
--------------------------------------------------------------------------------
/assets/dariobalinzo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DarioBalinzo/kafka-connect-elasticsearch-source/bc9d2632ec596fa88b2328cfc7658533dcb6baa0/assets/dariobalinzo.png
--------------------------------------------------------------------------------
/doc/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 | # Kafka-connect-elasticsearch-source
2 |
3 | [](https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/actions)
4 |
5 | Kafka Connect Elasticsearch Source: fetch data from elastic-search and sends it to kafka. The connector fetches only new
6 | data using a strictly incremental / temporal field (like a timestamp or an incrementing id). It supports dynamic schema
7 | and nested objects/ arrays.
8 |
9 | ## Requirements:
10 |
11 | - Elasticsearch 6.x and 7.x
12 | - Java >= 8
13 | - Maven
14 |
15 | ## Output data serialization format:
16 |
17 | The connector uses kafka-connect schema and structs, that are agnostic regarding the user serialization method (e.g. it
18 | might be Avro or json, etc...).
19 |
20 | ## Bugs or new Ideas?
21 |
22 | - Issues tracker: https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/issues
23 | - Feel free to open an issue to discuss new ideas (or propose new solutions with a PR).
24 |
25 | ## Installation:
26 |
27 | Compile the project with:
28 |
29 | ```bash
30 | mvn clean package -DskipTests
31 | ```
32 |
33 | You can also compile and running both unit and integration tests (docker is mandatory) with:
34 |
35 | ```bash
36 | mvn clean package
37 | ```
38 |
39 | Copy the jar with dependencies from the target folder into connect classpath (
40 | e.g ``/usr/share/java/kafka-connect-elasticsearch`` ) or set ``plugin.path`` parameter appropriately.
41 |
42 | ## Example
43 |
44 | Using kafka connect in distributed way, a sample config file to fetch ``my_awesome_index*`` indices and to produce
45 | output topics with ``es_`` prefix:
46 |
47 | ```json
48 | {
49 | "name": "elastic-source",
50 | "config": {
51 | "connector.class":"com.github.dariobalinzo.ElasticSourceConnector",
52 | "tasks.max": "1",
53 | "es.host" : "localhost",
54 | "es.port" : "9200",
55 | "index.prefix" : "my_awesome_index",
56 | "topic.prefix" : "es_",
57 | "incrementing.field.name" : "@timestamp"
58 | }
59 | }
60 | ```
61 |
62 | To start the connector with curl:
63 |
64 | ```bash
65 | curl -X POST -H "Content-Type: application/json" --data @config.json http://localhost:8083/connectors | jq
66 | ```
67 |
68 | To check the status:
69 |
70 | ```bash
71 | curl localhost:8083/connectors/elastic-source/status | jq
72 | ```
73 |
74 | To stop the connector:
75 |
76 | ```bash
77 | curl -X DELETE localhost:8083/connectors/elastic-source | jq
78 | ```
79 |
80 | ## Documentation
81 |
82 | ### Elasticsearch Configuration
83 |
84 | ``es.host``
85 | ElasticSearch host. Optionally it is possible to specify many hosts using ``;`` as separator (``host1;host2;host3``)
86 |
87 | * Type: string
88 | * Importance: high
89 | * Dependents: ``index.prefix``
90 |
91 | ``es.port``
92 | ElasticSearch port
93 |
94 | * Type: string
95 | * Importance: high
96 | * Dependents: ``index.prefix``
97 |
98 | ``es.scheme``
99 | ElasticSearch scheme (http/https)
100 |
101 | * Type: string
102 | * Importance: medium
103 | * Default: ``http``
104 |
105 | ``es.user``
106 | Elasticsearch username
107 |
108 | * Type: string
109 | * Default: null
110 | * Importance: high
111 |
112 | ``es.password``
113 | Elasticsearch password
114 |
115 | * Type: password
116 | * Default: null
117 | * Importance: high
118 |
119 |
120 | ``incrementing.field.name``
121 | The name of the strictly incrementing field to use to detect new records.
122 |
123 | * Type: any
124 | * Importance: high
125 |
126 | ``incrementing.secondary.field.name``
127 | In case the main incrementing field may have duplicates,
128 | this secondary field is used as a secondary sort field in order
129 | to avoid data losses when paginating (available starting from versions >= 1.4).
130 |
131 | * Type: any
132 | * Importance: low
133 |
134 |
135 | ``es.tls.truststore.location``
136 | Elastic ssl truststore location
137 |
138 | * Type: string
139 | * Importance: medium
140 |
141 | ``es.tls.truststore.password``
142 | Elastic ssl truststore password
143 |
144 | * Type: string
145 | * Default: ""
146 | * Importance: medium
147 |
148 | ``es.tls.keystore.location``
149 | Elasticsearch keystore location
150 |
151 | * Type: string
152 | * Importance: medium
153 |
154 | ``es.tls.keystore.password``
155 | Elasticsearch keystore password
156 |
157 | * Type: string
158 | * Default: ""
159 | * Importance: medium
160 |
161 | ``connection.attempts``
162 | Maximum number of attempts to retrieve a valid Elasticsearch connection.
163 |
164 | * Type: int
165 | * Default: 3
166 | * Importance: low
167 |
168 | ``connection.backoff.ms``
169 | Backoff time in milliseconds between connection attempts.
170 |
171 | * Type: long
172 | * Default: 10000
173 | * Importance: low
174 |
175 | ``index.prefix``
176 | Indices prefix to include in copying.
177 | Periodically, new indices are discovered if they match the pattern.
178 |
179 | * Type: string
180 | * Default: ""
181 | * Importance: medium
182 |
183 | ``index.names``
184 | List of elasticsearch indices: `es1,es2,es3`
185 |
186 | * Type: string
187 | * Default: null
188 | * Importance: medium
189 |
190 | ### Connector Configuration
191 |
192 | ``poll.interval.ms``
193 | Frequency in ms to poll for new data in each index.
194 |
195 | * Type: int
196 | * Default: 5000
197 | * Importance: high
198 |
199 | ``batch.max.rows``
200 | Maximum number of documents to include in a single batch when polling for new data.
201 |
202 | * Type: int
203 | * Default: 10000
204 | * Importance: low
205 |
206 | ``topic.prefix``
207 | Prefix to prepend to index names to generate the name of the Kafka topic to publish data
208 |
209 | * Type: string
210 | * Importance: high
211 |
212 | ``filters.whitelist``
213 | Whitelist filter for extracting a subset of fields from elastic-search json documents. The whitelist filter supports
214 | nested fields. To provide multiple fields use `;` as separator
215 | (e.g. `customer;order.qty;order.price`).
216 |
217 | * Type: string
218 | * Importance: medium
219 | * Default: null
220 |
221 | ``filters.blacklist``
222 | Blacklist filter for extracting a subset of fields from elastic-search json documents. The blacklist filter supports
223 | nested fields. To provide multiple fields use `;` as separator
224 | (e.g. `customer;order.qty;order.price`).
225 |
226 | * Type: string
227 | * Importance: medium
228 | * Default: null
229 |
230 | ``filters.json_cast``
231 | This filter casts nested fields to json string, avoiding parsing recursively as kafka connect-schema. The json-cast
232 | filter supports nested fields. To provide multiple fields use `;` as separator
233 | (e.g. `customer;order.qty;order.price`).
234 |
235 | * Type: string
236 | * Importance: medium
237 | * Default: null
238 |
239 | ``fieldname_converter``
240 | Configuring which field name converter should be used (allowed values: `avro` or `nop`). By default, the avro field name
241 | converter renames the json fields non respecting the avro
242 | specifications (https://avro.apache.org/docs/current/spec.html#names)
243 | in order to be serialized correctly. To disable the field name conversion set this parameter to `nop`.
244 |
245 | * Type: string
246 | * Importance: medium
247 | * Default: avro
248 |
--------------------------------------------------------------------------------
/etc/quickstart-elasticsearch-source.properties:
--------------------------------------------------------------------------------
1 | connector.class=com.github.dariobalinzo.ElasticSourceConnector
2 | tasks.max=1
3 | es.host=localhost
4 | es.port=9200
5 | index.prefix=my_awesome_index
6 | topic.prefix=es_
7 | incrementing.field.name=@timestamp
8 |
--------------------------------------------------------------------------------
/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "component_types": [
3 | "source"
4 | ],
5 | "description": "This is a connector for getting data out of Elasticsearch into Apache Kafka.",
6 | "documentation_url": "https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source",
7 | "features": {
8 | "confluent_control_center_integration": true,
9 | "delivery_guarantee": [
10 | "at_least_once"
11 | ],
12 | "kafka_connect_api": true,
13 | "single_message_transforms": true,
14 | "supported_encodings": [
15 | "any"
16 | ]
17 | },
18 | "license": [
19 | {
20 | "name": "Apache License, Version 2.0",
21 | "url": "http://www.apache.org/licenses/LICENSE-2.0",
22 | "logo": "assets/apache_logo.gif"
23 | }
24 | ],
25 | "name": "kafka-connect-elasticsearch-source",
26 | "owner": {
27 | "logo": "assets/dariobalinzo.png",
28 | "name": "Dario Balinzo",
29 | "type": "user",
30 | "url": "https://github.com/DarioBalinzo",
31 | "username": "dariobalinzo"
32 | },
33 | "requirements": [
34 | "Elasticsearch >= 6.x"
35 | ],
36 | "support": {
37 | "logo": "assets/dariobalinzo.png",
38 | "provider_name": "Dario Balinzo",
39 | "summary": "For support please open an issue in the github repository",
40 | "url": "https://github.com/DarioBalinzo/kafka-connect-elasticsearch-source/issues"
41 | },
42 | "tags": [
43 | "source",
44 | "elasticsearch",
45 | "elastic"
46 | ],
47 | "title": "Kafka Connect Elasticsearch Source",
48 | "version": "1.5.0"
49 | }
50 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
17 |
20 | 4.0.0
21 |
22 |
23 | 1.8
24 | 1.8
25 | UTF-8
26 |
27 |
28 | com.github.dariobalinzo
29 | elastic-source-connect
30 | 1.5.5
31 |
32 |
33 |
34 | Apache License 2.0
35 | http://www.apache.org/licenses/LICENSE-2.0.html
36 | repo
37 |
38 |
39 |
40 |
41 |
42 | org.apache.logging.log4j
43 | log4j-api
44 | 2.19.0
45 |
46 |
47 | org.apache.kafka
48 | connect-api
49 | 2.8.0
50 |
51 |
52 | org.elasticsearch.client
53 | elasticsearch-rest-high-level-client
54 | 7.16.1
55 |
56 |
57 | org.elasticsearch.client
58 | elasticsearch-rest-client
59 | 7.16.1
60 |
61 |
62 | junit
63 | junit
64 | 4.13.2
65 | test
66 |
67 |
68 | org.testcontainers
69 | testcontainers
70 | 1.16.2
71 | test
72 |
73 |
74 | org.testcontainers
75 | elasticsearch
76 | 1.16.2
77 | test
78 |
79 |
80 | ch.qos.logback
81 | logback-core
82 | 1.2.9
83 | test
84 |
85 |
86 | ch.qos.logback
87 | logback-classic
88 | 1.2.8
89 | test
90 |
91 |
92 |
93 | org.mockito
94 | mockito-core
95 | 2.28.2
96 | test
97 |
98 |
99 | com.fasterxml.jackson.core
100 | jackson-databind
101 | 2.12.7.1
102 |
103 |
104 |
105 | com.fasterxml.jackson.dataformat
106 | jackson-dataformat-cbor
107 | 2.14.2
108 |
109 |
110 | org.yaml
111 | snakeyaml
112 | 2.0
113 |
114 |
115 | org.apache.httpcomponents
116 | httpclient
117 | 4.5.13
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 | maven-assembly-plugin
126 |
127 |
128 | jar-with-dependencies
129 |
130 |
131 |
132 |
133 | make-assembly
134 | package
135 |
136 | single
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/ElasticSourceConnector.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo;
18 |
19 | import com.github.dariobalinzo.elastic.ElasticConnection;
20 | import com.github.dariobalinzo.elastic.ElasticConnectionBuilder;
21 | import com.github.dariobalinzo.elastic.ElasticRepository;
22 | import com.github.dariobalinzo.elastic.ElasticIndexMonitorThread;
23 | import com.github.dariobalinzo.task.ElasticSourceTask;
24 | import org.apache.kafka.common.config.ConfigDef;
25 | import org.apache.kafka.common.config.ConfigException;
26 | import org.apache.kafka.connect.connector.Task;
27 | import org.apache.kafka.connect.errors.ConnectException;
28 | import org.apache.kafka.connect.source.SourceConnector;
29 | import org.slf4j.Logger;
30 | import org.slf4j.LoggerFactory;
31 |
32 | import java.util.*;
33 |
34 | public class ElasticSourceConnector extends SourceConnector {
35 | private static Logger logger = LoggerFactory.getLogger(ElasticSourceConnector.class);
36 | private static final long MAX_TIMEOUT = 10000L;
37 | private static final long POLL_MILISSECONDS = 5000L;
38 |
39 | private ElasticSourceConnectorConfig config;
40 | private ElasticConnection elasticConnection;
41 | private ElasticRepository elasticRepository;
42 | private Map configProperties;
43 | private ElasticIndexMonitorThread indexMonitorThread;
44 |
45 | @Override
46 | public String version() {
47 | return Version.VERSION;
48 | }
49 |
50 | @Override
51 | public void start(Map props) {
52 | try {
53 | configProperties = props;
54 | config = new ElasticSourceConnectorConfig(props);
55 | } catch (ConfigException e) {
56 | throw new ConnectException("Couldn't start ElasticSourceConnector due to configuration "
57 | + "error", e);
58 | }
59 |
60 | String esScheme = config.getString(ElasticSourceConnectorConfig.ES_SCHEME_CONF);
61 | String esHost = config.getString(ElasticSourceConnectorConfig.ES_HOST_CONF);
62 |
63 | //using rest config all the parameters are strings
64 | int esPort = Integer.parseInt(config.getString(ElasticSourceConnectorConfig.ES_PORT_CONF));
65 |
66 | String esUser = config.getString(ElasticSourceConnectorConfig.ES_USER_CONF);
67 | String esPwd = config.getString(ElasticSourceConnectorConfig.ES_PWD_CONF);
68 |
69 | int maxConnectionAttempts = Integer.parseInt(config.getString(
70 | ElasticSourceConnectorConfig.CONNECTION_ATTEMPTS_CONFIG
71 | ));
72 | long connectionRetryBackoff = Long.parseLong(config.getString(
73 | ElasticSourceConnectorConfig.CONNECTION_BACKOFF_CONFIG
74 | ));
75 |
76 | ElasticConnectionBuilder connectionBuilder = new ElasticConnectionBuilder(esHost, esPort)
77 | .withProtocol(esScheme)
78 | .withMaxAttempts(maxConnectionAttempts)
79 | .withBackoff(connectionRetryBackoff);
80 |
81 | String truststore = config.getString(ElasticSourceConnectorConfig.ES_TRUSTSTORE_CONF);
82 | String truststorePass = config.getString(ElasticSourceConnectorConfig.ES_TRUSTSTORE_PWD_CONF);
83 | String keystore = config.getString(ElasticSourceConnectorConfig.ES_KEYSTORE_CONF);
84 | String keystorePass = config.getString(ElasticSourceConnectorConfig.ES_KEYSTORE_PWD_CONF);
85 |
86 | if (truststore != null) {
87 | connectionBuilder.withTrustStore(truststore, truststorePass);
88 | }
89 |
90 | if (keystore != null) {
91 | connectionBuilder.withKeyStore(keystore, keystorePass);
92 | }
93 |
94 | if (esUser == null || esUser.isEmpty()) {
95 | elasticConnection = connectionBuilder.build();
96 | } else {
97 | elasticConnection = connectionBuilder.withUser(esUser)
98 | .withPassword(esPwd)
99 | .build();
100 | }
101 |
102 | elasticRepository = new ElasticRepository(elasticConnection);
103 |
104 | indexMonitorThread = new ElasticIndexMonitorThread(context, POLL_MILISSECONDS, elasticRepository, config.getString(ElasticSourceConnectorConfig.INDEX_PREFIX_CONFIG));
105 | indexMonitorThread.start();
106 | }
107 |
108 | @Override
109 | public Class extends Task> taskClass() {
110 | return ElasticSourceTask.class;
111 | }
112 |
113 |
114 | @Override
115 | public List> taskConfigs(int maxTasks) {
116 | if (configProperties.containsKey(ElasticSourceConnectorConfig.INDEX_NAMES_CONFIG)) {
117 | String indicesNames = configProperties.get(ElasticSourceConnectorConfig.INDEX_NAMES_CONFIG);
118 | String[] indicesList = indicesNames.split(",");
119 | return generateTaskFromFixedList(Arrays.asList(indicesList), maxTasks);
120 | } else {
121 | return findTaskFromIndexPrefix(maxTasks);
122 | }
123 | }
124 |
125 | private List> generateTaskFromFixedList(List indicesList, int maxTasks) {
126 | int numGroups = Math.min(indicesList.size(), maxTasks);
127 | return groupIndicesToTasksConfig(maxTasks, indicesList);
128 | }
129 |
130 | private List> findTaskFromIndexPrefix(int maxTasks) {
131 | List currentIndexes = indexMonitorThread.indexes();
132 | return groupIndicesToTasksConfig(maxTasks, currentIndexes);
133 | }
134 |
135 | private List> groupIndicesToTasksConfig(int maxTasks, List currentIndexes) {
136 | int numGroups = Math.min(currentIndexes.size(), maxTasks);
137 | List> indexGrouped = groupPartitions(currentIndexes, numGroups);
138 | List> taskConfigs = new ArrayList<>(indexGrouped.size());
139 | for (List taskIndices : indexGrouped) {
140 | Map taskProps = new HashMap<>(configProperties);
141 | taskProps.put(ElasticSourceConnectorConfig.INDICES_CONFIG,
142 | String.join(",", taskIndices));
143 | taskConfigs.add(taskProps);
144 | }
145 | return taskConfigs;
146 | }
147 |
148 | @Override
149 | public void stop() {
150 | logger.info("stopping elastic source");
151 | indexMonitorThread.shutdown();
152 | try {
153 | indexMonitorThread.join(MAX_TIMEOUT);
154 | } catch (InterruptedException e) {
155 | // Ignore, shouldn't be interrupted
156 | }
157 | elasticConnection.closeQuietly();
158 | }
159 |
160 | @Override
161 | public ConfigDef config() {
162 | return ElasticSourceConnectorConfig.CONFIG_DEF;
163 | }
164 |
165 |
166 | private List> groupPartitions(List currentIndices, int numGroups) {
167 | List> result = new ArrayList<>(numGroups);
168 | for (int i = 0; i < numGroups; ++i) {
169 | result.add(new ArrayList<>());
170 | }
171 |
172 | for (int i = 0; i < currentIndices.size(); ++i) {
173 | result.get(i % numGroups).add(currentIndices.get(i));
174 | }
175 |
176 | return result;
177 | }
178 | }
179 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/ElasticSourceConnectorConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo;
18 |
19 | import org.apache.kafka.common.config.AbstractConfig;
20 | import org.apache.kafka.common.config.ConfigDef;
21 | import org.apache.kafka.common.config.ConfigDef.Importance;
22 | import org.apache.kafka.common.config.ConfigDef.Type;
23 | import org.apache.kafka.common.config.ConfigDef.Width;
24 |
25 | import java.util.Collections;
26 | import java.util.Map;
27 |
28 | public class ElasticSourceConnectorConfig extends AbstractConfig {
29 |
30 | public final static String ES_HOST_CONF = "es.host";
31 | private final static String ES_HOST_DOC = "ElasticSearch host. " +
32 | "Optionally it is possible to specify many hosts " +
33 | "using ; as separator (host1;host2;host3)";
34 | private final static String ES_HOST_DISPLAY = "Elastic host";
35 |
36 | public final static String ES_SCHEME_CONF = "es.scheme";
37 | private final static String ES_SCHEME_DOC = "Elasticsearch scheme (default: http)";
38 | private final static String ES_SCHEME_DISPLAY = "Elasticsearch scheme";
39 | private static final String ES_SCHEME_DEFAULT = "http";
40 |
41 | public final static String ES_PORT_CONF = "es.port";
42 | private final static String ES_PORT_DOC = "ElasticSearch port";
43 | private final static String ES_PORT_DISPLAY = "ElasticSearch port";
44 |
45 | public final static String ES_USER_CONF = "es.user";
46 | private final static String ES_USER_DOC = "Elasticsearch username";
47 | private final static String ES_USER_DISPLAY = "Elasticsearch username";
48 |
49 | public final static String ES_PWD_CONF = "es.password";
50 | private final static String ES_PWD_DOC = "Elasticsearch password";
51 | private final static String ES_PWD_DISPLAY = "Elasticsearch password";
52 |
53 | public final static String ES_KEYSTORE_CONF = "es.tls.keystore.location";
54 | private final static String ES_KEYSTORE_DOC = "Elasticsearch keystore location";
55 |
56 | public final static String ES_KEYSTORE_PWD_CONF = "es.tls.keystore.password";
57 | private final static String ES_KEYSTORE_PWD_DOC = "Elasticsearch keystore password";
58 |
59 | public final static String ES_TRUSTSTORE_CONF = "es.tls.truststore.location";
60 | private final static String ES_TRUSTSTORE_DOC = "Elasticsearch truststore location";
61 |
62 | public final static String ES_TRUSTSTORE_PWD_CONF = "es.tls.truststore.password";
63 | private final static String ES_TRUSTSTORE_PWD_DOC = "Elasticsearch truststore password";
64 |
65 | public static final String CONNECTION_ATTEMPTS_CONFIG = "connection.attempts";
66 | private static final String CONNECTION_ATTEMPTS_DOC
67 | = "Maximum number of attempts to retrieve a valid Elasticsearch connection.";
68 | private static final String CONNECTION_ATTEMPTS_DISPLAY = "Elasticsearch connection attempts";
69 | private static final String CONNECTION_ATTEMPTS_DEFAULT = "3";
70 |
71 | public static final String CONNECTION_BACKOFF_CONFIG = "connection.backoff.ms";
72 | private static final String CONNECTION_BACKOFF_DOC
73 | = "Backoff time in milliseconds between connection attempts.";
74 | private static final String CONNECTION_BACKOFF_DISPLAY
75 | = "Elastic connection backoff in milliseconds";
76 | private static final String CONNECTION_BACKOFF_DEFAULT = "10000";
77 |
78 | public static final String POLL_INTERVAL_MS_CONFIG = "poll.interval.ms";
79 | private static final String POLL_INTERVAL_MS_DOC = "Frequency in ms to poll for new data in "
80 | + "each index.";
81 | private static final String POLL_INTERVAL_MS_DEFAULT = "5000";
82 | private static final String POLL_INTERVAL_MS_DISPLAY = "Poll Interval (ms)";
83 |
84 | public static final String BATCH_MAX_ROWS_CONFIG = "batch.max.rows";
85 | private static final String BATCH_MAX_ROWS_DOC =
86 | "Maximum number of documents to include in a single batch when polling for new data.";
87 | private static final String BATCH_MAX_ROWS_DEFAULT = "10000";
88 | private static final String BATCH_MAX_ROWS_DISPLAY = "Max Documents Per Batch";
89 |
90 | private static final String MODE_UNSPECIFIED = "";
91 | private static final String MODE_BULK = "bulk";
92 | private static final String MODE_TIMESTAMP = "timestamp";
93 | private static final String MODE_INCREMENTING = "incrementing";
94 | private static final String MODE_TIMESTAMP_INCREMENTING = "timestamp+incrementing";
95 |
96 | public static final String INCREMENTING_FIELD_NAME_CONFIG = "incrementing.field.name";
97 | private static final String INCREMENTING_FIELD_NAME_DOC =
98 | "The name of the strictly incrementing field to use to detect new records.";
99 | private static final String INCREMENTING_FIELD_NAME_DEFAULT = "";
100 | private static final String INCREMENTING_FIELD_NAME_DISPLAY = "Incrementing Field Name";
101 |
102 | public static final String SECONDARY_INCREMENTING_FIELD_NAME_CONFIG = "incrementing.secondary.field.name";
103 | private static final String SECONDARY_INCREMENTING_FIELD_NAME_DOC =
104 | "In case the main incrementing field may have duplicates, this secondary field is used as a secondary sort field" +
105 | " in order to avoid data losses when paginating";
106 | private static final String SECONDARY_INCREMENTING_FIELD_NAME_DISPLAY = "Secondary Incrementing Field Name";
107 |
108 | public static final String INDEX_PREFIX_CONFIG = "index.prefix";
109 | private static final String INDEX_PREFIX_DOC = "List of indices to include in copying.";
110 | private static final String INDEX_PREFIX_DEFAULT = "";
111 | private static final String INDEX_PREFIX_DISPLAY = "Indices prefix Whitelist";
112 |
113 | public static final String INDEX_NAMES_CONFIG = "index.names";
114 | private static final String INDEX_NAMES_DOC = "List of elasticsearch indices (es1,es2,es3)";
115 | private static final String INDEX_NAMES_DEFAULT = null;
116 | private static final String INDEX_NAMES_DISPLAY = "List of elasticsearch indices (es1,es2,es3)";
117 |
118 |
119 | public static final String TOPIC_PREFIX_CONFIG = "topic.prefix";
120 | private static final String TOPIC_PREFIX_DOC =
121 | "Prefix to prepend to index names to generate the name of the Kafka topic to publish data";
122 | private static final String TOPIC_PREFIX_DISPLAY = "Topic Prefix";
123 |
124 | private static final String DATABASE_GROUP = "Elasticsearch";
125 | private static final String MODE_GROUP = "Mode";
126 | private static final String CONNECTOR_GROUP = "Connector";
127 |
128 | private static final String MODE_CONFIG = "mode";
129 | private static final String MODE_DOC = "";
130 | private static final String MODE_DISPLAY = "Index Incrementing field";
131 |
132 | public static final String INDICES_CONFIG = "es.indices";
133 |
134 | public static final String FIELDS_WHITELIST_CONFIG = "filters.whitelist";
135 | private static final String FIELDS_WHITELIST_DOC = "Whitelist filter for fields (e.g. order.qty;order.price;status )";
136 | private static final String FIELDS_WHITELIST_DISPLAY = "Fields whitelist";
137 |
138 | public static final String FIELDS_BLACKLIST_CONFIG = "filters.blacklist";
139 | private static final String FIELDS_BLACKLIST_DOC = "Blacklist filter for fields (e.g. order.qty;order.price;status )";
140 | private static final String FIELDS_BLACKLIST_DISPLAY = "Fields blacklist";
141 |
142 | public static final String FIELDS_JSON_CAST_CONFIG = "filters.json_cast";
143 | private static final String FIELDS_JSON_CAST_DOC = "Cast to json string instead of parsing nested objects (e.g. order.qty;order.price;status )";
144 | private static final String FIELDS_JSON_CAST_DISPLAY = "Cast to json string";
145 |
146 | public static final String CONNECTOR_FIELDNAME_CONVERTER_CONFIG = "fieldname_converter";
147 | public static final String CONNECTOR_FIELDNAME_CONVERTER_DOC = "Determine which name converter should be used for document fields: avro converter as standard";
148 | public static final String CONNECTOR_FIELDNAME_CONVERTER_DISPLAY = "Fields name converter (avro, nop)";
149 |
150 | public static final String NOP_FIELDNAME_CONVERTER = "nop";
151 | public static final String AVRO_FIELDNAME_CONVERTER = "avro";
152 |
153 | public static final ConfigDef CONFIG_DEF = baseConfigDef();
154 |
155 | protected static ConfigDef baseConfigDef() {
156 | ConfigDef config = new ConfigDef();
157 | addDatabaseOptions(config);
158 | addModeOptions(config);
159 | addConnectorOptions(config);
160 | return config;
161 | }
162 |
163 | private static void addDatabaseOptions(ConfigDef config) {
164 | int orderInGroup = 0;
165 | config.define(
166 | ES_HOST_CONF,
167 | Type.STRING,
168 | Importance.HIGH,
169 | ES_HOST_DOC,
170 | DATABASE_GROUP,
171 | ++orderInGroup,
172 | Width.LONG,
173 | ES_HOST_DISPLAY,
174 | Collections.singletonList(INDEX_PREFIX_CONFIG)
175 | ).define(
176 | ES_SCHEME_CONF,
177 | Type.STRING,
178 | ES_SCHEME_DEFAULT,
179 | Importance.MEDIUM,
180 | ES_SCHEME_DOC,
181 | DATABASE_GROUP,
182 | ++orderInGroup,
183 | Width.LONG,
184 | ES_SCHEME_DISPLAY
185 | ).define(
186 | ES_PORT_CONF,
187 | Type.STRING,
188 | Importance.HIGH,
189 | ES_PORT_DOC,
190 | DATABASE_GROUP,
191 | ++orderInGroup,
192 | Width.LONG,
193 | ES_PORT_DISPLAY,
194 | Collections.singletonList(INDEX_PREFIX_CONFIG)
195 | ).define(
196 | ES_USER_CONF,
197 | Type.STRING,
198 | null,
199 | Importance.HIGH,
200 | ES_USER_DOC,
201 | DATABASE_GROUP,
202 | ++orderInGroup,
203 | Width.LONG,
204 | ES_USER_DISPLAY
205 | ).define(
206 | ES_PWD_CONF,
207 | Type.STRING,
208 | null,
209 | Importance.HIGH,
210 | ES_PWD_DOC,
211 | DATABASE_GROUP,
212 | ++orderInGroup,
213 | Width.SHORT,
214 | ES_PWD_DISPLAY
215 | ).define(
216 | ES_KEYSTORE_CONF,
217 | Type.STRING,
218 | null,
219 | Importance.MEDIUM,
220 | ES_KEYSTORE_DOC,
221 | DATABASE_GROUP,
222 | ++orderInGroup,
223 | Width.SHORT,
224 | ES_KEYSTORE_DOC
225 | ).define(
226 | ES_KEYSTORE_PWD_CONF,
227 | Type.STRING,
228 | "",
229 | Importance.MEDIUM,
230 | ES_KEYSTORE_PWD_DOC,
231 | DATABASE_GROUP,
232 | ++orderInGroup,
233 | Width.SHORT,
234 | ES_KEYSTORE_PWD_DOC
235 | ).define(
236 | ES_TRUSTSTORE_CONF,
237 | Type.STRING,
238 | null,
239 | Importance.MEDIUM,
240 | ES_TRUSTSTORE_DOC,
241 | DATABASE_GROUP,
242 | ++orderInGroup,
243 | Width.SHORT,
244 | ES_TRUSTSTORE_DOC
245 | ).define(
246 | ES_TRUSTSTORE_PWD_CONF,
247 | Type.STRING,
248 | "",
249 | Importance.MEDIUM,
250 | ES_TRUSTSTORE_PWD_DOC,
251 | DATABASE_GROUP,
252 | ++orderInGroup,
253 | Width.SHORT,
254 | ES_TRUSTSTORE_PWD_DOC
255 | ).define(
256 | CONNECTION_ATTEMPTS_CONFIG,
257 | Type.STRING,
258 | CONNECTION_ATTEMPTS_DEFAULT,
259 | Importance.LOW,
260 | CONNECTION_ATTEMPTS_DOC,
261 | DATABASE_GROUP,
262 | ++orderInGroup,
263 | ConfigDef.Width.SHORT,
264 | CONNECTION_ATTEMPTS_DISPLAY
265 | ).define(
266 | CONNECTION_BACKOFF_CONFIG,
267 | Type.STRING,
268 | CONNECTION_BACKOFF_DEFAULT,
269 | Importance.LOW,
270 | CONNECTION_BACKOFF_DOC,
271 | DATABASE_GROUP,
272 | ++orderInGroup,
273 | Width.SHORT,
274 | CONNECTION_BACKOFF_DISPLAY
275 | ).define(
276 | INDEX_PREFIX_CONFIG,
277 | Type.STRING,
278 | INDEX_PREFIX_DEFAULT,
279 | Importance.MEDIUM,
280 | INDEX_PREFIX_DOC,
281 | DATABASE_GROUP,
282 | ++orderInGroup,
283 | Width.LONG,
284 | INDEX_PREFIX_DISPLAY
285 | ).define(
286 | INDEX_NAMES_CONFIG,
287 | Type.STRING,
288 | INDEX_NAMES_DEFAULT,
289 | Importance.MEDIUM,
290 | INDEX_NAMES_DOC,
291 | DATABASE_GROUP,
292 | ++orderInGroup,
293 | Width.LONG,
294 | INDEX_NAMES_DISPLAY
295 | ).define(
296 | FIELDS_WHITELIST_CONFIG,
297 | Type.STRING,
298 | null,
299 | Importance.MEDIUM,
300 | FIELDS_WHITELIST_DOC,
301 | CONNECTOR_GROUP,
302 | ++orderInGroup,
303 | Width.MEDIUM,
304 | FIELDS_WHITELIST_DISPLAY
305 | ).define(
306 | FIELDS_BLACKLIST_CONFIG,
307 | Type.STRING,
308 | null,
309 | Importance.MEDIUM,
310 | FIELDS_BLACKLIST_DOC,
311 | CONNECTOR_GROUP,
312 | ++orderInGroup,
313 | Width.MEDIUM,
314 | FIELDS_BLACKLIST_DISPLAY
315 | ).define(
316 | FIELDS_JSON_CAST_CONFIG,
317 | Type.STRING,
318 | null,
319 | Importance.MEDIUM,
320 | FIELDS_WHITELIST_DOC,
321 | CONNECTOR_GROUP,
322 | ++orderInGroup,
323 | Width.MEDIUM,
324 | FIELDS_JSON_CAST_DISPLAY
325 | );
326 | }
327 |
328 | private static void addModeOptions(ConfigDef config) {
329 | int orderInGroup = 0;
330 | config.define(
331 | MODE_CONFIG,
332 | Type.STRING,
333 | MODE_UNSPECIFIED,
334 | ConfigDef.ValidString.in(
335 | MODE_UNSPECIFIED,
336 | MODE_BULK,
337 | MODE_TIMESTAMP,
338 | MODE_INCREMENTING,
339 | MODE_TIMESTAMP_INCREMENTING
340 | ),
341 | Importance.HIGH,
342 | MODE_DOC,
343 | MODE_GROUP,
344 | ++orderInGroup,
345 | Width.MEDIUM,
346 | MODE_DISPLAY,
347 | Collections.singletonList(
348 | INCREMENTING_FIELD_NAME_CONFIG
349 | )
350 | ).define(
351 | INCREMENTING_FIELD_NAME_CONFIG,
352 | Type.STRING,
353 | INCREMENTING_FIELD_NAME_DEFAULT,
354 | Importance.MEDIUM,
355 | INCREMENTING_FIELD_NAME_DOC,
356 | MODE_GROUP,
357 | ++orderInGroup,
358 | Width.MEDIUM,
359 | INCREMENTING_FIELD_NAME_DISPLAY
360 | ).define(
361 | SECONDARY_INCREMENTING_FIELD_NAME_CONFIG,
362 | Type.STRING,
363 | null,
364 | Importance.LOW,
365 | SECONDARY_INCREMENTING_FIELD_NAME_DOC,
366 | MODE_GROUP,
367 | ++orderInGroup,
368 | Width.MEDIUM,
369 | SECONDARY_INCREMENTING_FIELD_NAME_DISPLAY
370 | );
371 | }
372 |
373 | private static void addConnectorOptions(ConfigDef config) {
374 | int orderInGroup = 0;
375 | config.define(
376 | POLL_INTERVAL_MS_CONFIG,
377 | Type.STRING,
378 | POLL_INTERVAL_MS_DEFAULT,
379 | Importance.HIGH,
380 | POLL_INTERVAL_MS_DOC,
381 | CONNECTOR_GROUP,
382 | ++orderInGroup,
383 | Width.SHORT,
384 | POLL_INTERVAL_MS_DISPLAY
385 | ).define(
386 | BATCH_MAX_ROWS_CONFIG,
387 | Type.STRING,
388 | BATCH_MAX_ROWS_DEFAULT,
389 | Importance.LOW,
390 | BATCH_MAX_ROWS_DOC,
391 | CONNECTOR_GROUP,
392 | ++orderInGroup,
393 | Width.SHORT,
394 | BATCH_MAX_ROWS_DISPLAY
395 | ).define(
396 | TOPIC_PREFIX_CONFIG,
397 | Type.STRING,
398 | Importance.HIGH,
399 | TOPIC_PREFIX_DOC,
400 | CONNECTOR_GROUP,
401 | ++orderInGroup,
402 | Width.MEDIUM,
403 | TOPIC_PREFIX_DISPLAY
404 | ).define(
405 | CONNECTOR_FIELDNAME_CONVERTER_CONFIG,
406 | Type.STRING,
407 | AVRO_FIELDNAME_CONVERTER,
408 | Importance.MEDIUM,
409 | CONNECTOR_FIELDNAME_CONVERTER_DOC,
410 | CONNECTOR_GROUP,
411 | ++orderInGroup,
412 | Width.MEDIUM,
413 | CONNECTOR_FIELDNAME_CONVERTER_DISPLAY
414 | );
415 | }
416 |
417 | public ElasticSourceConnectorConfig(Map properties) {
418 | super(CONFIG_DEF, properties);
419 | }
420 |
421 | protected ElasticSourceConnectorConfig(ConfigDef subclassConfigDef, Map props) {
422 | super(subclassConfigDef, props);
423 | }
424 |
425 | }
426 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/Version.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo;
18 |
19 | public class Version {
20 | public static final String VERSION = "1.5.5";
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/CursorField.java:
--------------------------------------------------------------------------------
1 | package com.github.dariobalinzo.elastic;
2 |
3 | import java.util.Map;
4 |
5 | import static com.github.dariobalinzo.elastic.ElasticJsonNaming.removeKeywordSuffix;
6 |
7 | public class CursorField {
8 | private final String cursor;
9 |
10 | public CursorField(String cursor) {
11 | this.cursor = removeKeywordSuffix(cursor);
12 | }
13 |
14 | public String read(Map document) {
15 | return read(document, cursor);
16 | }
17 |
18 | private String read(Map document, String field) {
19 | int firstDot = field.indexOf('.');
20 |
21 | Object value = null;
22 | if (document.containsKey(field)) {
23 | value = document.get(field);
24 | } else if (firstDot > 0 && firstDot < field.length() - 1) {
25 | String parent = field.substring(0, firstDot);
26 | Object nested = document.get(parent);
27 | if (nested instanceof Map) {
28 | return read((Map) document.get(parent),
29 | field.substring(firstDot + 1));
30 | }
31 | }
32 |
33 | return value == null ? null : value.toString();
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/ElasticConnection.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.elastic;
18 |
19 | import org.apache.http.HttpHost;
20 | import org.apache.http.auth.AuthScope;
21 | import org.apache.http.auth.UsernamePasswordCredentials;
22 | import org.apache.http.client.CredentialsProvider;
23 | import org.apache.http.impl.client.BasicCredentialsProvider;
24 | import org.apache.http.ssl.SSLContextBuilder;
25 | import org.apache.http.ssl.SSLContexts;
26 | import org.elasticsearch.client.RestClient;
27 | import org.elasticsearch.client.RestHighLevelClient;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 |
31 | import javax.net.ssl.SSLContext;
32 | import java.io.IOException;
33 | import java.io.InputStream;
34 | import java.nio.file.Files;
35 | import java.nio.file.Path;
36 | import java.nio.file.Paths;
37 | import java.security.KeyStore;
38 | import java.util.Arrays;
39 | import java.util.Objects;
40 |
41 | public class ElasticConnection {
42 | public final static Logger logger = LoggerFactory.getLogger(ElasticConnection.class);
43 |
44 | private RestHighLevelClient client;
45 | private final long connectionRetryBackoff;
46 | private final int maxConnectionAttempts;
47 | private final String hosts;
48 | private final String protocol;
49 | private final int port;
50 | private final SSLContext sslContext;
51 | private final CredentialsProvider credentialsProvider;
52 |
53 | ElasticConnection(ElasticConnectionBuilder builder) {
54 | hosts = builder.hosts;
55 | protocol = builder.protocol;
56 | port = builder.port;
57 |
58 | String user = builder.user;
59 | String pwd = builder.pwd;
60 | if (user != null) {
61 | credentialsProvider = new BasicCredentialsProvider();
62 | credentialsProvider.setCredentials(AuthScope.ANY,
63 | new UsernamePasswordCredentials(user, pwd));
64 | } else {
65 | credentialsProvider = null;
66 | }
67 |
68 | sslContext = builder.trustStorePath == null ? null :
69 | getSslContext(
70 | builder.trustStorePath,
71 | builder.trustStorePassword,
72 | builder.keyStorePath,
73 | builder.keyStorePassword
74 | );
75 |
76 | createConnection();
77 |
78 | this.maxConnectionAttempts = builder.maxConnectionAttempts;
79 | this.connectionRetryBackoff = builder.connectionRetryBackoff;
80 | }
81 |
82 | private void createConnection() {
83 | HttpHost[] hostList = parseHosts(hosts, protocol, port);
84 |
85 | client = new RestHighLevelClient(
86 | RestClient.builder(hostList)
87 | .setHttpClientConfigCallback(
88 | httpClientBuilder -> {
89 | if (credentialsProvider != null) {
90 | httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
91 | }
92 | if (sslContext != null) {
93 | httpClientBuilder.setSSLContext(sslContext);
94 | }
95 | return httpClientBuilder;
96 | }
97 | )
98 | );
99 | }
100 |
101 | private SSLContext getSslContext(String trustStoreConf, String trustStorePass,
102 | String keyStoreConf, String keyStorePass) {
103 |
104 | Objects.requireNonNull(trustStoreConf, "truststore location is required");
105 | Objects.requireNonNull(trustStorePass, "truststore password is required");
106 |
107 | try {
108 | Path trustStorePath = Paths.get(trustStoreConf);
109 | KeyStore truststore = KeyStore.getInstance("pkcs12");
110 | try (InputStream is = Files.newInputStream(trustStorePath)) {
111 | truststore.load(is, trustStorePass.toCharArray());
112 | }
113 | SSLContextBuilder sslBuilder = SSLContexts.custom()
114 | .loadTrustMaterial(truststore, null);
115 |
116 | if (keyStoreConf != null) {
117 | Objects.requireNonNull(keyStorePass, "keystore password is required");
118 | Path keyStorePath = Paths.get(keyStoreConf);
119 | KeyStore keyStore = KeyStore.getInstance("pkcs12");
120 | try (InputStream is = Files.newInputStream(keyStorePath)) {
121 | keyStore.load(is, keyStorePass.toCharArray());
122 | }
123 | sslBuilder.loadKeyMaterial(keyStore, keyStorePass.toCharArray());
124 | }
125 |
126 | return sslBuilder.build();
127 | } catch (Exception e) {
128 | throw new SslContextException(e);
129 | }
130 | }
131 |
132 | private HttpHost[] parseHosts(String hosts, String protocol, int port) {
133 | return Arrays.stream(hosts.split(";"))
134 | .map(host -> new HttpHost(host, port, protocol))
135 | .toArray(HttpHost[]::new);
136 | }
137 |
138 | public RestHighLevelClient getClient() {
139 | return client;
140 | }
141 |
142 | public long getConnectionRetryBackoff() {
143 | return connectionRetryBackoff;
144 | }
145 |
146 | public int getMaxConnectionAttempts() {
147 | return maxConnectionAttempts;
148 | }
149 |
150 | public void closeQuietly() {
151 | try {
152 | client.close();
153 | } catch (IOException e) {
154 | logger.error("error in close", e);
155 | }
156 | }
157 |
158 | }
159 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/ElasticConnectionBuilder.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.elastic;
18 |
19 | public class ElasticConnectionBuilder {
20 | final String hosts;
21 | final int port;
22 |
23 | String protocol = "http";
24 | int maxConnectionAttempts = 3;
25 | long connectionRetryBackoff = 1_000;
26 | String user;
27 | String pwd;
28 |
29 | String trustStorePath;
30 | String trustStorePassword;
31 | String keyStorePath;
32 | String keyStorePassword;
33 |
34 | public ElasticConnectionBuilder(String hosts, int port) {
35 | this.hosts = hosts;
36 | this.port = port;
37 | }
38 |
39 | public ElasticConnectionBuilder withProtocol(String protocol) {
40 | this.protocol = protocol;
41 | return this;
42 | }
43 |
44 | public ElasticConnectionBuilder withUser(String user) {
45 | this.user = user;
46 | return this;
47 | }
48 |
49 | public ElasticConnectionBuilder withPassword(String password) {
50 | this.pwd = password;
51 | return this;
52 | }
53 |
54 | public ElasticConnectionBuilder withMaxAttempts(int maxConnectionAttempts) {
55 | this.maxConnectionAttempts = maxConnectionAttempts;
56 | return this;
57 | }
58 |
59 | public ElasticConnectionBuilder withBackoff(long connectionRetryBackoff) {
60 | this.connectionRetryBackoff = connectionRetryBackoff;
61 | return this;
62 | }
63 |
64 | public ElasticConnectionBuilder withTrustStore(String path, String password) {
65 | this.trustStorePath = path;
66 | this.trustStorePassword = password;
67 | return this;
68 | }
69 |
70 | public ElasticConnectionBuilder withKeyStore(String path, String password) {
71 | this.keyStorePath = path;
72 | this.keyStorePassword = password;
73 | return this;
74 | }
75 |
76 | public ElasticConnection build() {
77 | return new ElasticConnection(this);
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/ElasticIndexMonitorThread.java:
--------------------------------------------------------------------------------
1 | package com.github.dariobalinzo.elastic;
2 |
3 | import org.apache.kafka.connect.connector.ConnectorContext;
4 | import org.apache.kafka.connect.errors.ConnectException;
5 | import org.slf4j.Logger;
6 | import org.slf4j.LoggerFactory;
7 |
8 | import java.util.ArrayList;
9 | import java.util.List;
10 | import java.util.concurrent.CountDownLatch;
11 | import java.util.concurrent.TimeUnit;
12 |
13 |
14 | /**
15 | * Thread that monitors Elastic for changes to the set of topics.
16 | */
17 | public class ElasticIndexMonitorThread extends Thread {
18 | private static final Logger log = LoggerFactory.getLogger(ElasticIndexMonitorThread.class);
19 | private static final long TIMEOUT = 10_000L;
20 |
21 | private final ConnectorContext context;
22 | private final CountDownLatch shutdownLatch;
23 | private final long pollMs;
24 | private final ElasticRepository elasticRepository;
25 | private final String prefix;
26 | private List indexes;
27 |
28 | public ElasticIndexMonitorThread(ConnectorContext context, long pollMs, ElasticRepository elasticRepository, String prefix) {
29 | this.context = context;
30 | this.shutdownLatch = new CountDownLatch(1);
31 | this.pollMs = pollMs;
32 | this.elasticRepository = elasticRepository;
33 | this.prefix = prefix;
34 | this.indexes = new ArrayList<>();
35 | }
36 |
37 | public static long getTimeout() {
38 | return TIMEOUT;
39 | }
40 |
41 | @Override
42 | public void run() {
43 | while (shutdownLatch.getCount() > 0) {
44 | try {
45 | if (updateIndexes()) {
46 | context.requestTaskReconfiguration();
47 | }
48 | } catch (Exception e) {
49 | context.raiseError(e);
50 | throw e;
51 | }
52 |
53 | try {
54 | boolean shuttingDown = shutdownLatch.await(pollMs, TimeUnit.MILLISECONDS);
55 | if (shuttingDown) {
56 | return;
57 | }
58 | } catch (InterruptedException e) {
59 | log.error("Unexpected InterruptedException, ignoring: ", e);
60 | }
61 | }
62 | }
63 |
64 | public synchronized List indexes() {
65 |
66 | long started = System.currentTimeMillis();
67 | long now = started;
68 | while (indexes.isEmpty() && now - started < TIMEOUT) {
69 | try {
70 | wait(TIMEOUT - (now - started));
71 | } catch (InterruptedException e) {
72 | // Ignore
73 | }
74 | now = System.currentTimeMillis();
75 | }
76 | if (indexes.isEmpty()) {
77 | throw new ConnectException("Cannot find any elasticsearch index");
78 | }
79 | return indexes;
80 | }
81 |
82 | public void shutdown() {
83 | shutdownLatch.countDown();
84 | }
85 |
86 | private synchronized boolean updateIndexes() {
87 | final List indexes;
88 | try {
89 | indexes = elasticRepository.catIndices(this.prefix);
90 | log.debug("Got the following topics: {}", indexes);
91 | } catch (RuntimeException e) {
92 | log.error("Error while trying to get updated topics list, ignoring and waiting for next table poll interval", e);
93 | return false;
94 | }
95 |
96 | if (!indexes.equals(this.indexes)) {
97 | log.debug("After filtering we got topics: {}", indexes);
98 | List previousIndexes = this.indexes;
99 | this.indexes = indexes;
100 | notifyAll();
101 | // Only return true if the table list wasn't previously null, i.e. if this was not the
102 | // first table lookup
103 | return !previousIndexes.isEmpty();
104 | }
105 | return false;
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/ElasticJsonNaming.java:
--------------------------------------------------------------------------------
1 | package com.github.dariobalinzo.elastic;
2 |
3 | public class ElasticJsonNaming {
4 | public static String removeKeywordSuffix(String fieldName) {
5 | return fieldName == null ? null : fieldName.replace(".keyword", "");
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/ElasticRepository.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.elastic;
18 |
19 | import com.github.dariobalinzo.elastic.response.Cursor;
20 | import com.github.dariobalinzo.elastic.response.PageResult;
21 | import org.elasticsearch.action.search.SearchRequest;
22 | import org.elasticsearch.action.search.SearchResponse;
23 | import org.elasticsearch.client.Request;
24 | import org.elasticsearch.client.RequestOptions;
25 | import org.elasticsearch.client.Response;
26 | import org.elasticsearch.index.query.QueryBuilder;
27 | import org.elasticsearch.search.builder.SearchSourceBuilder;
28 | import org.elasticsearch.search.sort.SortOrder;
29 | import org.slf4j.Logger;
30 | import org.slf4j.LoggerFactory;
31 |
32 | import java.io.BufferedReader;
33 | import java.io.IOException;
34 | import java.io.InputStreamReader;
35 | import java.util.*;
36 | import java.util.stream.Collectors;
37 |
38 | import static com.github.dariobalinzo.elastic.ElasticJsonNaming.removeKeywordSuffix;
39 | import static org.elasticsearch.index.query.QueryBuilders.*;
40 |
41 | public final class ElasticRepository {
42 | private final static Logger logger = LoggerFactory.getLogger(ElasticRepository.class);
43 |
44 | private final ElasticConnection elasticConnection;
45 |
46 | private final String cursorSearchField;
47 | private final String secondaryCursorSearchField;
48 | private final CursorField cursorField;
49 | private final CursorField secondaryCursorField;
50 |
51 | private int pageSize = 5000;
52 |
53 | public ElasticRepository(ElasticConnection elasticConnection) {
54 | this(elasticConnection, "_id");
55 | }
56 |
57 | public ElasticRepository(ElasticConnection elasticConnection, String cursorField) {
58 | this(elasticConnection, cursorField, null);
59 | }
60 |
61 | public ElasticRepository(ElasticConnection elasticConnection, String cursorSearchField, String secondaryCursorSearchField) {
62 | this.elasticConnection = elasticConnection;
63 | this.cursorSearchField = cursorSearchField;
64 | this.cursorField = new CursorField(cursorSearchField);
65 | this.secondaryCursorSearchField = secondaryCursorSearchField;
66 | this.secondaryCursorField = secondaryCursorSearchField == null ? null : new CursorField(secondaryCursorSearchField);
67 | }
68 |
69 | public PageResult searchAfter(String index, Cursor cursor) throws IOException, InterruptedException {
70 | QueryBuilder queryBuilder = cursor.getPrimaryCursor() == null ?
71 | matchAllQuery() :
72 | buildGreaterThen(cursorSearchField, cursor.getPrimaryCursor());
73 |
74 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
75 | .query(queryBuilder)
76 | .size(pageSize)
77 | .sort(cursorSearchField, SortOrder.ASC);
78 |
79 | SearchRequest searchRequest = new SearchRequest(index)
80 | .source(searchSourceBuilder);
81 |
82 | SearchResponse response = executeSearch(searchRequest);
83 |
84 | List> documents = extractDocuments(response);
85 |
86 | Cursor lastCursor;
87 | if (documents.isEmpty()) {
88 | lastCursor = Cursor.empty();
89 | } else {
90 | Map lastDocument = documents.get(documents.size() - 1);
91 | lastCursor = new Cursor(cursorField.read(lastDocument));
92 | }
93 | return new PageResult(index, documents, lastCursor);
94 | }
95 |
96 | private List> extractDocuments(SearchResponse response) {
97 | return Arrays.stream(response.getHits().getHits())
98 | .map(hit -> {
99 | Map sourceMap = hit.getSourceAsMap();
100 | sourceMap.put("es-id", hit.getId());
101 | sourceMap.put("es-index", hit.getIndex());
102 | return sourceMap;
103 | }).collect(Collectors.toList());
104 | }
105 |
106 | public PageResult searchAfterWithSecondarySort(String index, Cursor cursor) throws IOException, InterruptedException {
107 | Objects.requireNonNull(secondaryCursorField);
108 | String primaryCursor = cursor.getPrimaryCursor();
109 | String secondaryCursor = cursor.getSecondaryCursor();
110 | boolean noPrevCursor = primaryCursor == null && secondaryCursor == null;
111 |
112 | QueryBuilder queryBuilder = noPrevCursor ? matchAllQuery() :
113 | getSecondarySortFieldQuery(primaryCursor, secondaryCursor);
114 |
115 | SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
116 | .query(queryBuilder)
117 | .size(pageSize)
118 | .sort(cursorSearchField, SortOrder.ASC)
119 | .sort(secondaryCursorSearchField, SortOrder.ASC);
120 |
121 | SearchRequest searchRequest = new SearchRequest(index)
122 | .source(searchSourceBuilder);
123 |
124 | SearchResponse response = executeSearch(searchRequest);
125 |
126 | List> documents = extractDocuments(response);
127 |
128 | Cursor lastCursor;
129 | if (documents.isEmpty()) {
130 | lastCursor = Cursor.empty();
131 | } else {
132 | Map lastDocument = documents.get(documents.size() - 1);
133 | String primaryCursorValue = cursorField.read(lastDocument);
134 | String secondaryCursorValue = secondaryCursorField.read(lastDocument);
135 | lastCursor = new Cursor(primaryCursorValue, secondaryCursorValue);
136 | }
137 | return new PageResult(index, documents, lastCursor);
138 | }
139 |
140 | private QueryBuilder buildGreaterThen(String cursorField, String cursorValue) {
141 | return rangeQuery(cursorField).from(cursorValue, false);
142 | }
143 |
144 | private QueryBuilder getSecondarySortFieldQuery(String primaryCursor, String secondaryCursor) {
145 | if (secondaryCursor == null) {
146 | return buildGreaterThen(cursorSearchField, primaryCursor);
147 | }
148 | return boolQuery()
149 | .minimumShouldMatch(1)
150 | .should(buildGreaterThen(cursorSearchField, primaryCursor))
151 | .should(
152 | boolQuery()
153 | .filter(matchQuery(cursorSearchField, primaryCursor))
154 | .filter(buildGreaterThen(secondaryCursorSearchField, secondaryCursor))
155 | );
156 | }
157 |
158 | private SearchResponse executeSearch(SearchRequest searchRequest) throws IOException, InterruptedException {
159 | int maxTrials = elasticConnection.getMaxConnectionAttempts();
160 | if (maxTrials <= 0) {
161 | throw new IllegalArgumentException("MaxConnectionAttempts should be > 0");
162 | }
163 | IOException lastError = null;
164 | for (int i = 0; i < maxTrials; ++i) {
165 | try {
166 | return elasticConnection.getClient()
167 | .search(searchRequest, RequestOptions.DEFAULT);
168 | } catch (IOException e) {
169 | lastError = e;
170 | Thread.sleep(elasticConnection.getConnectionRetryBackoff());
171 | }
172 | }
173 | throw lastError;
174 | }
175 |
176 | public List catIndices(String prefix) {
177 | Response resp;
178 | try {
179 | resp = elasticConnection.getClient()
180 | .getLowLevelClient()
181 | .performRequest(new Request("GET", "/_cat/indices"));
182 | } catch (IOException e) {
183 | logger.error("error in searching index names");
184 | throw new RuntimeException(e);
185 | }
186 |
187 | List result = new ArrayList<>();
188 | try (BufferedReader reader = new BufferedReader(new InputStreamReader(resp.getEntity().getContent()))) {
189 | String line;
190 |
191 | while ((line = reader.readLine()) != null) {
192 | String index = line.split("\\s+")[2];
193 | if (index.startsWith(prefix)) {
194 | result.add(index);
195 | }
196 | }
197 | } catch (IOException e) {
198 | logger.error("error while getting indices", e);
199 | }
200 |
201 | Collections.sort(result);
202 |
203 | return result;
204 | }
205 |
206 | public void refreshIndex(String index) {
207 | try {
208 | elasticConnection.getClient()
209 | .getLowLevelClient()
210 | .performRequest(new Request("POST", "/" + index + "/_refresh"));
211 | } catch (IOException e) {
212 | logger.error("error in refreshing index " + index);
213 | throw new RuntimeException(e);
214 | }
215 | }
216 |
217 | public void setPageSize(int pageSize) {
218 | this.pageSize = pageSize;
219 | }
220 | }
221 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/SslContextException.java:
--------------------------------------------------------------------------------
1 | package com.github.dariobalinzo.elastic;
2 |
3 | public class SslContextException extends RuntimeException {
4 | public SslContextException(Exception e) {
5 | super(e);
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/response/Cursor.java:
--------------------------------------------------------------------------------
1 | package com.github.dariobalinzo.elastic.response;
2 |
3 | public class Cursor {
4 | private final String primaryCursor;
5 | private final String secondaryCursor;
6 |
7 | public Cursor(String primaryCursor, String secondaryCursor) {
8 | this.primaryCursor = primaryCursor;
9 | this.secondaryCursor = secondaryCursor;
10 | }
11 |
12 | public Cursor(String primaryCursor) {
13 | this.primaryCursor = primaryCursor;
14 | this.secondaryCursor = null;
15 | }
16 |
17 | public String getPrimaryCursor() {
18 | return primaryCursor;
19 | }
20 |
21 | public String getSecondaryCursor() {
22 | return secondaryCursor;
23 | }
24 |
25 | public static Cursor empty() {
26 | return new Cursor(null, null);
27 | }
28 |
29 | @Override
30 | public String toString() {
31 | return "Cursor{" +
32 | "primaryCursor='" + primaryCursor + '\'' +
33 | ", secondaryCursor='" + secondaryCursor + '\'' +
34 | '}';
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/elastic/response/PageResult.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.elastic.response;
18 |
19 | import java.util.List;
20 | import java.util.Map;
21 |
22 | public class PageResult {
23 | private final String index;
24 | private final List> documents;
25 | private final Cursor lastCursor;
26 |
27 | public PageResult(String index, List> documents, Cursor cursor) {
28 | this.index = index;
29 | this.documents = documents;
30 | this.lastCursor = cursor;
31 | }
32 |
33 | public List> getDocuments() {
34 | return documents;
35 | }
36 |
37 | public Cursor getLastCursor() {
38 | return lastCursor;
39 | }
40 |
41 | public String getIndex() {
42 | return index;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/filter/BlacklistFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.github.dariobalinzo.filter;
17 |
18 | import java.util.List;
19 | import java.util.Map;
20 | import java.util.Set;
21 |
22 | public class BlacklistFilter implements DocumentFilter {
23 | private final JsonFilterVisitor visitor;
24 | private final Set fieldsToRemove;
25 |
26 | public BlacklistFilter(Set fieldsToRemove) {
27 | this.fieldsToRemove = fieldsToRemove;
28 | visitor = new JsonFilterVisitor(this::filterBlacklistItem);
29 | }
30 |
31 | private Object filterBlacklistItem(String key, Object value) {
32 | if (value instanceof Map || value instanceof List) {
33 | boolean shouldVisitNestedObj = fieldsToRemove.stream()
34 | .anyMatch(jsonPath -> jsonPath.startsWith(key));
35 | return shouldVisitNestedObj ? value : null;
36 | }
37 | return fieldsToRemove.contains(key) ? null : value;
38 | }
39 |
40 | @Override
41 | public void filter(Map document) {
42 | visitor.visit(document);
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/filter/DocumentFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.filter;
18 |
19 | import java.util.Map;
20 |
21 | public interface DocumentFilter {
22 |
23 | void filter(Map document);
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/filter/JsonCastFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.github.dariobalinzo.filter;
17 |
18 | import com.fasterxml.jackson.core.JsonProcessingException;
19 | import com.fasterxml.jackson.databind.ObjectMapper;
20 |
21 | import java.util.Map;
22 | import java.util.Set;
23 |
24 | public class JsonCastFilter implements DocumentFilter {
25 | private final Set fieldsToCast;
26 | private final JsonFilterVisitor visitor;
27 | private final ObjectMapper objectMapper = new ObjectMapper();
28 |
29 | public JsonCastFilter(Set fieldsToCast) {
30 | this.fieldsToCast = fieldsToCast;
31 | visitor = new JsonFilterVisitor(this::checkIfJsonCastNeeded);
32 | }
33 |
34 | @Override
35 | public void filter(Map document) {
36 | visitor.visit(document);
37 | }
38 |
39 | private Object checkIfJsonCastNeeded(String key, Object value) {
40 | if (fieldsToCast.contains(key)) {
41 | return castToJson(value);
42 | } else {
43 | return value;
44 | }
45 | }
46 |
47 | private String castToJson(Object value) {
48 | try {
49 | return objectMapper.writeValueAsString(value);
50 | } catch (JsonProcessingException e) {
51 | throw new RuntimeException(e);
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/filter/JsonElementFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.github.dariobalinzo.filter;
17 |
18 | @FunctionalInterface
19 | public interface JsonElementFilter {
20 | Object filterElement(String fieldPath, Object value);
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/filter/JsonFilterVisitor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.github.dariobalinzo.filter;
17 |
18 | import java.util.Iterator;
19 | import java.util.List;
20 | import java.util.Map;
21 |
22 | public class JsonFilterVisitor {
23 | private final JsonElementFilter businessLogic;
24 |
25 | public JsonFilterVisitor(JsonElementFilter businessLogic) {
26 | this.businessLogic = businessLogic;
27 | }
28 |
29 | public void visit(Map document) {
30 | visitJsonDocument("", document);
31 | }
32 |
33 | @SuppressWarnings("unchecked")
34 | private void visitJsonDocument(String prefixPathName, Map document) {
35 | Iterator> iterator = document.entrySet().iterator();
36 | while (iterator.hasNext()) {
37 | Map.Entry entry = iterator.next();
38 | String fullPathKey = prefixPathName + entry.getKey();
39 | Object element = businessLogic.filterElement(fullPathKey, entry.getValue());
40 | if (element == null) {
41 | iterator.remove();
42 | } else {
43 | entry.setValue(element);
44 | }
45 |
46 | if (entry.getValue() instanceof List) {
47 | List nestedList = (List) entry.getValue();
48 | visitNestedList(fullPathKey + ".", nestedList);
49 | } else if (entry.getValue() instanceof Map) {
50 | String nestedObjectPath = prefixPathName + entry.getKey() + ".";
51 | visitJsonDocument(nestedObjectPath, (Map) entry.getValue());
52 | }
53 | }
54 | }
55 |
56 | private void visitNestedList(String prefixPathName, List nestedList) {
57 | nestedList.forEach(item -> visitNestedMap(prefixPathName, item));
58 | }
59 |
60 | @SuppressWarnings("unchecked")
61 | private void visitNestedMap(String prefixPathName, Object item) {
62 | if (item instanceof Map) {
63 | visitJsonDocument(prefixPathName, (Map) item);
64 | }
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/filter/WhitelistFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package com.github.dariobalinzo.filter;
17 |
18 | import java.util.List;
19 | import java.util.Map;
20 | import java.util.Set;
21 |
22 | public class WhitelistFilter implements DocumentFilter {
23 | private final JsonFilterVisitor visitor;
24 | private final Set allowedValues;
25 |
26 | public WhitelistFilter(Set allowedValues) {
27 | this.allowedValues = allowedValues;
28 | visitor = new JsonFilterVisitor(this::filterWhitelistItem);
29 | }
30 |
31 | private Object filterWhitelistItem(String key, Object value) {
32 | if (value instanceof Map || value instanceof List) {
33 | boolean shouldVisitNestedObj = allowedValues.stream()
34 | .anyMatch(jsonPath -> jsonPath.startsWith(key));
35 | return shouldVisitNestedObj ? value : null;
36 | }
37 | return allowedValues.contains(key) ? value : null;
38 | }
39 |
40 | @Override
41 | public void filter(Map document) {
42 | visitor.visit(document);
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/schema/AvroName.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.schema;
18 |
19 | public class AvroName implements FieldNameConverter {
20 |
21 | public String from(String elasticName) {
22 | return elasticName == null ? null : filterInvalidCharacters(elasticName);
23 | }
24 |
25 | public String from(String prefix, String elasticName) {
26 | return elasticName == null ? prefix : prefix + filterInvalidCharacters(elasticName);
27 | }
28 |
29 | private String filterInvalidCharacters(String elasticName) {
30 | boolean alphabetic = Character.isLetter(elasticName.charAt(0));
31 | if (!alphabetic) {
32 | elasticName = "avro" + elasticName;
33 | }
34 | return elasticName.replaceAll("[^a-zA-Z0-9]", "");
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/schema/FieldNameConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.schema;
18 |
19 | public interface FieldNameConverter {
20 |
21 | String from(String elasticName);
22 |
23 | String from(String prefix, String elasticName);
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/schema/NopNameConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.schema;
18 |
19 | public class NopNameConverter implements FieldNameConverter {
20 |
21 | public String from(String elasticName) {
22 | return elasticName;
23 | }
24 |
25 | public String from(String prefix, String elasticName) {
26 | return elasticName == null ? prefix : prefix + elasticName;
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/schema/SchemaConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.schema;
18 |
19 | import org.apache.kafka.connect.data.Field;
20 | import org.apache.kafka.connect.data.Schema;
21 | import org.apache.kafka.connect.data.SchemaBuilder;
22 |
23 | import java.util.LinkedHashMap;
24 | import java.util.List;
25 | import java.util.Map;
26 | import java.util.Set;
27 | import java.util.function.Consumer;
28 | import java.util.stream.Collectors;
29 |
30 | import static org.apache.kafka.connect.data.Schema.OPTIONAL_BOOLEAN_SCHEMA;
31 | import static org.apache.kafka.connect.data.Schema.OPTIONAL_FLOAT64_SCHEMA;
32 | import static org.apache.kafka.connect.data.Schema.OPTIONAL_INT64_SCHEMA;
33 | import static org.apache.kafka.connect.data.Schema.OPTIONAL_STRING_SCHEMA;
34 | import static org.apache.kafka.connect.data.Schema.Type.ARRAY;
35 | import static org.apache.kafka.connect.data.Schema.Type.FLOAT64;
36 | import static org.apache.kafka.connect.data.Schema.Type.INT64;
37 | import static org.apache.kafka.connect.data.Schema.Type.STRUCT;
38 | import static org.apache.kafka.connect.data.SchemaBuilder.array;
39 | import static org.apache.kafka.connect.data.SchemaBuilder.struct;
40 |
41 | public class SchemaConverter {
42 |
43 | private final FieldNameConverter converter;
44 |
45 | public SchemaConverter(FieldNameConverter converter) {
46 | this.converter = converter;
47 | }
48 |
49 | public Schema convert(Map elasticDocument, String schemaName) {
50 | String validSchemaName = converter.from("", schemaName);
51 | SchemaBuilder schemaBuilder = struct().name(validSchemaName);
52 | convertDocumentSchema("", elasticDocument, schemaBuilder);
53 | return schemaBuilder.build();
54 | }
55 |
56 | @SuppressWarnings("unchecked")
57 | private void convertDocumentSchema(String prefixName, Map doc, SchemaBuilder schemaBuilder) {
58 | for (Map.Entry entry : doc.entrySet()) {
59 | String key = entry.getKey();
60 | Object value = entry.getValue();
61 | String validKeyName = converter.from(key);
62 | if (value instanceof String) {
63 | schemaBuilder.field(validKeyName, OPTIONAL_STRING_SCHEMA);
64 | } else if (value instanceof Boolean) {
65 | schemaBuilder.field(validKeyName, OPTIONAL_BOOLEAN_SCHEMA);
66 | } else if (value instanceof Integer) {
67 | schemaBuilder.field(validKeyName, OPTIONAL_INT64_SCHEMA);
68 | } else if (value instanceof Long) {
69 | schemaBuilder.field(validKeyName, OPTIONAL_INT64_SCHEMA);
70 | } else if (value instanceof Float) {
71 | schemaBuilder.field(validKeyName, OPTIONAL_FLOAT64_SCHEMA);
72 | } else if (value instanceof Double) {
73 | schemaBuilder.field(validKeyName, OPTIONAL_FLOAT64_SCHEMA);
74 | } else if (value instanceof List) {
75 | if (!((List>) value).isEmpty()) {
76 | Object head = ((List>) value).get(0);
77 | if (head instanceof Map) {
78 | convertListOfObject(prefixName, schemaBuilder, key, (List>) value);
79 | } else {
80 | convertListSchema(prefixName, schemaBuilder, key, (List>)value);
81 | }
82 | }
83 | } else if (value instanceof Map) {
84 | convertMapSchema(prefixName, schemaBuilder, entry);
85 | } else {
86 | if (value != null) {
87 | throw new RuntimeException("type not supported " + key);
88 | }
89 | }
90 | }
91 | }
92 |
93 | @SuppressWarnings("unchecked")
94 | private void convertMapSchema(String prefixName, SchemaBuilder schemaBuilder, Map.Entry entry) {
95 | String key = entry.getKey();
96 | Map value = (Map) entry.getValue();
97 | String validKeyName = converter.from(prefixName, key);
98 | SchemaBuilder nestedSchema = struct().name(validKeyName).optional();
99 | convertDocumentSchema(validKeyName + ".", value, nestedSchema);
100 | schemaBuilder.field(converter.from(key), nestedSchema.build());
101 | }
102 |
103 | @SuppressWarnings("unchecked")
104 | private void convertListSchema(String prefixName, SchemaBuilder schemaBuilder, String k, List> items) {
105 | String validKeyName = converter.from(k);
106 |
107 | Set schemas = items.stream().filter(i -> i != null).map(this::convertListSchema).collect(Collectors.toSet());
108 | Schema itemSchema;
109 | if(schemas.isEmpty()) {
110 | itemSchema = OPTIONAL_STRING_SCHEMA;
111 | } else if(schemas.size() == 1) {
112 | itemSchema = schemas.iterator().next();
113 | } else if(!schemas.contains(OPTIONAL_STRING_SCHEMA) && !schemas.contains(OPTIONAL_BOOLEAN_SCHEMA)) {
114 | itemSchema = OPTIONAL_FLOAT64_SCHEMA;
115 | } else {
116 | throw new IllegalArgumentException("list " + validKeyName + " contains items of different schemas: " + schemas);
117 | }
118 |
119 | schemaBuilder.field(
120 | validKeyName,
121 | array(itemSchema).optional().build()
122 | ).build();
123 | }
124 |
125 | private Schema convertListSchema(Object item) {
126 | if (item instanceof String) {
127 | return OPTIONAL_STRING_SCHEMA;
128 | } else if (item instanceof Boolean) {
129 | return OPTIONAL_BOOLEAN_SCHEMA;
130 | } else if (item instanceof Integer) {
131 | return OPTIONAL_INT64_SCHEMA;
132 | } else if (item instanceof Long) {
133 | return OPTIONAL_INT64_SCHEMA;
134 | } else if (item instanceof Float) {
135 | return OPTIONAL_FLOAT64_SCHEMA;
136 | } else if (item instanceof Double) {
137 | return OPTIONAL_FLOAT64_SCHEMA;
138 | } else {
139 | throw new RuntimeException("error in converting list: type not supported " + item.getClass());
140 | }
141 | }
142 |
143 |
144 | private void convertListOfObject(String prefixName, SchemaBuilder schemaBuilder, String k,
145 | List> list) {
146 | String validKeyName = converter.from(k);
147 | String keyWithPrefix = converter.from(prefixName, k);
148 | Schema current = null;
149 | for (Map obj : list) {
150 | SchemaBuilder nestedSchema = struct().name(keyWithPrefix).optional();
151 | convertDocumentSchema(keyWithPrefix + ".", obj, nestedSchema);
152 |
153 | if(current == null) {
154 | current = nestedSchema;
155 | } else {
156 | current = merge(current, nestedSchema);
157 | }
158 | }
159 | schemaBuilder.field(validKeyName, array(current));
160 | }
161 |
162 | private Schema merge(Schema a, Schema b) {
163 | if (!(a.type() == STRUCT && b.type() == STRUCT)) {
164 | if(a.type() == INT64 && b.type() == FLOAT64) {
165 | return b;
166 | } else if(a.type() == FLOAT64 && b.type() == INT64) {
167 | return a;
168 | } else if (a.type() == ARRAY && b.type() == ARRAY) {
169 | SchemaBuilder builder = SchemaBuilder.array(merge(a.valueSchema(), b.valueSchema()));
170 | copyInto(builder, a);
171 | return builder;
172 | } else {
173 | // when we reach this cases we were not able to correctly merge the two schemas
174 | // we return the first and hope that it somehow works out
175 | return a;
176 | }
177 | }
178 |
179 | Map fieldsUnion = new LinkedHashMap<>();
180 | Consumer collector = f -> {
181 | fieldsUnion.computeIfPresent(f.name(), (key, old) -> merge(old.schema(), f.schema()));
182 | fieldsUnion.putIfAbsent(f.name(), f.schema());
183 | };
184 | a.fields().forEach(collector);
185 | b.fields().forEach(collector);
186 |
187 | SchemaBuilder union = struct().name(a.name()).optional();
188 | for (Map.Entry field : fieldsUnion.entrySet()) {
189 | union.field(field.getKey(), from(field.getValue()).optional().build());
190 | }
191 | return union;
192 | }
193 |
194 | private SchemaBuilder from(Schema schema) {
195 | if(schema instanceof SchemaBuilder) {
196 | return (SchemaBuilder) schema;
197 | } else {
198 | SchemaBuilder builder;
199 | switch (schema.type()) {
200 | case STRUCT: {
201 | builder = struct();
202 | for (Field field : schema.fields()) {
203 | builder.field(field.name(), field.schema());
204 | }
205 | break;
206 | }
207 | case MAP: {
208 | builder = SchemaBuilder.map(schema.keySchema(), schema.valueSchema());
209 | break;
210 | }
211 | case ARRAY: {
212 | builder = SchemaBuilder.array(schema.valueSchema());
213 | break;
214 | }
215 | default: {
216 | builder = new SchemaBuilder(schema.type());
217 | break;
218 | }
219 | }
220 | copyInto(builder, schema);
221 | return builder;
222 | }
223 | }
224 |
225 | private void copyInto(SchemaBuilder builder, Schema from) {
226 | if(from.isOptional()) {
227 | builder.optional();
228 | }
229 | builder.name(from.name());
230 | if(from.defaultValue() != null) {
231 | builder.defaultValue(from.defaultValue());
232 | }
233 | builder.doc(from.doc());
234 | if(from.parameters() != null) {
235 | builder.parameters(from.parameters());
236 | }
237 | builder.version(from.version());
238 | }
239 | }
240 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/schema/StructConverter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.schema;
18 |
19 | import org.apache.kafka.connect.data.Schema;
20 | import org.apache.kafka.connect.data.Struct;
21 |
22 | import java.util.ArrayList;
23 | import java.util.List;
24 | import java.util.Map;
25 | import java.util.stream.Collectors;
26 |
27 | import static org.apache.kafka.connect.data.Schema.Type.FLOAT64;
28 |
29 | public class StructConverter {
30 |
31 | private final FieldNameConverter converter;
32 |
33 | public StructConverter(FieldNameConverter converter) {
34 | this.converter = converter;
35 | }
36 |
37 | public Struct convert(Map doc, Schema schema) {
38 | Struct struct = new Struct(schema);
39 | convertDocumentStruct("", doc, struct, schema);
40 | return struct;
41 | }
42 |
43 | private void convertDocumentStruct(String prefixName, Map doc, Struct struct, Schema schema) {
44 | for (Map.Entry entry : doc.entrySet()) {
45 | String key = entry.getKey();
46 | Object value = entry.getValue();
47 |
48 | if (isScalar(value)) {
49 | String field = converter.from(key);
50 | boolean isFloat = struct.schema().field(field).schema().type() == FLOAT64;
51 | if(isFloat && value instanceof Number) {
52 | value = ((Number) value).doubleValue();
53 | } else {
54 | value = handleNumericPrecision(value);
55 | }
56 | struct.put(field, value);
57 | } else if (value instanceof List) {
58 | convertListToAvroArray(prefixName, struct, schema, entry);
59 | } else if (value instanceof Map) {
60 | covertMapToAvroStruct(prefixName, struct, schema, entry);
61 | } else {
62 | if (value != null) {
63 | throw new RuntimeException("type not supported " + key);
64 | }
65 | }
66 | }
67 | }
68 |
69 | private boolean isScalar(Object value) {
70 | return value instanceof String
71 | || value instanceof Boolean
72 | || value instanceof Integer
73 | || value instanceof Long
74 | || value instanceof Double
75 | || value instanceof Float;
76 | }
77 |
78 | private Object handleNumericPrecision(Object value) {
79 | if (value instanceof Integer) {
80 | value = ((Integer) value).longValue();
81 | } else if (value instanceof Float) {
82 | value = ((Float) value).doubleValue();
83 | }
84 | return value;
85 | }
86 |
87 | @SuppressWarnings("unchecked")
88 | private void convertListToAvroArray(String prefixName, Struct struct, Schema schema, Map.Entry entry) {
89 | String key = entry.getKey();
90 | List> value = (List>) entry.getValue();
91 |
92 | if (!value.isEmpty()) {
93 | //assuming that every item of the list has the same schema
94 | Object head = value.stream().filter(i -> i != null).findFirst().orElse(null);
95 | if(head == null) {
96 | struct.put(converter.from(key), value);
97 | } else if (isScalar(head)) {
98 | boolean isFloat64 = struct.schema().field(converter.from(key)).schema().valueSchema().type().equals(FLOAT64);
99 | List scalars = value.stream()
100 | .map(s -> isFloat64 ? ((Number) s).doubleValue() : handleNumericPrecision(s))
101 | .collect(Collectors.toList());
102 | struct.put(converter.from(key), scalars);
103 | } else if (head instanceof Map) {
104 | List array = value
105 | .stream()
106 | .map(doc -> convertListOfObject(prefixName, schema, key, (Map) doc))
107 | .collect(Collectors.toCollection(ArrayList::new));
108 | struct.put(converter.from(key), array);
109 | } else {
110 | throw new RuntimeException("error in converting list: type not supported");
111 | }
112 |
113 | }
114 | }
115 |
116 | @SuppressWarnings("unchecked")
117 | private void covertMapToAvroStruct(String prefixName, Struct struct, Schema schema, Map.Entry entry) {
118 | String k = entry.getKey();
119 | Map value = (Map) entry.getValue();
120 | Struct nestedStruct = new Struct(schema.field(converter.from(k)).schema());
121 | convertDocumentStruct(
122 | converter.from(prefixName, k) + ".",
123 | value,
124 | nestedStruct,
125 | schema.field(converter.from(k)).schema()
126 | );
127 | struct.put(converter.from(k), nestedStruct);
128 | }
129 |
130 | private Struct convertListOfObject(String prefixName, Schema schema, String key, Map doc) {
131 | String validKey = converter.from(key);
132 | String validKeyPrefix = converter.from(prefixName, key) + ".";
133 | Struct nestedStruct = new Struct(
134 | schema.field(validKey)
135 | .schema()
136 | .valueSchema()
137 | );
138 |
139 | convertDocumentStruct(
140 | validKeyPrefix,
141 | doc,
142 | nestedStruct,
143 | schema.field(validKey)
144 | .schema()
145 | .valueSchema()
146 | );
147 | return nestedStruct;
148 | }
149 |
150 |
151 | }
152 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/task/ElasticSourceTask.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.task;
18 |
19 | import com.github.dariobalinzo.ElasticSourceConnectorConfig;
20 | import com.github.dariobalinzo.Version;
21 | import com.github.dariobalinzo.elastic.CursorField;
22 | import com.github.dariobalinzo.elastic.ElasticConnection;
23 | import com.github.dariobalinzo.elastic.ElasticConnectionBuilder;
24 | import com.github.dariobalinzo.elastic.ElasticRepository;
25 | import com.github.dariobalinzo.elastic.response.Cursor;
26 | import com.github.dariobalinzo.elastic.response.PageResult;
27 | import com.github.dariobalinzo.filter.BlacklistFilter;
28 | import com.github.dariobalinzo.filter.DocumentFilter;
29 | import com.github.dariobalinzo.filter.JsonCastFilter;
30 | import com.github.dariobalinzo.filter.WhitelistFilter;
31 | import com.github.dariobalinzo.schema.*;
32 | import org.apache.kafka.common.config.ConfigException;
33 | import org.apache.kafka.connect.data.Schema;
34 | import org.apache.kafka.connect.data.Struct;
35 | import org.apache.kafka.connect.errors.ConnectException;
36 | import org.apache.kafka.connect.source.SourceRecord;
37 | import org.apache.kafka.connect.source.SourceTask;
38 | import org.slf4j.Logger;
39 | import org.slf4j.LoggerFactory;
40 |
41 | import java.util.*;
42 | import java.util.concurrent.atomic.AtomicBoolean;
43 |
44 | import static com.github.dariobalinzo.elastic.ElasticJsonNaming.removeKeywordSuffix;
45 |
46 | public class ElasticSourceTask extends SourceTask {
47 |
48 | private static final Logger logger = LoggerFactory.getLogger(ElasticSourceTask.class);
49 | private static final String INDEX = "index";
50 | static final String POSITION = "position";
51 | static final String POSITION_SECONDARY = "position_secondary";
52 |
53 |
54 | private final OffsetSerializer offsetSerializer = new OffsetSerializer();
55 | private SchemaConverter schemaConverter;
56 | private StructConverter structConverter;
57 |
58 | private ElasticSourceTaskConfig config;
59 | private ElasticConnection es;
60 |
61 | private final AtomicBoolean stopping = new AtomicBoolean(false);
62 | private List indices;
63 | private String topic;
64 | private String cursorSearchField;
65 | private CursorField cursorField;
66 | private String secondaryCursorSearchField;
67 | private CursorField secondaryCursorField;
68 | private int pollingMs;
69 | private final Map lastCursor = new HashMap<>();
70 | private final Map sent = new HashMap<>();
71 | private ElasticRepository elasticRepository;
72 |
73 | private final List documentFilters = new ArrayList<>();
74 |
75 | @Override
76 | public String version() {
77 | return Version.VERSION;
78 | }
79 |
80 | @Override
81 | public void start(Map properties) {
82 | try {
83 | config = new ElasticSourceTaskConfig(properties);
84 | } catch (ConfigException e) {
85 | throw new ConnectException("Couldn't start ElasticSourceTask due to configuration error", e);
86 | }
87 |
88 | indices = Arrays.asList(config.getString(ElasticSourceTaskConfig.INDICES_CONFIG).split(","));
89 | if (indices.isEmpty()) {
90 | throw new ConnectException("Invalid configuration: each ElasticSourceTask must have at "
91 | + "least one index assigned to it");
92 | }
93 |
94 | topic = config.getString(ElasticSourceConnectorConfig.TOPIC_PREFIX_CONFIG);
95 | cursorSearchField = config.getString(ElasticSourceConnectorConfig.INCREMENTING_FIELD_NAME_CONFIG);
96 | Objects.requireNonNull(cursorSearchField, ElasticSourceConnectorConfig.INCREMENTING_FIELD_NAME_CONFIG
97 | + " conf is mandatory");
98 | cursorField = new CursorField(cursorSearchField);
99 | secondaryCursorSearchField = config.getString(ElasticSourceConnectorConfig.SECONDARY_INCREMENTING_FIELD_NAME_CONFIG);
100 | secondaryCursorField = secondaryCursorSearchField == null ? null : new CursorField(secondaryCursorSearchField);
101 | pollingMs = Integer.parseInt(config.getString(ElasticSourceConnectorConfig.POLL_INTERVAL_MS_CONFIG));
102 |
103 | initConnectorFilters();
104 | initConnectorFieldConverter();
105 | initEsConnection();
106 | }
107 |
108 | private void initConnectorFilters() {
109 | String whiteFilters = config.getString(ElasticSourceConnectorConfig.FIELDS_WHITELIST_CONFIG);
110 | if (whiteFilters != null) {
111 | String[] whiteFiltersArray = whiteFilters.split(";");
112 | Set whiteFiltersSet = new HashSet<>(Arrays.asList(whiteFiltersArray));
113 | documentFilters.add(new WhitelistFilter(whiteFiltersSet));
114 | }
115 |
116 | String blackFilters = config.getString(ElasticSourceConnectorConfig.FIELDS_BLACKLIST_CONFIG);
117 | if (blackFilters != null) {
118 | String[] blackFiltersArray = blackFilters.split(";");
119 | Set blackFiltersSet = new HashSet<>(Arrays.asList(blackFiltersArray));
120 | documentFilters.add(new BlacklistFilter(blackFiltersSet));
121 | }
122 |
123 | String jsonCastFilters = config.getString(ElasticSourceConnectorConfig.FIELDS_JSON_CAST_CONFIG);
124 | if (jsonCastFilters != null) {
125 | String[] jsonCastFiltersArray = jsonCastFilters.split(";");
126 | Set whiteFiltersSet = new HashSet<>(Arrays.asList(jsonCastFiltersArray));
127 | documentFilters.add(new JsonCastFilter(whiteFiltersSet));
128 | }
129 | }
130 |
131 | private void initConnectorFieldConverter() {
132 | String nameConverterConfig = config.getString(ElasticSourceConnectorConfig.CONNECTOR_FIELDNAME_CONVERTER_CONFIG);
133 |
134 | FieldNameConverter fieldNameConverter;
135 | switch (nameConverterConfig) {
136 | case ElasticSourceConnectorConfig.NOP_FIELDNAME_CONVERTER:
137 | fieldNameConverter = new NopNameConverter();
138 | break;
139 | case ElasticSourceConnectorConfig.AVRO_FIELDNAME_CONVERTER:
140 | default:
141 | fieldNameConverter = new AvroName();
142 | break;
143 | }
144 | this.schemaConverter = new SchemaConverter(fieldNameConverter);
145 | this.structConverter = new StructConverter(fieldNameConverter);
146 | }
147 |
148 | private void initEsConnection() {
149 | String esScheme = config.getString(ElasticSourceConnectorConfig.ES_SCHEME_CONF);
150 | String esHost = config.getString(ElasticSourceConnectorConfig.ES_HOST_CONF);
151 | int esPort = Integer.parseInt(config.getString(ElasticSourceConnectorConfig.ES_PORT_CONF));
152 |
153 | String esUser = config.getString(ElasticSourceConnectorConfig.ES_USER_CONF);
154 | String esPwd = config.getString(ElasticSourceConnectorConfig.ES_PWD_CONF);
155 |
156 | int batchSize = Integer.parseInt(config.getString(ElasticSourceConnectorConfig.BATCH_MAX_ROWS_CONFIG));
157 |
158 | int maxConnectionAttempts = Integer.parseInt(config.getString(
159 | ElasticSourceConnectorConfig.CONNECTION_ATTEMPTS_CONFIG
160 | ));
161 | long connectionRetryBackoff = Long.parseLong(config.getString(
162 | ElasticSourceConnectorConfig.CONNECTION_BACKOFF_CONFIG
163 | ));
164 | ElasticConnectionBuilder connectionBuilder = new ElasticConnectionBuilder(esHost, esPort)
165 | .withProtocol(esScheme)
166 | .withMaxAttempts(maxConnectionAttempts)
167 | .withBackoff(connectionRetryBackoff);
168 |
169 | String truststore = config.getString(ElasticSourceConnectorConfig.ES_TRUSTSTORE_CONF);
170 | String truststorePass = config.getString(ElasticSourceConnectorConfig.ES_TRUSTSTORE_PWD_CONF);
171 | String keystore = config.getString(ElasticSourceConnectorConfig.ES_KEYSTORE_CONF);
172 | String keystorePass = config.getString(ElasticSourceConnectorConfig.ES_KEYSTORE_PWD_CONF);
173 |
174 | if (truststore != null) {
175 | connectionBuilder.withTrustStore(truststore, truststorePass);
176 | }
177 |
178 | if (keystore != null) {
179 | connectionBuilder.withKeyStore(keystore, keystorePass);
180 | }
181 |
182 | if (esUser == null || esUser.isEmpty()) {
183 | es = connectionBuilder.build();
184 | } else {
185 | es = connectionBuilder.withUser(esUser)
186 | .withPassword(esPwd)
187 | .build();
188 | }
189 |
190 | elasticRepository = new ElasticRepository(es, cursorSearchField, secondaryCursorSearchField);
191 | elasticRepository.setPageSize(batchSize);
192 | }
193 |
194 |
195 | //will be called by connect with a different thread than the stop thread
196 | @Override
197 | public List poll() {
198 | List results = new ArrayList<>();
199 | try {
200 | for (String index : indices) {
201 | if (!stopping.get()) {
202 | logger.info("fetching from {}", index);
203 | Cursor lastValue = fetchLastOffset(index);
204 | logger.info("found last value {}", lastValue);
205 | PageResult pageResult = secondaryCursorSearchField == null ?
206 | elasticRepository.searchAfter(index, lastValue) :
207 | elasticRepository.searchAfterWithSecondarySort(index, lastValue);
208 | parseResult(pageResult, results);
209 | logger.info("index {} total messages: {} ", index, sent.get(index));
210 | }
211 | }
212 | if (results.isEmpty()) {
213 | logger.info("no data found, sleeping for {} ms", pollingMs);
214 | Thread.sleep(pollingMs);
215 | }
216 |
217 | } catch (Exception e) {
218 | logger.error("error", e);
219 | }
220 | return results;
221 | }
222 |
223 | private Cursor fetchLastOffset(String index) {
224 | //first we check in cache memory the last value
225 | if (lastCursor.get(index) != null) {
226 | return lastCursor.get(index);
227 | }
228 |
229 | //if cache is empty we check the framework
230 | Map offset = context.offsetStorageReader().offset(Collections.singletonMap(INDEX, index));
231 | if (offset != null) {
232 | String primaryCursor = (String) offset.get(POSITION);
233 | String secondaryCursor = (String) offset.get(POSITION_SECONDARY);
234 | return new Cursor(primaryCursor, secondaryCursor);
235 | } else {
236 | return Cursor.empty();
237 | }
238 | }
239 |
240 | private void parseResult(PageResult pageResult, List results) {
241 | String index = pageResult.getIndex();
242 | for (Map elasticDocument : pageResult.getDocuments()) {
243 | Map sourcePartition = Collections.singletonMap(INDEX, index);
244 | Map sourceOffset = offsetSerializer.toMapOffset(
245 | cursorField,
246 | secondaryCursorField,
247 | elasticDocument
248 | );
249 | String key = offsetSerializer.toStringOffset(
250 | cursorField,
251 | secondaryCursorField,
252 | index,
253 | elasticDocument
254 | );
255 |
256 | lastCursor.put(index, pageResult.getLastCursor());
257 | sent.merge(index, 1, Integer::sum);
258 |
259 | documentFilters.forEach(jsonFilter -> jsonFilter.filter(elasticDocument));
260 |
261 | Schema schema = schemaConverter.convert(elasticDocument, index);
262 | Struct struct = structConverter.convert(elasticDocument, schema);
263 |
264 | SourceRecord sourceRecord = new SourceRecord(
265 | sourcePartition,
266 | sourceOffset,
267 | topic + index,
268 | //KEY
269 | Schema.STRING_SCHEMA,
270 | key,
271 | //VALUE
272 | schema,
273 | struct);
274 | results.add(sourceRecord);
275 | }
276 | }
277 |
278 | //will be called by connect with a different thread than poll thread
279 | public void stop() {
280 | stopping.set(true);
281 | if (es != null) {
282 | es.closeQuietly();
283 | }
284 | }
285 | }
286 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/task/ElasticSourceTaskConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.task;
18 |
19 |
20 | import com.github.dariobalinzo.ElasticSourceConnectorConfig;
21 | import org.apache.kafka.common.config.ConfigDef;
22 | import org.apache.kafka.common.config.ConfigDef.Importance;
23 | import org.apache.kafka.common.config.ConfigDef.Type;
24 |
25 | import java.util.Map;
26 |
27 | /**
28 | * Configuration options for a single ElasticSourceTask. These are processed after all
29 | * Connector-level configs have been parsed.
30 | */
31 | public class ElasticSourceTaskConfig extends ElasticSourceConnectorConfig {
32 |
33 | static ConfigDef config = baseConfigDef()
34 | .define(INDICES_CONFIG, Type.STRING, Importance.HIGH, INDICES_CONFIG);
35 |
36 | public ElasticSourceTaskConfig(Map props) {
37 | super(config, props);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/github/dariobalinzo/task/OffsetSerializer.java:
--------------------------------------------------------------------------------
1 | package com.github.dariobalinzo.task;
2 |
3 | import com.github.dariobalinzo.elastic.CursorField;
4 |
5 | import java.util.HashMap;
6 | import java.util.Map;
7 |
8 | import static com.github.dariobalinzo.task.ElasticSourceTask.POSITION;
9 | import static com.github.dariobalinzo.task.ElasticSourceTask.POSITION_SECONDARY;
10 |
11 | public class OffsetSerializer {
12 |
13 | public Map toMapOffset(CursorField primaryCursor, CursorField secondaryCursor, Map document) {
14 | Map result = new HashMap<>();
15 | result.put(POSITION, primaryCursor.read(document));
16 | if (secondaryCursor != null) {
17 | result.put(POSITION_SECONDARY, secondaryCursor.read(document));
18 | }
19 | return result;
20 | }
21 |
22 | public String toStringOffset(CursorField cursor, CursorField secondaryCursor, String index, Map document) {
23 | String cursorValue = cursor.read(document);
24 | if (secondaryCursor == null) {
25 | return String.join("_", index, cursorValue);
26 | } else {
27 | return String.join("_", index, cursorValue, secondaryCursor.read(document));
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/ElasticIndexMonitorThreadTest.java:
--------------------------------------------------------------------------------
1 | package com.github.dariobalinzo;
2 |
3 | import static org.junit.Assert.assertEquals;
4 | import static org.mockito.Mockito.atLeast;
5 |
6 | import java.io.IOException;
7 |
8 | import com.github.dariobalinzo.elastic.ElasticIndexMonitorThread;
9 |
10 | import org.apache.kafka.connect.connector.ConnectorContext;
11 | import org.junit.Before;
12 | import org.junit.Test;
13 | import org.mockito.Mock;
14 | import org.mockito.Mockito;
15 | import org.mockito.MockitoAnnotations;
16 |
17 |
18 | public class ElasticIndexMonitorThreadTest extends TestContainersContext {
19 |
20 | @Mock
21 | private ConnectorContext context;
22 |
23 | @Before
24 | public void init() {
25 | MockitoAnnotations.initMocks(this);
26 | }
27 |
28 | @Test
29 | public void shouldRefreshIndexesList() throws InterruptedException, IOException {
30 | //given
31 | long pollInterval = 1000L;
32 | deleteTestIndex();
33 |
34 | insertMockData(10, TEST_INDEX);
35 | refreshIndex();
36 |
37 | ElasticIndexMonitorThread indexMonitorThread = new ElasticIndexMonitorThread(context, pollInterval, repository, TEST_INDEX);
38 | indexMonitorThread.start();
39 |
40 | assertEquals(1, indexMonitorThread.indexes().size());
41 |
42 | //when another index is created in Elastic
43 | insertMockData(10, TEST_INDEX + '2');
44 | refreshIndex();
45 |
46 | long waitRefresh = pollInterval + (long)(Math.random() * 1000);
47 | Thread.sleep(waitRefresh);
48 |
49 | //then
50 | Mockito.verify(context, atLeast(1)).requestTaskReconfiguration();
51 | assertEquals(2, indexMonitorThread.indexes().size());
52 |
53 | indexMonitorThread.shutdown();
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/FooTest.java:
--------------------------------------------------------------------------------
1 | package com.github.dariobalinzo;
2 |
3 | import com.github.dariobalinzo.schema.AvroName;
4 | import com.github.dariobalinzo.schema.FieldNameConverter;
5 | import com.github.dariobalinzo.schema.NopNameConverter;
6 | import com.github.dariobalinzo.schema.SchemaConverter;
7 | import com.github.dariobalinzo.schema.StructConverter;
8 | import org.apache.kafka.connect.data.Schema;
9 | import org.apache.kafka.connect.data.Struct;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 | import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper;
13 |
14 | import java.io.IOException;
15 | import java.nio.file.Files;
16 | import java.nio.file.Paths;
17 | import java.util.Map;
18 |
19 | public class FooTest {
20 |
21 | private SchemaConverter schemaConverter;
22 | private StructConverter structConverter;
23 |
24 | Map elasticDocument;
25 |
26 | @Before
27 | public void setup() throws IOException {
28 | FieldNameConverter fieldNameConverter = new NopNameConverter();
29 | this.schemaConverter = new SchemaConverter(fieldNameConverter);
30 | this.structConverter = new StructConverter(fieldNameConverter);
31 |
32 | String doc = new String(Files.readAllBytes(Paths.get("src/test/java/com/github/dariobalinzo/foo.json")));
33 | elasticDocument = new ObjectMapper().readValue(doc, Map.class);
34 | }
35 |
36 | @Test
37 | public void foo() {
38 |
39 | Schema schema = schemaConverter.convert(elasticDocument, "foo");
40 | Struct struct = structConverter.convert(elasticDocument, schema);
41 |
42 | System.out.println(struct);
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/TestContainersContext.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo;
18 |
19 | import com.github.dariobalinzo.elastic.ElasticConnection;
20 | import com.github.dariobalinzo.elastic.ElasticConnectionBuilder;
21 | import com.github.dariobalinzo.elastic.ElasticRepository;
22 | import com.github.dariobalinzo.task.ElasticSourceTaskConfig;
23 | import org.apache.http.HttpHost;
24 | import org.elasticsearch.action.DocWriteResponse;
25 | import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
26 | import org.elasticsearch.action.index.IndexRequest;
27 | import org.elasticsearch.action.index.IndexResponse;
28 | import org.elasticsearch.client.RequestOptions;
29 | import org.elasticsearch.xcontent.XContentBuilder;
30 | import org.elasticsearch.xcontent.XContentFactory;
31 | import org.junit.AfterClass;
32 | import org.junit.BeforeClass;
33 | import org.testcontainers.elasticsearch.ElasticsearchContainer;
34 |
35 | import java.io.IOException;
36 | import java.util.HashMap;
37 | import java.util.Map;
38 |
39 | import static org.junit.Assert.assertEquals;
40 |
41 | public class TestContainersContext {
42 | protected static final int TEST_PAGE_SIZE = 3;
43 | protected static final int MAX_TRIALS = 2;
44 | protected static final int RETRY_WAIT_MS = 1_000;
45 |
46 | protected static final String TEST_INDEX = "source_index";
47 | protected static final String CURSOR_FIELD = "ts";
48 | protected static final String NESTED_OBJECT = "nested";
49 | protected static final String NESTED_CURSOR_FIELD = NESTED_OBJECT + "." + CURSOR_FIELD;
50 | protected static final String SECONDARY_CURSOR_FIELD = "fullName.keyword";
51 |
52 | protected static final String ELASTICSEARCH_IMAGE = "docker.elastic.co/elasticsearch/elasticsearch:7.11.1";
53 |
54 | protected static ElasticsearchContainer container;
55 | protected static ElasticConnection connection;
56 | protected static ElasticRepository repository;
57 | protected static ElasticRepository nestedRepository;
58 | protected static ElasticRepository secondarySortRepo;
59 |
60 | @BeforeClass
61 | public static void setupElastic() {
62 | // Create the elasticsearch container.
63 | container = new ElasticsearchContainer(ELASTICSEARCH_IMAGE);
64 | container.addEnv("ES_JAVA_OPTS", "-Xms512m -Xmx512m");
65 | container.start();
66 |
67 | HttpHost httpHost = HttpHost.create(container.getHttpHostAddress());
68 | connection = new ElasticConnectionBuilder(httpHost.getHostName(), httpHost.getPort())
69 | .withMaxAttempts(MAX_TRIALS)
70 | .withBackoff(RETRY_WAIT_MS)
71 | .build();
72 |
73 | repository = new ElasticRepository(connection, CURSOR_FIELD);
74 | repository.setPageSize(TEST_PAGE_SIZE);
75 |
76 | nestedRepository = new ElasticRepository(connection, NESTED_CURSOR_FIELD);
77 | nestedRepository.setPageSize(TEST_PAGE_SIZE);
78 |
79 | secondarySortRepo = new ElasticRepository(connection, CURSOR_FIELD, SECONDARY_CURSOR_FIELD);
80 | secondarySortRepo.setPageSize(TEST_PAGE_SIZE);
81 | }
82 |
83 |
84 | protected void deleteTestIndex() {
85 | try {
86 | connection.getClient().indices().delete(new DeleteIndexRequest(TEST_INDEX), RequestOptions.DEFAULT);
87 | } catch (Exception ignored) {
88 |
89 | }
90 | }
91 |
92 | protected void refreshIndex() throws IOException, InterruptedException {
93 | repository.refreshIndex(TEST_INDEX);
94 | }
95 |
96 | protected void insertMockData(int tsStart) throws IOException {
97 | insertMockData(tsStart, TEST_INDEX);
98 | }
99 |
100 | protected void insertMockData(int tsStart, String index) throws IOException {
101 | insertMockData(tsStart, "Test", index);
102 | }
103 |
104 | protected void insertMockData(int tsStart, String fullName, String index) throws IOException {
105 | XContentBuilder builder = XContentFactory.jsonBuilder()
106 | .startObject()
107 | .field("fullName", fullName)
108 | .field(CURSOR_FIELD, tsStart)
109 | .field("age", 10)
110 | .field("non-avro-field", "non-avro-field")
111 | .field("avroField", "avro-field")
112 | .object(NESTED_OBJECT, b -> b.field(CURSOR_FIELD, tsStart))
113 | .endObject();
114 |
115 | IndexRequest indexRequest = new IndexRequest(index);
116 | indexRequest.type("_doc");
117 | indexRequest.source(builder);
118 |
119 | IndexResponse response = connection.getClient().index(indexRequest, RequestOptions.DEFAULT);
120 | assertEquals(DocWriteResponse.Result.CREATED, response.getResult());
121 | }
122 |
123 | protected Map getConf() {
124 | HttpHost httpHost = HttpHost.create(container.getHttpHostAddress());
125 | Map conf = new HashMap<>();
126 | conf.put(ElasticSourceTaskConfig.INDICES_CONFIG, TEST_INDEX);
127 | conf.put(ElasticSourceConnectorConfig.TOPIC_PREFIX_CONFIG, "topic");
128 | conf.put(ElasticSourceConnectorConfig.INCREMENTING_FIELD_NAME_CONFIG, CURSOR_FIELD);
129 | conf.put(ElasticSourceConnectorConfig.POLL_INTERVAL_MS_CONFIG, String.valueOf(10));
130 | conf.put(ElasticSourceConnectorConfig.ES_HOST_CONF, httpHost.getHostName());
131 | conf.put(ElasticSourceConnectorConfig.ES_PORT_CONF, String.valueOf(httpHost.getPort()));
132 | conf.put(ElasticSourceConnectorConfig.BATCH_MAX_ROWS_CONFIG, String.valueOf(2));
133 | conf.put(ElasticSourceConnectorConfig.CONNECTION_ATTEMPTS_CONFIG, String.valueOf(MAX_TRIALS));
134 | conf.put(ElasticSourceConnectorConfig.CONNECTION_BACKOFF_CONFIG, String.valueOf(RETRY_WAIT_MS));
135 | return conf;
136 | }
137 |
138 |
139 | @AfterClass
140 | public static void stopElastic() {
141 | if (container != null) {
142 | container.close();
143 | }
144 | }
145 |
146 | }
147 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/elastic/ElasticRepositoryTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.elastic;
18 |
19 | import com.github.dariobalinzo.TestContainersContext;
20 | import com.github.dariobalinzo.elastic.response.Cursor;
21 | import com.github.dariobalinzo.elastic.response.PageResult;
22 | import org.junit.Test;
23 |
24 | import java.io.IOException;
25 | import java.util.Collections;
26 |
27 | import static org.junit.Assert.*;
28 |
29 | public class ElasticRepositoryTest extends TestContainersContext {
30 |
31 |
32 | @Test
33 | public void shouldFetchDataFromElastic() throws IOException, InterruptedException {
34 | deleteTestIndex();
35 |
36 | insertMockData(111);
37 | insertMockData(112);
38 | insertMockData(113);
39 | insertMockData(114);
40 | refreshIndex();
41 |
42 | PageResult firstPage = repository.searchAfter(TEST_INDEX, Cursor.empty());
43 | assertEquals(3, firstPage.getDocuments().size());
44 |
45 | PageResult secondPage = repository.searchAfter(TEST_INDEX, firstPage.getLastCursor());
46 | assertEquals(1, secondPage.getDocuments().size());
47 |
48 | PageResult emptyPage = repository.searchAfter(TEST_INDEX, secondPage.getLastCursor());
49 | assertEquals(0, emptyPage.getDocuments().size());
50 | assertNull(emptyPage.getLastCursor().getPrimaryCursor());
51 |
52 | assertEquals(Collections.singletonList(TEST_INDEX), repository.catIndices("source"));
53 | assertEquals(Collections.emptyList(), repository.catIndices("non-existing"));
54 | }
55 | @Test
56 | public void shouldFetchDataFromElasticWithNestedCursor() throws IOException, InterruptedException {
57 | deleteTestIndex();
58 |
59 | insertMockData(111);
60 | insertMockData(112);
61 | insertMockData(113);
62 | insertMockData(114);
63 | refreshIndex();
64 |
65 | PageResult firstPage = nestedRepository.searchAfter(TEST_INDEX, Cursor.empty());
66 | assertEquals(3, firstPage.getDocuments().size());
67 |
68 | PageResult secondPage = nestedRepository.searchAfter(TEST_INDEX, firstPage.getLastCursor());
69 | assertEquals(1, secondPage.getDocuments().size());
70 |
71 | PageResult emptyPage = nestedRepository.searchAfter(TEST_INDEX, secondPage.getLastCursor());
72 | assertEquals(0, emptyPage.getDocuments().size());
73 | assertNull(emptyPage.getLastCursor().getPrimaryCursor());
74 |
75 | assertEquals(Collections.singletonList(TEST_INDEX), nestedRepository.catIndices("source"));
76 | assertEquals(Collections.emptyList(), nestedRepository.catIndices("non-existing"));
77 | }
78 |
79 | @Test
80 | public void shouldListExistingIndices() throws IOException, InterruptedException {
81 | deleteTestIndex();
82 | insertMockData(111);
83 | refreshIndex();
84 |
85 | assertEquals(Collections.singletonList(TEST_INDEX), repository.catIndices("source"));
86 | assertEquals(Collections.emptyList(), repository.catIndices("non-existing"));
87 | }
88 |
89 | @Test
90 | public void shouldFetchDataUsingSecondarySortField() throws IOException, InterruptedException {
91 | deleteTestIndex();
92 |
93 | insertMockData(111, "customerA", TEST_INDEX);
94 | insertMockData(111, "customerB", TEST_INDEX);
95 | insertMockData(111, "customerC", TEST_INDEX);
96 | insertMockData(111, "customerD", TEST_INDEX);
97 | insertMockData(112, "customerA", TEST_INDEX);
98 | insertMockData(113, "customerB", TEST_INDEX);
99 | insertMockData(113, "customerC", TEST_INDEX);
100 | insertMockData(113, "customerD", TEST_INDEX);
101 |
102 | refreshIndex();
103 |
104 | PageResult firstPage = secondarySortRepo.searchAfterWithSecondarySort(TEST_INDEX, Cursor.empty());
105 | assertEquals(3, firstPage.getDocuments().size());
106 |
107 | PageResult secondPage = secondarySortRepo.searchAfterWithSecondarySort(TEST_INDEX, firstPage.getLastCursor());
108 | assertEquals(3, secondPage.getDocuments().size());
109 |
110 | PageResult thirdPage = secondarySortRepo.searchAfterWithSecondarySort(TEST_INDEX, secondPage.getLastCursor());
111 | assertEquals(2, thirdPage.getDocuments().size());
112 |
113 | PageResult emptyPage = secondarySortRepo.searchAfterWithSecondarySort(TEST_INDEX, thirdPage.getLastCursor());
114 | assertEquals(0, emptyPage.getDocuments().size());
115 | assertNull(emptyPage.getLastCursor().getPrimaryCursor());
116 | assertNull(emptyPage.getLastCursor().getSecondaryCursor());
117 | }
118 |
119 | @Test
120 | public void shouldFetchDataWithAdditionalField() throws IOException, InterruptedException {
121 | deleteTestIndex();
122 |
123 | insertMockData(110, "customerA", TEST_INDEX);
124 | insertMockData(111, "customerB", TEST_INDEX);
125 | refreshIndex();
126 |
127 | PageResult firstPage = repository.searchAfter(TEST_INDEX, Cursor.empty());
128 | firstPage.getDocuments().forEach(item -> {
129 | assertNotNull(item.get((String) "es-index"));
130 | assertNotNull(item.get((String) "es-id"));
131 | });
132 | }
133 |
134 | }
135 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/elastic/ElasticSourceConnectorTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.elastic;
18 |
19 |
20 | import com.github.dariobalinzo.ElasticSourceConnector;
21 | import com.github.dariobalinzo.TestContainersContext;
22 | import com.github.dariobalinzo.task.ElasticSourceTaskConfig;
23 |
24 | import org.junit.Test;
25 |
26 | import java.io.IOException;
27 | import java.util.List;
28 | import java.util.Map;
29 |
30 | import static org.junit.Assert.assertEquals;
31 | import static org.junit.Assert.assertNotNull;
32 |
33 | public class ElasticSourceConnectorTest extends TestContainersContext {
34 |
35 | @Test
36 | public void shouldGetAListOfTasks() throws IOException {
37 | //given
38 | ElasticSourceConnector connector = new ElasticSourceConnector();
39 | connector.start(getConf());
40 | insertMockData(1, TEST_INDEX + 1);
41 | insertMockData(2, TEST_INDEX + 2);
42 | insertMockData(3, TEST_INDEX + 3);
43 | insertMockData(4, TEST_INDEX + 4);
44 |
45 | try {
46 | Thread.sleep(1000);
47 | } catch (InterruptedException ignored) {
48 | }
49 |
50 | //when
51 | int maxTasks = 3;
52 | List> taskList = connector.taskConfigs(maxTasks);
53 |
54 | //then
55 | assertEquals(maxTasks, taskList.size());
56 | assertNotNull(connector.version());
57 | connector.stop();
58 | }
59 |
60 | @Test
61 | public void shouldGetTaskFromFixedList() {
62 | //given
63 | ElasticSourceConnector connector = new ElasticSourceConnector();
64 | Map conf = getConf();
65 | conf.remove(ElasticSourceTaskConfig.INDEX_PREFIX_CONFIG);
66 | conf.put(ElasticSourceTaskConfig.INDEX_NAMES_CONFIG, "index1,index2,index3");
67 | connector.start(conf);
68 |
69 | //when
70 | int maxTasks = 3;
71 | List> taskList = connector.taskConfigs(maxTasks);
72 |
73 | //then
74 | assertEquals(maxTasks, taskList.size());
75 | connector.stop();
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/filter/BlacklistFilterTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.filter;
18 |
19 |
20 | import com.fasterxml.jackson.databind.ObjectMapper;
21 | import org.junit.Assert;
22 | import org.junit.Test;
23 |
24 | import java.io.IOException;
25 | import java.nio.file.Files;
26 | import java.nio.file.Paths;
27 | import java.util.HashSet;
28 | import java.util.LinkedHashMap;
29 | import java.util.Map;
30 | import java.util.Set;
31 | import java.util.stream.Collectors;
32 | import java.util.stream.Stream;
33 |
34 | import static junit.framework.TestCase.assertEquals;
35 |
36 | public class BlacklistFilterTest {
37 | private final ObjectMapper objectMapper = new ObjectMapper();
38 |
39 | @Test
40 | public void shouldConvertSimpleSchema() {
41 | //given
42 | Map elasticDocument = new LinkedHashMap<>();
43 | elasticDocument.put("name", "elastic");
44 | elasticDocument.put("surname", "search");
45 | elasticDocument.put("version", 7);
46 | elasticDocument.put("enabled", true);
47 |
48 | //when
49 | Set filterValues = Stream.of(
50 | "name",
51 | "surname",
52 | "version"
53 | ).collect(Collectors.toCollection(HashSet::new));
54 | BlacklistFilter BlacklistFilter = new BlacklistFilter(filterValues);
55 | BlacklistFilter.filter(elasticDocument);
56 |
57 | //then
58 | Assert.assertEquals("{enabled=true}", elasticDocument.toString());
59 | }
60 |
61 | @SuppressWarnings("unchecked")
62 | @Test
63 | public void shouldConvertNestedDocument() throws IOException {
64 | //given
65 | String file = this.getClass().getClassLoader()
66 | .getResource("com/github/dariobalinzo/filter/document.json")
67 | .getFile();
68 | String jsonDocument = new String(Files.readAllBytes(Paths.get(file)));
69 |
70 | Map elasticDocument = objectMapper.readValue(jsonDocument, Map.class);
71 |
72 | //when
73 | Set whitelist = Stream.of(
74 | "name",
75 | "obj.details.qty",
76 | "order_list.details.qty"
77 | ).collect(Collectors.toSet());
78 | BlacklistFilter BlacklistFilter = new BlacklistFilter(whitelist);
79 | BlacklistFilter.filter(elasticDocument);
80 |
81 | //then
82 | assertEquals(
83 | "{age=7, order_list=[{id=1, details={nested_det=test nested inside list}}, {id=2, details={nested_det=test nested inside list}}], obj={key=55, details={nested_det=test nested inside list}}}",
84 | elasticDocument.toString());
85 | }
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/filter/JsonCastFilterTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.filter;
18 |
19 |
20 | import com.fasterxml.jackson.databind.ObjectMapper;
21 | import org.junit.Test;
22 |
23 | import java.io.IOException;
24 | import java.nio.file.Files;
25 | import java.nio.file.Paths;
26 | import java.util.List;
27 | import java.util.Map;
28 | import java.util.Set;
29 | import java.util.stream.Collectors;
30 | import java.util.stream.Stream;
31 |
32 | import static junit.framework.TestCase.assertEquals;
33 |
34 | public class JsonCastFilterTest {
35 |
36 | private final ObjectMapper objectMapper = new ObjectMapper();
37 |
38 | @SuppressWarnings("unchecked")
39 | @Test
40 | public void shouldConvertSimpleDocument() throws IOException {
41 | //given
42 | String file = this.getClass().getClassLoader()
43 | .getResource("com/github/dariobalinzo/filter/document.json")
44 | .getFile();
45 | String jsonDocument = new String(Files.readAllBytes(Paths.get(file)));
46 |
47 | Map elasticDocument = objectMapper.readValue(jsonDocument, Map.class);
48 |
49 | //when
50 | Set toCast = Stream.of(
51 | "name",
52 | "obj.details",
53 | "order_list.details"
54 | ).collect(Collectors.toSet());
55 | JsonCastFilter jsonCastFilter = new JsonCastFilter(toCast);
56 | jsonCastFilter.filter(elasticDocument);
57 |
58 | //then
59 | assertEquals(5, elasticDocument.keySet().size());
60 | assertEquals("\"elastic\"", elasticDocument.get("name"));
61 | Map obj = (Map) elasticDocument.get("obj");
62 | assertEquals("{\"nested_det\":\"test nested inside list\",\"qty\":2}", obj.get("details"));
63 |
64 | List nestedList = (List) elasticDocument.get("order_list");
65 | Map nestedInsideList1 = (Map) nestedList.get(0);
66 | Map nestedInsideList2 = (Map) nestedList.get(1);
67 |
68 | assertEquals("{\"nested_det\":\"test nested inside list\",\"qty\":1}", nestedInsideList1.get("details"));
69 | assertEquals("{\"nested_det\":\"test nested inside list\",\"qty\":2}", nestedInsideList2.get("details"));
70 | }
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/filter/WhitelistFilterTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.filter;
18 |
19 |
20 | import com.fasterxml.jackson.databind.ObjectMapper;
21 | import org.junit.Assert;
22 | import org.junit.Test;
23 |
24 | import java.io.IOException;
25 | import java.nio.file.Files;
26 | import java.nio.file.Paths;
27 | import java.util.HashSet;
28 | import java.util.LinkedHashMap;
29 | import java.util.Map;
30 | import java.util.Set;
31 | import java.util.stream.Collectors;
32 | import java.util.stream.Stream;
33 |
34 | import static junit.framework.TestCase.assertEquals;
35 |
36 | public class WhitelistFilterTest {
37 | private final ObjectMapper objectMapper = new ObjectMapper();
38 |
39 | @Test
40 | public void shouldConvertSimpleSchema() {
41 | //given
42 | Map elasticDocument = new LinkedHashMap<>();
43 | elasticDocument.put("name", "elastic");
44 | elasticDocument.put("surname", "search");
45 | elasticDocument.put("version", 7);
46 | elasticDocument.put("enabled", true);
47 |
48 | //when
49 | Set filterValues = Stream.of(
50 | "name",
51 | "surname",
52 | "version"
53 | ).collect(Collectors.toCollection(HashSet::new));
54 | WhitelistFilter whitelistFilter = new WhitelistFilter(filterValues);
55 | whitelistFilter.filter(elasticDocument);
56 |
57 | //then
58 | Assert.assertEquals("{name=elastic, surname=search, version=7}", elasticDocument.toString());
59 | }
60 |
61 | @SuppressWarnings("unchecked")
62 | @Test
63 | public void shouldConvertNestedDocument() throws IOException {
64 | //given
65 | String file = this.getClass().getClassLoader()
66 | .getResource("com/github/dariobalinzo/filter/document.json")
67 | .getFile();
68 | String jsonDocument = new String(Files.readAllBytes(Paths.get(file)));
69 |
70 | Map elasticDocument = objectMapper.readValue(jsonDocument, Map.class);
71 |
72 | //when
73 | Set whitelist = Stream.of(
74 | "name",
75 | "obj.details.qty",
76 | "order_list.details.qty"
77 | ).collect(Collectors.toSet());
78 | WhitelistFilter whitelistFilter = new WhitelistFilter(whitelist);
79 | whitelistFilter.filter(elasticDocument);
80 |
81 | //then
82 | assertEquals(
83 | "{name=elastic, order_list=[{details={qty=1}}, {details={qty=2}}], obj={details={qty=2}}}",
84 | elasticDocument.toString());
85 | }
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/foo.json:
--------------------------------------------------------------------------------
1 | {
2 | "equipments": [
3 | {
4 | "prices": {
5 | "listPrice": {
6 | "netPrice": {
7 | "amount": 420.17,
8 | "currency": "EUR"
9 | },
10 | "grossPrice": {
11 | "amount": 500,
12 | "currency": "EUR"
13 | },
14 | "updatedAt": "2023-05-17T04:08:18.046399"
15 | },
16 | "salesPrice": {
17 | "netPrice": {
18 | "amount": 0,
19 | "currency": "EUR"
20 | },
21 | "grossPrice": {
22 | "amount": 0,
23 | "currency": "EUR"
24 | },
25 | "updatedAt": "2023-05-17T04:08:17.844466"
26 | }
27 | }
28 | },
29 | {
30 | "prices": {
31 | "listPrice": {
32 | "netPrice": {
33 | "amount": 0,
34 | "currency": "EUR"
35 | },
36 | "grossPrice": {
37 | "amount": 0,
38 | "currency": "EUR"
39 | },
40 | "updatedAt": "2023-05-17T04:08:18.046401"
41 | },
42 | "salesPrice": {
43 | "netPrice": {
44 | "amount": 0,
45 | "currency": "EUR"
46 | },
47 | "grossPrice": {
48 | "amount": 0,
49 | "currency": "EUR"
50 | },
51 | "updatedAt": "2023-05-17T04:08:17.844525"
52 | }
53 | }
54 | },
55 | {
56 | "prices": {
57 | "listPrice": {
58 | "netPrice": {
59 | "amount": 3403.36,
60 | "currency": "EUR"
61 | },
62 | "grossPrice": {
63 | "amount": 4050,
64 | "currency": "EUR"
65 | },
66 | "updatedAt": "2023-05-17T04:08:18.0464"
67 | },
68 | "salesPrice": {
69 | "netPrice": {
70 | "amount": 0,
71 | "currency": "EUR"
72 | },
73 | "grossPrice": {
74 | "amount": 0,
75 | "currency": "EUR"
76 | },
77 | "updatedAt": "2023-05-17T04:08:17.844519"
78 | }
79 | }
80 | },
81 | {
82 | "prices": {
83 | "listPrice": {
84 | "netPrice": {
85 | "amount": 0,
86 | "currency": "EUR"
87 | },
88 | "grossPrice": {
89 | "amount": 0,
90 | "currency": "EUR"
91 | },
92 | "updatedAt": "2023-05-17T04:08:18.046397"
93 | },
94 | "salesPrice": {
95 | "netPrice": {
96 | "amount": 0,
97 | "currency": "EUR"
98 | },
99 | "grossPrice": {
100 | "amount": 0,
101 | "currency": "EUR"
102 | },
103 | "updatedAt": "2023-05-17T04:08:17.844406"
104 | }
105 | }
106 | }
107 | ]
108 | }
109 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/schema/AvroNameTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.schema;
18 |
19 |
20 | import org.junit.Assert;
21 | import org.junit.Test;
22 |
23 | public class AvroNameTest {
24 |
25 | @Test
26 | public void shouldCreateValidAvroNames() {
27 | //given
28 | String invalidName = "foo.bar";
29 | FieldNameConverter converter = new AvroName();
30 |
31 | //when
32 | String validName = converter.from(invalidName);
33 | String validNamePrefix = converter.from("prefix", invalidName);
34 | String startByNumber = converter.from("1invalid");
35 |
36 | //then
37 | Assert.assertEquals("foobar", validName);
38 | Assert.assertEquals("prefixfoobar", validNamePrefix);
39 | Assert.assertEquals("avro1invalid", startByNumber);
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/task/ElasticSourceTaskTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.task;
18 |
19 | import com.github.dariobalinzo.ElasticSourceConnectorConfig;
20 | import com.github.dariobalinzo.TestContainersContext;
21 | import org.apache.kafka.connect.data.Field;
22 | import org.apache.kafka.connect.data.Struct;
23 | import org.apache.kafka.connect.source.SourceRecord;
24 | import org.apache.kafka.connect.source.SourceTaskContext;
25 | import org.junit.Before;
26 | import org.junit.Test;
27 | import org.mockito.Mock;
28 | import org.mockito.Mockito;
29 | import org.mockito.MockitoAnnotations;
30 |
31 | import java.io.IOException;
32 | import java.util.List;
33 | import java.util.Map;
34 |
35 | import static com.github.dariobalinzo.ElasticSourceConnectorConfig.SECONDARY_INCREMENTING_FIELD_NAME_CONFIG;
36 | import static org.junit.Assert.*;
37 |
38 | public class ElasticSourceTaskTest extends TestContainersContext {
39 |
40 | @Mock
41 | private SourceTaskContext context;
42 |
43 | @Before
44 | public void init() {
45 | MockitoAnnotations.initMocks(this);
46 | }
47 |
48 | @Test
49 | public void shouldRunSourceTaskWithoutInitialOffset() throws IOException, InterruptedException {
50 | //given
51 | deleteTestIndex();
52 |
53 | insertMockData(111);
54 | insertMockData(112);
55 | insertMockData(113);
56 | insertMockData(114);
57 | refreshIndex();
58 |
59 | ElasticSourceTask task = new ElasticSourceTask();
60 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty());
61 | task.initialize(context);
62 |
63 | //when (fetching first page)
64 | task.start(getConf());
65 | List poll1 = task.poll();
66 | assertEquals(2, poll1.size());
67 | assertEquals(111L,
68 | ((Struct) poll1.get(0).value()).get("ts")
69 | );
70 | assertEquals("{position=111}", poll1.get(0).sourceOffset().toString());
71 | assertEquals(
72 | 112L,
73 | ((Struct) poll1.get(1).value()).get("ts")
74 | );
75 | assertEquals("{position=112}", poll1.get(1).sourceOffset().toString());
76 |
77 | //when fetching (second page)
78 | List poll2 = task.poll();
79 | assertEquals(2, poll2.size());
80 | assertEquals(
81 | 113L,
82 | ((Struct) poll2.get(0).value()).get("ts")
83 | );
84 | assertEquals("{position=113}", poll2.get(0).sourceOffset().toString());
85 | assertEquals(
86 | 114L,
87 | ((Struct) poll2.get(1).value()).get("ts")
88 | );
89 | assertEquals("{position=114}", poll2.get(1).sourceOffset().toString());
90 |
91 | //then
92 | List empty = task.poll();
93 | assertTrue(empty.isEmpty());
94 |
95 | task.stop();
96 | }
97 |
98 | @Test
99 | public void shouldRunTask_WithSecondarySort_WithoutInitialOffset() throws IOException, InterruptedException {
100 | //given
101 | deleteTestIndex();
102 |
103 | insertMockData(111, "customerA", TEST_INDEX);
104 | insertMockData(111, "customerB", TEST_INDEX);
105 | insertMockData(111, "customerC", TEST_INDEX);
106 | insertMockData(111, "customerD", TEST_INDEX);
107 | insertMockData(112, "customerA", TEST_INDEX);
108 | refreshIndex();
109 |
110 | ElasticSourceTask task = new ElasticSourceTask();
111 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty());
112 | task.initialize(context);
113 |
114 | //when (fetching first page)
115 | Map conf = getConf();
116 | conf.put(SECONDARY_INCREMENTING_FIELD_NAME_CONFIG, SECONDARY_CURSOR_FIELD);
117 | task.start(conf);
118 | List poll1 = task.poll();
119 | assertEquals(2, poll1.size());
120 | assertEquals(
121 | "customerA",
122 | ((Struct) poll1.get(0).value()).get("fullName")
123 | );
124 | assertEquals(
125 | 111L,
126 | ((Struct) poll1.get(0).value()).get("ts")
127 | );
128 | assertEquals("{position_secondary=customerA, position=111}", poll1.get(0).sourceOffset().toString());
129 | assertEquals(
130 | "customerB",
131 | ((Struct) poll1.get(1).value()).get("fullName")
132 | );
133 | assertEquals(
134 | 111L,
135 | ((Struct) poll1.get(1).value()).get("ts")
136 | );
137 | assertEquals("{position_secondary=customerB, position=111}", poll1.get(1).sourceOffset().toString());
138 |
139 | //when fetching (second page)
140 | List poll2 = task.poll();
141 | assertEquals(2, poll2.size());
142 | assertEquals(
143 | "customerC",
144 | ((Struct) poll2.get(0).value()).get("fullName")
145 | );
146 | assertEquals(
147 | 111L,
148 | ((Struct) poll2.get(0).value()).get("ts")
149 | );
150 | assertEquals("{position_secondary=customerC, position=111}", poll2.get(0).sourceOffset().toString());
151 | assertEquals(
152 | "customerD",
153 | ((Struct) poll2.get(1).value()).get("fullName")
154 | );
155 | assertEquals(
156 | 111L,
157 | ((Struct) poll2.get(1).value()).get("ts")
158 | );
159 | assertEquals("{position_secondary=customerD, position=111}", poll2.get(1).sourceOffset().toString());
160 |
161 | //then
162 | List last = task.poll();
163 | assertEquals(1, last.size());
164 | assertEquals(
165 | "customerA",
166 | ((Struct) last.get(0).value()).get("fullName")
167 | );
168 | assertEquals(
169 | 112L,
170 | ((Struct) last.get(0).value()).get("ts")
171 | );
172 | assertEquals("{position_secondary=customerA, position=112}", last.get(0).sourceOffset().toString());
173 |
174 | List empty = task.poll();
175 | assertTrue(empty.isEmpty());
176 |
177 | task.stop();
178 | }
179 |
180 | @Test
181 | public void shouldRunSourceTaskWithInitialOffset() throws IOException, InterruptedException {
182 | //given
183 | deleteTestIndex();
184 |
185 | insertMockData(111);
186 | insertMockData(112);
187 | insertMockData(113);
188 | insertMockData(114);
189 | refreshIndex();
190 |
191 | ElasticSourceTask task = new ElasticSourceTask();
192 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.from(String.valueOf(111)));
193 | task.initialize(context);
194 |
195 | //when (fetching first page)
196 | task.start(getConf());
197 | List poll1 = task.poll();
198 |
199 | assertEquals(2, poll1.size());
200 |
201 | assertEquals(
202 | "Test",
203 | ((Struct) poll1.get(0).value()).get("fullName")
204 | );
205 | assertEquals(
206 | 112L,
207 | ((Struct) poll1.get(0).value()).get("ts")
208 | );
209 | assertEquals("{position=112}", poll1.get(0).sourceOffset().toString());
210 | assertEquals(
211 | "Test",
212 | ((Struct) poll1.get(1).value()).get("fullName")
213 | );
214 | assertEquals(
215 | 113L,
216 | ((Struct) poll1.get(1).value()).get("ts")
217 | );
218 | assertEquals("{position=113}", poll1.get(1).sourceOffset().toString());
219 |
220 | //when fetching (second page)
221 | List poll2 = task.poll();
222 | assertEquals(1, poll2.size());
223 | assertEquals(
224 | "Test",
225 | ((Struct) poll2.get(0).value()).get("fullName")
226 | );
227 | assertEquals(
228 | 114L,
229 | ((Struct) poll2.get(0).value()).get("ts")
230 | );
231 | assertEquals("{position=114}", poll2.get(0).sourceOffset().toString());
232 |
233 | //then
234 | List empty = task.poll();
235 | assertTrue(empty.isEmpty());
236 |
237 | task.stop();
238 | }
239 |
240 | @Test
241 | public void shouldRunTask_WithSecondarySort_WithOnlyPrimaryInitialOffset() throws IOException, InterruptedException {
242 | //given
243 | deleteTestIndex();
244 |
245 | insertMockData(110, "customerA", TEST_INDEX); //already seen...
246 | insertMockData(111, "customerA", TEST_INDEX);
247 | insertMockData(111, "customerB", TEST_INDEX);
248 | insertMockData(111, "customerC", TEST_INDEX);
249 | insertMockData(111, "customerD", TEST_INDEX);
250 | insertMockData(112, "customerA", TEST_INDEX);
251 | refreshIndex();
252 |
253 | ElasticSourceTask task = new ElasticSourceTask();
254 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.from(String.valueOf(110)));
255 | task.initialize(context);
256 |
257 | //when (fetching first page)
258 | Map conf = getConf();
259 | conf.put(SECONDARY_INCREMENTING_FIELD_NAME_CONFIG, SECONDARY_CURSOR_FIELD);
260 | task.start(conf);
261 | List poll1 = task.poll();
262 | assertEquals(2, poll1.size());
263 | assertEquals(
264 | "customerA",
265 | ((Struct) poll1.get(0).value()).get("fullName")
266 | );
267 | assertEquals(
268 | 111L,
269 | ((Struct) poll1.get(0).value()).get("ts")
270 | );
271 | assertEquals(
272 | "customerB",
273 | ((Struct) poll1.get(1).value()).get("fullName")
274 | );
275 | assertEquals(
276 | 111L,
277 | ((Struct) poll1.get(1).value()).get("ts")
278 | );
279 |
280 | //when fetching (second page)
281 | List poll2 = task.poll();
282 | assertEquals(
283 | "customerC",
284 | ((Struct) poll2.get(0).value()).get("fullName")
285 | );
286 | assertEquals(
287 | 111L,
288 | ((Struct) poll2.get(0).value()).get("ts")
289 | );
290 | assertEquals("{position_secondary=customerC, position=111}", poll2.get(0).sourceOffset().toString());
291 | assertEquals(
292 | "customerD",
293 | ((Struct) poll2.get(1).value()).get("fullName")
294 | );
295 | assertEquals(
296 | 111L,
297 | ((Struct) poll2.get(1).value()).get("ts")
298 | );
299 | assertEquals("{position_secondary=customerD, position=111}", poll2.get(1).sourceOffset().toString());
300 | assertEquals(2, poll2.size());
301 |
302 | //then
303 | List last = task.poll();
304 | assertEquals(1, last.size());
305 | assertEquals(
306 | "customerA",
307 | ((Struct) last.get(0).value()).get("fullName")
308 | );
309 | assertEquals(
310 | 112L,
311 | ((Struct) last.get(0).value()).get("ts")
312 | );
313 | assertEquals("{position_secondary=customerA, position=112}", last.get(0).sourceOffset().toString());
314 | List empty = task.poll();
315 | assertTrue(empty.isEmpty());
316 |
317 | task.stop();
318 | }
319 |
320 | @Test
321 | public void shouldRunTask_WithSecondarySort_WithInitialOffset() throws IOException, InterruptedException {
322 | //given
323 | deleteTestIndex();
324 |
325 | insertMockData(110, "customerA", TEST_INDEX); //already seen
326 | insertMockData(111, "customerA", TEST_INDEX); //already seen
327 | insertMockData(111, "customerB", TEST_INDEX);
328 | insertMockData(111, "customerC", TEST_INDEX);
329 | insertMockData(111, "customerD", TEST_INDEX);
330 | insertMockData(112, "customerA", TEST_INDEX);
331 | refreshIndex();
332 |
333 | ElasticSourceTask task = new ElasticSourceTask();
334 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.from(String.valueOf(111), "customerA"));
335 | task.initialize(context);
336 |
337 | //when (fetching first page)
338 | Map conf = getConf();
339 | conf.put(SECONDARY_INCREMENTING_FIELD_NAME_CONFIG, SECONDARY_CURSOR_FIELD);
340 | task.start(conf);
341 | List poll1 = task.poll();
342 | assertEquals(2, poll1.size());
343 | assertEquals(
344 | "customerB",
345 | ((Struct) poll1.get(0).value()).get("fullName")
346 | );
347 | assertEquals("{position_secondary=customerB, position=111}", poll1.get(0).sourceOffset().toString());
348 | assertEquals(
349 | "customerC",
350 | ((Struct) poll1.get(1).value()).get("fullName")
351 | );
352 |
353 | //when fetching (second page)
354 | List poll2 = task.poll();
355 | assertEquals(2, poll2.size());
356 | assertEquals(
357 | "customerD",
358 | ((Struct) poll2.get(0).value()).get("fullName")
359 | );
360 | assertEquals("{position_secondary=customerD, position=111}", poll2.get(0).sourceOffset().toString());
361 | assertEquals(
362 | "customerA",
363 | ((Struct) poll2.get(1).value()).get("fullName")
364 | );
365 | assertEquals("{position_secondary=customerA, position=112}", poll2.get(1).sourceOffset().toString());
366 |
367 | //then
368 | List empty = task.poll();
369 | assertTrue(empty.isEmpty());
370 |
371 | task.stop();
372 | }
373 |
374 |
375 | @Test
376 | public void shouldRunSourceTaskWhitelist() throws IOException, InterruptedException {
377 | //given
378 | deleteTestIndex();
379 |
380 | insertMockData(111);
381 | insertMockData(112);
382 | insertMockData(113);
383 | insertMockData(114);
384 | refreshIndex();
385 |
386 | ElasticSourceTask task = new ElasticSourceTask();
387 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty());
388 | task.initialize(context);
389 | Map conf = getConf();
390 | conf.put(ElasticSourceConnectorConfig.FIELDS_WHITELIST_CONFIG, "fullName");
391 |
392 | //when (fetching first page)
393 | task.start(conf);
394 | List poll1 = task.poll();
395 | //Check the struct contains one only field, and this is "FullName" = "Test"
396 | assertEquals(1, ((Struct) poll1.get(0).value()).schema().fields().size());
397 | assertEquals(((Struct) poll1.get(0).value()).get("fullName"), "Test");
398 | task.stop();
399 | }
400 |
401 | @Test
402 | public void shouldRunSourceTaskBlacklist() throws IOException, InterruptedException {
403 | //given
404 | deleteTestIndex();
405 |
406 | insertMockData(111);
407 | insertMockData(112);
408 | insertMockData(113);
409 | insertMockData(114);
410 | refreshIndex();
411 |
412 | ElasticSourceTask task = new ElasticSourceTask();
413 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty());
414 | task.initialize(context);
415 | Map conf = getConf();
416 | conf.put(ElasticSourceConnectorConfig.FIELDS_BLACKLIST_CONFIG, "fullName");
417 |
418 | //when (fetching first page)
419 | task.start(conf);
420 | List poll1 = task.poll();
421 |
422 | //Then
423 | List fields = ((Struct) poll1.get(0).value()).schema().fields();
424 | assertEquals(6, fields.size());
425 | task.stop();
426 | }
427 |
428 | @Test
429 | public void shouldRunSourceTaskWithJsonCastFilter() throws IOException, InterruptedException {
430 | //given
431 | deleteTestIndex();
432 |
433 | insertMockData(111);
434 | insertMockData(112);
435 | insertMockData(113);
436 | insertMockData(114);
437 | refreshIndex();
438 |
439 | ElasticSourceTask task = new ElasticSourceTask();
440 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty());
441 | task.initialize(context);
442 | Map conf = getConf();
443 | conf.put(ElasticSourceConnectorConfig.FIELDS_JSON_CAST_CONFIG, "fullName");
444 |
445 | //when (fetching first page)
446 | task.start(conf);
447 | List poll1 = task.poll();
448 | Struct structValue = (Struct) poll1.get(0).value();
449 | assertEquals(structValue.get("fullName"), "\"Test\"");
450 | assertEquals(structValue.get("avroField"), "avro-field");
451 | assertEquals(structValue.get("nonavrofield"), "non-avro-field");
452 | task.stop();
453 | }
454 |
455 | @Test
456 | public void shouldRunSourceTaskWithAvroNameConverter() throws IOException, InterruptedException {
457 | //given
458 | deleteTestIndex();
459 |
460 | insertMockData(111);
461 | insertMockData(112);
462 | insertMockData(113);
463 | insertMockData(114);
464 | refreshIndex();
465 |
466 | ElasticSourceTask task = new ElasticSourceTask();
467 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty());
468 | task.initialize(context);
469 | Map conf = getConf();
470 |
471 | //when (fetching first page)
472 | task.start(conf);
473 | List poll1 = task.poll();
474 | Struct structValue = (Struct) poll1.get(0).value();
475 | assertEquals(structValue.get("avroField"), "avro-field");
476 | assertEquals(structValue.get("nonavrofield"), "non-avro-field");
477 | task.stop();
478 | }
479 |
480 | @Test
481 | public void shouldRunSourceTaskWithNopNameConverter() throws IOException, InterruptedException {
482 | //given
483 | deleteTestIndex();
484 |
485 | insertMockData(111);
486 | insertMockData(112);
487 | insertMockData(113);
488 | insertMockData(114);
489 | refreshIndex();
490 |
491 | ElasticSourceTask task = new ElasticSourceTask();
492 | Mockito.when(context.offsetStorageReader()).thenReturn(MockOffsetFactory.empty());
493 | task.initialize(context);
494 | Map conf = getConf();
495 | conf.put(ElasticSourceConnectorConfig.CONNECTOR_FIELDNAME_CONVERTER_CONFIG,
496 | ElasticSourceConnectorConfig.NOP_FIELDNAME_CONVERTER);
497 |
498 | //when (fetching first page)
499 | task.start(conf);
500 | List poll1 = task.poll();
501 | Struct structValue = (Struct) poll1.get(0).value();
502 | assertEquals(structValue.get("avroField"), "avro-field");
503 | assertEquals(structValue.get("non-avro-field"), "non-avro-field");
504 | task.stop();
505 | }
506 |
507 |
508 | }
509 |
--------------------------------------------------------------------------------
/src/test/java/com/github/dariobalinzo/task/MockOffsetFactory.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright © 2018 Dario Balinzo (dariobalinzo@gmail.com)
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package com.github.dariobalinzo.task;
18 |
19 | import org.apache.kafka.connect.storage.OffsetStorageReader;
20 |
21 | import java.util.Collection;
22 | import java.util.HashMap;
23 | import java.util.Map;
24 |
25 | public class MockOffsetFactory {
26 |
27 | static OffsetStorageReader empty() {
28 | return emptyOffset;
29 | }
30 |
31 | static OffsetStorageReader from(String initialCursor) {
32 | return from(initialCursor, null);
33 | }
34 |
35 | static OffsetStorageReader from(String initialCursor, String secondaryCursor) {
36 | Map state = new HashMap<>();
37 | state.put(ElasticSourceTask.POSITION, initialCursor);
38 | if (secondaryCursor != null) {
39 | state.put(ElasticSourceTask.POSITION_SECONDARY, secondaryCursor);
40 | }
41 |
42 | return new OffsetStorageReader() {
43 | @Override
44 | public Map offset(Map map) {
45 | return state;
46 | }
47 |
48 | @Override
49 | public Map, Map> offsets(Collection> collection) {
50 | return null;
51 | }
52 | };
53 | }
54 |
55 | private static OffsetStorageReader emptyOffset = new OffsetStorageReader() {
56 | @Override
57 | public Map offset(Map map) {
58 | return new HashMap<>();
59 | }
60 |
61 | @Override
62 | public Map, Map> offsets(Collection> collection) {
63 | return null;
64 | }
65 | };
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/src/test/resources/com/github/dariobalinzo/filter/document.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "elastic",
3 | "age": 7,
4 | "order_list": [
5 | {
6 | "id": 1,
7 | "details": {
8 | "nested_det": "test nested inside list",
9 | "qty": 1
10 | }
11 | },
12 | {
13 | "id": 2,
14 | "details": {
15 | "nested_det": "test nested inside list",
16 | "qty": 2
17 | }
18 | }
19 | ],
20 | "obj": {
21 | "key": 55,
22 | "details": {
23 | "nested_det": "test nested inside list",
24 | "qty": 2
25 | }
26 | },
27 | "other-obj": {
28 | "foo": "bar"
29 | }
30 | }
--------------------------------------------------------------------------------
/src/test/resources/com/github/dariobalinzo/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/src/test/resources/com/github/dariobalinzo/schema/complexDocument.json:
--------------------------------------------------------------------------------
1 | {
2 | "current-time": "2020-12-11T07:24:44Z",
3 | "ip": "192.168.1.111",
4 | "xxx-api-endpoint": "https://192.168.1.111:5001/api",
5 | "parent": "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b",
6 | "architecture": "armv7l",
7 | "updated": "2020-12-11T07:24:45.198Z",
8 | "last-boot": "2020-12-09T21:45:27Z",
9 | "xxx-engine-version": "1.13.0",
10 | "docker-server-version": "19.03.14",
11 | "created": "2020-12-10T08:06:38.652Z",
12 | "hostname": "xyz",
13 | "updated-by": "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b",
14 | "gpio-pins": [
15 | {
16 | "pin": 1,
17 | "name": "3.3v"
18 | },
19 | {
20 | "pin": 2,
21 | "name": "5v"
22 | },
23 | {
24 | "pin": 3,
25 | "bcm": 2,
26 | "name": "SDA.1",
27 | "mode": "IN",
28 | "voltage": 1
29 | },
30 | {
31 | "pin": 4,
32 | "name": "5v"
33 | },
34 | {
35 | "pin": 5,
36 | "bcm": 3,
37 | "name": "SCL.1",
38 | "mode": "IN",
39 | "voltage": 1
40 | },
41 | {
42 | "pin": 6,
43 | "name": "0v"
44 | },
45 | {
46 | "pin": 7,
47 | "bcm": 4,
48 | "name": "GPIO. 7",
49 | "mode": "IN",
50 | "voltage": 1
51 | },
52 | {
53 | "pin": 8,
54 | "name": "TxD",
55 | "mode": "IN",
56 | "voltage": 1
57 | },
58 | {
59 | "pin": 9,
60 | "name": "0v"
61 | },
62 | {
63 | "pin": 10,
64 | "name": "RxD",
65 | "mode": "IN",
66 | "voltage": 1
67 | },
68 | {
69 | "pin": 11,
70 | "bcm": 17,
71 | "name": "GPIO. 0",
72 | "mode": "IN",
73 | "voltage": 0
74 | },
75 | {
76 | "pin": 12,
77 | "name": "GPIO. 1",
78 | "mode": "IN",
79 | "voltage": 0
80 | },
81 | {
82 | "pin": 13,
83 | "bcm": 27,
84 | "name": "GPIO. 2",
85 | "mode": "IN",
86 | "voltage": 0
87 | },
88 | {
89 | "pin": 14,
90 | "name": "0v"
91 | },
92 | {
93 | "pin": 15,
94 | "bcm": 22,
95 | "name": "GPIO. 3",
96 | "mode": "IN",
97 | "voltage": 0
98 | },
99 | {
100 | "pin": 16,
101 | "name": "GPIO. 4",
102 | "mode": "IN",
103 | "voltage": 0
104 | },
105 | {
106 | "pin": 17,
107 | "name": "3.3v"
108 | },
109 | {
110 | "pin": 18,
111 | "name": "GPIO. 5",
112 | "mode": "IN",
113 | "voltage": 0
114 | },
115 | {
116 | "pin": 19,
117 | "bcm": 10,
118 | "name": "MOSI",
119 | "mode": "IN",
120 | "voltage": 0
121 | },
122 | {
123 | "pin": 20,
124 | "name": "0v"
125 | },
126 | {
127 | "pin": 21,
128 | "bcm": 9,
129 | "name": "MISO",
130 | "mode": "IN",
131 | "voltage": 0
132 | },
133 | {
134 | "pin": 22,
135 | "name": "GPIO. 6",
136 | "mode": "IN",
137 | "voltage": 0
138 | },
139 | {
140 | "pin": 23,
141 | "bcm": 11,
142 | "name": "SCLK",
143 | "mode": "IN",
144 | "voltage": 0
145 | },
146 | {
147 | "pin": 24,
148 | "name": "CE0",
149 | "mode": "IN",
150 | "voltage": 1
151 | },
152 | {
153 | "pin": 25,
154 | "name": "0v"
155 | },
156 | {
157 | "pin": 26,
158 | "name": "CE1",
159 | "mode": "IN",
160 | "voltage": 1
161 | },
162 | {
163 | "pin": 27,
164 | "bcm": 0,
165 | "name": "SDA.0",
166 | "mode": "IN",
167 | "voltage": 1
168 | },
169 | {
170 | "pin": 28,
171 | "name": "SCL.0",
172 | "mode": "IN",
173 | "voltage": 1
174 | },
175 | {
176 | "pin": 29,
177 | "bcm": 5,
178 | "name": "GPIO.21",
179 | "mode": "IN",
180 | "voltage": 1
181 | },
182 | {
183 | "pin": 30,
184 | "name": "0v"
185 | },
186 | {
187 | "pin": 31,
188 | "bcm": 6,
189 | "name": "GPIO.22",
190 | "mode": "IN",
191 | "voltage": 1
192 | },
193 | {
194 | "pin": 32,
195 | "name": "GPIO.26",
196 | "mode": "IN",
197 | "voltage": 0
198 | },
199 | {
200 | "pin": 33,
201 | "bcm": 13,
202 | "name": "GPIO.23",
203 | "mode": "IN",
204 | "voltage": 0
205 | },
206 | {
207 | "pin": 34,
208 | "name": "0v"
209 | },
210 | {
211 | "pin": 35,
212 | "bcm": 19,
213 | "name": "GPIO.24",
214 | "mode": "IN",
215 | "voltage": 0
216 | },
217 | {
218 | "pin": 36,
219 | "name": "GPIO.27",
220 | "mode": "IN",
221 | "voltage": 0
222 | },
223 | {
224 | "pin": 37,
225 | "bcm": 26,
226 | "name": "GPIO.25",
227 | "mode": "IN",
228 | "voltage": 0
229 | },
230 | {
231 | "pin": 38,
232 | "name": "GPIO.28",
233 | "mode": "IN",
234 | "voltage": 0
235 | },
236 | {
237 | "pin": 39,
238 | "name": "0v"
239 | },
240 | {
241 | "pin": 40,
242 | "name": "GPIO.29",
243 | "mode": "IN",
244 | "voltage": 0
245 | }
246 | ],
247 | "created-by": "internal",
248 | "status": "OPERATIONAL",
249 | "id": "xxx-status/b5054ecf-9f18-4b86-bc95-30933fe05581",
250 | "operating-system": "Raspbian GNU/Linux 10 (buster)",
251 | "resource-type": "xxx-status",
252 | "acl": {
253 | "view-acl": [
254 | "user/80454ed0-65eb-4b77-864e-2dc525627e38"
255 | ],
256 | "view-meta": [
257 | "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b",
258 | "user/80454ed0-65eb-4b77-864e-2dc525627e38"
259 | ],
260 | "view-data": [
261 | "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b",
262 | "user/80454ed0-65eb-4b77-864e-2dc525627e38"
263 | ],
264 | "edit-data": [
265 | "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b"
266 | ],
267 | "edit-meta": [
268 | "xxx/d0cee1fb-8fc8-427a-88de-ab4f7b2f8b8b"
269 | ],
270 | "owners": [
271 | "group/nuvla-admin"
272 | ]
273 | },
274 | "next-heartbeat": "2020-12-11T07:25:15.209Z",
275 | "version": 1,
276 | "resources": {
277 | "cpu": {
278 | "topic": "cpu",
279 | "raw-sample": "{\"capacity\": 4, \"load\": 0.64}",
280 | "capacity": 4,
281 | "load": 0.64
282 | },
283 | "ram": {
284 | "topic": "ram",
285 | "raw-sample": "{\"capacity\": 3828, \"used\": 1235}",
286 | "capacity": 3828,
287 | "used": 1235
288 | },
289 | "disks": [
290 | {
291 | "device": "overlay",
292 | "capacity": 28,
293 | "used": 4,
294 | "topic": "disks",
295 | "raw-sample": "{\"device\": \"overlay\", \"capacity\": 28, \"used\": 4}"
296 | }
297 | ],
298 | "net-stats": [
299 | {
300 | "interface": "docker_gwbridge",
301 | "bytes-transmitted": 1810018,
302 | "bytes-received": 633
303 | },
304 | {
305 | "interface": "lo",
306 | "bytes-transmitted": 153116745,
307 | "bytes-received": 153116745
308 | },
309 | {
310 | "interface": "veth53b9858",
311 | "bytes-transmitted": 3865916,
312 | "bytes-received": 1275
313 | },
314 | {
315 | "interface": "vetha95aba6",
316 | "bytes-transmitted": 4349209,
317 | "bytes-received": 0
318 | },
319 | {
320 | "interface": "docker0",
321 | "bytes-transmitted": 58162393,
322 | "bytes-received": 1347447
323 | },
324 | {
325 | "interface": "veth2d9e5be",
326 | "bytes-transmitted": 20942074,
327 | "bytes-received": 12350057
328 | },
329 | {
330 | "interface": "vethe4e283e",
331 | "bytes-transmitted": 723871,
332 | "bytes-received": 352184
333 | },
334 | {
335 | "interface": "veth5207da0",
336 | "bytes-transmitted": 23136462,
337 | "bytes-received": 61398287
338 | },
339 | {
340 | "interface": "vethef962b3",
341 | "bytes-transmitted": 3858289,
342 | "bytes-received": 689
343 | },
344 | {
345 | "interface": "vetha49fdcb",
346 | "bytes-transmitted": 3936275,
347 | "bytes-received": 7957
348 | },
349 | {
350 | "interface": "br-193effb5470e",
351 | "bytes-transmitted": 145658655,
352 | "bytes-received": 147435494
353 | },
354 | {
355 | "interface": "wlan0",
356 | "bytes-transmitted": 91616660,
357 | "bytes-received": 307622918
358 | },
359 | {
360 | "interface": "veth3d6d8ed",
361 | "bytes-transmitted": 25273385,
362 | "bytes-received": 66929714
363 | },
364 | {
365 | "interface": "eth0",
366 | "bytes-transmitted": 0,
367 | "bytes-received": 0
368 | }
369 | ]
370 | },
371 | "inferred-location": [
372 | 6.0826,
373 | 46.1443
374 | ],
375 | "vulnerabilities": {
376 | "summary": {
377 | "total": 1,
378 | "affected-products": [
379 | "OpenSSH 7.9p1 Raspbian 10+deb10u2"
380 | ],
381 | "average-score": 8.1
382 | },
383 | "items": [
384 | {
385 | "product": "OpenSSH 7.9p1 Raspbian 10+deb10u2",
386 | "vulnerability-id": "CVE-2019-7639",
387 | "vulnerability-score": 8.1
388 | }
389 | ]
390 | }
391 | }
--------------------------------------------------------------------------------