├── .github ├── CODEOWNERS └── workflows │ ├── check.yml │ └── publish.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── NOTICE ├── NOTICE_GEM ├── README.md ├── build.gradle ├── config └── checkstyle │ ├── checkstyle.xml │ └── default.xml ├── docker-compose.yml ├── gradle.lockfile ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ ├── gradle-wrapper.properties │ └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── src ├── main └── java │ └── org │ └── embulk │ └── output │ └── elasticsearch │ ├── ElasticsearchHttpClient.java │ ├── ElasticsearchOutputPlugin.java │ ├── ElasticsearchOutputPluginDelegate.java │ └── ElasticsearchRecordBuffer.java └── test ├── java └── org │ └── embulk │ └── output │ └── elasticsearch │ ├── ElasticsearchTestUtils.java │ ├── TestElasticsearchHttpClient.java │ ├── TestElasticsearchOutputPlugin.java │ └── TestElasticsearchOutputPluginJSON.java └── resources ├── sample_01.csv └── sample_01.json /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @embulk/elastic-maintainers 2 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: Check 2 | on: [ pull_request, push ] 3 | jobs: 4 | check: 5 | runs-on: ubuntu-latest 6 | # push: always run. 7 | # pull_request: run only when the PR is submitted from a forked repository, not within this repository. 8 | if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository 9 | strategy: 10 | fail-fast: false 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up OpenJDK 8 14 | uses: actions/setup-java@v4 15 | with: 16 | java-version: 8 17 | distribution: "temurin" 18 | cache: "gradle" 19 | - name: Run elastisearch container 20 | run: docker-compose up -d 21 | - name: List containers 22 | run: docker-compose ps 23 | - name: Grant execute permission for gradlew 24 | run: chmod +x gradlew 25 | - name: Testing 26 | run: ./gradlew check --console rich --info 27 | - name: Check building 28 | run: ./gradlew gem --console rich --info 29 | - name: JaCoCo test report 30 | if: success() 31 | run: ./gradlew jacocoTestReport 32 | - name: Pack reports 33 | if: always() 34 | run: zip -9 -r -q reports.zip ./build/reports 35 | - uses: actions/upload-artifact@v4 36 | if: success() 37 | with: 38 | name: reports 39 | path: ./reports.zip 40 | - uses: actions/upload-artifact@v4 41 | if: success() 42 | with: 43 | name: gem 44 | path: ./pkg/*.gem 45 | - uses: actions/upload-artifact@v4 46 | if: success() 47 | with: 48 | name: jar 49 | path: ./build/libs/*.jar 50 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | on: 3 | push: 4 | tags: 5 | - "v0.*" 6 | jobs: 7 | publish: 8 | environment: 9 | name: maven-central-and-ruby-gems 10 | runs-on: ubuntu-latest 11 | strategy: 12 | fail-fast: true 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up OpenJDK 8 16 | uses: actions/setup-java@v4 17 | with: 18 | java-version: 8 19 | distribution: "temurin" 20 | - name: Publish 21 | run: | 22 | mkdir -p $HOME/.gem 23 | touch $HOME/.gem/credentials 24 | chmod 0600 $HOME/.gem/credentials 25 | printf -- "---\n:rubygems_api_key: ${RUBYGEMS_API_KEY}\n" > $HOME/.gem/credentials 26 | ./gradlew --stacktrace publishMavenPublicationToMavenCentralRepository gemPush 27 | env: 28 | ORG_GRADLE_PROJECT_ossrhUsername: ${{ vars.OSSRH_USERNAME }} 29 | ORG_GRADLE_PROJECT_ossrhPassword: ${{ secrets.OSSRH_PASSWORD }} 30 | ORG_GRADLE_PROJECT_signingKey: ${{ secrets.GPG_PRIVATE_KEY_ARMOR }} 31 | ORG_GRADLE_PROJECT_signingPassword: ${{ secrets.GPG_PRIVATE_KEY_PASSWORD }} 32 | RUBYGEMS_API_KEY: ${{ secrets.RUBYGEMS_API_KEY }} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.iml 3 | .idea 4 | build/ 5 | /classpath/ 6 | /.gradle 7 | /pkg/ 8 | es-data 9 | out 10 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.7.0 - 2024-03-01 2 | * [update] Add jetty94 to support TLS1.3 3 | 4 | ## 0.6.0 - 2023-02-14 5 | * [maintenance] Support typeless endpoint for ES version 8.x [#71](https://github.com/embulk/embulk-output-elasticsearch/pull/71) 6 | 7 | ## 0.4.7 - 2018-12-14 8 | * [maintenance] Show warning logs instead of throwing ConfigException for AWS ES [#49](https://github.com/embulk/embulk-output-elasticsearch/pull/49) 9 | * [maintenance] Updated Embulk version v0.8.36 to v0.9.11 [#55](https://github.com/embulk/embulk-output-elasticsearch/pull/55) 10 | 11 | ## 0.4.6 - 2018-08-01 12 | * [new feature] Add "connect_timeout_millis" option [#53](https://github.com/embulk/embulk-output-elasticsearch/pull/53) 13 | * [new feature] Only build with Java8 [#52](https://github.com/embulk/embulk-output-elasticsearch/pull/52) 14 | * [maintenance] Fix bug: "timeout_millis" option doesn't work as expected [#51](https://github.com/muga/embulk-output-elasticsearch/pull/51) 15 | 16 | ## 0.4.5 - 2017-11-29 17 | * [new feature] Add "fill_null_for_empty_column" option and allow insert null value when column is empty [#47](https://github.com/embulk/embulk-output-elasticsearch/pull/47) Thanks! @kfitzgerald 18 | 19 | ## 0.4.4 - 2017-06-16 20 | 21 | * [maintenance] Improve retry logic - Create RetryHelper instance only at sendRequest() method [#41](https://github.com/muga/embulk-output-elasticsearch/pull/41) 22 | 23 | ## 0.4.3 - 2017-06-12 24 | 25 | * [maintenance] Improve exception handling [#38](https://github.com/muga/embulk-output-elasticsearch/pull/38) 26 | * [maintenance] Fix ElasticsearchRecordBuffer to call retryHelper.close() [#39](https://github.com/muga/embulk-output-elasticsearch/pull/39) 27 | 28 | ## 0.4.2 - 2017-05-31 29 | 30 | * [maintenance] Update embulk-base-restclient to fix ArrayIndexOutOfBoundsException [#37](https://github.com/muga/embulk-output-elasticsearch/pull/37) 31 | 32 | ## 0.4.1 - 2017-04-21 33 | 34 | * [maintenance] Check snapshot progress status before delete index [#36](https://github.com/muga/embulk-output-elasticsearch/pull/36) 35 | 36 | ## 0.4.0 - 2017-03-28 37 | 38 | * [new feature] Support multiple Elasticsearch version [#32](https://github.com/muga/embulk-output-elasticsearch/pull/32) 39 | * [new feature] Support SSL and Basic authentication including 'Security'(formally 'Shield') [#33](https://github.com/muga/embulk-output-elasticsearch/pull/33) 40 | * [maintenance] Improve export logic [#34](https://github.com/muga/embulk-output-elasticsearch/pull/34) 41 | 42 | ## 0.3.1 - 2016-06-21 43 | 44 | * [maintenance] Update Elasticsearch client to 2.3.3 [#25](https://github.com/muga/embulk-output-elasticsearch/pull/25) 45 | 46 | ## 0.3.0 - 2016-02-22 47 | 48 | * [maintenance] Upgrade Embulk v08 [#21](https://github.com/muga/embulk-output-elasticsearch/pull/21) 49 | 50 | ## 0.2.1 - 2016-02-05 51 | 52 | * [maintenance] Fix bug. Force to fail jobs if nodes down while executing [#19](https://github.com/muga/embulk-output-elasticsearch/pull/19) 53 | 54 | ## 0.2.0 - 2016-01-26 55 | 56 | * [new feature] Support Elasticsearch 2.x [#12](https://github.com/muga/embulk-output-elasticsearch/pull/12) 57 | * [new feature] Added replace mode [#15](https://github.com/muga/embulk-output-elasticsearch/pull/15) 58 | * [maintenance] Fix id param's behavior [#14](https://github.com/muga/embulk-output-elasticsearch/pull/14) 59 | * [maintenance] Added unit tests [#17](https://github.com/muga/embulk-output-elasticsearch/pull/17) 60 | * [maintenance] Upgraded Embulk to v0.7.7 61 | 62 | ## 0.1.8 - 2015-08-19 63 | 64 | * [maintenance] Upgraded Embulk to v0.7.0 65 | * [maintenance] Upgraded Elasticsearch to v1.5.2 66 | 67 | ## 0.1.7 - 2015-05-09 68 | 69 | * [maintenance] Fixed handling null value [#10](https://github.com/muga/embulk-output-elasticsearch/pull/10) 70 | 71 | ## 0.1.6 - 2015-04-14 72 | 73 | * [new feature] Added bulk_size parameter [#8](https://github.com/muga/embulk-output-elasticsearch/pull/8) 74 | 75 | ## 0.1.5 - 2015-03-26 76 | 77 | * [new feature] Added cluster_name parameter [#7](https://github.com/muga/embulk-output-elasticsearch/pull/7) 78 | 79 | ## 0.1.4 - 2015-03-19 80 | 81 | * [maintenance] Fixed parameter names index_name to index, doc_id_column to id. [#5](https://github.com/muga/embulk-output-elasticsearch/pull/5) 82 | * [maintenance] Fixed typo at parameter [#6](https://github.com/muga/embulk-output-elasticsearch/pull/6) 83 | 84 | ## 0.1.3 - 2015-02-25 85 | 86 | * [new feature] Supported timestamp column [#4](https://github.com/muga/embulk-output-elasticsearch/pull/4) 87 | 88 | ## 0.1.2 - 2015-02-24 89 | 90 | ## 0.1.1 - 2015-02-16 91 | 92 | ## 0.1.0 - 2015-02-16 93 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | embulk-output-elasticsearch 2 | Copyright 2015 The Embulk project 3 | 4 | This product includes software developed at 5 | The Embulk project (https://www.embulk.org/). 6 | -------------------------------------------------------------------------------- /NOTICE_GEM: -------------------------------------------------------------------------------- 1 | embulk-output-elasticsearch 2 | Copyright 2015 The Embulk project 3 | 4 | The gem distribution of this product includes software developed in a part of the Embulk project (https://www.embulk.org/). 5 | It has been licenced under the Apache Software License, Version 2.0. 6 | 7 | The gem distribution of this product includes JARs of the Jackson project (https://github.com/FasterXML/jackson), as-is. 8 | They are licensed under the Apache Software License, Version 2.0. 9 | 10 | The gem distribution of this product includes JARs of the Jakarta Bean Validation API 1.1 (https://beanvalidation.org/1.1/), as-is. 11 | It is licensed under the Apache Software License, Version 2.0. 12 | 13 | The gem distribution of this product includes JARs of the Eclipse Jetty Project (https://www.eclipse.org/jetty/) 9.4, as-is. 14 | They are licensed under dual licenses of the Apache Software License, Version 2.0, and the Eclipse Public License 2.0. 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch output plugin for Embulk 2 | 3 | **Notice** This plugin doesn't positively support [Amazon(AWS) Elasticsearch Service](https://aws.amazon.com/elasticsearch-service/). 4 | Actually, AWS Elasticsearch Service supported AWS VPC at Oct 2017 and user is able to access to Es from EC2 instances in VPC subnet without any authentication. 5 | You can use this plugin for AWS ES at your own risk. 6 | 7 | - *[Amazon Elasticsearch Service Limits](http://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/aes-limits.html)* 8 | 9 | ## Overview 10 | 11 | * **Plugin type**: output 12 | * **Rollback supported**: no 13 | * **Resume supported**: no 14 | * **Cleanup supported**: no 15 | 16 | ## Configuration 17 | 18 | - **mode**: "insert" or "replace". See below(string, optional, default is insert) 19 | - **nodes**: list of nodes. nodes are pairs of host and port (list, required) 20 | - NOTE: This plugin uses HTTP/REST Clients and uses TCP:9200 as a default. TCP:9300 is usually used for Transport Client. 21 | - **use_ssl** Use SSL encryption (boolean, default is false) 22 | - **auth_method** (string, default is 'none') 'none'/'basic'. See also [Authentication](#authentication). 23 | - **user** Username for basic authentication (string, default is null) 24 | - **password** Password for above user (string, default is null) 25 | - ~~**cluster_name**: name of the cluster (string, default is "elasticsearch")~~ Not used now. May use in the future 26 | - **index**: index name (string, required) 27 | - **index_type**: index type (string, required) 28 | - **id**: document id column (string, default is null) 29 | - **bulk_actions**: Sets when to flush a new bulk request based on the number of actions currently added. (int, default is 1000) 30 | - **bulk_size**: Sets when to flush a new bulk request based on the size of actions currently added. (long, default is 5242880) 31 | - **fill_null_for_empty_column**: Fill null value when column value is empty (boolean, optional, default is false) 32 | - ~~**concurrent_requests**: concurrent_requests (int, default is 5)~~ Not used now. May use in the future 33 | - **maximum_retries** Number of maximam retry times (int, optional, default is 7) 34 | - **initial_retry_interval_millis** Initial interval between retries in milliseconds (int, optional, default is 1000) 35 | - **maximum_retry_interval_millis** Maximum interval between retries in milliseconds (int, optional, default is 120000) 36 | - **timeout_millis** timeout in milliseconds for each HTTP request(int, optional, default is 60000) 37 | - **connect_timeout_millis** connection timeout in milliseconds for HTTP client(int, optional, default is 60000) 38 | - **max_snapshot_waiting_secs** maximam waiting time in second when snapshot is just creating before delete index. works when `mode: replace` (int, optional, default is 1800) 39 | ### Modes 40 | 41 | #### insert: 42 | 43 | default. 44 | This mode writes data to existing index. 45 | 46 | #### replace: 47 | 48 | 1. Create new temporary index 49 | 2. Insert data into the new index 50 | 3. replace the alias with the new index. If alias doesn't exists, plugin will create new alias. 51 | 4. Delete existing (old) index if exists 52 | 53 | Index should not exists with the same name as the alias 54 | 55 | ```yaml 56 | out: 57 | type: elasticsearch 58 | mode: replace 59 | nodes: 60 | - {host: localhost, port: 9200} 61 | index: # plugin generates index name like _%Y%m%d-%H%M%S 62 | index_type: 63 | ``` 64 | 65 | ### Authentication 66 | 67 | This plugin supports Basic authentication and works with [Elastic Cloud](https://cloud.elastic.co/) and 'Security'(formally Sield). 68 | 'Security' also supports LDAP and Active Directory. This plugin doesn't supports these auth methods. 69 | 70 | ```yaml 71 | use_ssl: true 72 | auth_method: basic 73 | user: 74 | password: 75 | ``` 76 | 77 | ## Example 78 | 79 | ```yaml 80 | out: 81 | type: elasticsearch 82 | mode: insert 83 | nodes: 84 | - {host: localhost, port: 9200} 85 | index: 86 | index_type: 87 | ``` 88 | 89 | ## Test 90 | 91 | Firstly install Docker and Docker compose then `docker-compose up -d`, 92 | so that an ES server will be locally launched then you can run tests with `./gradlew test`. 93 | 94 | ```sh 95 | $ docker-compose up -d 96 | Creating network "embulk-output-elasticsearch_default" with the default driver 97 | Creating embulk-output-elasticsearch_server ... done 98 | 99 | $ docker-compose ps 100 | Name Command State Ports 101 | ------------------------------------------------------------------------------------------------------------------------------ 102 | embulk-output-elasticsearch_server /docker-entrypoint.sh elas ... Up 0.0.0.0:19200->9200/tcp, 0.0.0.0:19300->9300/tcp 103 | 104 | $ ./gradlew test # -t to watch change of files and rebuild continuously 105 | ``` 106 | 107 | For Maintainers 108 | ---------------- 109 | 110 | ### Release 111 | 112 | Modify `version` in `build.gradle` at a detached commit, and then tag the commit with an annotation. 113 | 114 | ``` 115 | git checkout --detach master 116 | (Edit: Remove "-SNAPSHOT" in "version" in build.gradle.) 117 | git add build.gradle 118 | git commit -m "Release vX.Y.Z" 119 | git tag -a vX.Y.Z 120 | (Edit: Write a tag annotation in the changelog format.) 121 | ``` 122 | 123 | See [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) for the changelog format. We adopt a part of it for Git's tag annotation like below. 124 | 125 | ``` 126 | ## [X.Y.Z] - YYYY-MM-DD 127 | ### Added 128 | - Added a feature. 129 | ### Changed 130 | - Changed something. 131 | ### Fixed 132 | - Fixed a bug. 133 | ``` 134 | 135 | Push the annotated tag, then. It triggers a release operation on GitHub Actions after approval. 136 | 137 | ``` 138 | git push -u origin vX.Y.Z 139 | ``` 140 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id "maven-publish" 3 | id "java" 4 | id "signing" 5 | id "jacoco" 6 | id "checkstyle" 7 | id "org.embulk.embulk-plugins" version "0.6.2" 8 | } 9 | 10 | repositories { 11 | mavenCentral() 12 | } 13 | 14 | group = "org.embulk" 15 | version = "0.7.0-SNAPSHOT" 16 | description = "Elasticsearch output plugin is an Embulk plugin that loads records to Elasticsearch read by any input plugins." 17 | 18 | tasks.withType(JavaCompile) { 19 | options.compilerArgs << "-Xlint:deprecation" << "-Xlint:unchecked" 20 | options.encoding = "UTF-8" 21 | } 22 | 23 | java { 24 | toolchain { 25 | languageVersion = JavaLanguageVersion.of(8) 26 | } 27 | 28 | withJavadocJar() 29 | withSourcesJar() 30 | } 31 | 32 | dependencies { 33 | compileOnly "org.embulk:embulk-spi:0.11" 34 | 35 | implementation("org.embulk:embulk-util-config:0.3.4") { 36 | // They conflict with embulk-core. They are once excluded here, 37 | // and added explicitly with versions exactly the same with embulk-core:0.10.28. 38 | exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations" 39 | exclude group: "com.fasterxml.jackson.core", module: "jackson-core" 40 | exclude group: "com.fasterxml.jackson.core", module: "jackson-databind" 41 | exclude group: "com.fasterxml.jackson.datatype", module: "jackson-datatype-jdk8" 42 | exclude group: "javax.validation", module: "validation-api" 43 | } 44 | 45 | // They are once excluded from transitive dependencies of other dependencies, 46 | // and added explicitly with versions exactly the same with embulk-core:0.10.28. 47 | implementation "com.fasterxml.jackson.core:jackson-annotations:2.6.7" 48 | implementation "com.fasterxml.jackson.core:jackson-core:2.6.7" 49 | implementation "com.fasterxml.jackson.core:jackson-databind:2.6.7.5" 50 | implementation "com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7" 51 | implementation "javax.validation:validation-api:1.1.0.Final" 52 | 53 | implementation "org.embulk:embulk-base-restclient:0.10.1" 54 | implementation "org.embulk:embulk-util-retryhelper-jetty94:0.9.0" 55 | 56 | implementation "org.embulk:embulk-util-timestamp:0.2.2" 57 | 58 | testImplementation "junit:junit:4.13.2" 59 | testImplementation "org.embulk:embulk-core:0.11.2" 60 | testImplementation "org.embulk:embulk-deps:0.11.2" 61 | testImplementation "org.embulk:embulk-junit4:0.11.2" 62 | testImplementation "org.embulk:embulk-input-file:0.11.0" 63 | testImplementation "org.embulk:embulk-parser-csv:0.11.4" 64 | testImplementation "com.google.guava:guava:18.0" 65 | } 66 | 67 | embulkPlugin { 68 | mainClass = "org.embulk.output.elasticsearch.ElasticsearchOutputPlugin" 69 | category = "output" 70 | type = "elasticsearch" 71 | } 72 | 73 | javadoc { 74 | options { 75 | locale = "en_US" 76 | encoding = "UTF-8" 77 | } 78 | } 79 | 80 | jar { 81 | metaInf { 82 | from rootProject.file("LICENSE") 83 | from rootProject.file("NOTICE") 84 | } 85 | } 86 | 87 | sourcesJar { 88 | metaInf { 89 | from rootProject.file("LICENSE") 90 | from rootProject.file("NOTICE") 91 | } 92 | } 93 | 94 | javadocJar { 95 | metaInf { 96 | from rootProject.file("LICENSE") 97 | from rootProject.file("NOTICE") 98 | } 99 | } 100 | 101 | publishing { 102 | publications { 103 | maven(MavenPublication) { 104 | groupId = project.group 105 | artifactId = project.name 106 | 107 | from components.java // Must be "components.java". The dependency modification works only for it. 108 | // javadocJar and sourcesJar are added by java.withJavadocJar() and java.withSourcesJar() above. 109 | // See: https://docs.gradle.org/current/javadoc/org/gradle/api/plugins/JavaPluginExtension.html 110 | 111 | pom { // https://central.sonatype.org/pages/requirements.html 112 | packaging "jar" 113 | 114 | name = project.name 115 | description = project.description 116 | url = "https://www.embulk.org/" 117 | 118 | licenses { 119 | license { 120 | // http://central.sonatype.org/pages/requirements.html#license-information 121 | name = "The Apache License, Version 2.0" 122 | url = "https://www.apache.org/licenses/LICENSE-2.0.txt" 123 | distribution = "repo" 124 | } 125 | } 126 | 127 | developers { 128 | developer { 129 | name = "Muga Nishizawa" 130 | email = "muga.nishizawa@gmail.com" 131 | } 132 | developer { 133 | name = "Sadayuki Furuhashi" 134 | email = "frsyuki@gmail.com" 135 | } 136 | developer { 137 | name = "hirakiuc" 138 | email = "hirakiuc@gmail.com" 139 | } 140 | developer { 141 | name = "Toyama Hiroshi" 142 | email = "toyama0919@gmail.com" 143 | } 144 | developer { 145 | name = "Satoshi Akama" 146 | email = "satoshiakama@gmail.com" 147 | } 148 | developer { 149 | name = "Kevin M Fitzgerald" 150 | email = "kevin@kevinfitzgerald.net" 151 | } 152 | developer { 153 | name = "Serhii Himadieiev" 154 | email = "gimadeevsv@gmail.com" 155 | } 156 | developer { 157 | name = "Dai MIKURUBE" 158 | email = "dmikurube@treasure-data.com" 159 | } 160 | } 161 | 162 | scm { 163 | connection = "scm:git:git://github.com/embulk/embulk-output-elasticsearch.git" 164 | developerConnection = "scm:git:git@github.com:embulk/embulk-output-elasticsearch.git" 165 | url = "https://github.com/embulk/embulk-output-elasticsearch" 166 | } 167 | } 168 | } 169 | } 170 | 171 | repositories { 172 | maven { // publishMavenPublicationToMavenCentralRepository 173 | name = "mavenCentral" 174 | if (project.version.endsWith("-SNAPSHOT")) { 175 | url "https://oss.sonatype.org/content/repositories/snapshots" 176 | } else { 177 | url "https://oss.sonatype.org/service/local/staging/deploy/maven2" 178 | } 179 | 180 | credentials { 181 | username = project.hasProperty("ossrhUsername") ? ossrhUsername : "" 182 | password = project.hasProperty("ossrhPassword") ? ossrhPassword : "" 183 | } 184 | } 185 | } 186 | } 187 | 188 | signing { 189 | if (project.hasProperty("signingKey") && project.hasProperty("signingPassword")) { 190 | logger.lifecycle("Signing with an in-memory key.") 191 | useInMemoryPgpKeys(signingKey, signingPassword) 192 | } 193 | sign publishing.publications.maven 194 | } 195 | 196 | gem { 197 | authors = [ "Muga Nishizawa" ] 198 | email = [ "muga.nishizawa@gmail.com" ] 199 | summary = "Elasticsearch output plugin for Embulk" 200 | homepage = "https://github.com/embulk/embulk-output-elasticsearch" 201 | licenses = [ "Apache-2.0" ] 202 | 203 | from rootProject.file("LICENSE") 204 | from rootProject.file("NOTICE_GEM") 205 | rename ("NOTICE_GEM", "NOTICE") 206 | } 207 | 208 | gemPush { 209 | host = "https://rubygems.org" 210 | } 211 | 212 | checkstyle { 213 | configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml") 214 | toolVersion = '6.14.1' 215 | } 216 | checkstyleMain { 217 | configFile = file("${project.rootDir}/config/checkstyle/default.xml") 218 | ignoreFailures = true 219 | } 220 | checkstyleTest { 221 | configFile = file("${project.rootDir}/config/checkstyle/default.xml") 222 | ignoreFailures = true 223 | } 224 | task checkstyle(type: Checkstyle) { 225 | classpath = sourceSets.main.output + sourceSets.test.output 226 | source = sourceSets.main.allJava + sourceSets.test.allJava 227 | } 228 | -------------------------------------------------------------------------------- /config/checkstyle/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /config/checkstyle/default.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | services: 3 | elasticsearch: 4 | container_name: embulk-output-elasticsearch_server 5 | image: elasticsearch:5 6 | #image: elasticsearch:6.8.21 7 | #image: elasticsearch:7.17.8 8 | # For Mac M1 9 | #image: docker.elastic.co/elasticsearch/elasticsearch:7.17.6-arm64 10 | #image: elasticsearch:8.6.1 11 | ports: 12 | - 19200:9200 13 | - 19300:9300 14 | 15 | # use this environment for v7.x & v8.x 16 | #environment: 17 | # - discovery.type=single-node 18 | # - xpack.security.enabled=false 19 | volumes: 20 | - ./es-data:/usr/share/elasticsearch/data/ 21 | -------------------------------------------------------------------------------- /gradle.lockfile: -------------------------------------------------------------------------------- 1 | # This is a Gradle generated file for dependency locking. 2 | # Manual edits can break the build and are not advised. 3 | # This file is expected to be part of source control. 4 | com.fasterxml.jackson.core:jackson-annotations:2.6.7=compileClasspath,runtimeClasspath 5 | com.fasterxml.jackson.core:jackson-core:2.6.7=compileClasspath,runtimeClasspath 6 | com.fasterxml.jackson.core:jackson-databind:2.6.7.5=compileClasspath,runtimeClasspath 7 | com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7=compileClasspath,runtimeClasspath 8 | javax.validation:validation-api:1.1.0.Final=compileClasspath,runtimeClasspath 9 | org.eclipse.jetty:jetty-client:9.4.51.v20230217=compileClasspath,runtimeClasspath 10 | org.eclipse.jetty:jetty-http:9.4.51.v20230217=compileClasspath,runtimeClasspath 11 | org.eclipse.jetty:jetty-io:9.4.51.v20230217=compileClasspath,runtimeClasspath 12 | org.eclipse.jetty:jetty-util:9.4.51.v20230217=compileClasspath,runtimeClasspath 13 | org.embulk:embulk-base-restclient:0.10.1=compileClasspath,runtimeClasspath 14 | org.embulk:embulk-spi:0.11=compileClasspath 15 | org.embulk:embulk-util-config:0.3.4=compileClasspath,runtimeClasspath 16 | org.embulk:embulk-util-json:0.1.1=compileClasspath,runtimeClasspath 17 | org.embulk:embulk-util-retryhelper-jetty94:0.9.0=compileClasspath,runtimeClasspath 18 | org.embulk:embulk-util-retryhelper:0.9.0=compileClasspath,runtimeClasspath 19 | org.embulk:embulk-util-rubytime:0.3.3=compileClasspath,runtimeClasspath 20 | org.embulk:embulk-util-timestamp:0.2.2=compileClasspath,runtimeClasspath 21 | org.msgpack:msgpack-core:0.8.24=compileClasspath 22 | org.slf4j:slf4j-api:2.0.7=compileClasspath 23 | empty= 24 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/embulk/embulk-output-elasticsearch/08db4e04f562228195f131801e43cb09a9f928fa/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.4-bin.zip 4 | networkTimeout=10000 5 | zipStoreBase=GRADLE_USER_HOME 6 | zipStorePath=wrapper/dists 7 | -------------------------------------------------------------------------------- /gradle/wrapper/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/embulk/embulk-output-elasticsearch/08db4e04f562228195f131801e43cb09a9f928fa/gradle/wrapper/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Wed Jan 13 12:41:02 JST 2016 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | # This is normally unused 84 | # shellcheck disable=SC2034 85 | APP_BASE_NAME=${0##*/} 86 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit 87 | 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | 142 | # Increase the maximum file descriptors if we can. 143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 144 | case $MAX_FD in #( 145 | max*) 146 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 147 | # shellcheck disable=SC3045 148 | MAX_FD=$( ulimit -H -n ) || 149 | warn "Could not query maximum file descriptor limit" 150 | esac 151 | case $MAX_FD in #( 152 | '' | soft) :;; #( 153 | *) 154 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 155 | # shellcheck disable=SC3045 156 | ulimit -n "$MAX_FD" || 157 | warn "Could not set maximum file descriptor limit to $MAX_FD" 158 | esac 159 | fi 160 | 161 | # Collect all arguments for the java command, stacking in reverse order: 162 | # * args from the command line 163 | # * the main class name 164 | # * -classpath 165 | # * -D...appname settings 166 | # * --module-path (only if needed) 167 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 168 | 169 | # For Cygwin or MSYS, switch paths to Windows format before running java 170 | if "$cygwin" || "$msys" ; then 171 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 172 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 173 | 174 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 175 | 176 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 177 | for arg do 178 | if 179 | case $arg in #( 180 | -*) false ;; # don't mess with options #( 181 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 182 | [ -e "$t" ] ;; #( 183 | *) false ;; 184 | esac 185 | then 186 | arg=$( cygpath --path --ignore --mixed "$arg" ) 187 | fi 188 | # Roll the args list around exactly as many times as the number of 189 | # args, so each arg winds up back in the position where it started, but 190 | # possibly modified. 191 | # 192 | # NB: a `for` loop captures its iteration list before it begins, so 193 | # changing the positional parameters here affects neither the number of 194 | # iterations, nor the values presented in `arg`. 195 | shift # remove old arg 196 | set -- "$@" "$arg" # push replacement arg 197 | done 198 | fi 199 | 200 | # Collect all arguments for the java command; 201 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of 202 | # shell script including quotes and variable substitutions, so put them in 203 | # double quotes to make sure that they get re-expanded; and 204 | # * put everything else in single quotes, so that it's not re-expanded. 205 | 206 | set -- \ 207 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 208 | -classpath "$CLASSPATH" \ 209 | org.gradle.wrapper.GradleWrapperMain \ 210 | "$@" 211 | 212 | # Stop when "xargs" is not available. 213 | if ! command -v xargs >/dev/null 2>&1 214 | then 215 | die "xargs is not available" 216 | fi 217 | 218 | # Use "xargs" to parse quoted args. 219 | # 220 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 221 | # 222 | # In Bash we could simply go: 223 | # 224 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 225 | # set -- "${ARGS[@]}" "$@" 226 | # 227 | # but POSIX shell has neither arrays nor command substitution, so instead we 228 | # post-process each arg (as a line of input to sed) to backslash-escape any 229 | # character that might be a shell metacharacter, then use eval to reverse 230 | # that process (while maintaining the separation between arguments), and wrap 231 | # the whole thing up as a single "set" statement. 232 | # 233 | # This will of course break if any of these variables contains a newline or 234 | # an unmatched quote. 235 | # 236 | 237 | eval "set -- $( 238 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 239 | xargs -n1 | 240 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 241 | tr '\n' ' ' 242 | )" '"$@"' 243 | 244 | exec "$JAVACMD" "$@" 245 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%"=="" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%"=="" set DIRNAME=. 29 | @rem This is normally unused 30 | set APP_BASE_NAME=%~n0 31 | set APP_HOME=%DIRNAME% 32 | 33 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 34 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 35 | 36 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 37 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 38 | 39 | @rem Find java.exe 40 | if defined JAVA_HOME goto findJavaFromJavaHome 41 | 42 | set JAVA_EXE=java.exe 43 | %JAVA_EXE% -version >NUL 2>&1 44 | if %ERRORLEVEL% equ 0 goto execute 45 | 46 | echo. 47 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 48 | echo. 49 | echo Please set the JAVA_HOME variable in your environment to match the 50 | echo location of your Java installation. 51 | 52 | goto fail 53 | 54 | :findJavaFromJavaHome 55 | set JAVA_HOME=%JAVA_HOME:"=% 56 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 57 | 58 | if exist "%JAVA_EXE%" goto execute 59 | 60 | echo. 61 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 62 | echo. 63 | echo Please set the JAVA_HOME variable in your environment to match the 64 | echo location of your Java installation. 65 | 66 | goto fail 67 | 68 | :execute 69 | @rem Setup the command line 70 | 71 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 72 | 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if %ERRORLEVEL% equ 0 goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | set EXIT_CODE=%ERRORLEVEL% 85 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 86 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 87 | exit /b %EXIT_CODE% 88 | 89 | :mainEnd 90 | if "%OS%"=="Windows_NT" endlocal 91 | 92 | :omega 93 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = "embulk-output-elasticsearch" 2 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/elasticsearch/ElasticsearchHttpClient.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.elasticsearch; 18 | 19 | import com.fasterxml.jackson.core.JsonProcessingException; 20 | import com.fasterxml.jackson.databind.JsonNode; 21 | import com.fasterxml.jackson.databind.ObjectMapper; 22 | import org.eclipse.jetty.client.util.StringContentProvider; 23 | import org.eclipse.jetty.http.HttpMethod; 24 | import org.eclipse.jetty.util.ssl.SslContextFactory; 25 | import org.embulk.config.ConfigException; 26 | import org.embulk.config.UserDataException; 27 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.AuthMethod; 28 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.NodeAddressTask; 29 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask; 30 | import org.embulk.spi.DataException; 31 | import org.embulk.spi.Exec; 32 | import org.embulk.spi.time.Timestamp; 33 | import org.embulk.util.retryhelper.jetty94.Jetty94ClientCreator; 34 | import org.embulk.util.retryhelper.jetty94.Jetty94RetryHelper; 35 | import org.embulk.util.retryhelper.jetty94.Jetty94SingleRequester; 36 | import org.embulk.util.retryhelper.jetty94.StringJetty94ResponseEntityReader; 37 | import org.slf4j.Logger; 38 | import org.slf4j.LoggerFactory; 39 | 40 | import javax.xml.bind.DatatypeConverter; 41 | 42 | import java.io.IOException; 43 | import java.text.SimpleDateFormat; 44 | import java.util.ArrayList; 45 | import java.util.Arrays; 46 | import java.util.HashMap; 47 | import java.util.Iterator; 48 | import java.util.List; 49 | import java.util.Locale; 50 | import java.util.Map; 51 | import java.util.Random; 52 | import java.util.concurrent.TimeUnit; 53 | 54 | public class ElasticsearchHttpClient 55 | { 56 | private final Logger log; 57 | 58 | // ALLOW_UNQUOTED_CONTROL_CHARS - Not expected but whether parser will allow JSON Strings to contain unquoted control characters 59 | // FAIL_ON_UNKNOWN_PROPERTIES - Feature that determines whether encountering of unknown properties 60 | private final ObjectMapper jsonMapper = new ObjectMapper() 61 | .configure(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, false) 62 | .configure(com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 63 | 64 | // Elasticsearch maximum index byte size 65 | // public static final int MAX_INDEX_NAME_BYTES = 255; 66 | // @see https://github.com/elastic/elasticsearch/blob/master/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java#L108 67 | private final long maxIndexNameBytes = 255; 68 | private final List invalidIndexCharacters = Arrays.asList('\\', '/', '*', '?', '"', '<', '>', '|', '#', ' ', ','); 69 | 70 | public static final int ES_SUPPORT_TYPELESS_API_VERSION = 8; 71 | private static int ES_CURRENT_MAJOR_VERSION = 0; 72 | 73 | public ElasticsearchHttpClient() 74 | { 75 | this.log = LoggerFactory.getLogger(getClass()); 76 | } 77 | 78 | public void push(JsonNode records, PluginTask task) 79 | { 80 | int bulkActions = task.getBulkActions(); 81 | long bulkSize = task.getBulkSize(); 82 | // curl -xPOST localhost:9200/{index}/{type}/_bulk -d ' 83 | // {"index" : {}}\n 84 | // {"k" : "v"}\n 85 | // {"index" : {}}\n 86 | // {"k" : "v2"}\n 87 | // ' 88 | try { 89 | int esMajorVersion = this.getEsMajorVersion(task); 90 | String path = esMajorVersion >= ES_SUPPORT_TYPELESS_API_VERSION 91 | ? String.format("/%s/_bulk", task.getIndex()) 92 | : String.format("/%s/%s/_bulk", task.getIndex(), task.getType()); 93 | int recordSize = records.size(); 94 | String idColumn = task.getId().orElse(null); 95 | if (recordSize > 0) { 96 | StringBuilder sb = new StringBuilder(); 97 | for (JsonNode record : records) { 98 | sb.append(createIndexRequest(idColumn, record)); 99 | 100 | String requestString = jsonMapper.writeValueAsString(record); 101 | sb.append("\n") 102 | .append(requestString) 103 | .append("\n"); 104 | } 105 | sendRequest(path, HttpMethod.POST, task, sb.toString()); 106 | } 107 | } 108 | catch (JsonProcessingException ex) { 109 | throw new DataException(ex); 110 | } 111 | } 112 | 113 | public List getIndexByAlias(String aliasName, PluginTask task) 114 | { 115 | // curl -XGET localhost:9200/_alias/{alias} 116 | // No alias: 404 117 | // Alias found: {"embulk_20161018-183738":{"aliases":{"embulk":{}}}} 118 | List indices = new ArrayList<>(); 119 | String path = String.format("/_alias/%s", aliasName); 120 | JsonNode response = sendRequest(path, HttpMethod.GET, task); 121 | 122 | Iterator it = response.fieldNames(); 123 | while (it.hasNext()) { 124 | indices.add(it.next().toString()); 125 | } 126 | 127 | return indices; 128 | } 129 | 130 | public boolean isIndexExisting(String indexName, PluginTask task) 131 | { 132 | // curl -XGET localhost:9200/{index} 133 | // No index: 404 134 | // Index found: 200 135 | try { 136 | sendRequest(indexName, HttpMethod.GET, task); 137 | return true; 138 | } 139 | catch (ResourceNotFoundException ex) { 140 | return false; 141 | } 142 | } 143 | 144 | public String generateNewIndexName(String indexName) 145 | { 146 | Timestamp time = Exec.getTransactionTime(); 147 | return indexName + new SimpleDateFormat("_yyyyMMdd-HHmmss").format(time.toEpochMilli()); 148 | } 149 | 150 | public boolean isAliasExisting(String aliasName, PluginTask task) 151 | { 152 | // curl -XGET localhost:9200/_aliases // List all aliases 153 | // No aliases: {} 154 | // Aliases found: {"embulk_20161018-183738":{"aliases":{"embulk":{}}}} 155 | JsonNode response = sendRequest("/_aliases", HttpMethod.GET, task); 156 | if (response.size() == 0) { 157 | return false; 158 | } 159 | for (JsonNode index : response) { 160 | if (index.has("aliases") && index.get("aliases").has(aliasName)) { 161 | return true; 162 | } 163 | } 164 | return false; 165 | } 166 | 167 | // Should be called just once while Embulk transaction. 168 | // Be sure to call after all exporting tasks completed 169 | // This method will delete existing index 170 | public void reassignAlias(String aliasName, String newIndexName, PluginTask task) 171 | { 172 | if (!isAliasExisting(aliasName, task)) { 173 | assignAlias(newIndexName, aliasName, task); 174 | } 175 | else { 176 | List oldIndices = getIndexByAlias(aliasName, task); 177 | assignAlias(newIndexName, aliasName, task); 178 | for (String index : oldIndices) { 179 | deleteIndex(index, task); 180 | } 181 | } 182 | } 183 | 184 | public String getEsVersion(PluginTask task) 185 | { 186 | // curl -XGET 'http://localhost:9200’ 187 | JsonNode response = sendRequest("", HttpMethod.GET, task); 188 | return response.get("version").get("number").asText(); 189 | } 190 | 191 | public int getEsMajorVersion(PluginTask task) 192 | { 193 | try { 194 | if (ES_CURRENT_MAJOR_VERSION > 0) { 195 | return ES_CURRENT_MAJOR_VERSION; 196 | } 197 | 198 | final String esVersion = getEsVersion(task); 199 | ES_CURRENT_MAJOR_VERSION = Integer.parseInt(esVersion.substring(0, 1)); 200 | return ES_CURRENT_MAJOR_VERSION; 201 | } 202 | catch (Exception ex) { 203 | throw new RuntimeException("Failed to fetch ES version"); 204 | } 205 | } 206 | 207 | public void validateIndexOrAliasName(String index, String type) 208 | { 209 | for (int i = 0; i < index.length(); i++) { 210 | if (invalidIndexCharacters.contains(index.charAt(i))) { 211 | throw new ConfigException(String.format("%s '%s' must not contain the invalid characters " + invalidIndexCharacters.toString(), type, index)); 212 | } 213 | } 214 | 215 | if (!index.toLowerCase(Locale.ROOT).equals(index)) { 216 | throw new ConfigException(String.format("%s '%s' must be lowercase", type, index)); 217 | } 218 | 219 | if (index.startsWith("_") || index.startsWith("-") || index.startsWith("+")) { 220 | throw new ConfigException(String.format("%s '%s' must not start with '_', '-', or '+'", type, index)); 221 | } 222 | 223 | if (index.length() > maxIndexNameBytes) { 224 | throw new ConfigException(String.format("%s name is too long, (%s > %s)", type, index.length(), maxIndexNameBytes)); 225 | } 226 | 227 | if (index.equals(".") || index.equals("..")) { 228 | throw new ConfigException("index must not be '.' or '..'"); 229 | } 230 | } 231 | 232 | private String createIndexRequest(String idColumn, JsonNode record) throws JsonProcessingException 233 | { 234 | // index name and type are set at path("/{index}/{type}"). So no need to set 235 | if (idColumn != null && record.hasNonNull(idColumn)) { 236 | // {"index" : {"_id" : "v"}} 237 | Map indexRequest = new HashMap<>(); 238 | 239 | Map idRequest = new HashMap<>(); 240 | idRequest.put("_id", record.get(idColumn)); 241 | 242 | indexRequest.put("index", idRequest); 243 | return jsonMapper.writeValueAsString(indexRequest); 244 | } 245 | else { 246 | // {"index" : {}} 247 | return "{\"index\" : {}}"; 248 | } 249 | } 250 | 251 | private void assignAlias(String indexName, String aliasName, PluginTask task) 252 | { 253 | try { 254 | if (isIndexExisting(indexName, task)) { 255 | if (isAliasExisting(aliasName, task)) { 256 | // curl -XPUT http://localhost:9200/_alias -d\ 257 | // "actions" : [ 258 | // {"remove" : {"alias" : "{alias}", "index" : "{index_old}"}}, 259 | // {"add" : {"alias": "{alias}", "index": "{index_new}"}} 260 | // ] 261 | // Success: {"acknowledged":true} 262 | List oldIndices = getIndexByAlias(aliasName, task); 263 | 264 | Map newAlias = new HashMap<>(); 265 | newAlias.put("alias", aliasName); 266 | newAlias.put("index", indexName); 267 | Map add = new HashMap<>(); 268 | add.put("add", newAlias); 269 | 270 | Map oldAlias = new HashMap<>(); 271 | // TODO multiple alias? 272 | for (String oldIndex : oldIndices) { 273 | oldAlias.put("alias", aliasName); 274 | oldAlias.put("index", oldIndex); 275 | } 276 | Map remove = new HashMap<>(); 277 | remove.put("remove", oldAlias); 278 | 279 | List> actions = new ArrayList<>(); 280 | actions.add(remove); 281 | actions.add(add); 282 | Map rootTree = new HashMap<>(); 283 | rootTree.put("actions", actions); 284 | 285 | String content = jsonMapper.writeValueAsString(rootTree); 286 | sendRequest("/_aliases", HttpMethod.POST, task, content); 287 | log.info("Reassigned alias [{}] to index[{}]", aliasName, indexName); 288 | } 289 | else { 290 | // curl -XPUT http://localhost:9200/{index}/_alias/{alias} 291 | // Success: {"acknowledged":true} 292 | String path = String.format("/%s/_alias/%s", indexName, aliasName); 293 | sendRequest(path, HttpMethod.PUT, task); 294 | log.info("Assigned alias [{}] to Index [{}]", aliasName, indexName); 295 | } 296 | } 297 | } 298 | catch (JsonProcessingException ex) { 299 | throw new ConfigException(String.format("Failed to assign alias[%s] to index[%s]", aliasName, indexName)); 300 | } 301 | } 302 | 303 | private void deleteIndex(String indexName, PluginTask task) 304 | { 305 | // curl -XDELETE localhost:9200/{index} 306 | // Success: {"acknowledged":true} 307 | if (isIndexExisting(indexName, task)) { 308 | waitSnapshot(task); 309 | sendRequest(indexName, HttpMethod.DELETE, task); 310 | log.info("Deleted Index [{}]", indexName); 311 | } 312 | } 313 | 314 | private void waitSnapshot(PluginTask task) 315 | { 316 | int maxSnapshotWaitingMills = task.getMaxSnapshotWaitingSecs() * 1000; 317 | long execCount = 1; 318 | long totalWaitingTime = 0; 319 | // Since only needs exponential backoff, don't need exception handling and others, I don't use Embulk RetryExecutor 320 | while (isSnapshotProgressing(task)) { 321 | long sleepTime = ((long) Math.pow(2, execCount) * 1000); 322 | try { 323 | Thread.sleep(sleepTime); 324 | } 325 | catch (InterruptedException ex) { 326 | // do nothing 327 | } 328 | if (execCount > 1) { 329 | log.info("Waiting for snapshot completed."); 330 | } 331 | execCount++; 332 | totalWaitingTime += sleepTime; 333 | if (totalWaitingTime > maxSnapshotWaitingMills) { 334 | throw new ConfigException(String.format("Waiting creating snapshot is expired. %s sec.", maxSnapshotWaitingMills)); 335 | } 336 | } 337 | } 338 | 339 | private boolean isSnapshotProgressing(PluginTask task) 340 | { 341 | // https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html#_snapshot_status 342 | // curl -XGET localhost:9200/_snapshot/_status 343 | JsonNode response = sendRequest("/_snapshot/_status", HttpMethod.GET, task); 344 | String snapshots = response.get("snapshots").asText(); 345 | return !snapshots.equals(""); 346 | } 347 | 348 | private JsonNode sendRequest(String path, final HttpMethod method, PluginTask task) 349 | { 350 | return sendRequest(path, method, task, ""); 351 | } 352 | 353 | private JsonNode sendRequest(String path, final HttpMethod method, final PluginTask task, final String content) 354 | { 355 | final String uri = createRequestUri(task, path); 356 | final String authorizationHeader = getAuthorizationHeader(task); 357 | 358 | try (Jetty94RetryHelper retryHelper = createRetryHelper(task)) { 359 | String responseBody = retryHelper.requestWithRetry( 360 | new StringJetty94ResponseEntityReader(task.getTimeoutMills()), 361 | new Jetty94SingleRequester() { 362 | @Override 363 | public void requestOnce(org.eclipse.jetty.client.HttpClient client, org.eclipse.jetty.client.api.Response.Listener responseListener) 364 | { 365 | org.eclipse.jetty.client.api.Request request = client 366 | .newRequest(uri) 367 | .accept("application/json") 368 | .timeout(task.getTimeoutMills(), TimeUnit.MILLISECONDS) 369 | .method(method); 370 | if (method == HttpMethod.POST) { 371 | request.content(new StringContentProvider(content), "application/json"); 372 | } 373 | 374 | if (!authorizationHeader.isEmpty()) { 375 | request.header("Authorization", authorizationHeader); 376 | } 377 | request.send(responseListener); 378 | } 379 | 380 | @Override 381 | public boolean isExceptionToRetry(Exception exception) 382 | { 383 | return task.getId().isPresent(); 384 | } 385 | 386 | @Override 387 | public boolean isResponseStatusToRetry(org.eclipse.jetty.client.api.Response response) 388 | { 389 | int status = response.getStatus(); 390 | if (status == 404) { 391 | throw new ResourceNotFoundException("Requested resource was not found"); 392 | } 393 | else if (status == 429) { 394 | return true; // Retry if 429. 395 | } 396 | return status / 100 != 4; // Retry unless 4xx except for 429. 397 | } 398 | }); 399 | return parseJson(responseBody); 400 | } 401 | } 402 | 403 | private String createRequestUri(PluginTask task, String path) 404 | { 405 | if (!path.startsWith("/")) { 406 | path = "/" + path; 407 | } 408 | String protocol = task.getUseSsl() ? "https" : "http"; 409 | String nodeAddress = getRandomNodeAddress(task); 410 | return String.format("%s://%s%s", protocol, nodeAddress, path); 411 | } 412 | 413 | // Return node address (RoundRobin) 414 | private String getRandomNodeAddress(PluginTask task) 415 | { 416 | List nodes = task.getNodes(); 417 | Random random = new Random(); 418 | int index = random.nextInt(nodes.size()); 419 | NodeAddressTask node = nodes.get(index); 420 | return node.getHost() + ":" + node.getPort(); 421 | } 422 | 423 | private JsonNode parseJson(final String json) throws DataException 424 | { 425 | try { 426 | return this.jsonMapper.readTree(json); 427 | } 428 | catch (IOException ex) { 429 | throw new DataException(ex); 430 | } 431 | } 432 | 433 | private Jetty94RetryHelper createRetryHelper(final PluginTask task) 434 | { 435 | return new Jetty94RetryHelper( 436 | task.getMaximumRetries(), 437 | task.getInitialRetryIntervalMillis(), 438 | task.getMaximumRetryIntervalMillis(), 439 | new Jetty94ClientCreator() { 440 | @Override 441 | public org.eclipse.jetty.client.HttpClient createAndStart() 442 | { 443 | org.eclipse.jetty.client.HttpClient client = new org.eclipse.jetty.client.HttpClient(new SslContextFactory()); 444 | client.setConnectTimeout(task.getConnectTimeoutMills()); 445 | try { 446 | client.start(); 447 | return client; 448 | } 449 | catch (Exception e) { 450 | if (e instanceof RuntimeException) { 451 | throw (RuntimeException) e; 452 | } 453 | throw new RuntimeException(e); 454 | } 455 | } 456 | }); 457 | } 458 | 459 | protected String getAuthorizationHeader(PluginTask task) 460 | { 461 | String header = ""; 462 | if (task.getAuthMethod() == AuthMethod.BASIC) { 463 | String authString = task.getUser().get() + ":" + task.getPassword().get(); 464 | header = "Basic " + DatatypeConverter.printBase64Binary(authString.getBytes()); 465 | } 466 | return header; 467 | } 468 | 469 | public class ResourceNotFoundException extends RuntimeException implements UserDataException 470 | { 471 | protected ResourceNotFoundException() 472 | { 473 | } 474 | 475 | public ResourceNotFoundException(String message) 476 | { 477 | super(message); 478 | } 479 | 480 | public ResourceNotFoundException(Throwable cause) 481 | { 482 | super(cause); 483 | } 484 | } 485 | } 486 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/elasticsearch/ElasticsearchOutputPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.elasticsearch; 18 | 19 | import org.embulk.base.restclient.RestClientOutputPluginBase; 20 | import org.embulk.util.config.ConfigMapper; 21 | import org.embulk.util.config.ConfigMapperFactory; 22 | 23 | public class ElasticsearchOutputPlugin 24 | extends RestClientOutputPluginBase 25 | { 26 | public ElasticsearchOutputPlugin() 27 | { 28 | super(CONFIG_MAPPER_FACTORY, ElasticsearchOutputPluginDelegate.PluginTask.class, new ElasticsearchOutputPluginDelegate()); 29 | } 30 | 31 | static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build(); 32 | static final ConfigMapper CONFIG_MAPPER = CONFIG_MAPPER_FACTORY.createConfigMapper(); 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/elasticsearch/ElasticsearchOutputPluginDelegate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.elasticsearch; 18 | 19 | import com.fasterxml.jackson.annotation.JsonCreator; 20 | import com.fasterxml.jackson.annotation.JsonValue; 21 | import org.embulk.base.restclient.RestClientOutputPluginDelegate; 22 | import org.embulk.base.restclient.RestClientOutputTaskBase; 23 | import org.embulk.base.restclient.jackson.JacksonServiceRequestMapper; 24 | import org.embulk.base.restclient.jackson.JacksonTopLevelValueLocator; 25 | import org.embulk.base.restclient.jackson.scope.JacksonAllInObjectScope; 26 | import org.embulk.base.restclient.record.RecordBuffer; 27 | import org.embulk.config.ConfigDiff; 28 | import org.embulk.config.ConfigException; 29 | import org.embulk.config.TaskReport; 30 | import org.embulk.spi.Schema; 31 | import org.embulk.util.config.Config; 32 | import org.embulk.util.config.ConfigDefault; 33 | import org.embulk.util.config.Task; 34 | import org.embulk.util.timestamp.TimestampFormatter; 35 | import org.slf4j.Logger; 36 | import org.slf4j.LoggerFactory; 37 | 38 | import java.util.List; 39 | import java.util.Locale; 40 | import java.util.Optional; 41 | 42 | public class ElasticsearchOutputPluginDelegate 43 | implements RestClientOutputPluginDelegate 44 | { 45 | private final Logger log; 46 | private final ElasticsearchHttpClient client; 47 | 48 | public ElasticsearchOutputPluginDelegate() 49 | { 50 | this.log = LoggerFactory.getLogger(getClass()); 51 | this.client = new ElasticsearchHttpClient(); 52 | } 53 | 54 | public interface NodeAddressTask 55 | extends Task 56 | { 57 | @Config("host") 58 | String getHost(); 59 | 60 | @Config("port") 61 | @ConfigDefault("9200") 62 | int getPort(); 63 | } 64 | 65 | public interface PluginTask 66 | extends RestClientOutputTaskBase 67 | { 68 | @Config("mode") 69 | @ConfigDefault("\"insert\"") 70 | Mode getMode(); 71 | 72 | @Config("nodes") 73 | List getNodes(); 74 | 75 | @Config("cluster_name") 76 | @ConfigDefault("\"elasticsearch\"") 77 | String getClusterName(); 78 | 79 | @Config("index") 80 | String getIndex(); 81 | void setIndex(String indexName); 82 | 83 | @Config("alias") 84 | @ConfigDefault("null") 85 | Optional getAlias(); 86 | void setAlias(Optional aliasName); 87 | 88 | @Config("index_type") 89 | String getType(); 90 | 91 | @Config("id") 92 | @ConfigDefault("null") 93 | Optional getId(); 94 | 95 | @Config("use_ssl") 96 | @ConfigDefault("false") 97 | boolean getUseSsl(); 98 | 99 | @Config("auth_method") 100 | @ConfigDefault("\"none\"") 101 | AuthMethod getAuthMethod(); 102 | 103 | @Config("user") 104 | @ConfigDefault("null") 105 | Optional getUser(); 106 | 107 | @Config("password") 108 | @ConfigDefault("null") 109 | Optional getPassword(); 110 | 111 | @Config("bulk_actions") 112 | @ConfigDefault("1000") 113 | int getBulkActions(); 114 | 115 | @Config("bulk_size") 116 | @ConfigDefault("5242880") 117 | long getBulkSize(); 118 | 119 | @Config("concurrent_requests") 120 | @ConfigDefault("5") 121 | int getConcurrentRequests(); 122 | 123 | @Config("maximum_retries") 124 | @ConfigDefault("7") 125 | int getMaximumRetries(); 126 | 127 | @Config("initial_retry_interval_millis") 128 | @ConfigDefault("1000") 129 | int getInitialRetryIntervalMillis(); 130 | 131 | @Config("maximum_retry_interval_millis") 132 | @ConfigDefault("120000") 133 | int getMaximumRetryIntervalMillis(); 134 | 135 | @Config("timeout_millis") 136 | @ConfigDefault("60000") 137 | int getTimeoutMills(); 138 | 139 | @Config("connect_timeout_millis") 140 | @ConfigDefault("60000") 141 | int getConnectTimeoutMills(); 142 | 143 | @Config("max_snapshot_waiting_secs") 144 | @ConfigDefault("1800") 145 | int getMaxSnapshotWaitingSecs(); 146 | 147 | @Config("time_zone") 148 | @ConfigDefault("\"UTC\"") 149 | String getTimeZone(); 150 | 151 | @Config("fill_null_for_empty_column") 152 | @ConfigDefault("false") 153 | boolean getFillNullForEmptyColumn(); 154 | 155 | // The following method has been removed. It came org.embulk.spi.time.TimestampFormatter.Task, but it has not been used. 156 | // 157 | // @Config("default_timezone") 158 | // @ConfigDefault("\"UTC\"") 159 | // String getDefaultTimeZoneId() 160 | 161 | // The following method has been removed. It came org.embulk.spi.time.TimestampFormatter.Task, but it has not been used. 162 | // 163 | // @Config("default_timestamp_format") 164 | // @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%6N %z\"") 165 | // String getDefaultTimestampFormat(); 166 | } 167 | 168 | public enum Mode 169 | { 170 | INSERT, 171 | REPLACE; 172 | 173 | @JsonValue 174 | @Override 175 | public String toString() 176 | { 177 | return name().toLowerCase(Locale.ENGLISH); 178 | } 179 | 180 | @JsonCreator 181 | public static Mode fromString(String value) 182 | { 183 | switch (value) { 184 | case "insert": 185 | return INSERT; 186 | case "replace": 187 | return REPLACE; 188 | default: 189 | throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are insert, truncate_insert, replace", value)); 190 | } 191 | } 192 | } 193 | 194 | public enum AuthMethod 195 | { 196 | NONE, 197 | BASIC; 198 | 199 | @JsonValue 200 | @Override 201 | public String toString() 202 | { 203 | return name().toLowerCase(Locale.ENGLISH); 204 | } 205 | 206 | @JsonCreator 207 | public static AuthMethod fromString(String value) 208 | { 209 | switch (value) { 210 | case "none": 211 | return NONE; 212 | case "basic": 213 | return BASIC; 214 | default: 215 | throw new ConfigException(String.format("Unknown auth_method '%s'. Supported auth_method are none, basic", value)); 216 | } 217 | } 218 | } 219 | 220 | @Override // Overridden from |OutputTaskValidatable| 221 | public void validateOutputTask(PluginTask task, Schema embulkSchema, int taskCount) 222 | { 223 | if (task.getNodes().size() > 0) { 224 | for (NodeAddressTask node : task.getNodes()) { 225 | if (node.getHost().endsWith("es.amazonaws.com")) { 226 | log.warn("This plugin does't support AWS Elasticsearch Service. See README https://github.com/embulk/embulk-output-elasticsearch/blob/master/README.md"); 227 | } 228 | if (node.getPort() == 9300) { 229 | log.warn("Port:9300 is usually used by TransportClient. HTTP/Rest Client uses 9200."); 230 | } 231 | } 232 | } 233 | 234 | log.info(String.format("Connecting to Elasticsearch version:%s", client.getEsVersion(task))); 235 | log.info("Executing plugin with '{}' mode.", task.getMode()); 236 | client.validateIndexOrAliasName(task.getIndex(), "index"); 237 | client.validateIndexOrAliasName(task.getType(), "index_type"); 238 | 239 | if (task.getMode().equals(Mode.REPLACE)) { 240 | task.setAlias(Optional.of(task.getIndex())); 241 | task.setIndex(client.generateNewIndexName(task.getIndex())); 242 | if (client.isIndexExisting(task.getAlias().orElse(null), task) && !client.isAliasExisting(task.getAlias().orElse(null), task)) { 243 | throw new ConfigException(String.format("Invalid alias name [%s], an index exists with the same name as the alias", task.getAlias().orElse(null))); 244 | } 245 | } 246 | log.info(String.format("Inserting data into index[%s]", task.getIndex())); 247 | 248 | if (task.getAuthMethod() == AuthMethod.BASIC) { 249 | if (!task.getUser().isPresent() || !task.getPassword().isPresent()) { 250 | throw new ConfigException("'user' and 'password' are required when auth_method='basic'"); 251 | } 252 | } 253 | } 254 | 255 | @Override // Overridden from |ServiceRequestMapperBuildable| 256 | public JacksonServiceRequestMapper buildServiceRequestMapper(PluginTask task) 257 | { 258 | final TimestampFormatter formatter = TimestampFormatter 259 | .builder("%Y-%m-%dT%H:%M:%S.%3N%z", true) 260 | .setDefaultZoneFromString(task.getTimeZone()) 261 | .build(); 262 | 263 | return JacksonServiceRequestMapper.builder() 264 | .add(new JacksonAllInObjectScope(formatter, task.getFillNullForEmptyColumn()), new JacksonTopLevelValueLocator("record")) 265 | .build(); 266 | } 267 | 268 | @Override // Overridden from |RecordBufferBuildable| 269 | public RecordBuffer buildRecordBuffer(PluginTask task, Schema schema, int taskIndex) 270 | { 271 | return new ElasticsearchRecordBuffer("records", task); 272 | } 273 | 274 | @Override 275 | public ConfigDiff egestEmbulkData(final PluginTask task, 276 | Schema schema, 277 | int taskIndex, 278 | List taskReports) 279 | { 280 | long totalInserted = 0; 281 | for (TaskReport taskReport : taskReports) { 282 | if (taskReport.has("inserted")) { 283 | totalInserted += taskReport.get(Long.class, "inserted"); 284 | } 285 | } 286 | 287 | log.info("Insert completed. {} records", totalInserted); 288 | // Re assign alias only when repale mode 289 | if (task.getMode().equals(Mode.REPLACE)) { 290 | client.reassignAlias(task.getAlias().orElse(null), task.getIndex(), task); 291 | } 292 | 293 | return ElasticsearchOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigDiff(); 294 | } 295 | } 296 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/elasticsearch/ElasticsearchRecordBuffer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.elasticsearch; 18 | 19 | import com.fasterxml.jackson.databind.DeserializationFeature; 20 | import com.fasterxml.jackson.databind.JsonNode; 21 | import com.fasterxml.jackson.databind.ObjectMapper; 22 | import com.fasterxml.jackson.databind.node.ArrayNode; 23 | import com.fasterxml.jackson.databind.node.JsonNodeFactory; 24 | import org.embulk.base.restclient.jackson.JacksonServiceRecord; 25 | import org.embulk.base.restclient.record.RecordBuffer; 26 | import org.embulk.base.restclient.record.ServiceRecord; 27 | import org.embulk.config.TaskReport; 28 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | import java.io.IOException; 33 | 34 | /** 35 | * ElasticsearchRecordBuffer is an implementation of {@code RecordBuffer} which includes JSON output directly to Elasticsearch server. 36 | */ 37 | public class ElasticsearchRecordBuffer 38 | extends RecordBuffer 39 | { 40 | private final String attributeName; 41 | private final PluginTask task; 42 | private final long bulkActions; 43 | private final long bulkSize; 44 | private final ElasticsearchHttpClient client; 45 | private final ObjectMapper mapper; 46 | private final Logger log; 47 | private long totalCount; 48 | private int requestCount; 49 | private long requestBytes; 50 | private ArrayNode records; 51 | 52 | public ElasticsearchRecordBuffer(String attributeName, PluginTask task) 53 | { 54 | this.attributeName = attributeName; 55 | this.task = task; 56 | this.bulkActions = task.getBulkActions(); 57 | this.bulkSize = task.getBulkSize(); 58 | this.client = new ElasticsearchHttpClient(); 59 | this.mapper = new ObjectMapper() 60 | .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) 61 | .configure(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, false); 62 | this.records = JsonNodeFactory.instance.arrayNode(); 63 | this.totalCount = 0; 64 | this.requestCount = 0; 65 | this.requestBytes = 0; 66 | this.log = LoggerFactory.getLogger(getClass()); 67 | } 68 | 69 | @Override 70 | public void bufferRecord(ServiceRecord serviceRecord) 71 | { 72 | JacksonServiceRecord jacksonServiceRecord; 73 | try { 74 | jacksonServiceRecord = (JacksonServiceRecord) serviceRecord; 75 | JsonNode record = mapper.readTree(jacksonServiceRecord.toString()).get("record"); 76 | 77 | requestCount++; 78 | totalCount++; 79 | requestBytes += record.toString().getBytes().length; 80 | 81 | records.add(record); 82 | if (requestCount >= bulkActions || requestBytes >= bulkSize) { 83 | client.push(records, task); 84 | if (totalCount % 10000 == 0) { 85 | log.info("Inserted {} records", totalCount); 86 | } 87 | records = JsonNodeFactory.instance.arrayNode(); 88 | requestBytes = 0; 89 | requestCount = 0; 90 | } 91 | } 92 | catch (ClassCastException ex) { 93 | throw new RuntimeException(ex); 94 | } 95 | catch (IOException ex) { 96 | throw new RuntimeException(ex); 97 | } 98 | } 99 | 100 | @Override 101 | public void finish() 102 | { 103 | } 104 | 105 | @Override 106 | public void close() 107 | { 108 | } 109 | 110 | @Override 111 | public TaskReport commitWithTaskReportUpdated(TaskReport taskReport) 112 | { 113 | if (records.size() > 0) { 114 | client.push(records, task); 115 | log.info("Inserted {} records", records.size()); 116 | } 117 | return ElasticsearchOutputPlugin.CONFIG_MAPPER_FACTORY.newTaskReport().set("inserted", totalCount); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/elasticsearch/ElasticsearchTestUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.elasticsearch; 18 | 19 | import com.google.common.collect.ImmutableList; 20 | import com.google.common.collect.ImmutableMap; 21 | import org.embulk.config.ConfigSource; 22 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask; 23 | import org.embulk.spi.Schema; 24 | import org.embulk.spi.type.Types; 25 | 26 | import java.lang.reflect.Method; 27 | import java.util.Arrays; 28 | import java.util.List; 29 | 30 | public class ElasticsearchTestUtils 31 | { 32 | public static String ES_HOST; 33 | public static int ES_PORT; 34 | public static List ES_NODES; 35 | public static String ES_INDEX; 36 | public static String ES_INDEX_TYPE; 37 | public static String ES_ID; 38 | public static int ES_BULK_ACTIONS; 39 | public static int ES_BULK_SIZE; 40 | public static int ES_CONCURRENT_REQUESTS; 41 | public static String PATH_PREFIX; 42 | public static String JSON_PATH_PREFIX; 43 | public static String ES_INDEX2; 44 | public static String ES_ALIAS; 45 | 46 | public static int ES_MIN_API_VERSION = 7; 47 | 48 | public void initializeConstant() 49 | { 50 | ES_HOST = "localhost"; 51 | ES_PORT = 19200; 52 | 53 | ES_INDEX = "embulk"; 54 | ES_INDEX2 = ES_INDEX + "_02"; 55 | ES_ALIAS = ES_INDEX + "_alias"; 56 | ES_INDEX_TYPE = "embulk"; 57 | ES_ID = "id"; 58 | ES_BULK_ACTIONS = 1000; 59 | ES_BULK_SIZE = 5242880; 60 | ES_CONCURRENT_REQUESTS = 5; 61 | 62 | ES_NODES = Arrays.asList(ImmutableMap.of("host", ES_HOST, "port", ES_PORT)); 63 | 64 | PATH_PREFIX = ElasticsearchTestUtils.class.getClassLoader().getResource("sample_01.csv").getPath(); 65 | JSON_PATH_PREFIX = ElasticsearchTestUtils.class.getClassLoader().getResource("sample_01.json").getPath(); 66 | } 67 | 68 | public void prepareBeforeTest(PluginTask task) throws Exception 69 | { 70 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 71 | Method deleteIndex = ElasticsearchHttpClient.class.getDeclaredMethod("deleteIndex", String.class, PluginTask.class); 72 | deleteIndex.setAccessible(true); 73 | 74 | // Delete index 75 | if (client.isIndexExisting(ES_INDEX, task)) { 76 | deleteIndex.invoke(client, ES_INDEX, task); 77 | } 78 | 79 | if (client.isIndexExisting(ES_INDEX2, task)) { 80 | deleteIndex.invoke(client, ES_INDEX2, task); 81 | } 82 | } 83 | 84 | public ConfigSource config() 85 | { 86 | return ElasticsearchOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigSource() 87 | .set("in", inputConfig()) 88 | .set("parser", parserConfig(schemaConfig())) 89 | .set("type", "elasticsearch") 90 | .set("mode", "insert") 91 | .set("nodes", ES_NODES) 92 | .set("index", ES_INDEX) 93 | .set("index_type", ES_INDEX_TYPE) 94 | .set("id", ES_ID) 95 | .set("bulk_actions", ES_BULK_ACTIONS) 96 | .set("bulk_size", ES_BULK_SIZE) 97 | .set("concurrent_requests", ES_CONCURRENT_REQUESTS) 98 | .set("maximum_retries", 2); 99 | } 100 | 101 | public ConfigSource oldParserConfig() 102 | { 103 | return ElasticsearchOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigSource() 104 | .set("parser", parserConfig(schemaConfig())) 105 | .getNested("parser"); 106 | } 107 | 108 | public ConfigSource configJSON() 109 | { 110 | return ElasticsearchOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigSource() 111 | .set("in", inputConfigJSON()) 112 | .set("parser", parserConfigJSON()) 113 | .set("type", "elasticsearch") 114 | .set("mode", "insert") 115 | .set("nodes", ES_NODES) 116 | .set("index", ES_INDEX) 117 | .set("index_type", ES_INDEX_TYPE) 118 | .set("id", ES_ID) 119 | .set("bulk_actions", ES_BULK_ACTIONS) 120 | .set("bulk_size", ES_BULK_SIZE) 121 | .set("concurrent_requests", ES_CONCURRENT_REQUESTS) 122 | .set("maximum_retries", 2) 123 | .set("fill_null_for_empty_column", true); 124 | } 125 | 126 | public ImmutableMap inputConfig() 127 | { 128 | ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); 129 | builder.put("type", "file"); 130 | builder.put("path_prefix", PATH_PREFIX); 131 | builder.put("last_path", ""); 132 | return builder.build(); 133 | } 134 | 135 | public ImmutableMap inputConfigJSON() 136 | { 137 | ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); 138 | builder.put("type", "file"); 139 | builder.put("path_prefix", JSON_PATH_PREFIX); 140 | builder.put("last_path", ""); 141 | return builder.build(); 142 | } 143 | 144 | public ImmutableMap parserConfig(ImmutableList schemaConfig) 145 | { 146 | ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); 147 | builder.put("type", "csv"); 148 | builder.put("newline", "CRLF"); 149 | builder.put("delimiter", ","); 150 | builder.put("quote", "\""); 151 | builder.put("escape", "\""); 152 | builder.put("trim_if_not_quoted", false); 153 | builder.put("skip_header_lines", 1); 154 | builder.put("allow_extra_columns", false); 155 | builder.put("allow_optional_columns", false); 156 | builder.put("columns", schemaConfig); 157 | return builder.build(); 158 | } 159 | 160 | public ImmutableMap parserConfigJSON() 161 | { 162 | ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); 163 | return builder.build(); 164 | } 165 | 166 | public ImmutableList schemaConfig() 167 | { 168 | ImmutableList.Builder builder = new ImmutableList.Builder<>(); 169 | builder.add(ImmutableMap.of("name", "id", "type", "long")); 170 | builder.add(ImmutableMap.of("name", "account", "type", "long")); 171 | builder.add(ImmutableMap.of("name", "time", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S")); 172 | builder.add(ImmutableMap.of("name", "purchase", "type", "timestamp", "format", "%Y%m%d")); 173 | builder.add(ImmutableMap.of("name", "flg", "type", "boolean")); 174 | builder.add(ImmutableMap.of("name", "score", "type", "double")); 175 | builder.add(ImmutableMap.of("name", "comment", "type", "string")); 176 | return builder.build(); 177 | } 178 | 179 | public Schema JSONSchema() 180 | { 181 | return Schema.builder() 182 | .add("id", Types.LONG) 183 | .add("account", Types.LONG) 184 | .add("time", Types.STRING) 185 | .add("purchase", Types.STRING) 186 | .add("flg", Types.BOOLEAN) 187 | .add("score", Types.DOUBLE) 188 | .add("comment", Types.STRING) 189 | .build(); 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/elasticsearch/TestElasticsearchHttpClient.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.elasticsearch; 18 | 19 | import org.eclipse.jetty.http.HttpMethod; 20 | import org.embulk.config.ConfigException; 21 | import org.embulk.config.ConfigSource; 22 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask; 23 | import org.embulk.spi.Exec; 24 | import org.embulk.spi.time.Timestamp; 25 | import org.embulk.test.EmbulkTestRuntime; 26 | import org.embulk.util.config.ConfigMapper; 27 | import org.embulk.util.config.ConfigMapperFactory; 28 | import org.junit.Before; 29 | import org.junit.BeforeClass; 30 | import org.junit.Rule; 31 | import org.junit.Test; 32 | 33 | import java.lang.reflect.Method; 34 | import java.text.SimpleDateFormat; 35 | 36 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_ALIAS; 37 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_INDEX; 38 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_INDEX2; 39 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_NODES; 40 | import static org.hamcrest.MatcherAssert.assertThat; 41 | import static org.hamcrest.core.Is.is; 42 | 43 | public class TestElasticsearchHttpClient 44 | { 45 | @BeforeClass 46 | public static void initializeConstant() 47 | { 48 | } 49 | 50 | @Before 51 | public void createResources() throws Exception 52 | { 53 | utils = new ElasticsearchTestUtils(); 54 | utils.initializeConstant(); 55 | 56 | final PluginTask task = CONFIG_MAPPER.map(utils.config(), PluginTask.class); 57 | utils.prepareBeforeTest(task); 58 | } 59 | 60 | @Rule 61 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 62 | 63 | private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ElasticsearchOutputPlugin.CONFIG_MAPPER_FACTORY; 64 | private static final ConfigMapper CONFIG_MAPPER = ElasticsearchOutputPlugin.CONFIG_MAPPER; 65 | 66 | private ElasticsearchTestUtils utils; 67 | 68 | @Test 69 | public void testValidateIndexOrAliasName() 70 | { 71 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 72 | client.validateIndexOrAliasName("embulk", "index"); 73 | } 74 | 75 | @Test(expected = ConfigException.class) 76 | public void testIndexNameContainsUpperCase() 77 | { 78 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 79 | client.validateIndexOrAliasName("Embulk", "index"); 80 | } 81 | 82 | @Test(expected = ConfigException.class) 83 | public void testIndexNameStartsInvalidChars() 84 | { 85 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 86 | client.validateIndexOrAliasName("_embulk", "index"); 87 | } 88 | 89 | @Test(expected = ConfigException.class) 90 | public void testIndexNameContainsInvalidChars() 91 | { 92 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 93 | client.validateIndexOrAliasName("em#bulk", "index"); 94 | } 95 | 96 | @Test(expected = ConfigException.class) 97 | public void testIndexNameTooLong() 98 | { 99 | String index = "embulk"; 100 | for (int i = 0; i < 255; i++) { 101 | index += "s"; 102 | } 103 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 104 | client.validateIndexOrAliasName(index, "index"); 105 | } 106 | 107 | @Test(expected = ConfigException.class) 108 | public void testIndexNameEqDot() 109 | { 110 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 111 | client.validateIndexOrAliasName(".", "index"); 112 | } 113 | 114 | @Test 115 | public void testGenerateNewIndex() 116 | { 117 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 118 | String newIndexName = client.generateNewIndexName(ES_INDEX); 119 | Timestamp time = Exec.getTransactionTime(); 120 | assertThat(newIndexName, is(ES_INDEX + new SimpleDateFormat("_yyyyMMdd-HHmmss").format(time.toEpochMilli()))); 121 | } 122 | 123 | @Test 124 | public void testCreateAlias() throws Exception 125 | { 126 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 127 | final PluginTask task = CONFIG_MAPPER.map(utils.config(), PluginTask.class); 128 | // delete index 129 | Method method = ElasticsearchHttpClient.class.getDeclaredMethod("deleteIndex", String.class, PluginTask.class); 130 | method.setAccessible(true); 131 | method.invoke(client, "newindex", task); 132 | 133 | // create index 134 | Method sendRequest = ElasticsearchHttpClient.class.getDeclaredMethod("sendRequest", String.class, HttpMethod.class, PluginTask.class); 135 | sendRequest.setAccessible(true); 136 | String path = String.format("/%s/", ES_INDEX); 137 | sendRequest.invoke(client, path, HttpMethod.PUT, task); 138 | 139 | path = String.format("/%s/", ES_INDEX2); 140 | sendRequest.invoke(client, path, HttpMethod.PUT, task); 141 | 142 | // create alias 143 | client.reassignAlias(ES_ALIAS, ES_INDEX, task); 144 | 145 | // check alias 146 | assertThat(client.isAliasExisting(ES_ALIAS, task), is(true)); 147 | assertThat(client.getIndexByAlias(ES_ALIAS, task).toString(), is("[" + ES_INDEX + "]")); 148 | 149 | // reassign index 150 | client.reassignAlias(ES_ALIAS, ES_INDEX2, task); 151 | assertThat(client.getIndexByAlias(ES_ALIAS, task).toString(), is("[" + ES_INDEX2 + "]")); 152 | } 153 | 154 | @Test 155 | public void testIsIndexExistingWithNonExistsIndex() 156 | { 157 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 158 | final PluginTask task = CONFIG_MAPPER.map(utils.config(), PluginTask.class); 159 | assertThat(client.isIndexExisting("non-existing-index", task), is(false)); 160 | } 161 | 162 | @Test 163 | public void testIsAliasExistingWithNonExistsAlias() 164 | { 165 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 166 | final PluginTask task = CONFIG_MAPPER.map(utils.config(), PluginTask.class); 167 | assertThat(client.isAliasExisting("non-existing-alias", task), is(false)); 168 | } 169 | 170 | @Test 171 | public void testGetAuthorizationHeader() throws Exception 172 | { 173 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 174 | 175 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 176 | .set("auth_method", "basic") 177 | .set("user", "username") 178 | .set("password", "password") 179 | .set("index", "idx") 180 | .set("index_type", "idx_type") 181 | .set("nodes", ES_NODES); 182 | 183 | assertThat( 184 | client.getAuthorizationHeader(CONFIG_MAPPER.map(config, PluginTask.class)), 185 | is("Basic dXNlcm5hbWU6cGFzc3dvcmQ=")); 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/elasticsearch/TestElasticsearchOutputPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.elasticsearch; 18 | 19 | import com.fasterxml.jackson.databind.JsonNode; 20 | import com.google.common.collect.Lists; 21 | import org.eclipse.jetty.http.HttpMethod; 22 | import org.embulk.config.ConfigException; 23 | import org.embulk.config.ConfigSource; 24 | import org.embulk.config.TaskReport; 25 | import org.embulk.config.TaskSource; 26 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.AuthMethod; 27 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.Mode; 28 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask; 29 | import org.embulk.spi.OutputPlugin; 30 | import org.embulk.spi.Page; 31 | import org.embulk.spi.Schema; 32 | import org.embulk.spi.TransactionalPageOutput; 33 | import org.embulk.spi.time.Timestamp; 34 | import org.embulk.parser.csv.CsvParserPlugin; 35 | import org.embulk.test.EmbulkTestRuntime; 36 | import org.embulk.test.PageTestUtils; 37 | import org.embulk.util.config.ConfigMapper; 38 | import org.embulk.util.config.ConfigMapperFactory; 39 | import org.junit.Before; 40 | import org.junit.Rule; 41 | import org.junit.Test; 42 | 43 | import java.lang.reflect.Method; 44 | import java.util.Arrays; 45 | import java.util.List; 46 | 47 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_BULK_ACTIONS; 48 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_BULK_SIZE; 49 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_CONCURRENT_REQUESTS; 50 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_ID; 51 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_INDEX; 52 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_INDEX_TYPE; 53 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_NODES; 54 | import static org.hamcrest.core.Is.is; 55 | import static org.junit.Assert.assertThat; 56 | import static org.junit.Assert.assertTrue; 57 | 58 | public class TestElasticsearchOutputPlugin 59 | { 60 | private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ElasticsearchOutputPlugin.CONFIG_MAPPER_FACTORY; 61 | private static final ConfigMapper CONFIG_MAPPER = ElasticsearchOutputPlugin.CONFIG_MAPPER; 62 | 63 | @Rule 64 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 65 | private ElasticsearchOutputPlugin plugin; 66 | private ElasticsearchTestUtils utils; 67 | 68 | @Before 69 | public void createResources() throws Exception 70 | { 71 | utils = new ElasticsearchTestUtils(); 72 | utils.initializeConstant(); 73 | final PluginTask task = CONFIG_MAPPER.map(utils.config(), PluginTask.class); 74 | utils.prepareBeforeTest(task); 75 | 76 | plugin = new ElasticsearchOutputPlugin(); 77 | } 78 | 79 | @Test 80 | public void testDefaultValues() 81 | { 82 | final PluginTask task = CONFIG_MAPPER.map(utils.config(), PluginTask.class); 83 | assertThat(task.getIndex(), is(ES_INDEX)); 84 | } 85 | 86 | @Test 87 | public void testDefaultValuesNull() 88 | { 89 | final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 90 | .set("in", utils.inputConfig()) 91 | .set("parser", utils.parserConfig(utils.schemaConfig())) 92 | .set("type", "elasticsearch") 93 | .set("mode", "") // NULL 94 | .set("nodes", ES_NODES) 95 | .set("index", ES_INDEX) 96 | .set("index_type", ES_INDEX_TYPE) 97 | .set("id", ES_ID) 98 | .set("bulk_actions", ES_BULK_ACTIONS) 99 | .set("bulk_size", ES_BULK_SIZE) 100 | .set("concurrent_requests", ES_CONCURRENT_REQUESTS 101 | ); 102 | Schema schema = CONFIG_MAPPER.map(utils.oldParserConfig(), CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema(); 103 | try { 104 | plugin.transaction(config, schema, 0, new OutputPlugin.Control() 105 | { 106 | @Override 107 | public List run(TaskSource taskSource) 108 | { 109 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 110 | } 111 | }); 112 | } 113 | catch (Throwable t) { 114 | if (t instanceof RuntimeException) { 115 | assertTrue(t instanceof ConfigException); 116 | } 117 | } 118 | } 119 | 120 | @Test 121 | public void testTransaction() 122 | { 123 | ConfigSource config = utils.config(); 124 | Schema schema = CONFIG_MAPPER.map(utils.oldParserConfig(), CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema(); 125 | plugin.transaction(config, schema, 0, new OutputPlugin.Control() 126 | { 127 | @Override 128 | public List run(TaskSource taskSource) 129 | { 130 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 131 | } 132 | }); 133 | // no error happens 134 | } 135 | 136 | @Test 137 | public void testResume() 138 | { 139 | ConfigSource config = utils.config(); 140 | Schema schema = CONFIG_MAPPER.map(utils.oldParserConfig(), CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema(); 141 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 142 | plugin.resume(task.dump(), schema, 0, new OutputPlugin.Control() 143 | { 144 | @Override 145 | public List run(TaskSource taskSource) 146 | { 147 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 148 | } 149 | }); 150 | } 151 | 152 | @Test 153 | public void testCleanup() 154 | { 155 | ConfigSource config = utils.config(); 156 | Schema schema = CONFIG_MAPPER.map(utils.oldParserConfig(), CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema(); 157 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 158 | plugin.cleanup(task.dump(), schema, 0, Arrays.asList(CONFIG_MAPPER_FACTORY.newTaskReport())); 159 | // no error happens 160 | } 161 | 162 | @Test 163 | public void testOutputByOpen() throws Exception 164 | { 165 | ConfigSource config = utils.config(); 166 | Schema schema = CONFIG_MAPPER.map(utils.oldParserConfig(), CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema(); 167 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 168 | plugin.transaction(config, schema, 0, new OutputPlugin.Control() { 169 | @Override 170 | public List run(TaskSource taskSource) 171 | { 172 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 173 | } 174 | }); 175 | TransactionalPageOutput output = plugin.open(task.dump(), schema, 0); 176 | 177 | List pages = PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, 1L, 32864L, Timestamp.ofEpochSecond(1422386629), Timestamp.ofEpochSecond(1422316800), true, 123.45, "embulk"); 178 | assertThat(pages.size(), is(1)); 179 | for (Page page : pages) { 180 | output.add(page); 181 | } 182 | 183 | output.finish(); 184 | output.commit(); 185 | Thread.sleep(3000); // Need to wait until index done 186 | 187 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 188 | Method sendRequest = ElasticsearchHttpClient.class.getDeclaredMethod("sendRequest", String.class, HttpMethod.class, PluginTask.class, String.class); 189 | sendRequest.setAccessible(true); 190 | int esMajorVersion = client.getEsMajorVersion(task); 191 | String path = esMajorVersion >= ElasticsearchHttpClient.ES_SUPPORT_TYPELESS_API_VERSION 192 | ? String.format("/%s/_search", ES_INDEX) 193 | : String.format("/%s/%s/_search", ES_INDEX, ES_INDEX_TYPE); 194 | String sort = "{\"sort\" : \"id\"}"; 195 | JsonNode response = (JsonNode) sendRequest.invoke(client, path, HttpMethod.POST, task, sort); 196 | 197 | int totalHits = esMajorVersion >= ElasticsearchTestUtils.ES_MIN_API_VERSION 198 | ? response.get("hits").get("total").get("value").asInt() 199 | : response.get("hits").get("total").asInt(); 200 | 201 | assertThat(totalHits, is(1)); 202 | if (response.size() > 0) { 203 | JsonNode record = response.get("hits").get("hits").get(0).get("_source"); 204 | assertThat(record.get("id").asInt(), is(1)); 205 | assertThat(record.get("account").asInt(), is(32864)); 206 | assertThat(record.get("time").asText(), is("2015-01-27T19:23:49.000+0000")); 207 | assertThat(record.get("purchase").asText(), is("2015-01-27T00:00:00.000+0000")); 208 | assertThat(record.get("flg").asBoolean(), is(true)); 209 | assertThat(record.get("score").asDouble(), is(123.45)); 210 | assertThat(record.get("comment").asText(), is("embulk")); 211 | } 212 | } 213 | 214 | @Test 215 | public void testOpenAbort() 216 | { 217 | ConfigSource config = utils.config(); 218 | Schema schema = CONFIG_MAPPER.map(utils.oldParserConfig(), CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema(); 219 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 220 | TransactionalPageOutput output = plugin.open(task.dump(), schema, 0); 221 | output.abort(); 222 | // no error happens. 223 | } 224 | 225 | @Test 226 | public void testMode() 227 | { 228 | assertThat(Mode.values().length, is(2)); 229 | assertThat(Mode.valueOf("INSERT"), is(Mode.INSERT)); 230 | } 231 | 232 | @Test 233 | public void testAuthMethod() 234 | { 235 | assertThat(AuthMethod.values().length, is(2)); 236 | assertThat(AuthMethod.valueOf("BASIC"), is(AuthMethod.BASIC)); 237 | } 238 | 239 | @Test(expected = ConfigException.class) 240 | public void testModeThrowsConfigException() 241 | { 242 | Mode.fromString("non-exists-mode"); 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/elasticsearch/TestElasticsearchOutputPluginJSON.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.elasticsearch; 18 | 19 | import com.fasterxml.jackson.databind.JsonNode; 20 | import com.google.common.collect.Lists; 21 | import org.eclipse.jetty.http.HttpMethod; 22 | import org.embulk.config.ConfigException; 23 | import org.embulk.config.ConfigSource; 24 | import org.embulk.config.TaskReport; 25 | import org.embulk.config.TaskSource; 26 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.AuthMethod; 27 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.Mode; 28 | import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask; 29 | import org.embulk.spi.Exec; 30 | import org.embulk.spi.OutputPlugin; 31 | import org.embulk.spi.Page; 32 | import org.embulk.spi.Schema; 33 | import org.embulk.spi.TransactionalPageOutput; 34 | import org.embulk.test.EmbulkTestRuntime; 35 | import org.embulk.test.PageTestUtils; 36 | import org.embulk.util.config.ConfigMapper; 37 | import org.embulk.util.config.ConfigMapperFactory; 38 | import org.junit.Before; 39 | import org.junit.Rule; 40 | import org.junit.Test; 41 | 42 | import java.lang.reflect.Method; 43 | import java.util.Arrays; 44 | import java.util.List; 45 | 46 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_INDEX; 47 | import static org.embulk.output.elasticsearch.ElasticsearchTestUtils.ES_INDEX_TYPE; 48 | import static org.hamcrest.core.Is.is; 49 | import static org.junit.Assert.assertThat; 50 | import static org.junit.Assert.assertTrue; 51 | 52 | public class TestElasticsearchOutputPluginJSON 53 | { 54 | private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ElasticsearchOutputPlugin.CONFIG_MAPPER_FACTORY; 55 | private static final ConfigMapper CONFIG_MAPPER = ElasticsearchOutputPlugin.CONFIG_MAPPER; 56 | 57 | @Rule 58 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 59 | private ElasticsearchOutputPlugin plugin; 60 | private ElasticsearchTestUtils utils; 61 | 62 | @Before 63 | public void createResources() throws Exception 64 | { 65 | utils = new ElasticsearchTestUtils(); 66 | utils.initializeConstant(); 67 | final PluginTask task = CONFIG_MAPPER.map(utils.configJSON(), PluginTask.class); 68 | utils.prepareBeforeTest(task); 69 | 70 | plugin = new ElasticsearchOutputPlugin(); 71 | } 72 | 73 | @Test 74 | public void testDefaultValues() 75 | { 76 | final PluginTask task = CONFIG_MAPPER.map(utils.configJSON(), PluginTask.class); 77 | assertThat(task.getIndex(), is(ES_INDEX)); 78 | } 79 | 80 | @Test 81 | public void testTransaction() 82 | { 83 | ConfigSource config = utils.configJSON(); 84 | Schema schema = utils.JSONSchema(); 85 | plugin.transaction(config, schema, 0, new OutputPlugin.Control() 86 | { 87 | @Override 88 | public List run(TaskSource taskSource) 89 | { 90 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 91 | } 92 | }); 93 | // no error happens 94 | } 95 | 96 | @Test 97 | public void testResume() 98 | { 99 | ConfigSource config = utils.configJSON(); 100 | Schema schema = utils.JSONSchema(); 101 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 102 | plugin.resume(task.dump(), schema, 0, new OutputPlugin.Control() 103 | { 104 | @Override 105 | public List run(TaskSource taskSource) 106 | { 107 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 108 | } 109 | }); 110 | } 111 | 112 | @Test 113 | public void testCleanup() 114 | { 115 | ConfigSource config = utils.configJSON(); 116 | Schema schema = utils.JSONSchema(); 117 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 118 | plugin.cleanup(task.dump(), schema, 0, Arrays.asList(CONFIG_MAPPER_FACTORY.newTaskReport())); 119 | // no error happens 120 | } 121 | 122 | @Test 123 | public void testOutputByOpen() throws Exception 124 | { 125 | ConfigSource config = utils.configJSON(); 126 | Schema schema = utils.JSONSchema(); 127 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 128 | plugin.transaction(config, schema, 0, new OutputPlugin.Control() { 129 | @Override 130 | public List run(TaskSource taskSource) 131 | { 132 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 133 | } 134 | }); 135 | TransactionalPageOutput output = plugin.open(task.dump(), schema, 0); 136 | 137 | List pages = PageTestUtils.buildPage(Exec.getBufferAllocator(), schema, 1L, 32864L, "2015-01-27 19:23:49", "2015-01-27", true, 123.45, "embulk"); 138 | assertThat(pages.size(), is(1)); 139 | for (Page page : pages) { 140 | output.add(page); 141 | } 142 | 143 | output.finish(); 144 | output.commit(); 145 | Thread.sleep(1500); // Need to wait until index done 146 | 147 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 148 | Method sendRequest = ElasticsearchHttpClient.class.getDeclaredMethod("sendRequest", String.class, HttpMethod.class, PluginTask.class, String.class); 149 | sendRequest.setAccessible(true); 150 | int esMajorVersion = client.getEsMajorVersion(task); 151 | String path = esMajorVersion >= ElasticsearchHttpClient.ES_SUPPORT_TYPELESS_API_VERSION 152 | ? String.format("/%s/_search", ES_INDEX) 153 | : String.format("/%s/%s/_search", ES_INDEX, ES_INDEX_TYPE); 154 | String sort = "{\"sort\" : \"id\"}"; 155 | JsonNode response = (JsonNode) sendRequest.invoke(client, path, HttpMethod.POST, task, sort); 156 | 157 | int totalHits = esMajorVersion >= ElasticsearchTestUtils.ES_MIN_API_VERSION 158 | ? response.get("hits").get("total").get("value").asInt() 159 | : response.get("hits").get("total").asInt(); 160 | 161 | assertThat(totalHits, is(1)); 162 | 163 | if (response.size() > 0) { 164 | JsonNode record = response.get("hits").get("hits").get(0).get("_source"); 165 | assertThat(record.get("id").asInt(), is(1)); 166 | assertThat(record.get("account").asInt(), is(32864)); 167 | assertThat(record.get("time").asText(), is("2015-01-27 19:23:49")); 168 | assertThat(record.get("purchase").asText(), is("2015-01-27")); 169 | assertThat(record.get("flg").asBoolean(), is(true)); 170 | assertThat(record.get("score").asDouble(), is(123.45)); 171 | assertThat(record.get("comment").asText(), is("embulk")); 172 | } 173 | } 174 | 175 | @Test 176 | public void testOutputByOpenWithNulls() throws Exception 177 | { 178 | ConfigSource config = utils.configJSON(); 179 | Schema schema = utils.JSONSchema(); 180 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 181 | plugin.transaction(config, schema, 0, new OutputPlugin.Control() { 182 | @Override 183 | public List run(TaskSource taskSource) 184 | { 185 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 186 | } 187 | }); 188 | TransactionalPageOutput output = plugin.open(task.dump(), schema, 0); 189 | 190 | List pages = PageTestUtils.buildPage(Exec.getBufferAllocator(), schema, 2L, null, null, "2015-01-27", true, 123.45, "embulk"); 191 | assertThat(pages.size(), is(1)); 192 | for (Page page : pages) { 193 | output.add(page); 194 | } 195 | 196 | output.finish(); 197 | output.commit(); 198 | Thread.sleep(1500); // Need to wait until index done 199 | 200 | ElasticsearchHttpClient client = new ElasticsearchHttpClient(); 201 | Method sendRequest = ElasticsearchHttpClient.class.getDeclaredMethod("sendRequest", String.class, HttpMethod.class, PluginTask.class, String.class); 202 | sendRequest.setAccessible(true); 203 | int esMajorVersion = client.getEsMajorVersion(task); 204 | 205 | String path = esMajorVersion >= ElasticsearchHttpClient.ES_SUPPORT_TYPELESS_API_VERSION 206 | ? String.format("/%s/_search", ES_INDEX) 207 | : String.format("/%s/%s/_search", ES_INDEX, ES_INDEX_TYPE); 208 | String sort = "{\"sort\" : \"id\"}"; 209 | 210 | JsonNode response = (JsonNode) sendRequest.invoke(client, path, HttpMethod.POST, task, sort); 211 | 212 | int totalHits = esMajorVersion >= ElasticsearchTestUtils.ES_MIN_API_VERSION 213 | ? response.get("hits").get("total").get("value").asInt() 214 | : response.get("hits").get("total").asInt(); 215 | 216 | assertThat(totalHits, is(1)); 217 | if (response.size() > 0) { 218 | JsonNode record = response.get("hits").get("hits").get(0).get("_source"); 219 | assertThat(record.get("id").asInt(), is(2)); 220 | assertTrue(record.get("account").isNull()); 221 | assertTrue(record.get("time").isNull()); 222 | assertThat(record.get("purchase").asText(), is("2015-01-27")); 223 | assertThat(record.get("flg").asBoolean(), is(true)); 224 | assertThat(record.get("score").asDouble(), is(123.45)); 225 | assertThat(record.get("comment").asText(), is("embulk")); 226 | } 227 | } 228 | 229 | @Test 230 | public void testOpenAbort() 231 | { 232 | ConfigSource config = utils.configJSON(); 233 | Schema schema = utils.JSONSchema(); 234 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 235 | TransactionalPageOutput output = plugin.open(task.dump(), schema, 0); 236 | output.abort(); 237 | // no error happens. 238 | } 239 | 240 | @Test 241 | public void testMode() 242 | { 243 | assertThat(Mode.values().length, is(2)); 244 | assertThat(Mode.valueOf("INSERT"), is(Mode.INSERT)); 245 | } 246 | 247 | @Test 248 | public void testAuthMethod() 249 | { 250 | assertThat(AuthMethod.values().length, is(2)); 251 | assertThat(AuthMethod.valueOf("BASIC"), is(AuthMethod.BASIC)); 252 | } 253 | 254 | @Test(expected = ConfigException.class) 255 | public void testModeThrowsConfigException() 256 | { 257 | Mode.fromString("non-exists-mode"); 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /src/test/resources/sample_01.csv: -------------------------------------------------------------------------------- 1 | id,account,time,purchase,flg,score,comment 2 | 1,32864,2015-01-27 19:23:49,20150127,1,123.45,embulk 3 | 2,14824,2015-01-27 19:01:23,20150127,0,234,56,embulk 4 | 3,27559,2015-01-28 02:20:02,20150128,1,678.90,embulk 5 | 4,11270,2015-01-29 11:54:36,20150129,0,100.00,embulk 6 | -------------------------------------------------------------------------------- /src/test/resources/sample_01.json: -------------------------------------------------------------------------------- 1 | {"id":5, "account":32864, "time":"2015-01-27 19:23:49", "purchase":20150127, "flg": true, "score": 123.45, "comment": "embulk"} 2 | {"id":6, "account":14824, "time":"2015-01-27 19:01:23", "purchase":20150127, "flg": false, "score": 234.56, "comment": "embulk"} 3 | {"id":7, "account":55555, "time":"2015-01-28 02:20:02", "purchase":20150128, "flg": true, "score": 678.90, "comment": "embulk"} 4 | {"id":8, "account":11270, "time":"2015-01-29 11:54:36", "purchase":20150129, "flg": false, "score": 100.00, "comment": "embulk"} --------------------------------------------------------------------------------