├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── build.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── run.sh ├── settings.gradle └── src ├── main ├── java │ └── pl │ │ └── allegro │ │ └── tech │ │ └── search │ │ └── elasticsearch │ │ └── tools │ │ └── reindex │ │ ├── ReindexAction.java │ │ ├── ReindexCommandParser.java │ │ ├── ReindexInvoker.java │ │ ├── command │ │ └── ReindexCommand.java │ │ ├── connection │ │ ├── ElasticAddress.java │ │ ├── ElasticAddressParser.java │ │ ├── ElasticDataPointer.java │ │ ├── ElasticDataPointerBuilder.java │ │ ├── ElasticSearchClientFactory.java │ │ ├── ElasticSearchQuery.java │ │ ├── ElasticSearchQueryBuilder.java │ │ └── ParsingElasticsearchAddressException.java │ │ ├── process │ │ ├── BulkResult.java │ │ ├── IndexingComponent.java │ │ ├── IndexingProcess.java │ │ ├── IndexingProcessBuilder.java │ │ ├── ProcessConfiguration.java │ │ ├── ProcessExecutor.java │ │ ├── ProcessSynchronizer.java │ │ ├── QueryComponent.java │ │ ├── QueryComponentBuilder.java │ │ └── QueryProcess.java │ │ ├── query │ │ ├── BadSegmentationDefinitionException.java │ │ ├── BoundedSegment.java │ │ ├── DoubleFieldSegmentation.java │ │ ├── EmptySegmentation.java │ │ ├── PrefixSegment.java │ │ ├── QuerySegmentation.java │ │ ├── QuerySegmentationFactory.java │ │ ├── RangeSegment.java │ │ ├── RangeSegmentBuilder.java │ │ ├── SegmentationQueryTrait.java │ │ ├── StringPrefixSegmentation.java │ │ └── filter │ │ │ ├── BoundedFilterCreationStrategy.java │ │ │ ├── BoundedFilterFactory.java │ │ │ ├── PrefixFilterCreationStrategy.java │ │ │ └── RangeFilterCreationStrategy.java │ │ └── statistics │ │ ├── ProcessStatistics.java │ │ ├── ReindexingSummary.java │ │ └── ReindexingSummaryBuilder.java └── resources │ └── config.properties └── test └── java └── pl └── allegro └── tech └── search └── elasticsearch └── tools └── reindex ├── ReindexCommandParserTest.java ├── ReindexInvokerTest.java ├── ReindexInvokerWithIndexingErrorsTest.java ├── TTLTest.java ├── connection ├── ElasticAddressAssert.java ├── ElasticAddressParserTest.java ├── ElasticDataPointerAssert.java └── ElasticSearchClientProducerTest.java ├── embeded ├── EmbeddedElasticsearchCluster.java └── IndexDocument.java ├── process ├── BulkResultAssert.java ├── IndexingComponentTest.java ├── IndexingProcessTest.java ├── ProcessExecutorTest.java ├── ProcessSynchronizerTest.java ├── QueryComponentTest.java └── QueryProcessTest.java ├── query ├── DoubleFieldSegmentationTest.java ├── EmptySegmentationTest.java ├── PrefixSegmentAssert.java ├── QuerySegmentationAssert.java ├── QuerySegmentationFactoryTest.java ├── RangeSegmentAssert.java ├── StringPrefixSegmentationTest.java └── filter │ └── BoundedFilterFactoryTest.java └── statistics ├── ProcessStatisticsTest.java └── ReindexingSummaryAssert.java /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .gradle 3 | .idea 4 | *.iml 5 | data/* 6 | config/* 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | script: 4 | - "./gradlew clean build" 5 | 6 | jdk: 7 | - oraclejdk8 8 | 9 | addons: 10 | apt: 11 | packages: 12 | - oracle-java8-installer 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Elasticsearch reindex tool 2 | =========================================== 3 | 4 | Elasticsearch reindex tool provides easy way to rebuild indexes, it is also possible to move indexes between clusters. 5 | Multiple threads are used in order to query(read) and index(write) data. In order to do that efficiently, [scan and scroll](http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/scan-scroll.html) is used to retrieve batches of documents from the old index and then the [bulk API](http://www.elasticsearch.org/guide/en/elasticsearch/client/javascript-api/current/api-reference.html) to push them into new one. 6 | 7 | ## Elasticsearch version compability 8 | 9 | Master branch is compatible with version 2.x 10 | 11 | If you wish to use it with version 1.x please checkout branch [1.x](https://github.com/allegro/elasticsearch-reindex-tool/tree/1.x) 12 | 13 | ## Why another reindex tool? 14 | 15 | Our idea was to speed up index rebuilding. To decrease time of reindexing, our tool reads data from old index and writes it to the new one in parallel using multiple threads. To make it possible, each thread reads piece of data from the index based on a chosen field and its values. 16 | 17 | Currently tool supports double type and string type fields. 18 | For double field type queries are spread into segments with given list of thresholds, for string type fields with given prefixes list. 19 | 20 | In the future we plan provide more segmentation strategies. 21 | 22 | There are similar tools: 23 | 24 | * [npm elasticsearch-reindex](https://www.npmjs.com/package/elasticsearch-reindex) - tool to reindex within one thread 25 | only, no segmentation implemented, possible to reindex only filtered data 26 | * [karussell elasticsearch-reindex](https://github.com/karussell/elasticsearch-reindex) - tool to reindex within one 27 | thread only, no segmentation implemented, possible to reindex only filtered data 28 | * [geronime es-reindex](https://github.com/geronime/es-reindex) - ruby script to copy and reindex within one thread 29 | only, no strict typing on data 30 | 31 | ## Requirements 32 | 33 | * JDK 1.8 34 | * ElasticSearch 1.3+ 35 | 36 | ## Usage 37 | 38 | First create a package: 39 | 40 | `./gradlew jar` 41 | 42 | Example of reindex: 43 | 44 | **REMEMBER: use elasticsearch binary transport port (by default 9300), not the one used for rest 45 | communication (by default 9200)** 46 | 47 | Without segmentation: 48 | 49 | `./run.sh -s http://host:9300/index/type -t http://host1:9300/index1/type1 -sc cluster_name -tc 50 | cluster_name1` 51 | 52 | With segmentation by double field: 53 | 54 | `./run.sh -s http://host:9300/index/type -t http://host1:9300/index1/type1 -sc cluster_name -tc 55 | cluster_name1` 56 | 57 | `./run.sh -s http://host:9300/index/type -t http://host1:9300/index1/type1 -sc cluster_name -tc 58 | cluster_name1 -segmentationField rate.newCoolness -segmentationThresholds 0.0,0.5,0.59,0.6,0.7,0.9,1.0` 59 | 60 | Index querying will divide data into segments based on rate.newCoolness field: (0.0-0.5] (0.5-0.59] (0.59-0.6] (0 61 | .6-0.7] 62 | (0.7-0.9],(0.9-1.0] 63 | 64 | With segmentation by prefix on string field: 65 | 66 | `./run.sh -s http://host:9300/index/type -t http://host1:9300/index1/type1 -sc cluster_name -tc 67 | cluster_name1 -segmentationField userId -segmentationPrefixes 1,2,3,4,5,6,7` 68 | 69 | In this example index querying will divide data into segments based on the first character of the userId field: 1,2,3,4,5,6,7 70 | 71 | With query option: 72 | 73 | `./run.sh -s http://host:9300/index/type -t http://host1:9300/index1/type1 -sc cluster_name -tc 74 | cluster_name1 -query "{\"range\": {\"timestamp\" : {\"gte\" : \"2015-12-08T07:03:00.216Z\"}}}" -sort _timestamp 75 | -sortOrder DESC` 76 | 77 | In this example index querying will be filtered with query and reindex will take place ordered by sort field and sortOrder 78 | 79 | With target index name pattern 80 | 81 | Use the actual source-index of the document (useful when you use wildcards for the source-index): 82 | `./run.sh -s http://host:9300/*/type -t http://host1:9300/${_index}/type1 -sc cluster_name -tc cluster_name1` 83 | 84 | Use a field from the document: 85 | `./run.sh -s http://host:9300/index/type -t http://host1:9300/${customer}/type1 -sc cluster_name -tc cluster_name1` 86 | 87 | Use a time-field from the document and apply a format: 88 | `./run.sh -s http://host:9300/index/type -t http://host1:9300/${startTime:yyyy-MM-dd}/type1 -sc cluster_name -tc cluster_name1` 89 | 90 | Options: 91 | 92 | -s, source 93 | Source f.e. http://localhost:9300/source_index/type 94 | -sc, source-cluster 95 | Source cluster name 96 | -t, target 97 | Target f.e. http://localhost:9300/target_index/type 98 | -tc, target-cluster 99 | Target cluster name 100 | -disable-cluster-sniffing 101 | Don't try to determine additional cluster nodes (e.g. when your network 102 | only allows access to one of the nodes) 103 | Default: false 104 | -segmentationField 105 | Segmentation field 106 | -segmentationPrefixes 107 | Segmentation prefixes (comma-separated) 108 | -segmentationThresholds 109 | Segmentation thresholds (only double type) 110 | -query 111 | Give a query to filter data 112 | -sort 113 | Give field to sort on (if query option in use) 114 | -sortOrder 115 | Give sortOrder (if query option in use) 116 | 117 | `segmentationField`, `segmentationThreshold` and `segmentationPrefixes` are optional parameters, allowing to spread 118 | querying for field with double values or prefix for string field 119 | 120 | `disable-cluster-sniffing` allows to work in cases where the network-setup makes it impossible to connect to all nodes 121 | of the source or target cluster. Note that it may lead to slightly reduced reindexing rates as data can only be sent 122 | via one node then. 123 | 124 | During reindex process progress message is prompted after each scroll query. 125 | 126 | Example of progress message with the time how long it lasts, number of items queried and indexed, occupancy of queue, number of concurrent reader threads and number of failed document indexing: 127 | 128 | `11:24:59.567 [pool-1-thread-1] INFO c.y.e.t.r.s.ProcessStatistics - PT11M43.346S items: 3572086 / 3580842 (10 1) 129 | failed=0` 130 | 131 | ## License 132 | 133 | **Elasticsearch reindex tool** is published under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0). 134 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | 2 | buildscript { 3 | repositories { 4 | mavenCentral() 5 | jcenter() 6 | } 7 | dependencies { 8 | classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.5.1" 9 | } 10 | } 11 | 12 | plugins { 13 | id 'java' 14 | id 'application' 15 | id 'maven-publish' 16 | id 'pl.allegro.tech.build.axion-release' version '1.2.4' 17 | id "com.bmuschko.nexus" version "2.3.1" 18 | id 'com.github.kt3k.coveralls' version '2.3.1' 19 | } 20 | 21 | apply plugin: 'io.codearte.nexus-staging' 22 | 23 | sourceCompatibility = 1.8 24 | project.group = 'pl.allegro.yukon' 25 | mainClassName = "pl.allegro.tech.search.elasticsearch.tools.reindex.ReindexAction" 26 | 27 | scmVersion { 28 | tag { 29 | prefix = 'elasticsearch-reindex-tool' 30 | } 31 | } 32 | 33 | project.version = scmVersion.version 34 | 35 | repositories { 36 | mavenCentral() 37 | } 38 | 39 | dependencies { 40 | compile group: 'org.elasticsearch', name: 'elasticsearch', version: '2.0.0' 41 | compile group: 'org.slf4j', name: 'slf4j-api', version: '1.7.7' 42 | compile group: 'ch.qos.logback', name: 'logback-classic', version: '1.0.13' 43 | compile group: 'com.beust', name: 'jcommander', version: '1.30' 44 | compile group: 'com.google.guava', name: 'guava', version: '18.0' 45 | compile group: 'org.apache.commons', name: 'commons-collections4', version: '4.0' 46 | 47 | testCompile group: 'junit', name: 'junit', version: '4.11' 48 | testCompile group: 'org.mockito', name: 'mockito-all', version: '1.10.19' 49 | testCompile group: 'cglib', name: 'cglib', version: '3.1' 50 | testCompile group: 'com.jayway.awaitility', name: 'awaitility', version: '1.6.3' 51 | testCompile group: 'org.assertj', name: 'assertj-core', version: '2.0.0' 52 | testCompile group: 'pl.pragmatists', name: 'JUnitParams', version: '1.0.4' 53 | testCompile group: 'eu.codearte.catch-exception', name: 'catch-exception', version: '1.4.4' 54 | } 55 | 56 | run { 57 | if ( project.hasProperty("appArgs") ) { 58 | args Eval.me(appArgs) 59 | } 60 | } 61 | 62 | jar { 63 | manifest { 64 | attributes( 65 | 'Main-Class': mainClassName) 66 | } 67 | } 68 | 69 | nexusStaging { 70 | packageGroup = "pl.allegro" 71 | } 72 | 73 | modifyPom { 74 | project { 75 | name 'Elasticsearch reindex tool' 76 | description 'Elasticsearch reindex tool provides easy way to rebuild indexes' 77 | url 'https://github.com/allegro/elasticsearch-reindex-tool' 78 | inceptionYear '2015' 79 | 80 | scm { 81 | url 'https://github.com/allegro/elasticsearch-reindex-tool' 82 | connection 'scm:git@github.com:allegro/elasticsearch-reindex-tool.git' 83 | developerConnection 'scm:git@github.com:allegro/elasticsearch-reindex-tool.git' 84 | } 85 | 86 | licenses { 87 | license { 88 | name 'The Apache Software License, Version 2.0' 89 | url 'http://www.apache.org/licenses/LICENSE-2.0.txt' 90 | } 91 | } 92 | 93 | developers { 94 | developer { 95 | id 'awislowski' 96 | name 'Andrzej Wisłowski' 97 | } 98 | } 99 | } 100 | } 101 | 102 | task wrapper(type: Wrapper) { 103 | gradleVersion = '2.7' 104 | } -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allegro/elasticsearch-reindex-tool/5d98b0da9db2eb7f40f047d0b538f6a9fe594a92/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Mon Sep 28 22:00:01 CEST 2015 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.7-all.zip 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 10 | DEFAULT_JVM_OPTS="" 11 | 12 | APP_NAME="Gradle" 13 | APP_BASE_NAME=`basename "$0"` 14 | 15 | # Use the maximum available, or set MAX_FD != -1 to use that value. 16 | MAX_FD="maximum" 17 | 18 | warn ( ) { 19 | echo "$*" 20 | } 21 | 22 | die ( ) { 23 | echo 24 | echo "$*" 25 | echo 26 | exit 1 27 | } 28 | 29 | # OS specific support (must be 'true' or 'false'). 30 | cygwin=false 31 | msys=false 32 | darwin=false 33 | case "`uname`" in 34 | CYGWIN* ) 35 | cygwin=true 36 | ;; 37 | Darwin* ) 38 | darwin=true 39 | ;; 40 | MINGW* ) 41 | msys=true 42 | ;; 43 | esac 44 | 45 | # Attempt to set APP_HOME 46 | # Resolve links: $0 may be a link 47 | PRG="$0" 48 | # Need this for relative symlinks. 49 | while [ -h "$PRG" ] ; do 50 | ls=`ls -ld "$PRG"` 51 | link=`expr "$ls" : '.*-> \(.*\)$'` 52 | if expr "$link" : '/.*' > /dev/null; then 53 | PRG="$link" 54 | else 55 | PRG=`dirname "$PRG"`"/$link" 56 | fi 57 | done 58 | SAVED="`pwd`" 59 | cd "`dirname \"$PRG\"`/" >&- 60 | APP_HOME="`pwd -P`" 61 | cd "$SAVED" >&- 62 | 63 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 64 | 65 | # Determine the Java command to use to start the JVM. 66 | if [ -n "$JAVA_HOME" ] ; then 67 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 68 | # IBM's JDK on AIX uses strange locations for the executables 69 | JAVACMD="$JAVA_HOME/jre/sh/java" 70 | else 71 | JAVACMD="$JAVA_HOME/bin/java" 72 | fi 73 | if [ ! -x "$JAVACMD" ] ; then 74 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 75 | 76 | Please set the JAVA_HOME variable in your environment to match the 77 | location of your Java installation." 78 | fi 79 | else 80 | JAVACMD="java" 81 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 82 | 83 | Please set the JAVA_HOME variable in your environment to match the 84 | location of your Java installation." 85 | fi 86 | 87 | # Increase the maximum file descriptors if we can. 88 | if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then 89 | MAX_FD_LIMIT=`ulimit -H -n` 90 | if [ $? -eq 0 ] ; then 91 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 92 | MAX_FD="$MAX_FD_LIMIT" 93 | fi 94 | ulimit -n $MAX_FD 95 | if [ $? -ne 0 ] ; then 96 | warn "Could not set maximum file descriptor limit: $MAX_FD" 97 | fi 98 | else 99 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 100 | fi 101 | fi 102 | 103 | # For Darwin, add options to specify how the application appears in the dock 104 | if $darwin; then 105 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 106 | fi 107 | 108 | # For Cygwin, switch paths to Windows format before running java 109 | if $cygwin ; then 110 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 111 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 112 | JAVACMD=`cygpath --unix "$JAVACMD"` 113 | 114 | # We build the pattern for arguments to be converted via cygpath 115 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 116 | SEP="" 117 | for dir in $ROOTDIRSRAW ; do 118 | ROOTDIRS="$ROOTDIRS$SEP$dir" 119 | SEP="|" 120 | done 121 | OURCYGPATTERN="(^($ROOTDIRS))" 122 | # Add a user-defined pattern to the cygpath arguments 123 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 124 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 125 | fi 126 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 127 | i=0 128 | for arg in "$@" ; do 129 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 130 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 131 | 132 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 133 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 134 | else 135 | eval `echo args$i`="\"$arg\"" 136 | fi 137 | i=$((i+1)) 138 | done 139 | case $i in 140 | (0) set -- ;; 141 | (1) set -- "$args0" ;; 142 | (2) set -- "$args0" "$args1" ;; 143 | (3) set -- "$args0" "$args1" "$args2" ;; 144 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 145 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 146 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 147 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 148 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 149 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 150 | esac 151 | fi 152 | 153 | # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules 154 | function splitJvmOpts() { 155 | JVM_OPTS=("$@") 156 | } 157 | eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS 158 | JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" 159 | 160 | exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" 161 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 12 | set DEFAULT_JVM_OPTS= 13 | 14 | set DIRNAME=%~dp0 15 | if "%DIRNAME%" == "" set DIRNAME=. 16 | set APP_BASE_NAME=%~n0 17 | set APP_HOME=%DIRNAME% 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windowz variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Join arguments in a string. But not quite like you'd expect... see next line. 4 | printf -v var "'%s', " "$@" 5 | 6 | # Remove trailing ", " 7 | var=${var%??} 8 | 9 | ./gradlew run -PappArgs="[$var]" 10 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'elasticsearch-reindex-tool' 2 | 3 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/ReindexAction.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex; 2 | 3 | public final class ReindexAction { 4 | 5 | private ReindexAction() { 6 | throw new IllegalAccessError(); 7 | } 8 | 9 | public static void main(String[] args) { 10 | ReindexCommandParser commandParser = new ReindexCommandParser(); 11 | if (commandParser.tryParse(args)) { 12 | ReindexInvoker.invokeReindexing( 13 | commandParser.getSourcePointer(), 14 | commandParser.getTargetPointer(), 15 | commandParser.getSegmentation()); 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/ReindexCommandParser.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex; 2 | 3 | import com.beust.jcommander.JCommander; 4 | import com.beust.jcommander.ParameterException; 5 | 6 | import pl.allegro.tech.search.elasticsearch.tools.reindex.command.ReindexCommand; 7 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 8 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointerBuilder; 9 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ParsingElasticsearchAddressException; 10 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.QuerySegmentation; 11 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.QuerySegmentationFactory; 12 | 13 | public class ReindexCommandParser { 14 | 15 | private ElasticDataPointer sourcePointer; 16 | private ElasticDataPointer targetPointer; 17 | private QuerySegmentation segmentation; 18 | 19 | public boolean tryParse(String... args) { 20 | ReindexCommand command = new ReindexCommand(); 21 | JCommander jCommander = new JCommander(command); 22 | 23 | try { 24 | jCommander.parse(args); 25 | buildReindexParameters(command); 26 | 27 | } catch (ParameterException | ParsingElasticsearchAddressException exception) { 28 | JCommander.getConsole().println("Parameters error occurred:"); 29 | JCommander.getConsole().println(exception.getMessage()); 30 | JCommander.getConsole().println(""); 31 | 32 | jCommander.usage(); 33 | return false; 34 | } 35 | return true; 36 | } 37 | 38 | private void buildReindexParameters(ReindexCommand command) { 39 | sourcePointer = ElasticDataPointerBuilder.builder() 40 | .setClusterName(command.getSourceClusterName()) 41 | .setAddress(command.getSource()) 42 | .setSniff(!command.isDisableSniff()) 43 | .build(); 44 | targetPointer = ElasticDataPointerBuilder.builder() 45 | .setClusterName(command.getTargetClusterName()) 46 | .setAddress(command.getTarget()) 47 | .setSniff(!command.isDisableSniff()) 48 | .build(); 49 | segmentation = getFieldSegmentation(command); 50 | } 51 | 52 | private QuerySegmentation getFieldSegmentation(ReindexCommand command) { 53 | return QuerySegmentationFactory.create(command); 54 | } 55 | 56 | public ElasticDataPointer getSourcePointer() { 57 | return sourcePointer; 58 | } 59 | 60 | public ElasticDataPointer getTargetPointer() { 61 | return targetPointer; 62 | } 63 | 64 | public QuerySegmentation getSegmentation() { 65 | return segmentation; 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/ReindexInvoker.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex; 2 | 3 | import org.elasticsearch.client.Client; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 7 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchClientFactory; 8 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 9 | import pl.allegro.tech.search.elasticsearch.tools.reindex.process.IndexingComponent; 10 | import pl.allegro.tech.search.elasticsearch.tools.reindex.process.IndexingProcessBuilder; 11 | import pl.allegro.tech.search.elasticsearch.tools.reindex.process.ProcessConfiguration; 12 | import pl.allegro.tech.search.elasticsearch.tools.reindex.process.ProcessExecutor; 13 | import pl.allegro.tech.search.elasticsearch.tools.reindex.process.ProcessSynchronizer; 14 | import pl.allegro.tech.search.elasticsearch.tools.reindex.process.QueryComponentBuilder; 15 | import pl.allegro.tech.search.elasticsearch.tools.reindex.process.QueryProcess; 16 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.QuerySegmentation; 17 | import pl.allegro.tech.search.elasticsearch.tools.reindex.statistics.ReindexingSummary; 18 | 19 | import java.util.Arrays; 20 | import java.util.stream.IntStream; 21 | 22 | class ReindexInvoker { 23 | 24 | private static final Logger LOGGER = LoggerFactory.getLogger(ReindexInvoker.class); 25 | 26 | private ProcessSynchronizer processSynchronizer; 27 | private ProcessExecutor processExecutor; 28 | 29 | public ReindexInvoker(int querySegmentCount) { 30 | processExecutor = new ProcessExecutor(querySegmentCount); 31 | processSynchronizer = new ProcessSynchronizer(querySegmentCount); 32 | } 33 | 34 | public static ReindexingSummary invokeReindexing(ElasticDataPointer sourcePointer, ElasticDataPointer targetPointer, QuerySegmentation 35 | segmentation) { 36 | ReindexInvoker reindexInvoker = new ReindexInvoker(segmentation.getSegmentsCount()); 37 | LOGGER.info("Starting"); 38 | ReindexingSummary summary = reindexInvoker.run(sourcePointer, targetPointer, segmentation); 39 | LOGGER.info("Ended"); 40 | return summary; 41 | } 42 | 43 | public ReindexingSummary run(ElasticDataPointer sourcePointer, ElasticDataPointer targetPointer, QuerySegmentation segmentation) { 44 | Client sourceClient = ElasticSearchClientFactory.createClient(sourcePointer); 45 | Client targetClient = ElasticSearchClientFactory.createClient(targetPointer); 46 | 47 | if (indexExists(sourceClient, sourcePointer.getIndexName())) { 48 | startQueriesProcesses(sourceClient, sourcePointer, segmentation); 49 | startUpdatesProcesses(targetClient, targetPointer); 50 | processSynchronizer.waitForProcessesToEnd(); 51 | } 52 | 53 | releaseResources(sourceClient, targetClient); 54 | 55 | processSynchronizer.logStats(); 56 | return processSynchronizer.getReindexingSummary(); 57 | } 58 | 59 | private boolean indexExists(Client sourceClient, String indexName) { 60 | return sourceClient.admin().indices().prepareExists(indexName).get().isExists(); 61 | } 62 | 63 | private void startUpdatesProcesses(Client client, ElasticDataPointer targetPointer) { 64 | IntStream.range(0, ProcessConfiguration.getInstance().getUpdateThreadsCount()).forEach( 65 | i -> processExecutor.startProcess( 66 | IndexingProcessBuilder.builder() 67 | .setIndexingComponent(new IndexingComponent(client)) 68 | .setDataPointer(targetPointer) 69 | .setProcessSynchronizer(processSynchronizer) 70 | .build()) 71 | ); 72 | } 73 | 74 | private void startQueriesProcesses(Client client, ElasticDataPointer sourcePointer, QuerySegmentation segmentation) { 75 | IntStream.range(0, segmentation.getSegmentsCount()) 76 | .mapToObj( 77 | i -> 78 | QueryComponentBuilder.builder() 79 | .setClient(client) 80 | .setDataPointer(sourcePointer) 81 | .setSegmentationField(segmentation.getFieldName()) 82 | .setBound(segmentation.getThreshold(i)) 83 | .setQuery(segmentation.getQuery()) 84 | .createQueryComponent() 85 | ).map( 86 | queryComponent -> new QueryProcess(processSynchronizer, queryComponent) 87 | ).forEach( 88 | processExecutor::startProcess 89 | ); 90 | } 91 | 92 | private void releaseResources(Client sourceClient, Client targetClient) { 93 | processExecutor.finishProcessing(); 94 | refreshTargetIndex(targetClient); 95 | disconnectElasticsearchClients(sourceClient, targetClient); 96 | } 97 | 98 | private void refreshTargetIndex(Client targetClient) { 99 | targetClient.admin().indices().prepareRefresh().get(); 100 | } 101 | 102 | private void disconnectElasticsearchClients(Client... clients) { 103 | Arrays.asList(clients) 104 | .forEach(Client::close); 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/command/ReindexCommand.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.command; 2 | 3 | import java.util.List; 4 | 5 | import com.beust.jcommander.Parameter; 6 | 7 | public class ReindexCommand { 8 | 9 | @Parameter(names = { "-s", "source" }, description = "Source f.e. http://localhost:9300/source_index/type", 10 | required = true) 11 | private String source; 12 | 13 | @Parameter(names = { "-sc", "source-cluster" }, description = "Source cluster name", required = true) 14 | private String sourceClusterName; 15 | 16 | @Parameter(names = { "-tc", "target-cluster" }, description = "Target cluster name", required = true) 17 | private String targetClusterName; 18 | @Parameter(names = { "-t", "target" }, description = "Target f.e. http://localhost:9300/target_index/type", 19 | required = true) 20 | private String target; 21 | 22 | @Parameter(names = { "-segmentationField" }, description = "Segmentation field") 23 | private String segmentationField; 24 | 25 | @Parameter(names = { "-query" }, description = "Give a query to filter data") 26 | private String query; 27 | 28 | @Parameter(names = { "-sort" }, description = "Give field to sort on (if query option in use)") 29 | private String sort; 30 | 31 | @Parameter(names = { "-sortOrder" }, description = "Give sortOrder (if query option in use)") 32 | private String sortOrder; 33 | 34 | @Parameter(names = { "-segmentationThresholds" }, description = "Segmentation thresholds (only double type)") 35 | private List segmentationThresholds; 36 | 37 | @Parameter(names = { "-segmentationPrefixes" }, description = "Segmentation prefixes (comma-separated)") 38 | private List segmentationPrefixes; 39 | 40 | @Parameter(names = { "-disable-cluster-sniffing" }, description = "Don't try to determine additional cluster nodes (e.g. when your network only allows access to one of the nodes)") 41 | private boolean disableSniff; 42 | 43 | public String getSourceClusterName() { 44 | return sourceClusterName; 45 | } 46 | public String getTargetClusterName() { 47 | return targetClusterName; 48 | } 49 | public String getSegmentationField() { 50 | return segmentationField; 51 | } 52 | 53 | public String getQuery() { 54 | return query; 55 | } 56 | 57 | public String getSort() { 58 | return sort; 59 | } 60 | 61 | public String getSortOrder() { 62 | return sortOrder; 63 | } 64 | 65 | public List getSegmentationThresholds() { 66 | return segmentationThresholds; 67 | } 68 | 69 | public List getSegmentationPrefixes() { 70 | return segmentationPrefixes; 71 | } 72 | 73 | public String getSource() { 74 | return source; 75 | } 76 | 77 | public String getTarget() { 78 | return target; 79 | } 80 | 81 | public boolean isDisableSniff() { 82 | return disableSniff; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticAddress.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | public class ElasticAddress { 4 | 5 | private String host; 6 | private String indexName; 7 | private String typeName; 8 | private int port; 9 | 10 | public String getHost() { 11 | return host; 12 | } 13 | 14 | public void setHost(String host) { 15 | this.host = host; 16 | } 17 | 18 | public String getIndexName() { 19 | return indexName; 20 | } 21 | 22 | public void setIndexName(String indexName) { 23 | this.indexName = indexName; 24 | } 25 | 26 | public String getTypeName() { 27 | return typeName; 28 | } 29 | 30 | public void setTypeName(String typeName) { 31 | this.typeName = typeName; 32 | } 33 | 34 | public int getPort() { 35 | return port; 36 | } 37 | 38 | public void setPort(int port) { 39 | this.port = port; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticAddressParser.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | import java.util.regex.Matcher; 4 | import java.util.regex.Pattern; 5 | 6 | public class ElasticAddressParser { 7 | 8 | public static final Pattern URI_PATTERN = Pattern.compile("http://([^:]+):(\\d+)/([^/]+)/([^/]+)$"); 9 | 10 | public ElasticAddress parse(String uri) { 11 | Matcher matcher = URI_PATTERN.matcher(uri); 12 | ElasticAddress elasticAddress = new ElasticAddress(); 13 | if (matcher.find()) { 14 | elasticAddress.setHost(matcher.group(1)); 15 | elasticAddress.setPort(Integer.parseInt(matcher.group(2))); 16 | elasticAddress.setIndexName(matcher.group(3)); 17 | elasticAddress.setTypeName(matcher.group(4)); 18 | return elasticAddress; 19 | } else { 20 | throw new ParsingElasticsearchAddressException("Could not parse elasticsearch url: " + uri); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticDataPointer.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | public class ElasticDataPointer { 4 | 5 | private final String host; 6 | private final String clusterName; 7 | private final String indexName; 8 | private final String typeName; 9 | private final int port; 10 | private final boolean sniff; 11 | 12 | ElasticDataPointer(String host, String clusterName, String indexName, String typeName, int port, boolean sniff) { 13 | this.host = host; 14 | this.clusterName = clusterName; 15 | this.indexName = indexName; 16 | this.typeName = typeName; 17 | this.port = port; 18 | this.sniff = sniff; 19 | } 20 | 21 | public String getHost() { 22 | return host; 23 | } 24 | 25 | public String getClusterName() { 26 | return clusterName; 27 | } 28 | 29 | public String getIndexName() { 30 | return indexName; 31 | } 32 | 33 | public String getTypeName() { 34 | return typeName; 35 | } 36 | 37 | public int getPort() { 38 | return port; 39 | } 40 | 41 | public boolean isSniff() { 42 | return sniff; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticDataPointerBuilder.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | public class ElasticDataPointerBuilder { 4 | 5 | private ElasticAddressParser elasticAddressParser = new ElasticAddressParser(); 6 | 7 | private String clusterName = "elasticsearch"; 8 | private ElasticAddress address; 9 | private boolean sniff = true; 10 | 11 | private ElasticDataPointerBuilder() { 12 | } 13 | 14 | public ElasticDataPointerBuilder setAddress(String uri) { 15 | address = elasticAddressParser.parse(uri); 16 | return this; 17 | } 18 | 19 | public ElasticDataPointerBuilder setClusterName(String clusterName) { 20 | this.clusterName = clusterName; 21 | return this; 22 | } 23 | 24 | public ElasticDataPointerBuilder setSniff(boolean sniff) { 25 | this.sniff = sniff; 26 | return this; 27 | } 28 | 29 | public ElasticDataPointer build() { 30 | return new ElasticDataPointer(address.getHost(), clusterName, address.getIndexName(), address.getTypeName(), 31 | address.getPort(), sniff); 32 | } 33 | 34 | public static ElasticDataPointerBuilder builder() { 35 | return new ElasticDataPointerBuilder(); 36 | } 37 | 38 | } -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticSearchClientFactory.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | import java.net.InetSocketAddress; 4 | 5 | import org.elasticsearch.client.Client; 6 | import org.elasticsearch.client.transport.TransportClient; 7 | import org.elasticsearch.cluster.ClusterName; 8 | import org.elasticsearch.common.settings.Settings; 9 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 10 | 11 | public final class ElasticSearchClientFactory { 12 | 13 | private ElasticSearchClientFactory() { 14 | } 15 | 16 | public static Client createClient(ElasticDataPointer elasticDataPointer) { 17 | Settings settings = Settings.settingsBuilder() 18 | .put("client.transport.sniff", elasticDataPointer.isSniff()) 19 | .put(ClusterName.SETTING, elasticDataPointer.getClusterName()) 20 | .build(); 21 | TransportClient client = TransportClient.builder().settings(settings).build(); 22 | client.addTransportAddress(new InetSocketTransportAddress(new InetSocketAddress(elasticDataPointer.getHost(), elasticDataPointer 23 | .getPort()))); 24 | return client; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticSearchQuery.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | import org.elasticsearch.search.sort.SortOrder; 4 | 5 | public class ElasticSearchQuery { 6 | private final String query; 7 | private final String sortField; 8 | private final SortOrder sortOrder; 9 | 10 | ElasticSearchQuery(String query, String sortField, SortOrder sortOrder) { 11 | this.query = query; 12 | this.sortField = sortField; 13 | this.sortOrder = sortOrder; 14 | } 15 | 16 | public String getQuery() { 17 | return query; 18 | } 19 | 20 | public SortOrder getSortOrder() { 21 | return sortOrder; 22 | } 23 | 24 | public String getSortField() { 25 | return sortField; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticSearchQueryBuilder.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | import com.google.common.base.Strings; 4 | import org.elasticsearch.search.sort.SortOrder; 5 | 6 | public class ElasticSearchQueryBuilder { 7 | private String query; 8 | private String sortField; 9 | private SortOrder sortOrder = SortOrder.ASC; 10 | 11 | 12 | private ElasticSearchQueryBuilder() { 13 | } 14 | 15 | public ElasticSearchQueryBuilder setSortOrder(String sortOrder) { 16 | if (!Strings.isNullOrEmpty(sortOrder)) { 17 | try { 18 | this.sortOrder = SortOrder.valueOf(sortOrder); 19 | } catch (IllegalArgumentException e) { 20 | throw new ParsingElasticsearchAddressException("SortOrder can be only ASC or DESC, not " + sortOrder); 21 | } 22 | } 23 | return this; 24 | } 25 | 26 | public ElasticSearchQueryBuilder setQuery(String query) { 27 | this.query = query; 28 | return this; 29 | } 30 | 31 | public ElasticSearchQueryBuilder setSortByField(String orderByField) { 32 | this.sortField = orderByField; 33 | return this; 34 | } 35 | 36 | public ElasticSearchQuery build() { 37 | return new ElasticSearchQuery(query, sortField, sortOrder); 38 | } 39 | 40 | public static ElasticSearchQueryBuilder builder() { 41 | return new ElasticSearchQueryBuilder(); 42 | } 43 | 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ParsingElasticsearchAddressException.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | public class ParsingElasticsearchAddressException extends RuntimeException { 4 | 5 | public ParsingElasticsearchAddressException(String message) { 6 | super(message); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/BulkResult.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import java.util.Collection; 4 | import java.util.Collections; 5 | 6 | public class BulkResult { 7 | 8 | private final int indexedCount; 9 | 10 | private final Collection failedIds; 11 | 12 | public BulkResult(int indexedCount, Collection failedIds) { 13 | this.indexedCount = indexedCount; 14 | this.failedIds = Collections.unmodifiableCollection(failedIds); 15 | } 16 | 17 | public int getIndexedCount() { 18 | return indexedCount; 19 | } 20 | 21 | public long getFailedCount() { 22 | return failedIds.size(); 23 | } 24 | 25 | public Collection getFailedIds() { 26 | return failedIds; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/IndexingComponent.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import java.text.SimpleDateFormat; 4 | import java.util.Date; 5 | import java.util.Map; 6 | import java.util.Optional; 7 | import java.util.Set; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | import java.util.stream.Collectors; 11 | import java.util.stream.Stream; 12 | 13 | import com.google.common.base.Preconditions; 14 | import org.elasticsearch.action.bulk.BulkItemResponse; 15 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 16 | import org.elasticsearch.action.bulk.BulkResponse; 17 | import org.elasticsearch.action.index.IndexRequestBuilder; 18 | import org.elasticsearch.client.Client; 19 | import org.elasticsearch.search.SearchHit; 20 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 21 | 22 | public class IndexingComponent { 23 | 24 | private final Client client; 25 | 26 | public IndexingComponent(Client client) { 27 | this.client = client; 28 | } 29 | 30 | private BulkRequestBuilder createBulkRequestBuilder() { 31 | return client.prepareBulk(); 32 | } 33 | 34 | public Optional indexData(ElasticDataPointer targetDataPointer, SearchHit[] hits) { 35 | BulkRequestBuilder bulkRequest = createBulkRequestBuilder(); 36 | 37 | for (SearchHit hit : hits) { 38 | Map source = hit.getSource(); 39 | 40 | IndexRequestBuilder requestBuilder = prepareIndex(targetDataPointer.getIndexName(), targetDataPointer 41 | .getTypeName(), hit.getId(), source, hit.getIndex()); 42 | if (hit.getFields().get("_ttl") != null) { 43 | requestBuilder.setTTL(hit.getFields().get("_ttl").value()); 44 | } 45 | if (hit.getFields().get("_routing") != null) { 46 | requestBuilder.setRouting(hit.getFields().get("_routing").value()); 47 | } 48 | requestBuilder.setSource(source); 49 | bulkRequest.add(requestBuilder); 50 | } 51 | return executeBulk(hits.length, bulkRequest); 52 | } 53 | 54 | private Optional executeBulk(int indexedCount, BulkRequestBuilder bulkRequest) { 55 | if (bulkRequest.numberOfActions() > 0) { 56 | BulkResponse bulkItemResponses = bulkRequest.execute().actionGet(); 57 | Set failedIds = Stream.of(bulkItemResponses.getItems()) 58 | .filter(BulkItemResponse::isFailed) 59 | .map(BulkItemResponse::getId) 60 | .collect(Collectors.toSet()); 61 | return Optional.of(new BulkResult(indexedCount, failedIds)); 62 | } 63 | return Optional.empty(); 64 | } 65 | 66 | private static final Pattern INDEX_NAME_REPLACEMENT_PATTERN = Pattern.compile("\\$\\{([^}]+)\\}"); 67 | private IndexRequestBuilder prepareIndex(String indexName, String typeName, String id, Map sourceFields, String sourceIndex) { 68 | String newIndexName = computeIndexName(indexName, sourceFields, sourceIndex); 69 | 70 | return client.prepareIndex(newIndexName, typeName, id); 71 | } 72 | 73 | protected static String computeIndexName(String indexName, Map sourceFields, String sourceIndex) { 74 | StringBuffer sb = new StringBuffer(); 75 | Matcher matcher = INDEX_NAME_REPLACEMENT_PATTERN.matcher(indexName); 76 | while(matcher.find()) { 77 | String fieldName = matcher.group(1); 78 | String format = null; 79 | int pos = fieldName.indexOf(':'); 80 | if(pos != -1) { 81 | format = fieldName.substring(pos + 1); 82 | fieldName = fieldName.substring(0, pos); 83 | } 84 | 85 | final String replacement; 86 | if(fieldName.equals("_index")) { 87 | replacement = sourceIndex; 88 | } else { 89 | Object obj = sourceFields.get(fieldName); 90 | Preconditions.checkNotNull(obj, "Specified source field " + fieldName + " not found for index-name replacement"); 91 | String field = obj.toString(); 92 | if(format != null) { 93 | // only support time based on milliseconds since the epoch for now 94 | SimpleDateFormat formatter = new SimpleDateFormat(format); 95 | replacement = formatter.format(new Date(Long.parseLong(field))); 96 | } else { 97 | replacement = field; 98 | } 99 | } 100 | 101 | matcher.appendReplacement(sb, replacement); 102 | } 103 | matcher.appendTail(sb); 104 | return sb.toString(); 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/IndexingProcess.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.elasticsearch.search.SearchHits; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 7 | 8 | import java.util.Optional; 9 | 10 | public class IndexingProcess implements Runnable { 11 | 12 | private static final Logger logger = LoggerFactory.getLogger(IndexingProcessBuilder.class); 13 | 14 | private IndexingComponent indexingComponent; 15 | private ProcessSynchronizer processSynchronizer; 16 | private ElasticDataPointer dataPointer; 17 | 18 | public IndexingProcess(IndexingComponent indexingComponent, ProcessSynchronizer processSynchronizer, ElasticDataPointer dataPointer) { 19 | this.indexingComponent = indexingComponent; 20 | this.processSynchronizer = processSynchronizer; 21 | this.dataPointer = dataPointer; 22 | } 23 | 24 | @Override 25 | public void run() { 26 | while (processSynchronizer.hasDataToBeIndexed()) { 27 | SearchHits hits = null; 28 | try { 29 | hits = processSynchronizer.pollDataToIndexed(); 30 | } catch (InterruptedException e) { 31 | Thread.currentThread().interrupt(); 32 | logger.error("Update Process interrupted", e); 33 | continue; 34 | } 35 | Optional bulkResult = indexingComponent.indexData(dataPointer, hits.getHits()); 36 | processBulkResult(bulkResult); 37 | } 38 | processSynchronizer.subtractWorkingUpdatesProcess(); 39 | } 40 | 41 | private void processBulkResult(Optional bulkResult) { 42 | bulkResult.ifPresent( 43 | bResult -> processSynchronizer.incrementUpdates(bResult.getIndexedCount()) 44 | ); 45 | bulkResult.filter(bResult -> bResult.getFailedCount() > 0).ifPresent( 46 | bResult -> { 47 | processSynchronizer.incrementFailures(bResult.getFailedCount()); 48 | logger.warn("Failed indexing documents with ids: {}", bResult.getFailedIds()); 49 | } 50 | ); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/IndexingProcessBuilder.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 4 | 5 | public final class IndexingProcessBuilder { 6 | 7 | private IndexingComponent indexingComponent; 8 | private ProcessSynchronizer processSynchronizer; 9 | private ElasticDataPointer dataPointer; 10 | 11 | private IndexingProcessBuilder() { 12 | } 13 | 14 | public IndexingProcess build() { 15 | return new IndexingProcess(indexingComponent, processSynchronizer, dataPointer); 16 | } 17 | 18 | public IndexingProcessBuilder setProcessSynchronizer(ProcessSynchronizer processSynchronizer) { 19 | this.processSynchronizer = processSynchronizer; 20 | return this; 21 | } 22 | 23 | public IndexingProcessBuilder setDataPointer(ElasticDataPointer dataPointer) { 24 | this.dataPointer = dataPointer; 25 | return this; 26 | } 27 | 28 | public IndexingProcessBuilder setIndexingComponent(IndexingComponent indexingComponent) { 29 | this.indexingComponent = indexingComponent; 30 | return this; 31 | } 32 | 33 | public static IndexingProcessBuilder builder() { 34 | return new IndexingProcessBuilder(); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/ProcessConfiguration.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.Properties; 6 | 7 | public class ProcessConfiguration { 8 | 9 | private static final String PROPERTIES_FILE_NAME = "config.properties"; 10 | private static final String INDEXING_QUEUE_SIZE = "indexing.queue.size"; 11 | private static final String UPDATE_THREADS_COUNT = "update.threads.count"; 12 | private static final String QUEUE_POLL_TIMEOUT = "queue.poll.timeout"; 13 | private static final String QUEUE_OFFER_TIMEOUT = "queue.offer.timeout"; 14 | 15 | private final int queueSize; 16 | private final int updateThreadsCount; 17 | private final int queuePollTimeout; 18 | private final int queueOfferTimeout; 19 | 20 | public ProcessConfiguration() { 21 | InputStream inputStream = getClass().getClassLoader().getResourceAsStream(PROPERTIES_FILE_NAME); 22 | Properties prop = new Properties(); 23 | try { 24 | prop.load(inputStream); 25 | queueSize = Integer.parseInt(prop.getProperty(INDEXING_QUEUE_SIZE)); 26 | updateThreadsCount = Integer.parseInt(prop.getProperty(UPDATE_THREADS_COUNT)); 27 | queueOfferTimeout = Integer.parseInt(prop.getProperty(QUEUE_OFFER_TIMEOUT)); 28 | queuePollTimeout = Integer.parseInt(prop.getProperty(QUEUE_POLL_TIMEOUT)); 29 | } catch (IOException | NumberFormatException e) { 30 | throw new RuntimeException("Failed reading config file: " + PROPERTIES_FILE_NAME, e); 31 | } 32 | } 33 | 34 | public int getQueueSize() { 35 | return queueSize; 36 | } 37 | 38 | public int getUpdateThreadsCount() { 39 | return updateThreadsCount; 40 | } 41 | 42 | public int getQueuePollTimeout() { 43 | return queuePollTimeout; 44 | } 45 | 46 | public int getQueueOfferTimeout() { 47 | return queueOfferTimeout; 48 | } 49 | 50 | private static ProcessConfiguration instance; 51 | 52 | public static synchronized ProcessConfiguration getInstance() { 53 | if (instance == null) { 54 | instance = new ProcessConfiguration(); 55 | } 56 | return instance; 57 | } 58 | 59 | public static void main(String[] args) { 60 | new ProcessConfiguration(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/ProcessExecutor.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | import java.util.concurrent.ExecutorService; 7 | import java.util.concurrent.Executors; 8 | import java.util.concurrent.TimeUnit; 9 | 10 | public class ProcessExecutor { 11 | 12 | private static final Logger logger = LoggerFactory.getLogger(ProcessExecutor.class); 13 | 14 | private ExecutorService executorService; 15 | 16 | public ProcessExecutor(int queryThreadsCount) { 17 | this.executorService = Executors.newFixedThreadPool(queryThreadsCount + ProcessConfiguration.getInstance().getUpdateThreadsCount()); 18 | } 19 | 20 | public void startProcess(Runnable process) { 21 | executorService.submit(process); 22 | } 23 | 24 | public void finishProcessing() { 25 | try { 26 | executorService.shutdown(); 27 | executorService.awaitTermination(1, TimeUnit.MINUTES); 28 | } catch (InterruptedException e) { 29 | logger.error("Closing executor service failed"); 30 | Thread.currentThread().interrupt(); 31 | } 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/ProcessSynchronizer.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.elasticsearch.action.search.SearchResponse; 4 | import org.elasticsearch.search.SearchHits; 5 | import org.elasticsearch.search.internal.InternalSearchHits; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | import pl.allegro.tech.search.elasticsearch.tools.reindex.statistics.ProcessStatistics; 9 | import pl.allegro.tech.search.elasticsearch.tools.reindex.statistics.ReindexingSummary; 10 | 11 | import java.util.Collections; 12 | import java.util.LinkedList; 13 | import java.util.List; 14 | import java.util.concurrent.CountDownLatch; 15 | import java.util.concurrent.LinkedBlockingQueue; 16 | import java.util.concurrent.TimeUnit; 17 | 18 | public class ProcessSynchronizer { 19 | 20 | private static final Logger logger = LoggerFactory.getLogger(ProcessSynchronizer.class); 21 | 22 | private LinkedBlockingQueue dataQueue; 23 | private CountDownLatch finishedQueringLatch; 24 | private CountDownLatch finishedUpdatesLatch; 25 | private ProcessStatistics statistics = new ProcessStatistics(); 26 | private final int queuePollTimeout = ProcessConfiguration.getInstance().getQueuePollTimeout(); 27 | private final int queueOfferTimeout = ProcessConfiguration.getInstance().getQueueOfferTimeout(); 28 | private final List exceptions = Collections.synchronizedList(new LinkedList()); 29 | 30 | public ProcessSynchronizer(int querySegmentCount) { 31 | this.dataQueue = new LinkedBlockingQueue(ProcessConfiguration.getInstance().getQueueSize()); 32 | this.finishedQueringLatch = new CountDownLatch(querySegmentCount); 33 | this.finishedUpdatesLatch = new CountDownLatch(ProcessConfiguration.getInstance().getUpdateThreadsCount()); 34 | } 35 | 36 | public void waitForProcessesToEnd() { 37 | try { 38 | finishedQueringLatch.await(); 39 | finishedUpdatesLatch.await(); 40 | } catch (InterruptedException e) { 41 | logger.error("Waiting for processes to end fails", e); 42 | Thread.currentThread().interrupt(); 43 | } 44 | } 45 | 46 | public long getWorkingQueryProcessCount() { 47 | return finishedQueringLatch.getCount(); 48 | } 49 | 50 | public void subtractWorkingQueryProcess() { 51 | finishedQueringLatch.countDown(); 52 | } 53 | 54 | public void subtractWorkingUpdatesProcess() { 55 | finishedUpdatesLatch.countDown(); 56 | } 57 | 58 | public void logStats() { 59 | exceptions.stream().forEach( 60 | exception -> logger.error("Processing Exception: ", exception) 61 | ); 62 | statistics.log(dataQueue.size(), getWorkingQueryProcessCount()); 63 | } 64 | 65 | public void incrementUpdates(int indexedCount) { 66 | statistics.incrementUpdates(indexedCount); 67 | } 68 | 69 | public void incrementQueries(int delta) { 70 | statistics.incrementQueries(delta); 71 | } 72 | 73 | public void incrementFailures(long count) { 74 | statistics.incrementFailures(count); 75 | } 76 | 77 | public boolean tryFillQueueWithSearchHits(SearchResponse response) { 78 | try { 79 | SearchHits hits = response.getHits(); 80 | dataQueue.offer(hits, queueOfferTimeout, TimeUnit.MINUTES); 81 | incrementQueries(hits.getHits().length); 82 | logStats(); 83 | return true; 84 | } catch (InterruptedException e) { 85 | logger.error("Fill Query Queue interrupted", e); 86 | Thread.currentThread().interrupt(); 87 | return false; 88 | } 89 | } 90 | 91 | public SearchHits pollDataToIndexed() throws InterruptedException { 92 | SearchHits polled = dataQueue.poll(queuePollTimeout, TimeUnit.SECONDS); 93 | if (didTimeout(polled)) { 94 | return InternalSearchHits.empty(); 95 | } 96 | return polled; 97 | } 98 | 99 | private boolean didTimeout(SearchHits polled) { 100 | return polled == null; 101 | } 102 | 103 | public boolean hasDataToBeIndexed() { 104 | return getWorkingQueryProcessCount() > 0 || dataQueue.size() > 0; 105 | } 106 | 107 | public ReindexingSummary getReindexingSummary() { 108 | return statistics.createReindexingSummary(); 109 | } 110 | 111 | public void addProcessingException(Exception exception) { 112 | exceptions.add(exception); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/QueryComponent.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import com.carrotsearch.hppc.cursors.ObjectCursor; 4 | import com.google.common.base.Strings; 5 | import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; 6 | import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse; 7 | import org.elasticsearch.action.search.SearchRequestBuilder; 8 | import org.elasticsearch.action.search.SearchResponse; 9 | import org.elasticsearch.action.search.SearchType; 10 | import org.elasticsearch.client.Client; 11 | import org.elasticsearch.common.settings.Settings; 12 | import org.elasticsearch.common.unit.TimeValue; 13 | import org.elasticsearch.search.sort.FieldSortBuilder; 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 17 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 18 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.BoundedSegment; 19 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.filter.BoundedFilterFactory; 20 | 21 | import java.util.Optional; 22 | 23 | public class QueryComponent { 24 | private static final Logger logger = LoggerFactory.getLogger(QueryComponent.class); 25 | 26 | public static final int SCROLL_TIME_LIMIT = 60000; 27 | public static final int SCROLL_SHARD_LIMIT = 5000; 28 | public static final int SCROLL_TIMEOUT = 600000; 29 | 30 | private Client client; 31 | private Optional segmentationField; 32 | private ElasticDataPointer dataPointer; 33 | private Optional bound; 34 | private ElasticSearchQuery query; 35 | private BoundedFilterFactory boundedFilterFactory = new BoundedFilterFactory(); 36 | 37 | QueryComponent(Client client, ElasticDataPointer dataPointer, Optional segmentationField, Optional bound, ElasticSearchQuery query) { 38 | this.client = client; 39 | this.dataPointer = dataPointer; 40 | this.segmentationField = segmentationField; 41 | this.bound = bound; 42 | this.query = query; 43 | } 44 | 45 | public SearchResponse prepareSearchScrollRequest() { 46 | // find out how many indices and shards are affected by this query to not get huge result sets when there are very many indices affected by the name, e.g. when wildcards are used 47 | // otherwise we regularly run into OOMs when a query goes against a large number of indices 48 | // I did not find a better way to find out the number of shards than to query a list of indices and for each index query the number of shards via the settings 49 | GetSettingsResponse getSettingsResponse = client.admin().indices().getSettings(new GetSettingsRequest().indices(dataPointer.getIndexName())).actionGet(); 50 | int numShards = 0, numIndices = 0; 51 | for(ObjectCursor settings : getSettingsResponse.getIndexToSettings().values()) { 52 | numShards += settings.value.getAsInt("index.number_of_shards", 0); 53 | numIndices++; 54 | } 55 | 56 | int sizePerShard = (int)Math.ceil((double)SCROLL_SHARD_LIMIT/numShards); 57 | logger.info("Found " + numIndices + " indices and " + numShards + " shards matching the index-pattern, thus setting the sizePerShard to " + sizePerShard); 58 | 59 | SearchRequestBuilder searchRequestBuilder = client.prepareSearch(dataPointer.getIndexName()) 60 | .setTypes(dataPointer.getTypeName()) 61 | .setSearchType(SearchType.SCAN) 62 | .addFields("_ttl", "_source") 63 | .setScroll(new TimeValue(SCROLL_TIME_LIMIT)) 64 | .setSize(sizePerShard); 65 | 66 | if (!Strings.isNullOrEmpty(query.getQuery())) { 67 | searchRequestBuilder.setQuery(query.getQuery()); 68 | } 69 | if (!Strings.isNullOrEmpty(query.getSortField())) { 70 | searchRequestBuilder.addSort(new FieldSortBuilder(query.getSortField()).order(query.getSortOrder())); 71 | } 72 | 73 | bound.map(resolvedBound -> boundedFilterFactory.createBoundedFilter(segmentationField.get(), resolvedBound)) 74 | .ifPresent(searchRequestBuilder::setQuery); 75 | 76 | return searchRequestBuilder.execute().actionGet(); 77 | } 78 | 79 | public SearchResponse getNextScrolledSearchResults(String scrollId) { 80 | return client.prepareSearchScroll(scrollId) 81 | .setScroll(new TimeValue(SCROLL_TIMEOUT)) 82 | .get(); 83 | } 84 | 85 | int getResponseSize(SearchResponse response) { 86 | return response.getHits().getHits().length; 87 | } 88 | 89 | boolean searchResultsNotEmpty(SearchResponse response) { 90 | return response.getHits().getTotalHits() > 0; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/QueryComponentBuilder.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 4 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 5 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.BoundedSegment; 6 | import org.elasticsearch.client.Client; 7 | 8 | import java.util.Optional; 9 | 10 | public final class QueryComponentBuilder { 11 | private Client client; 12 | private ElasticDataPointer dataPointer; 13 | private Optional segmentationField = Optional.empty(); 14 | private Optional bound = Optional.empty(); 15 | private ElasticSearchQuery query; 16 | 17 | private QueryComponentBuilder() { 18 | } 19 | 20 | public QueryComponentBuilder setClient(Client client) { 21 | this.client = client; 22 | return this; 23 | } 24 | 25 | public QueryComponentBuilder setDataPointer(ElasticDataPointer dataPointer) { 26 | this.dataPointer = dataPointer; 27 | return this; 28 | } 29 | 30 | public QueryComponentBuilder setSegmentationField(Optional segmentationField) { 31 | this.segmentationField = segmentationField; 32 | return this; 33 | } 34 | 35 | public QueryComponentBuilder setBound(Optional bound) { 36 | this.bound = bound; 37 | return this; 38 | } 39 | 40 | public QueryComponentBuilder setQuery(ElasticSearchQuery query) { 41 | this.query = query; 42 | return this; 43 | } 44 | 45 | 46 | public static QueryComponentBuilder builder() { 47 | return new QueryComponentBuilder(); 48 | } 49 | 50 | public QueryComponent createQueryComponent() { 51 | return new QueryComponent(client, dataPointer, segmentationField, bound, query); 52 | } 53 | 54 | } -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/QueryProcess.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.elasticsearch.action.search.SearchResponse; 4 | 5 | public class QueryProcess implements Runnable { 6 | 7 | private final ProcessSynchronizer processSynchronizer; 8 | private final QueryComponent queryComponent; 9 | 10 | public QueryProcess(ProcessSynchronizer processSynchronizer, QueryComponent queryComponent) { 11 | this.processSynchronizer = processSynchronizer; 12 | this.queryComponent = queryComponent; 13 | } 14 | 15 | @Override 16 | public void run() { 17 | try { 18 | SearchResponse response = queryComponent.prepareSearchScrollRequest(); 19 | 20 | if (queryComponent.searchResultsNotEmpty(response)) { 21 | while (true) { 22 | if (processSynchronizer.tryFillQueueWithSearchHits(response)) { 23 | response = queryComponent.getNextScrolledSearchResults(response.getScrollId()); 24 | if (queryComponent.getResponseSize(response) == 0) { 25 | break; 26 | } 27 | } 28 | } 29 | } 30 | } catch (final Exception e) { 31 | processSynchronizer.addProcessingException(e); 32 | } 33 | processSynchronizer.subtractWorkingQueryProcess(); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/BadSegmentationDefinitionException.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | public class BadSegmentationDefinitionException extends RuntimeException { 4 | 5 | public BadSegmentationDefinitionException(String message) { 6 | super(message); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/BoundedSegment.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | public interface BoundedSegment { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/DoubleFieldSegmentation.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 4 | 5 | import java.util.Collections; 6 | import java.util.List; 7 | import java.util.Optional; 8 | 9 | public final class DoubleFieldSegmentation extends SegmentationQueryTrait implements QuerySegmentation { 10 | 11 | private final List thresholds; 12 | 13 | private final Optional fieldName; 14 | 15 | private DoubleFieldSegmentation(String fieldName, List thresholds, ElasticSearchQuery query) { 16 | super(query); 17 | this.fieldName = Optional.of(fieldName); 18 | this.thresholds = Collections.unmodifiableList(thresholds); 19 | } 20 | 21 | @Override 22 | public Optional getFieldName() { 23 | return fieldName; 24 | } 25 | 26 | @Override 27 | public int getSegmentsCount() { 28 | return thresholds.size() - 1; 29 | } 30 | 31 | @Override 32 | public Optional getThreshold(int i) { 33 | RangeSegment segmentation = 34 | RangeSegmentBuilder.builder() 35 | .setLowerOpenBound(thresholds.get(i)) 36 | .setUpperBound(thresholds.get(i + 1)) 37 | .createRangeSegment(); 38 | return Optional.of(segmentation); 39 | } 40 | 41 | public static DoubleFieldSegmentation create(String fieldName, List thresholds, ElasticSearchQuery query) { 42 | return new DoubleFieldSegmentation(fieldName, thresholds, query); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/EmptySegmentation.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 4 | 5 | import java.util.Optional; 6 | 7 | public final class EmptySegmentation extends SegmentationQueryTrait implements QuerySegmentation { 8 | 9 | private EmptySegmentation(ElasticSearchQuery query) { 10 | super(query); 11 | } 12 | 13 | @Override 14 | public Optional getFieldName() { 15 | return Optional.empty(); 16 | } 17 | 18 | @Override 19 | public int getSegmentsCount() { 20 | return 1; 21 | } 22 | 23 | @Override 24 | public Optional getThreshold(int i) { 25 | return Optional.empty(); 26 | } 27 | 28 | public static EmptySegmentation createEmptySegmentation(ElasticSearchQuery query) { 29 | return new EmptySegmentation(query); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/PrefixSegment.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | public class PrefixSegment implements BoundedSegment { 4 | 5 | private final String prefix; 6 | 7 | public PrefixSegment(String prefix) { 8 | this.prefix = prefix; 9 | } 10 | 11 | public String getPrefix() { 12 | return prefix; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/QuerySegmentation.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 4 | 5 | import java.util.Optional; 6 | 7 | public interface QuerySegmentation { 8 | 9 | Optional getFieldName(); 10 | 11 | int getSegmentsCount(); 12 | 13 | Optional getThreshold(int i); 14 | 15 | ElasticSearchQuery getQuery(); 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/QuerySegmentationFactory.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import pl.allegro.tech.search.elasticsearch.tools.reindex.command.ReindexCommand; 4 | import org.apache.commons.collections4.CollectionUtils; 5 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 6 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQueryBuilder; 7 | 8 | public final class QuerySegmentationFactory { 9 | 10 | private QuerySegmentationFactory() { 11 | } 12 | 13 | public static QuerySegmentation create(ReindexCommand command) { 14 | ElasticSearchQuery query = buildQuery(command); 15 | if (command.getSegmentationField() == null) { 16 | return EmptySegmentation.createEmptySegmentation(query); 17 | } 18 | if (CollectionUtils.isNotEmpty(command.getSegmentationThresholds())) { 19 | return DoubleFieldSegmentation.create(command.getSegmentationField(), command.getSegmentationThresholds(), query); 20 | } 21 | if (CollectionUtils.isNotEmpty(command.getSegmentationPrefixes())) { 22 | return StringPrefixSegmentation.create(command.getSegmentationField(), command.getSegmentationPrefixes(), query); 23 | } 24 | throw new BadSegmentationDefinitionException("Bad segmentation creation params"); 25 | } 26 | 27 | private static ElasticSearchQuery buildQuery(ReindexCommand command) { 28 | return ElasticSearchQueryBuilder.builder() 29 | .setQuery(command.getQuery()) 30 | .setSortByField(command.getSort()) 31 | .setSortOrder(command.getSortOrder()) 32 | .build(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/RangeSegment.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | public class RangeSegment implements BoundedSegment { 4 | 5 | private final Double upperBound; 6 | private final Double lowerOpenBound; 7 | 8 | public RangeSegment(Double upperBound, Double lowerOpenBound) { 9 | this.upperBound = upperBound; 10 | this.lowerOpenBound = lowerOpenBound; 11 | } 12 | 13 | public Double getLowerOpenBound() { 14 | return lowerOpenBound; 15 | } 16 | 17 | public Double getUpperBound() { 18 | return upperBound; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/RangeSegmentBuilder.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | public class RangeSegmentBuilder { 4 | private Double upperBound; 5 | private Double lowerOpenBound; 6 | 7 | public RangeSegmentBuilder setUpperBound(Double upperBound) { 8 | this.upperBound = upperBound; 9 | return this; 10 | } 11 | 12 | public RangeSegmentBuilder setLowerOpenBound(Double lowerOpenBound) { 13 | this.lowerOpenBound = lowerOpenBound; 14 | return this; 15 | } 16 | 17 | public RangeSegment createRangeSegment() { 18 | return new RangeSegment(upperBound, lowerOpenBound); 19 | } 20 | 21 | public static RangeSegmentBuilder builder() { 22 | return new RangeSegmentBuilder(); 23 | } 24 | } -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/SegmentationQueryTrait.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 4 | 5 | public class SegmentationQueryTrait { 6 | 7 | private final ElasticSearchQuery query; 8 | 9 | public SegmentationQueryTrait(ElasticSearchQuery query) { 10 | this.query = query; 11 | } 12 | 13 | public ElasticSearchQuery getQuery() { 14 | return query; 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/StringPrefixSegmentation.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 4 | 5 | import java.util.Collections; 6 | import java.util.List; 7 | import java.util.Optional; 8 | import java.util.stream.Collectors; 9 | 10 | public class StringPrefixSegmentation extends SegmentationQueryTrait implements QuerySegmentation { 11 | 12 | private final String fieldName; 13 | 14 | private final List prefixSegmentsList; 15 | 16 | public StringPrefixSegmentation(String fieldName, List prefixesList, ElasticSearchQuery query) { 17 | super(query); 18 | this.fieldName = fieldName; 19 | this.prefixSegmentsList = Collections.unmodifiableList( 20 | prefixesList.stream().map(PrefixSegment::new).collect(Collectors.toList())); 21 | } 22 | 23 | @Override 24 | public Optional getFieldName() { 25 | return Optional.of(fieldName); 26 | } 27 | 28 | @Override 29 | public int getSegmentsCount() { 30 | return prefixSegmentsList.size(); 31 | } 32 | 33 | @Override 34 | public Optional getThreshold(int i) { 35 | return Optional.of(prefixSegmentsList.get(i)); 36 | } 37 | 38 | public static QuerySegmentation create(String fieldName, List segmentationPrefixes, ElasticSearchQuery query) { 39 | return new StringPrefixSegmentation(fieldName, segmentationPrefixes, query); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/filter/BoundedFilterCreationStrategy.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query.filter; 2 | 3 | import org.elasticsearch.index.query.QueryBuilder; 4 | 5 | public interface BoundedFilterCreationStrategy { 6 | 7 | QueryBuilder create(String fieldName, SegmentType resolvedBound); 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/filter/BoundedFilterFactory.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query.filter; 2 | 3 | import org.elasticsearch.index.query.QueryBuilder; 4 | 5 | import com.google.common.collect.ImmutableMap; 6 | 7 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.BoundedSegment; 8 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.PrefixSegment; 9 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.RangeSegment; 10 | 11 | public class BoundedFilterFactory { 12 | 13 | private final ImmutableMap, BoundedFilterCreationStrategy> strategy = 14 | new ImmutableMap.Builder, BoundedFilterCreationStrategy>() 15 | .put(PrefixSegment.class, new PrefixFilterCreationStrategy()) 16 | .put(RangeSegment.class, new RangeFilterCreationStrategy()) 17 | .build(); 18 | 19 | public QueryBuilder createBoundedFilter(String fieldName, BoundedSegment boundedSegment) { 20 | return strategy.get(boundedSegment.getClass()).create(fieldName, boundedSegment); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/filter/PrefixFilterCreationStrategy.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query.filter; 2 | 3 | import org.elasticsearch.index.query.PrefixQueryBuilder; 4 | import org.elasticsearch.index.query.QueryBuilder; 5 | 6 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.PrefixSegment; 7 | 8 | public class PrefixFilterCreationStrategy implements BoundedFilterCreationStrategy { 9 | @Override 10 | public QueryBuilder create(String fieldName, PrefixSegment resolvedBound) { 11 | return new PrefixQueryBuilder(fieldName, resolvedBound.getPrefix()); 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/filter/RangeFilterCreationStrategy.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query.filter; 2 | 3 | import org.elasticsearch.index.query.QueryBuilder; 4 | import org.elasticsearch.index.query.RangeQueryBuilder; 5 | 6 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.RangeSegment; 7 | 8 | public class RangeFilterCreationStrategy implements BoundedFilterCreationStrategy { 9 | @Override 10 | public QueryBuilder create(String fieldName, RangeSegment resolvedBound) { 11 | return new RangeQueryBuilder(fieldName) 12 | .lte(resolvedBound.getUpperBound()) 13 | .gt(resolvedBound.getLowerOpenBound()); 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/statistics/ProcessStatistics.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.statistics; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | import java.time.Duration; 7 | import java.time.Instant; 8 | import java.util.concurrent.atomic.AtomicLong; 9 | 10 | public class ProcessStatistics { 11 | 12 | private static final Logger logger = LoggerFactory.getLogger(ProcessStatistics.class); 13 | 14 | private final Instant started = Instant.now(); 15 | private final AtomicLong updatesCounter = new AtomicLong(); 16 | private final AtomicLong queriesCounter = new AtomicLong(); 17 | private final AtomicLong failuresCounter = new AtomicLong(); 18 | 19 | 20 | public void incrementUpdates(int indexedCount) { 21 | updatesCounter.getAndAdd(indexedCount); 22 | } 23 | 24 | public void incrementQueries(long delta) { 25 | queriesCounter.getAndAdd(delta); 26 | } 27 | 28 | public void log(int queuedCount, long queryProcessCount) { 29 | logger.info("{} items: {} / {} ({} {}) failed={}", 30 | Duration.between(started, Instant.now()), 31 | updatesCounter.get(), queriesCounter.get(), queuedCount, queryProcessCount, failuresCounter.get()); 32 | } 33 | 34 | public void incrementFailures(long delta) { 35 | failuresCounter.addAndGet(delta); 36 | } 37 | 38 | public ReindexingSummary createReindexingSummary() { 39 | return ReindexingSummaryBuilder.builder() 40 | .setQueried(queriesCounter.get()) 41 | .setIndexed(updatesCounter.get()) 42 | .setFailedIndexed(failuresCounter.get()) 43 | .createReindexingSummary(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/statistics/ReindexingSummary.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.statistics; 2 | 3 | public class ReindexingSummary { 4 | private final long queried; 5 | private final long indexed; 6 | private final long failedIndexed; 7 | 8 | ReindexingSummary(long queried, long indexed, long failedIndexed) { 9 | this.queried = queried; 10 | this.indexed = indexed; 11 | this.failedIndexed = failedIndexed; 12 | } 13 | 14 | public long getQueried() { 15 | return queried; 16 | } 17 | 18 | public long getIndexed() { 19 | return indexed; 20 | } 21 | 22 | public long getFailedIndexed() { 23 | return failedIndexed; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/pl/allegro/tech/search/elasticsearch/tools/reindex/statistics/ReindexingSummaryBuilder.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.statistics; 2 | 3 | public final class ReindexingSummaryBuilder { 4 | private long queried; 5 | private long indexed; 6 | private long failedIndexed; 7 | 8 | private ReindexingSummaryBuilder() { 9 | } 10 | 11 | public ReindexingSummaryBuilder setQueried(long queried) { 12 | this.queried = queried; 13 | return this; 14 | } 15 | 16 | public ReindexingSummaryBuilder setIndexed(long indexed) { 17 | this.indexed = indexed; 18 | return this; 19 | } 20 | 21 | public ReindexingSummaryBuilder setFailedIndexed(long failedIndexed) { 22 | this.failedIndexed = failedIndexed; 23 | return this; 24 | } 25 | 26 | public ReindexingSummary createReindexingSummary() { 27 | return new ReindexingSummary(queried, indexed, failedIndexed); 28 | } 29 | 30 | public static ReindexingSummaryBuilder builder() { 31 | return new ReindexingSummaryBuilder(); 32 | } 33 | } -------------------------------------------------------------------------------- /src/main/resources/config.properties: -------------------------------------------------------------------------------- 1 | update.threads.count=5 2 | indexing.queue.size=10 3 | queue.poll.timeout=10 4 | queue.offer.timeout=4 -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/ReindexCommandParserTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex; 2 | 3 | import junitparams.JUnitParamsRunner; 4 | import junitparams.Parameters; 5 | import org.elasticsearch.search.sort.SortOrder; 6 | import org.junit.Assert; 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.PrefixSegment; 10 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.RangeSegment; 11 | 12 | import java.util.Optional; 13 | 14 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointerAssert.assertThat; 15 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.query.PrefixSegmentAssert.assertThat; 16 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.query.QuerySegmentationAssert.assertThat; 17 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.query.RangeSegmentAssert.assertThat; 18 | 19 | @RunWith(JUnitParamsRunner.class) 20 | public class ReindexCommandParserTest { 21 | 22 | @Test 23 | public void parsesCommandWithNoSegmentation() throws Exception { 24 | //given 25 | ReindexCommandParser commandParser = new ReindexCommandParser(); 26 | //when 27 | boolean result = commandParser.tryParse(createArgumentArray( 28 | "-sc", "sourceClusterName", 29 | "-tc", "targetClusterName", 30 | "-s", "http://sourceHost1:9333/source_index/source_type", 31 | "-t", "http://targetHost1:9333/target_index/target_type" 32 | )); 33 | //then 34 | Assert.assertEquals(true, result); 35 | assertThat(commandParser.getSourcePointer()) 36 | .hasHost("sourceHost1") 37 | .hasClusterName("sourceClusterName") 38 | .hasPort(9333) 39 | .hasIndexName("source_index") 40 | .hasTypeName("source_type"); 41 | assertThat(commandParser.getTargetPointer()) 42 | .hasHost("targetHost1") 43 | .hasClusterName("targetClusterName") 44 | .hasPort(9333) 45 | .hasIndexName("target_index") 46 | .hasTypeName("target_type"); 47 | Assert.assertEquals(Optional.empty(), commandParser.getSegmentation().getFieldName()); 48 | } 49 | 50 | private Object[] prepareDoubleSegmentationParams() { 51 | return new Object[] { 52 | new Object[] { "0,1", 0.0, 1.0 }, 53 | new Object[] { "1.1,230.1", 1.1, 230.1 } 54 | }; 55 | } 56 | 57 | @Test 58 | @Parameters(method = "prepareDoubleSegmentationParams") 59 | public void parsesCommandWithDoubleSegmentation(String segmentationThresholds, double lowerBound, double upperBound) throws 60 | Exception { 61 | //given 62 | ReindexCommandParser commandParser = new ReindexCommandParser(); 63 | //when 64 | boolean result = commandParser.tryParse(createArgumentArray( 65 | "-sc", "sourceClusterName", 66 | "-tc", "targetClusterName", 67 | "-s", "http://sourceHost1:9333/source_index/source_type", 68 | "-t", "http://targetHost1:9333/target_index/target_type", 69 | "-segmentationField", "fieldName", 70 | "-segmentationThresholds", segmentationThresholds 71 | )); 72 | //then 73 | Assert.assertEquals(true, result); 74 | assertThat(commandParser.getSegmentation()) 75 | .hasFileName("fieldName") 76 | .hasSegmentsCount(1); 77 | assertThat((RangeSegment) commandParser.getSegmentation().getThreshold(0).get()) 78 | .hasUpperBound(upperBound) 79 | .hasLowerOpenBound(lowerBound); 80 | } 81 | 82 | @Test 83 | public void parsesCommandWithStringPrefixSegmentation() throws Exception { 84 | //given 85 | ReindexCommandParser commandParser = new ReindexCommandParser(); 86 | //when 87 | boolean result = commandParser.tryParse(createArgumentArray( 88 | "-sc", "sourceClusterName", 89 | "-tc", "targetClusterName", 90 | "-s", "http://sourceHost1:9333/source_index/source_type", 91 | "-t", "http://targetHost1:9333/target_index/target_type", 92 | "-segmentationField", "fieldName", 93 | "-segmentationPrefixes", "1,2" 94 | )); 95 | //then 96 | Assert.assertEquals(true, result); 97 | assertThat(commandParser.getSegmentation()) 98 | .hasFileName("fieldName") 99 | .hasSegmentsCount(2); 100 | assertThat((PrefixSegment) commandParser.getSegmentation().getThreshold(0).get()) 101 | .hasPrefix("1"); 102 | assertThat((PrefixSegment) commandParser.getSegmentation().getThreshold(1).get()) 103 | .hasPrefix("2"); 104 | } 105 | 106 | @Test 107 | public void doNotParseCommandWithNoTargetType() throws Exception { 108 | //given 109 | ReindexCommandParser commandParser = new ReindexCommandParser(); 110 | //when 111 | boolean result = commandParser.tryParse(createArgumentArray( 112 | "-sc", "sourceClusterName", 113 | "-s", "http://sourceHost1:9333/source_index/source_type", 114 | "-t", "http://targetHost1:9333/target_index/target_type" 115 | )); 116 | //then 117 | Assert.assertEquals(false, result); 118 | 119 | } 120 | 121 | @Test 122 | public void parseCommandWithoutSortOrderAndQueryAndSortField() { 123 | //given 124 | ReindexCommandParser commandParser = new ReindexCommandParser(); 125 | //when 126 | commandParser.tryParse(createArgumentArray( 127 | "-sc", "sourceClusterName", 128 | "-tc", "targetClusterName", 129 | "-s", "http://sourceHost1:9333/source_index/source_type", 130 | "-t", "http://targetHost1:9333/target_index/target_type" 131 | )); 132 | //then 133 | Assert.assertNull(commandParser.getSegmentation().getQuery().getQuery()); 134 | } 135 | 136 | @Test 137 | public void parseCommandWithQuery() { 138 | //given 139 | ReindexCommandParser commandParser = new ReindexCommandParser(); 140 | //when 141 | String query = "{range\": {\"_timestamp\" : {\"gte\" : 1447142880000}}}"; 142 | commandParser.tryParse(createArgumentArray( 143 | "-sc", "sourceClusterName", 144 | "-tc", "targetClusterName", 145 | "-s", "http://sourceHost1:9333/source_index/source_type", 146 | "-t", "http://targetHost1:9333/target_index/target_type", 147 | "-query", query 148 | )); 149 | //then 150 | Assert.assertEquals(query, commandParser.getSegmentation().getQuery().getQuery()); 151 | } 152 | 153 | @Test 154 | public void parseCommandWithSortField() { 155 | //given 156 | ReindexCommandParser commandParser = new ReindexCommandParser(); 157 | //when 158 | boolean result = commandParser.tryParse(createArgumentArray( 159 | "-sc", "sourceClusterName", 160 | "-tc", "targetClusterName", 161 | "-s", "http://sourceHost1:9333/source_index/source_type", 162 | "-t", "http://targetHost1:9333/target_index/target_type", 163 | "-sort", "_timestamp" 164 | )); 165 | //then 166 | Assert.assertEquals("_timestamp", commandParser.getSegmentation().getQuery().getSortField()); 167 | } 168 | 169 | @Test 170 | public void parseCommandWithSortOrderDESC() { 171 | //given 172 | ReindexCommandParser commandParser = new ReindexCommandParser(); 173 | //when 174 | boolean result = commandParser.tryParse(createArgumentArray( 175 | "-sc", "sourceClusterName", 176 | "-tc", "targetClusterName", 177 | "-s", "http://sourceHost1:9333/source_index/source_type", 178 | "-t", "http://targetHost1:9333/target_index/target_type", 179 | "-sortOrder", "DESC" 180 | )); 181 | //then 182 | Assert.assertEquals(SortOrder.DESC, commandParser.getSegmentation().getQuery().getSortOrder()); 183 | } 184 | 185 | @Test 186 | public void parseCommandWithSortOrderASC() { 187 | //given 188 | ReindexCommandParser commandParser = new ReindexCommandParser(); 189 | //when 190 | boolean result = commandParser.tryParse(createArgumentArray( 191 | "-sc", "sourceClusterName", 192 | "-tc", "targetClusterName", 193 | "-s", "http://sourceHost1:9333/source_index/source_type", 194 | "-t", "http://targetHost1:9333/target_index/target_type", 195 | "-sortOrder", "ASC" 196 | )); 197 | //then 198 | Assert.assertEquals(SortOrder.ASC, commandParser.getSegmentation().getQuery().getSortOrder()); 199 | } 200 | 201 | @Test 202 | public void parseCommandWithSortOrderDefaultASC() { 203 | //given 204 | ReindexCommandParser commandParser = new ReindexCommandParser(); 205 | //when 206 | boolean result = commandParser.tryParse(createArgumentArray( 207 | "-sc", "sourceClusterName", 208 | "-tc", "targetClusterName", 209 | "-s", "http://sourceHost1:9333/source_index/source_type", 210 | "-t", "http://targetHost1:9333/target_index/target_type" 211 | )); 212 | //then 213 | Assert.assertEquals(SortOrder.ASC, commandParser.getSegmentation().getQuery().getSortOrder()); 214 | } 215 | 216 | @Test 217 | public void parseDisableClusterSniffing() { 218 | //given 219 | ReindexCommandParser commandParser = new ReindexCommandParser(); 220 | //when 221 | boolean result = commandParser.tryParse(createArgumentArray( 222 | "-sc", "sourceClusterName", 223 | "-tc", "targetClusterName", 224 | "-s", "http://sourceHost1:9333/source_index/source_type", 225 | "-t", "http://targetHost1:9333/target_index/target_type", 226 | "-disable-cluster-sniffing" 227 | )); 228 | //then 229 | Assert.assertFalse(commandParser.getSourcePointer().isSniff()); 230 | Assert.assertFalse(commandParser.getTargetPointer().isSniff()); 231 | } 232 | 233 | @Test 234 | public void parseWhenNotDisabledClusterSniffing() { 235 | //given 236 | ReindexCommandParser commandParser = new ReindexCommandParser(); 237 | //when 238 | boolean result = commandParser.tryParse(createArgumentArray( 239 | "-sc", "sourceClusterName", 240 | "-tc", "targetClusterName", 241 | "-s", "http://sourceHost1:9333/source_index/source_type", 242 | "-t", "http://targetHost1:9333/target_index/target_type" 243 | )); 244 | //then 245 | Assert.assertTrue(commandParser.getSourcePointer().isSniff()); 246 | Assert.assertTrue(commandParser.getTargetPointer().isSniff()); 247 | 248 | } 249 | 250 | private String[] createArgumentArray(String... args) { 251 | return args; 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/ReindexInvokerTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex; 2 | 3 | import com.google.common.collect.ImmutableMap; 4 | import com.google.common.collect.Lists; 5 | import org.junit.AfterClass; 6 | import org.junit.Before; 7 | import org.junit.BeforeClass; 8 | import org.junit.Test; 9 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 10 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 11 | import pl.allegro.tech.search.elasticsearch.tools.reindex.embeded.EmbeddedElasticsearchCluster; 12 | import pl.allegro.tech.search.elasticsearch.tools.reindex.embeded.IndexDocument; 13 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.DoubleFieldSegmentation; 14 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.EmptySegmentation; 15 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.StringPrefixSegmentation; 16 | import pl.allegro.tech.search.elasticsearch.tools.reindex.statistics.ReindexingSummary; 17 | 18 | import java.util.stream.IntStream; 19 | import java.util.stream.Stream; 20 | 21 | import static org.junit.Assert.assertEquals; 22 | import static org.junit.Assert.assertFalse; 23 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.statistics.ReindexingSummaryAssert.assertThat; 24 | 25 | public class ReindexInvokerTest { 26 | 27 | private static final String SOURCE_INDEX = "sourceindex"; 28 | private static final String TARGET_INDEX = "targetindex"; 29 | public static final String DATA_TYPE = "type"; 30 | 31 | private static EmbeddedElasticsearchCluster embeddedElasticsearchCluster; 32 | 33 | @BeforeClass 34 | public static void setUp() throws Exception { 35 | embeddedElasticsearchCluster = EmbeddedElasticsearchCluster.createDataNode(); 36 | } 37 | 38 | 39 | @AfterClass 40 | public static void tearDown() throws Exception { 41 | embeddedElasticsearchCluster.close(); 42 | } 43 | 44 | @Before 45 | public void clearTargetIndex() { 46 | embeddedElasticsearchCluster.deleteIndex(SOURCE_INDEX); 47 | embeddedElasticsearchCluster.deleteIndex(TARGET_INDEX); 48 | } 49 | 50 | @Test 51 | public void indexingWithoutSegmentingEmpty() throws Exception { 52 | //given 53 | embeddedElasticsearchCluster.recreateIndex(SOURCE_INDEX); 54 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 55 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 56 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 57 | //when 58 | ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, EmptySegmentation.createEmptySegmentation(elasticSearchQuery)); 59 | //then 60 | assertFalse(embeddedElasticsearchCluster.indexExist(TARGET_INDEX)); 61 | } 62 | 63 | @Test 64 | public void indexingWithSegmentingEmpty() throws Exception { 65 | //given 66 | embeddedElasticsearchCluster.recreateIndex(SOURCE_INDEX); 67 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 68 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 69 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 70 | //when 71 | ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, DoubleFieldSegmentation.create("fieldName", 72 | Lists.newArrayList(1.0, 3.0), elasticSearchQuery)); 73 | //then 74 | assertFalse(embeddedElasticsearchCluster.indexExist(TARGET_INDEX)); 75 | } 76 | 77 | @Test 78 | public void indexingWithoutSegmenting() throws Exception { 79 | //given 80 | indexWithSampleData(); 81 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 82 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 83 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 84 | //when 85 | ReindexingSummary reindexingSummary = ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, 86 | EmptySegmentation.createEmptySegmentation(elasticSearchQuery)); 87 | //then 88 | assertEquals(9L, embeddedElasticsearchCluster.count(TARGET_INDEX)); 89 | assertThat(reindexingSummary) 90 | .hasIndexedCount(9L) 91 | .hasQueriedCount(9L) 92 | .hasFailedIndexedCount(0L); 93 | } 94 | 95 | 96 | @Test 97 | public void indexingWithSegmentingByDoubleField() throws Exception { 98 | //given 99 | indexWithSampleData(); 100 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 101 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 102 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 103 | //when 104 | ReindexingSummary reindexingSummary = ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, DoubleFieldSegmentation.create("fieldName", 105 | Lists.newArrayList(1.0, 3.0, 7.0), elasticSearchQuery)); 106 | //then 107 | assertEquals(6L, embeddedElasticsearchCluster.count(TARGET_INDEX)); 108 | assertThat(reindexingSummary) 109 | .hasIndexedCount(6L) 110 | .hasQueriedCount(6L) 111 | .hasFailedIndexedCount(0L); 112 | } 113 | 114 | @Test 115 | public void indexingWithSegmentingByPrefixOnStringField() throws Exception { 116 | //given 117 | indexWithSampleData(); 118 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 119 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 120 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 121 | //when 122 | ReindexingSummary reindexingSummary = ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, StringPrefixSegmentation.create("fieldName", 123 | Lists.newArrayList("1", "2", "3", "4"), elasticSearchQuery)); 124 | //then 125 | assertEquals(4L, embeddedElasticsearchCluster.count(TARGET_INDEX)); 126 | assertThat(reindexingSummary) 127 | .hasIndexedCount(4L) 128 | .hasQueriedCount(4L) 129 | .hasFailedIndexedCount(0L); 130 | } 131 | 132 | 133 | @Test 134 | public void indexingWithStartQuery() throws Exception { 135 | //given 136 | indexWithSampleData(); 137 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 138 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 139 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery("" + 140 | "{\"range\": {\"fieldName\" : { \"gte\" : \"5\"}}}", "fieldName"); 141 | //when 142 | ReindexingSummary reindexingSummary = ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, 143 | EmptySegmentation.createEmptySegmentation(elasticSearchQuery) ); 144 | //then 145 | assertEquals(5L, embeddedElasticsearchCluster.count(TARGET_INDEX)); 146 | assertThat(reindexingSummary) 147 | .hasIndexedCount(5L) 148 | .hasQueriedCount(5L) 149 | .hasFailedIndexedCount(0L); 150 | } 151 | 152 | private void indexWithSampleData() { 153 | Stream streamToBeIndexed = IntStream 154 | .range(1, 10) 155 | .mapToObj( 156 | i -> new IndexDocument(Integer.toString(i), ImmutableMap.of("fieldName", i)) 157 | ); 158 | embeddedElasticsearchCluster.indexWithSampleData(SOURCE_INDEX, DATA_TYPE, streamToBeIndexed); 159 | } 160 | 161 | @Test 162 | public void tryingReindexNotExistingIndex() throws Exception { 163 | //given 164 | embeddedElasticsearchCluster.deleteIndex(SOURCE_INDEX); 165 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 166 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 167 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 168 | //when 169 | ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, EmptySegmentation.createEmptySegmentation(elasticSearchQuery)); 170 | //then 171 | assertFalse(embeddedElasticsearchCluster.indexExist(TARGET_INDEX)); 172 | 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/ReindexInvokerWithIndexingErrorsTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex; 2 | 3 | import com.google.common.collect.ImmutableMap; 4 | import org.assertj.core.api.Assertions; 5 | import org.elasticsearch.common.xcontent.XContentBuilder; 6 | import org.elasticsearch.common.xcontent.XContentFactory; 7 | import org.junit.After; 8 | import org.junit.Before; 9 | import org.junit.Test; 10 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 11 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 12 | import pl.allegro.tech.search.elasticsearch.tools.reindex.embeded.EmbeddedElasticsearchCluster; 13 | import pl.allegro.tech.search.elasticsearch.tools.reindex.embeded.IndexDocument; 14 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.EmptySegmentation; 15 | import pl.allegro.tech.search.elasticsearch.tools.reindex.statistics.ReindexingSummary; 16 | import pl.allegro.tech.search.elasticsearch.tools.reindex.statistics.ReindexingSummaryAssert; 17 | 18 | import java.io.IOException; 19 | import java.util.function.IntFunction; 20 | import java.util.stream.IntStream; 21 | import java.util.stream.Stream; 22 | 23 | public class ReindexInvokerWithIndexingErrorsTest { 24 | 25 | private static final String SOURCE_INDEX = "sourceindex"; 26 | private static final String TARGET_INDEX = "targetindex"; 27 | private static final String DATA_TYPE = "type"; 28 | 29 | private EmbeddedElasticsearchCluster embeddedElasticsearchCluster; 30 | 31 | @Before 32 | public void setUp() throws Exception { 33 | embeddedElasticsearchCluster = EmbeddedElasticsearchCluster.createDataNode(); 34 | embeddedElasticsearchCluster.deleteIndex(SOURCE_INDEX); 35 | embeddedElasticsearchCluster.deleteIndex(TARGET_INDEX); 36 | } 37 | 38 | @After 39 | public void tearDown() throws Exception { 40 | embeddedElasticsearchCluster.close(); 41 | } 42 | 43 | @Test 44 | public void shouldWarnWhenIndexingFails() throws Exception { 45 | //given 46 | indexWithSampleData(); 47 | embeddedElasticsearchCluster.createIndex(TARGET_INDEX, DATA_TYPE, createStrictMappingDefinition()); 48 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 49 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 50 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 51 | //when 52 | ReindexingSummary reindexingSummary = ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, EmptySegmentation.createEmptySegmentation(elasticSearchQuery)); 53 | //then 54 | Assertions.assertThat(embeddedElasticsearchCluster.count(SOURCE_INDEX)).isEqualTo(8L); 55 | Assertions.assertThat(embeddedElasticsearchCluster.count(TARGET_INDEX)).isEqualTo(4L); 56 | ReindexingSummaryAssert.assertThat(reindexingSummary) 57 | .hasIndexedCount(8L) 58 | .hasQueriedCount(8L) 59 | .hasFailedIndexedCount(4L); 60 | } 61 | 62 | private void indexWithSampleData() { 63 | Stream docsWithField1 = 64 | createDocsStream(5, i -> new IndexDocument(Integer.toString(i), ImmutableMap.of("field1", i))); 65 | Stream docsWithField2 = 66 | createDocsStream(5, i -> new IndexDocument(Integer.toString(i + 5), ImmutableMap.of("field2", i))); 67 | embeddedElasticsearchCluster.indexWithSampleData(SOURCE_INDEX, DATA_TYPE, 68 | Stream.concat(docsWithField1, docsWithField2)); 69 | } 70 | 71 | @SuppressWarnings("unchecked") 72 | private Stream createDocsStream(int amount, IntFunction docMapper) { 73 | return IntStream.range(1, amount) 74 | .mapToObj(docMapper); 75 | } 76 | 77 | public XContentBuilder createStrictMappingDefinition() { 78 | try { 79 | // @formatter:off 80 | //How to enable it in intellij see it here: http://stackoverflow.com/questions/3375307/how-to-disable-code-formatting-for-some-part-of-the-code-using-comments 81 | return XContentFactory.jsonBuilder() 82 | .startObject() 83 | .field("dynamic", "strict") 84 | .startObject("properties") 85 | .startObject("field1") 86 | .field("type", "string") 87 | .endObject() 88 | .endObject() 89 | .endObject(); 90 | // @formatter:off 91 | } catch (IOException e) { 92 | throw new RuntimeException("Failed building index mappingDef", e); 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/TTLTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex; 2 | 3 | import com.google.common.collect.ImmutableMap; 4 | import org.elasticsearch.action.search.SearchResponse; 5 | import org.elasticsearch.common.xcontent.XContentBuilder; 6 | import org.elasticsearch.common.xcontent.XContentFactory; 7 | import org.elasticsearch.search.SearchHitField; 8 | import org.junit.AfterClass; 9 | import org.junit.Before; 10 | import org.junit.BeforeClass; 11 | import org.junit.Test; 12 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 13 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 14 | import pl.allegro.tech.search.elasticsearch.tools.reindex.embeded.EmbeddedElasticsearchCluster; 15 | import pl.allegro.tech.search.elasticsearch.tools.reindex.embeded.IndexDocument; 16 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.EmptySegmentation; 17 | 18 | import java.io.IOException; 19 | import java.util.Map; 20 | import java.util.concurrent.ExecutionException; 21 | import java.util.stream.IntStream; 22 | import java.util.stream.Stream; 23 | 24 | import static org.assertj.core.api.Assertions.*; 25 | 26 | public class TTLTest { 27 | 28 | private static final String SOURCE_INDEX = "sourceindex"; 29 | private static final String TARGET_INDEX = "targetindex"; 30 | private static final String DATA_TYPE = "type"; 31 | 32 | 33 | private static EmbeddedElasticsearchCluster embeddedElasticsearchCluster; 34 | 35 | @BeforeClass 36 | public static void setUp() throws Exception { 37 | embeddedElasticsearchCluster = EmbeddedElasticsearchCluster.createDataNode(); 38 | } 39 | 40 | @AfterClass 41 | public static void tearDown() throws Exception { 42 | embeddedElasticsearchCluster.close(); 43 | } 44 | 45 | @Before 46 | public void clearTargetIndex() { 47 | embeddedElasticsearchCluster.deleteIndex(SOURCE_INDEX); 48 | embeddedElasticsearchCluster.deleteIndex(TARGET_INDEX); 49 | } 50 | 51 | @Test 52 | public void shouldReindexTTL() throws ExecutionException, InterruptedException { 53 | //given 54 | embeddedElasticsearchCluster.createIndex(SOURCE_INDEX, DATA_TYPE, mappingWithTTL()); 55 | embeddedElasticsearchCluster.createIndex(TARGET_INDEX, DATA_TYPE, mappingWithTTL()); 56 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 57 | indexSampleDataWithTTL(); 58 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 59 | ElasticDataPointer targetDataPointer = embeddedElasticsearchCluster.createDataPointer(TARGET_INDEX); 60 | 61 | //when 62 | ReindexInvoker.invokeReindexing(sourceDataPointer, targetDataPointer, EmptySegmentation.createEmptySegmentation(elasticSearchQuery)); 63 | SearchResponse targetResponse = embeddedElasticsearchCluster.client().prepareSearch(TARGET_INDEX).addFields("_ttl").get(); 64 | 65 | //then 66 | assertThat(embeddedElasticsearchCluster.count(SOURCE_INDEX)).isEqualTo(1L); 67 | assertThat(embeddedElasticsearchCluster.count(TARGET_INDEX)).isEqualTo(1L); 68 | 69 | Map resultFields = targetResponse.getHits().getAt(0).getFields(); 70 | assertThat(resultFields.containsKey("_ttl")); 71 | assertThat((Long) resultFields.get("_ttl").value() > 0L); 72 | } 73 | 74 | private void indexSampleDataWithTTL() { 75 | Stream streamToBeIndexed = IntStream 76 | .range(1, 2) 77 | .mapToObj( 78 | i -> { 79 | Long ttl = 60000L; 80 | return new IndexDocument(Integer.toString(i), ImmutableMap.of("fieldName", i), ttl); 81 | } 82 | ); 83 | 84 | streamToBeIndexed.forEach( 85 | indexDocument -> embeddedElasticsearchCluster.indexDocument(SOURCE_INDEX, DATA_TYPE, indexDocument) 86 | ); 87 | embeddedElasticsearchCluster.refreshIndex(); 88 | } 89 | 90 | private XContentBuilder mappingWithTTL() { 91 | try { 92 | // @formatter:off 93 | //How to enable it in intellij see it here: http://stackoverflow.com/questions/3375307/how-to-disable-code-formatting-for-some-part-of-the-code-using-comments 94 | return XContentFactory.jsonBuilder() 95 | .startObject() 96 | .startObject("_ttl").field("enabled", true).endObject() 97 | .endObject(); 98 | // @formatter:off 99 | } catch (IOException e) { 100 | throw new RuntimeException("Failed building index mappingDef", e); 101 | } 102 | } 103 | 104 | 105 | } 106 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticAddressAssert.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | import junit.framework.TestCase; 4 | import org.assertj.core.api.AbstractAssert; 5 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.PrefixSegment; 6 | 7 | public class ElasticAddressAssert extends AbstractAssert { 8 | 9 | protected ElasticAddressAssert(ElasticAddress actual) { 10 | super(actual, ElasticAddressAssert.class); 11 | } 12 | 13 | public static ElasticAddressAssert assertThat(ElasticAddress actual) { 14 | return new ElasticAddressAssert(actual); 15 | } 16 | 17 | public ElasticAddressAssert hasHost(String host) { 18 | isNotNull(); 19 | if (!actual.getHost().equals(host)) { 20 | failWithMessage("Expected character's host to be <%s> but was <%s>", host, actual.getHost()); 21 | } 22 | return this; 23 | } 24 | 25 | public ElasticAddressAssert hasPort(int port) { 26 | isNotNull(); 27 | if (actual.getPort() != port) { 28 | failWithMessage("Expected port to be <%d> but was <%d>", port, actual.getPort()); 29 | } 30 | return this; 31 | } 32 | 33 | public ElasticAddressAssert hasIndexName(String indexName) { 34 | isNotNull(); 35 | if (!actual.getIndexName().equals(indexName)) { 36 | failWithMessage("Expected character's indexName to be <%s> but was <%s>", indexName, actual.getIndexName()); 37 | } 38 | return this; 39 | } 40 | 41 | public ElasticAddressAssert hasTypeName(String typeName) { 42 | isNotNull(); 43 | if (!actual.getTypeName().equals(typeName)) { 44 | failWithMessage("Expected character's typeName to be <%s> but was <%s>", typeName, actual.getTypeName()); 45 | } 46 | return this; 47 | } 48 | 49 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticAddressParserTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | import junitparams.JUnitParamsRunner; 4 | import junitparams.Parameters; 5 | import org.assertj.core.api.Assertions; 6 | import org.junit.Test; 7 | import org.junit.runner.RunWith; 8 | 9 | import static com.googlecode.catchexception.CatchException.catchException; 10 | import static com.googlecode.catchexception.CatchException.caughtException; 11 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticAddressAssert.assertThat; 12 | 13 | @RunWith(JUnitParamsRunner.class) 14 | public class ElasticAddressParserTest { 15 | 16 | private ElasticAddressParser elasticAddressParser = new ElasticAddressParser(); 17 | 18 | @Test 19 | public void shouldBuildForProperAddress() throws Exception { 20 | //given 21 | String uri = "http://host:123/index/type"; 22 | //when 23 | ElasticAddress address = elasticAddressParser.parse(uri); 24 | //then 25 | assertThat(address) 26 | .hasHost("host") 27 | .hasPort(123) 28 | .hasIndexName("index") 29 | .hasTypeName("type"); 30 | } 31 | 32 | @Test 33 | @Parameters({ 34 | "http://host:123/index", 35 | "http://host/index", 36 | "host:123/index", 37 | "http://:123/index" }) 38 | public void shouldNotBuildForAddressWithoutType(String uri) throws Exception { 39 | //when 40 | catchException(elasticAddressParser).parse(uri); 41 | //then 42 | Assertions.assertThat((Throwable) caughtException()) 43 | .isInstanceOf(ParsingElasticsearchAddressException.class); 44 | } 45 | 46 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticDataPointerAssert.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | import junit.framework.TestCase; 4 | import org.assertj.core.api.AbstractAssert; 5 | 6 | public class ElasticDataPointerAssert extends AbstractAssert { 7 | 8 | public ElasticDataPointerAssert(ElasticDataPointer actual) { 9 | super(actual, ElasticDataPointerAssert.class); 10 | } 11 | 12 | public static ElasticDataPointerAssert assertThat(ElasticDataPointer actual) { 13 | return new ElasticDataPointerAssert(actual); 14 | } 15 | 16 | public ElasticDataPointerAssert hasHost(String host) { 17 | isNotNull(); 18 | if (!actual.getHost().equals(host)) { 19 | failWithMessage("Expected character's host to be <%s> but was <%s>", host, actual.getHost()); 20 | } 21 | return this; 22 | } 23 | 24 | public ElasticDataPointerAssert hasPort(int port) { 25 | isNotNull(); 26 | if (actual.getPort() != port) { 27 | failWithMessage("Expected port to be <%d> but was <%d>", port, actual.getPort()); 28 | } 29 | return this; 30 | } 31 | 32 | public ElasticDataPointerAssert hasIndexName(String indexName) { 33 | isNotNull(); 34 | if (!actual.getIndexName().equals(indexName)) { 35 | failWithMessage("Expected character's indexName to be <%s> but was <%s>", indexName, actual.getIndexName()); 36 | } 37 | return this; 38 | } 39 | 40 | public ElasticDataPointerAssert hasTypeName(String typeName) { 41 | isNotNull(); 42 | if (!actual.getTypeName().equals(typeName)) { 43 | failWithMessage("Expected character's typeName to be <%s> but was <%s>", typeName, actual.getTypeName()); 44 | } 45 | return this; 46 | } 47 | 48 | public ElasticDataPointerAssert hasClusterName(String clusterName) { 49 | isNotNull(); 50 | if (!actual.getClusterName().equals(clusterName)) { 51 | failWithMessage("Expected character's clusterName to be <%s> but was <%s>", clusterName, actual.getClusterName()); 52 | } 53 | return this; 54 | } 55 | 56 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/connection/ElasticSearchClientProducerTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.connection; 2 | 3 | import static org.elasticsearch.node.NodeBuilder.nodeBuilder; 4 | 5 | import org.assertj.core.api.Assertions; 6 | import org.elasticsearch.client.Client; 7 | import org.elasticsearch.common.settings.Settings; 8 | import org.elasticsearch.node.Node; 9 | import org.junit.After; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | public class ElasticSearchClientProducerTest { 14 | 15 | public static final String CLUSTER_NAME = "my_test_cluster"; 16 | public static final String INDEX_NAME = "index"; 17 | private Node dataNode; 18 | 19 | @Before 20 | public void setUp() throws Exception { 21 | Settings.Builder settings = nodeBuilder().settings() 22 | .put("path.home", "."); 23 | 24 | dataNode = nodeBuilder(). 25 | settings(settings). 26 | clusterName(CLUSTER_NAME).node(); 27 | } 28 | 29 | @After 30 | public void tearDown() throws Exception { 31 | dataNode.close(); 32 | } 33 | 34 | @Test 35 | public void validateCreatedLocalElasticClientWithProperClusterName() throws Exception { 36 | //given 37 | ElasticDataPointer dataPointer = ElasticDataPointerBuilder.builder() 38 | .setAddress("http://localhost:9300/"+INDEX_NAME+"/type") 39 | .setClusterName(CLUSTER_NAME) 40 | .build(); 41 | //when 42 | Client client = ElasticSearchClientFactory.createClient(dataPointer); 43 | //then 44 | Assertions.assertThat(client.settings().get("cluster.name")).isEqualTo(CLUSTER_NAME); 45 | client.close(); 46 | } 47 | 48 | 49 | @Test 50 | public void validateCreatedLocalElasticClientWithoutSniff() throws Exception { 51 | //given 52 | ElasticDataPointer dataPointer = ElasticDataPointerBuilder.builder() 53 | .setAddress("http://localhost:9300/"+INDEX_NAME+"/type") 54 | .setClusterName(CLUSTER_NAME) 55 | .setSniff(false) 56 | .build(); 57 | //when 58 | Client client = ElasticSearchClientFactory.createClient(dataPointer); 59 | //then 60 | Assertions.assertThat(client.settings().get("cluster.name")).isEqualTo(CLUSTER_NAME); 61 | client.close(); 62 | } 63 | 64 | @Test 65 | public void validateCreatedLocalElasticClientWithSniff() throws Exception { 66 | //given 67 | ElasticDataPointer dataPointer = ElasticDataPointerBuilder.builder() 68 | .setAddress("http://localhost:9300/"+INDEX_NAME+"/type") 69 | .setClusterName(CLUSTER_NAME) 70 | .setSniff(true) 71 | .build(); 72 | //when 73 | Client client = ElasticSearchClientFactory.createClient(dataPointer); 74 | //then 75 | Assertions.assertThat(client.settings().get("cluster.name")).isEqualTo(CLUSTER_NAME); 76 | client.close(); 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/embeded/EmbeddedElasticsearchCluster.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.embeded; 2 | 3 | import static org.elasticsearch.node.NodeBuilder.nodeBuilder; 4 | 5 | import java.util.stream.Stream; 6 | 7 | import org.elasticsearch.action.index.IndexRequestBuilder; 8 | import org.elasticsearch.client.Client; 9 | import org.elasticsearch.client.IndicesAdminClient; 10 | import org.elasticsearch.common.settings.Settings; 11 | import org.elasticsearch.common.xcontent.XContentBuilder; 12 | import org.elasticsearch.node.Node; 13 | import org.elasticsearch.node.NodeBuilder; 14 | import pl.allegro.tech.search.elasticsearch.tools.reindex.ReindexInvokerTest; 15 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 16 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointerBuilder; 17 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 18 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQueryBuilder; 19 | 20 | public final class EmbeddedElasticsearchCluster { 21 | 22 | public static final String CLUSTER_NAME = "ReindexInvoker_cluster"; 23 | public static final int ELS_PORT = 9211; 24 | public static final int ELS_TCP_PORT = 9311; 25 | 26 | private final Node dataNode; 27 | 28 | private EmbeddedElasticsearchCluster(String clusterName, int apiPort) { 29 | NodeBuilder nodeBuilder = nodeBuilder() 30 | .clusterName(clusterName) 31 | .data(true); 32 | Settings.Builder settings = nodeBuilder.settings() 33 | .put("http.port", ELS_PORT) 34 | //.put("index.store.type", "memory") 35 | .put("path.home", ".") 36 | .put("transport.tcp.port", apiPort); 37 | 38 | dataNode = nodeBuilder.settings(settings).node(); 39 | dataNode.client().admin().cluster().prepareHealth().setWaitForGreenStatus().get(); 40 | } 41 | 42 | public static EmbeddedElasticsearchCluster createDataNode() { 43 | return new EmbeddedElasticsearchCluster(CLUSTER_NAME, ELS_TCP_PORT); 44 | } 45 | 46 | public void recreateIndex(final String sourceIndex) { 47 | IndicesAdminClient indices = dataNode.client().admin().indices(); 48 | if (indices.prepareExists(sourceIndex).get().isExists()) { 49 | indices.prepareDelete(sourceIndex).get(); 50 | } 51 | indices.prepareCreate(sourceIndex).get(); 52 | } 53 | 54 | public void close() { 55 | dataNode.close(); 56 | } 57 | 58 | public Client client() { 59 | return dataNode.client(); 60 | } 61 | 62 | public void deleteIndex(String targetIndex) { 63 | IndicesAdminClient indices = dataNode.client().admin().indices(); 64 | if (indices.prepareExists(targetIndex).get().isExists()) { 65 | indices.prepareDelete(targetIndex).get(); 66 | } 67 | } 68 | 69 | public void indexDocument(String index, String type, IndexDocument indexDocument) { 70 | IndexRequestBuilder requestBuilder = dataNode.client().prepareIndex(index, type, indexDocument.getId()).setSource(indexDocument.getDoc()); 71 | if (indexDocument.getTTL() != null) { 72 | requestBuilder.setTTL(indexDocument.getTTL()); 73 | } 74 | requestBuilder.get(); 75 | } 76 | 77 | public boolean indexExist(String index) { 78 | return dataNode.client().admin().indices().prepareExists(index).get().isExists(); 79 | } 80 | 81 | public long count(String index) { 82 | return dataNode.client().prepareCount(index).get().getCount(); 83 | } 84 | 85 | public ElasticDataPointer createDataPointer(String indexName) { 86 | return ElasticDataPointerBuilder.builder() 87 | .setAddress("http://127.0.0.1:" + ELS_TCP_PORT + "/" + indexName + "/" + ReindexInvokerTest.DATA_TYPE) 88 | .setClusterName(CLUSTER_NAME) 89 | .build(); 90 | } 91 | 92 | public void indexWithSampleData(String sourceIndex, String type, Stream indexDocumentStream) { 93 | recreateIndex(sourceIndex); 94 | indexDocumentStream.forEach( 95 | indexDocument -> indexDocument(sourceIndex, type, indexDocument) 96 | ); 97 | refreshIndex(); 98 | } 99 | 100 | public void refreshIndex() { 101 | dataNode.client().admin().indices().prepareRefresh().get(); 102 | } 103 | 104 | public void createIndex(String index, String type, XContentBuilder mappingDef) { 105 | dataNode.client().admin().indices() 106 | .prepareCreate(index) 107 | .addMapping(type, mappingDef) 108 | .get(); 109 | } 110 | 111 | public ElasticSearchQuery createInitialQuery(String query) { 112 | return ElasticSearchQueryBuilder.builder().setQuery(query).build(); 113 | } 114 | 115 | public ElasticSearchQuery createInitialQuery(String query, String orderByField) { 116 | return ElasticSearchQueryBuilder.builder().setQuery(query).setSortByField(orderByField).build(); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/embeded/IndexDocument.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.embeded; 2 | 3 | import java.util.Map; 4 | 5 | public class IndexDocument { 6 | private final String id; 7 | private final Map doc; 8 | private final Long ttl; 9 | 10 | public IndexDocument(String id, Map doc, Long ttl) { 11 | this.id = id; 12 | this.doc = doc; 13 | this.ttl = ttl != null && ttl > 0 ? ttl : null; 14 | } 15 | 16 | public IndexDocument(String id, Map doc) { 17 | this(id, doc, null); 18 | } 19 | 20 | public String getId() { 21 | return id; 22 | } 23 | 24 | public Map getDoc() { 25 | return doc; 26 | } 27 | 28 | public Long getTTL() { 29 | return ttl; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/BulkResultAssert.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.assertj.core.api.AbstractAssert; 4 | import org.assertj.core.api.Assertions; 5 | 6 | import java.util.Collection; 7 | 8 | public class BulkResultAssert extends AbstractAssert { 9 | 10 | protected BulkResultAssert(BulkResult actual) { 11 | super(actual, BulkResultAssert.class); 12 | } 13 | 14 | public static BulkResultAssert assertThat(BulkResult actual) { 15 | return new BulkResultAssert(actual); 16 | } 17 | 18 | public BulkResultAssert hasFailedCount(long failedCount) { 19 | isNotNull(); 20 | if (actual.getFailedCount() != failedCount) { 21 | failWithMessage("Expected failedCount to be <%d> but was <%d>", failedCount, actual.getFailedCount()); 22 | } 23 | return this; 24 | } 25 | 26 | public BulkResultAssert hasFailedIds(Collection failedIds) { 27 | isNotNull(); 28 | Assertions.assertThat(failedIds).hasSameElementsAs(actual.getFailedIds()); 29 | return this; 30 | } 31 | 32 | 33 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/IndexingComponentTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.junit.Test; 4 | 5 | import java.text.SimpleDateFormat; 6 | import java.util.Collections; 7 | import java.util.HashMap; 8 | import java.util.Map; 9 | 10 | import static org.junit.Assert.*; 11 | 12 | public class IndexingComponentTest { 13 | 14 | public static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm"); 15 | 16 | @Test 17 | public void testComputeIndexName() throws Exception { 18 | // no replacement and error cases 19 | assertEquals("myindex", IndexingComponent.computeIndexName("myindex", null, null)); 20 | assertEquals("", IndexingComponent.computeIndexName("", null, null)); 21 | try { 22 | IndexingComponent.computeIndexName("${value}", Collections.emptyMap(), null); 23 | fail("Should throw NullPointerException with informative text"); 24 | } catch (NullPointerException e) { 25 | assertTrue(!e.getMessage().isEmpty()); 26 | } 27 | 28 | // simple replacement 29 | assertEquals("123", IndexingComponent.computeIndexName("${value}", Collections.singletonMap("value", "123"), null)); 30 | assertEquals("doc-123", IndexingComponent.computeIndexName("doc-${value}", Collections.singletonMap("value", "123"), null)); 31 | assertEquals("doc-123doc", IndexingComponent.computeIndexName("doc-${value}doc", Collections.singletonMap("value", "123"), null)); 32 | 33 | // multiple replacements 34 | Map map = new HashMap<>(); 35 | map.put("value", (Object) "123"); 36 | map.put("key", (Object) "key45"); 37 | assertEquals("doc-123dockey45", IndexingComponent.computeIndexName("doc-${value}doc${key}", map, null)); 38 | 39 | // source-index 40 | assertEquals("doc-idx43doc", IndexingComponent.computeIndexName("doc-${_index}doc", null, "idx43")); 41 | 42 | // parse the date to avoid issues due to different timezones 43 | long jan1970 = DATE_FORMAT.parse("1970-01-01 03:23").getTime(); 44 | long may2015 = DATE_FORMAT.parse("2015-05-23 11:23").getTime(); 45 | 46 | // date/time formatting 47 | assertEquals("doc-1970-01-01doc", IndexingComponent.computeIndexName("doc-${startTime:yyyy-MM-dd}doc", Collections.singletonMap("startTime", Long.toString(jan1970)), null)); 48 | assertEquals("doc-2015-05-23doc", IndexingComponent.computeIndexName("doc-${startTime:yyyy-MM-dd}doc", Collections.singletonMap("startTime", Long.toString(may2015)), null)); 49 | assertEquals("doc-2015-05-23 11:23:00doc", IndexingComponent.computeIndexName("doc-${startTime:yyyy-MM-dd HH:mm:ss}doc", Collections.singletonMap("startTime", Long.toString(may2015)), null)); 50 | 51 | map.put("startTime", Long.toString(DATE_FORMAT.parse("2015-05-23 21:12").getTime())); 52 | map.put("endTime", Long.toString(DATE_FORMAT.parse("2015-05-24 23:12").getTime())); 53 | assertEquals("doc-2015-05-2323:12doc", IndexingComponent.computeIndexName("doc-${startTime:yyyy-MM-dd}${endTime:HH:mm}doc", map, null)); 54 | } 55 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/IndexingProcessTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import com.beust.jcommander.internal.Lists; 4 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 5 | import org.elasticsearch.search.SearchHit; 6 | import org.elasticsearch.search.SearchHits; 7 | import org.junit.Test; 8 | import org.mockito.Mockito; 9 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 10 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointerBuilder; 11 | 12 | import java.util.Collections; 13 | import java.util.Optional; 14 | 15 | import static org.mockito.Matchers.any; 16 | import static org.mockito.Matchers.eq; 17 | import static org.mockito.Mockito.mock; 18 | import static org.mockito.Mockito.times; 19 | import static org.mockito.Mockito.verify; 20 | import static org.mockito.Mockito.when; 21 | 22 | public class IndexingProcessTest { 23 | 24 | private static final String INDEX = "index"; 25 | private static final String TYPE = "type"; 26 | 27 | @Test 28 | public void verifyIndexingProcessDoNotIndexWhenNoDataToIndex() throws Exception { 29 | //given 30 | ProcessSynchronizer processSynchronizer = mock(ProcessSynchronizer.class); 31 | when(processSynchronizer.hasDataToBeIndexed()).thenReturn(false); 32 | IndexingComponent indexingComponent = mock(IndexingComponent.class); 33 | 34 | //when 35 | IndexingProcess updatesProcess = IndexingProcessBuilder.builder() 36 | .setProcessSynchronizer(processSynchronizer) 37 | .setIndexingComponent(indexingComponent) 38 | .build(); 39 | updatesProcess.run(); 40 | 41 | //then 42 | verify(processSynchronizer, times(0)).incrementUpdates(Mockito.anyInt()); 43 | } 44 | 45 | @Test 46 | public void verifyIndexedCount() throws Exception { 47 | //given 48 | ProcessSynchronizer processSynchronizer = buildProcessSynchronizerMock(); 49 | ElasticDataPointer dataPointer = ElasticDataPointerBuilder 50 | .builder() 51 | .setAddress("http://localhost:9300/" + INDEX + "/" + TYPE) 52 | .build(); 53 | IndexingComponent indexingComponent = mock(IndexingComponent.class); 54 | when(indexingComponent.indexData(eq(dataPointer), any(SearchHit[].class))) 55 | .thenReturn(Optional.of(new BulkResult(4, Collections.emptyList()))); 56 | 57 | //when 58 | IndexingProcess updatesProcess = IndexingProcessBuilder.builder() 59 | .setProcessSynchronizer(processSynchronizer) 60 | .setIndexingComponent(indexingComponent) 61 | .setDataPointer(dataPointer) 62 | .build(); 63 | updatesProcess.run(); 64 | 65 | //then 66 | verify(processSynchronizer).incrementUpdates(4); 67 | } 68 | 69 | @Test 70 | public void verifyIndexedFailedCount() throws Exception { 71 | 72 | //given 73 | ElasticDataPointer dataPointer = ElasticDataPointerBuilder.builder() 74 | .setAddress("http://localhost:9300/" + INDEX + "/" + TYPE) 75 | .build(); 76 | ProcessSynchronizer processSynchronizer = buildProcessSynchronizerMock(); 77 | IndexingComponent indexingComponent = mock(IndexingComponent.class); 78 | when(indexingComponent.indexData(eq(dataPointer), any(SearchHit[].class))) 79 | .thenReturn(Optional.of(new BulkResult(0, Lists.newArrayList("1", "2")))); 80 | 81 | //when 82 | IndexingProcess updatesProcess = IndexingProcessBuilder.builder() 83 | .setProcessSynchronizer(processSynchronizer) 84 | .setIndexingComponent(indexingComponent) 85 | .setDataPointer(dataPointer) 86 | .build(); 87 | updatesProcess.run(); 88 | 89 | //then 90 | verify(processSynchronizer, times(1)).incrementFailures(2); 91 | } 92 | 93 | private ProcessSynchronizer buildProcessSynchronizerMock() throws Exception { 94 | ProcessSynchronizer processSynchronizer = mock(ProcessSynchronizer.class); 95 | when(processSynchronizer.hasDataToBeIndexed()).thenReturn(true, false); 96 | SearchHits searchHits = mock(SearchHits.class); 97 | when(processSynchronizer.pollDataToIndexed()).thenReturn(searchHits); 98 | return processSynchronizer; 99 | } 100 | 101 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/ProcessExecutorTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | import java.util.concurrent.atomic.AtomicBoolean; 7 | 8 | public class ProcessExecutorTest { 9 | 10 | @Test 11 | public void testExecutingAndEndingProcess() throws Exception { 12 | //given 13 | int queryThreadsAmount = 3; 14 | ProcessExecutor processExecutor = new ProcessExecutor(queryThreadsAmount); 15 | AtomicBoolean executed = new AtomicBoolean(); 16 | //when 17 | processExecutor.startProcess(() -> executed.set(true)); 18 | processExecutor.finishProcessing(); 19 | //then 20 | Assert.assertTrue(executed.get()); 21 | } 22 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/ProcessSynchronizerTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.elasticsearch.action.search.SearchResponse; 4 | import org.elasticsearch.search.SearchHit; 5 | import org.elasticsearch.search.SearchHits; 6 | import org.junit.Test; 7 | 8 | import java.util.Timer; 9 | import java.util.TimerTask; 10 | import java.util.concurrent.atomic.AtomicBoolean; 11 | import java.util.stream.IntStream; 12 | 13 | import static org.junit.Assert.assertEquals; 14 | import static org.junit.Assert.assertFalse; 15 | import static org.junit.Assert.assertTrue; 16 | import static org.mockito.Mockito.mock; 17 | import static org.mockito.Mockito.when; 18 | 19 | public class ProcessSynchronizerTest { 20 | 21 | public static final int QUERY_SEGMENT_SIZE = 3; 22 | 23 | @Test 24 | public void shouldHasDataToBeIndexedBeTrueWhenWorkingQueueIsNotEmpty() throws Exception { 25 | //given 26 | ProcessSynchronizer processSynchronizer = new ProcessSynchronizer(QUERY_SEGMENT_SIZE); 27 | //when 28 | boolean result = processSynchronizer.hasDataToBeIndexed(); 29 | //then 30 | assertTrue(result); 31 | } 32 | 33 | @Test 34 | public void shouldHasDataToBeIndexedBeTrueWhenDataQueueIsNotEmpty() throws Exception { 35 | //given 36 | ProcessSynchronizer processSynchronizer = new ProcessSynchronizer(QUERY_SEGMENT_SIZE); 37 | setupDataQueueForProcessSynchronizer(processSynchronizer); 38 | //when 39 | boolean result = processSynchronizer.hasDataToBeIndexed(); 40 | //then 41 | assertTrue(result); 42 | } 43 | 44 | private void setupDataQueueForProcessSynchronizer(ProcessSynchronizer processSynchronizer) { 45 | IntStream.range(0, QUERY_SEGMENT_SIZE).forEach(i -> processSynchronizer.subtractWorkingQueryProcess()); 46 | IntStream.range(0, ProcessConfiguration.getInstance().getUpdateThreadsCount()).forEach(i -> processSynchronizer 47 | .subtractWorkingUpdatesProcess()); 48 | processSynchronizer.tryFillQueueWithSearchHits(createSearchResponse()); 49 | } 50 | 51 | @Test 52 | public void shouldHasDataToBeIndexedBeFalseWhenDataQueueIsEmptyAndDataQueueIsEmpty() throws Exception { 53 | //given 54 | ProcessSynchronizer processSynchronizer = new ProcessSynchronizer(QUERY_SEGMENT_SIZE); 55 | IntStream.range(0, QUERY_SEGMENT_SIZE).forEach(i -> processSynchronizer.subtractWorkingQueryProcess()); 56 | //when 57 | boolean result = processSynchronizer.hasDataToBeIndexed(); 58 | //then 59 | assertFalse(result); 60 | } 61 | 62 | @Test 63 | public void shouldPollWhatHasPulled() throws Exception { 64 | //given 65 | ProcessSynchronizer processSynchronizer = new ProcessSynchronizer(QUERY_SEGMENT_SIZE); 66 | SearchResponse searchResponse = createSearchResponse(); 67 | processSynchronizer.tryFillQueueWithSearchHits(searchResponse); 68 | //when 69 | SearchHits searchHits = processSynchronizer.pollDataToIndexed(); 70 | //then 71 | assertEquals(searchResponse.getHits(), searchHits); 72 | } 73 | 74 | @Test 75 | public void shouldReturnEmptyHitsWhenPoolingWithTimeout() throws Exception { 76 | //given 77 | ProcessSynchronizer processSynchronizer = new ProcessSynchronizer(QUERY_SEGMENT_SIZE); 78 | //when 79 | SearchHits searchHits = processSynchronizer.pollDataToIndexed(); 80 | //then 81 | assertEquals(0, searchHits.getTotalHits()); 82 | } 83 | 84 | 85 | @Test 86 | public void shouldNotWaitForProcessesToEndWhenWorkingQueueIsEmpty() throws Exception { 87 | //given 88 | ProcessSynchronizer processSynchronizer = new ProcessSynchronizer(QUERY_SEGMENT_SIZE); 89 | IntStream.range(0, QUERY_SEGMENT_SIZE).forEach(i -> processSynchronizer.subtractWorkingQueryProcess()); 90 | IntStream.range(0, ProcessConfiguration.getInstance().getUpdateThreadsCount()).forEach(i -> processSynchronizer.subtractWorkingUpdatesProcess()); 91 | //when 92 | processSynchronizer.waitForProcessesToEnd(); 93 | //then 94 | assertFalse(Thread.currentThread().isInterrupted()); 95 | } 96 | 97 | @Test 98 | public void shouldWaitTillQueriesLatchReleased() throws Exception { 99 | //given 100 | ProcessSynchronizer processSynchronizer = new ProcessSynchronizer(QUERY_SEGMENT_SIZE); 101 | final AtomicBoolean waitedTillSubtractWorkingQueryProcessDone = new AtomicBoolean(); 102 | createTimerReleasingAllProcessesAfterSecond(processSynchronizer, waitedTillSubtractWorkingQueryProcessDone); 103 | //when 104 | processSynchronizer.waitForProcessesToEnd(); 105 | //then 106 | assertEquals(true, waitedTillSubtractWorkingQueryProcessDone.get()); 107 | } 108 | 109 | private void createTimerReleasingAllProcessesAfterSecond(ProcessSynchronizer processSynchronizer, AtomicBoolean 110 | waitedTillSubtractWorkingQueryProcessDone) { 111 | Timer timer = new Timer(); 112 | timer.schedule(new TimerTask() { 113 | @Override 114 | public void run() { 115 | waitedTillSubtractWorkingQueryProcessDone.set(true); 116 | IntStream.range(0, QUERY_SEGMENT_SIZE).forEach(i -> processSynchronizer.subtractWorkingQueryProcess()); 117 | IntStream.range(0, ProcessConfiguration.getInstance().getUpdateThreadsCount()).forEach(i -> processSynchronizer.subtractWorkingUpdatesProcess()); 118 | } 119 | }, 1000); 120 | } 121 | 122 | private SearchResponse createSearchResponse() { 123 | SearchResponse searchResponse = mock(SearchResponse.class); 124 | SearchHits searchHits = mock(SearchHits.class); 125 | when(searchHits.getHits()).thenReturn(new SearchHit[0]); 126 | when(searchResponse.getHits()).thenReturn(searchHits); 127 | return searchResponse; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/QueryComponentTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import com.google.common.collect.ImmutableMap; 4 | import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; 5 | import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse; 6 | import org.elasticsearch.action.search.SearchRequestBuilder; 7 | import org.elasticsearch.action.search.SearchResponse; 8 | import org.elasticsearch.action.search.SearchType; 9 | import org.elasticsearch.client.Client; 10 | import org.elasticsearch.common.unit.TimeValue; 11 | import org.junit.AfterClass; 12 | import org.junit.Before; 13 | import org.junit.BeforeClass; 14 | import org.junit.Test; 15 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticDataPointer; 16 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchClientFactory; 17 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 18 | import pl.allegro.tech.search.elasticsearch.tools.reindex.embeded.EmbeddedElasticsearchCluster; 19 | import pl.allegro.tech.search.elasticsearch.tools.reindex.embeded.IndexDocument; 20 | 21 | import java.util.stream.IntStream; 22 | import java.util.stream.Stream; 23 | 24 | import static org.junit.Assert.*; 25 | 26 | public class QueryComponentTest { 27 | private static final String SOURCE_INDEX = "sourceindex"; 28 | private static final String TARGET_INDEX = "targetindex"; 29 | public static final String DATA_TYPE = "type"; 30 | 31 | private static EmbeddedElasticsearchCluster embeddedElasticsearchCluster; 32 | 33 | @BeforeClass 34 | public static void setUp() throws Exception { 35 | embeddedElasticsearchCluster = EmbeddedElasticsearchCluster.createDataNode(); 36 | } 37 | 38 | 39 | @AfterClass 40 | public static void tearDown() throws Exception { 41 | embeddedElasticsearchCluster.close(); 42 | } 43 | 44 | @Before 45 | public void clearTargetIndex() { 46 | embeddedElasticsearchCluster.deleteIndex(SOURCE_INDEX); 47 | embeddedElasticsearchCluster.deleteIndex(TARGET_INDEX); 48 | } 49 | 50 | @Test 51 | public void testQueryNoData() { 52 | // given 53 | embeddedElasticsearchCluster.recreateIndex(SOURCE_INDEX); 54 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 55 | Client sourceClient = ElasticSearchClientFactory.createClient(sourceDataPointer); 56 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 57 | 58 | // when 59 | QueryComponent component = QueryComponentBuilder.builder() 60 | .setClient(sourceClient) 61 | .setDataPointer(sourceDataPointer) 62 | .setQuery(elasticSearchQuery) 63 | .createQueryComponent(); 64 | SearchResponse searchResponse = component.prepareSearchScrollRequest(); 65 | 66 | // then 67 | assertEquals("No results overall", 68 | 0L, searchResponse.getHits().getTotalHits()); 69 | assertEquals("Initially zero documents are loaded", 70 | 0L, searchResponse.getHits().getHits().length); 71 | assertEquals("Initially zero documents are loaded", 72 | 0L, component.getResponseSize(searchResponse)); 73 | assertFalse("Some documents are found", 74 | component.searchResultsNotEmpty(searchResponse)); 75 | 76 | // when 77 | searchResponse = component.getNextScrolledSearchResults(searchResponse.getScrollId()); 78 | 79 | // then 80 | assertEquals("No results overall", 81 | 0L, searchResponse.getHits().getTotalHits()); 82 | assertEquals("Initially zero documents are loaded", 83 | 0L, searchResponse.getHits().getHits().length); 84 | assertEquals("Initially zero documents are loaded", 85 | 0L, component.getResponseSize(searchResponse)); 86 | assertFalse("Some documents are found", 87 | component.searchResultsNotEmpty(searchResponse)); 88 | } 89 | 90 | @Test 91 | public void testQueryWithData() { 92 | // given 93 | indexWithSampleData(7000); 94 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 95 | Client sourceClient = ElasticSearchClientFactory.createClient(sourceDataPointer); 96 | ElasticSearchQuery elasticSearchQuery = embeddedElasticsearchCluster.createInitialQuery(""); 97 | 98 | GetSettingsResponse indexSettings = sourceClient.admin().indices().getSettings(new GetSettingsRequest().indices(SOURCE_INDEX)).actionGet(); 99 | assertEquals("We should have an index with 5 shards now", 100 | "5", indexSettings.getIndexToSettings().get(SOURCE_INDEX).get("index.number_of_shards")); 101 | assertEquals("We should have an index with one replica now", 102 | "1", indexSettings.getIndexToSettings().get(SOURCE_INDEX).get("index.number_of_replicas")); 103 | 104 | // when 105 | QueryComponent component = QueryComponentBuilder.builder() 106 | .setClient(sourceClient) 107 | .setDataPointer(sourceDataPointer) 108 | .setQuery(elasticSearchQuery) 109 | .createQueryComponent(); 110 | SearchResponse searchResponse = component.prepareSearchScrollRequest(); 111 | 112 | // then 113 | assertEquals("Overall there should be 7000 hits", 114 | 7000L, searchResponse.getHits().getTotalHits()); 115 | assertEquals("Initially zero documents are loaded", 116 | 0L, searchResponse.getHits().getHits().length); 117 | assertEquals("Initially zero documents are loaded", 118 | 0L, component.getResponseSize(searchResponse)); 119 | assertTrue("Some documents are found", 120 | component.searchResultsNotEmpty(searchResponse)); 121 | 122 | // when 123 | searchResponse = component.getNextScrolledSearchResults(searchResponse.getScrollId()); 124 | 125 | // then 126 | assertEquals("Overall there should be 7000 hits", 127 | 7000L, searchResponse.getHits().getTotalHits()); 128 | assertEquals("QueryComponent tries to compute the hits to be 5000 on evenly distributed documents, never more!", 129 | 5000L, searchResponse.getHits().getHits().length); 130 | assertEquals("QueryComponent tries to compute the hits to be 5000 on evenly distributed documents, never more!", 131 | 5000L, component.getResponseSize(searchResponse)); 132 | assertTrue("Some documents are found", 133 | component.searchResultsNotEmpty(searchResponse)); 134 | } 135 | 136 | // just a simple test to verify that replica-shards are not included in the calculation of results 137 | // for the "size per shard" setting in scan/scroll-queries 138 | @Test 139 | public void testElasticsearchReplicaHandlingInScrolls() { 140 | // given 141 | indexWithSampleData(200); 142 | ElasticDataPointer sourceDataPointer = embeddedElasticsearchCluster.createDataPointer(SOURCE_INDEX); 143 | Client sourceClient = ElasticSearchClientFactory.createClient(sourceDataPointer); 144 | 145 | GetSettingsResponse indexSettings = sourceClient.admin().indices().getSettings(new GetSettingsRequest().indices(SOURCE_INDEX)).actionGet(); 146 | assertEquals("We should have an index with 5 shards now", 147 | "5", indexSettings.getIndexToSettings().get(SOURCE_INDEX).get("index.number_of_shards")); 148 | assertEquals("We should have an index with one replica now", 149 | "1", indexSettings.getIndexToSettings().get(SOURCE_INDEX).get("index.number_of_replicas")); 150 | 151 | // when 152 | SearchRequestBuilder searchRequestBuilder = sourceClient.prepareSearch(sourceDataPointer.getIndexName()) 153 | .setTypes(DATA_TYPE) 154 | .setSearchType(SearchType.SCAN) 155 | .addFields("_ttl", "_source") 156 | .setScroll(new TimeValue(QueryComponent.SCROLL_TIME_LIMIT)) 157 | .setSize(10); 158 | assertNotNull(searchRequestBuilder); 159 | 160 | // then 161 | SearchResponse searchResponse = searchRequestBuilder.execute().actionGet(); 162 | assertEquals("Overall there should be 200 hits", 163 | 200L, searchResponse.getHits().getTotalHits()); 164 | assertEquals("Initially zero documents are loaded", 165 | 0L, searchResponse.getHits().getHits().length); 166 | 167 | // when 168 | searchResponse = sourceClient.prepareSearchScroll(searchResponse.getScrollId()) 169 | .setScroll(new TimeValue(QueryComponent.SCROLL_TIMEOUT)) 170 | .get(); 171 | 172 | // then 173 | assertEquals(200L, searchResponse.getHits().getTotalHits()); 174 | assertEquals(50L, searchResponse.getHits().getHits().length); 175 | } 176 | 177 | 178 | private void indexWithSampleData(final int numberOfDocuments) { 179 | Stream streamToBeIndexed = IntStream 180 | .range(1, numberOfDocuments+1) 181 | .mapToObj( 182 | i -> new IndexDocument(Integer.toString(i), ImmutableMap.of("fieldName", i)) 183 | ); 184 | embeddedElasticsearchCluster.indexWithSampleData(SOURCE_INDEX, DATA_TYPE, streamToBeIndexed); 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/process/QueryProcessTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.process; 2 | 3 | import org.elasticsearch.action.search.SearchResponse; 4 | import org.elasticsearch.action.search.ShardSearchFailure; 5 | import org.elasticsearch.search.internal.InternalSearchResponse; 6 | import org.junit.Test; 7 | 8 | import static org.mockito.Matchers.any; 9 | import static org.mockito.Mockito.mock; 10 | import static org.mockito.Mockito.never; 11 | import static org.mockito.Mockito.times; 12 | import static org.mockito.Mockito.verify; 13 | import static org.mockito.Mockito.when; 14 | 15 | public class QueryProcessTest { 16 | 17 | @Test 18 | public void shouldNotFillQueueWhenQueryResultEmpty() throws Exception { 19 | //given 20 | SearchResponse searchResponse = new SearchResponse(); 21 | ProcessSynchronizer processSynchronizer = mock(ProcessSynchronizer.class); 22 | QueryComponent queryComponent = mock(QueryComponent.class); 23 | when(queryComponent.prepareSearchScrollRequest()).thenReturn(searchResponse); 24 | 25 | //when 26 | new QueryProcess(processSynchronizer, queryComponent).run(); 27 | 28 | //then 29 | verify(processSynchronizer, never()).tryFillQueueWithSearchHits(searchResponse); 30 | verify(processSynchronizer).subtractWorkingQueryProcess(); 31 | } 32 | 33 | @Test 34 | public void shouldFillQueueWhenQueryResultNotEmptyInOneChunk() throws Exception { 35 | //given 36 | SearchResponse searchResponse = new SearchResponse(); 37 | ProcessSynchronizer processSynchronizer = createProcessSynchronizerMock(); 38 | QueryComponent queryComponent = mock(QueryComponent.class); 39 | when(queryComponent.prepareSearchScrollRequest()).thenReturn(searchResponse); 40 | when(queryComponent.searchResultsNotEmpty(searchResponse)).thenReturn(true); 41 | 42 | //when 43 | new QueryProcess(processSynchronizer, queryComponent).run(); 44 | 45 | //then 46 | verify(processSynchronizer, times(1)).tryFillQueueWithSearchHits(searchResponse); 47 | verify(processSynchronizer).subtractWorkingQueryProcess(); 48 | } 49 | 50 | @Test 51 | public void shouldFillQueueWhenQueryResultNotEmptyInTwoChunks() throws Exception { 52 | //given 53 | SearchResponse searchResponse = createSearchResponseWithScrollId("scrollId"); 54 | ProcessSynchronizer processSynchronizer = createProcessSynchronizerMock(); 55 | QueryComponent queryComponent = mock(QueryComponent.class); 56 | when(queryComponent.prepareSearchScrollRequest()).thenReturn(searchResponse); 57 | when(queryComponent.searchResultsNotEmpty(searchResponse)).thenReturn(true); 58 | when(queryComponent.getResponseSize(searchResponse)).thenReturn(1, 0); 59 | when(queryComponent.getNextScrolledSearchResults("scrollId")).thenReturn(searchResponse); 60 | 61 | //when 62 | new QueryProcess(processSynchronizer, queryComponent).run(); 63 | 64 | //then 65 | verify(processSynchronizer, times(2)).tryFillQueueWithSearchHits(searchResponse); 66 | verify(processSynchronizer).subtractWorkingQueryProcess(); 67 | } 68 | 69 | private SearchResponse createSearchResponseWithScrollId(String scrollId) { 70 | return new SearchResponse(InternalSearchResponse.empty(), scrollId, 1, 1, 1, new ShardSearchFailure[0]); 71 | } 72 | 73 | private ProcessSynchronizer createProcessSynchronizerMock() { 74 | ProcessSynchronizer processSynchronizer = mock(ProcessSynchronizer.class); 75 | when(processSynchronizer.tryFillQueueWithSearchHits(any(SearchResponse.class))).thenReturn(true); 76 | return processSynchronizer; 77 | } 78 | 79 | 80 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/DoubleFieldSegmentationTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import com.google.common.collect.Lists; 4 | import org.junit.Test; 5 | import org.mockito.Mockito; 6 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 7 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQueryBuilder; 8 | 9 | import static org.junit.Assert.assertEquals; 10 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.query.RangeSegmentAssert.assertThat; 11 | 12 | public class DoubleFieldSegmentationTest { 13 | 14 | @Test 15 | public void checkSegmentationFieldName() throws Exception { 16 | //when 17 | DoubleFieldSegmentation segmentation = DoubleFieldSegmentation.create("name", Lists.newArrayList(0.0, 1.0), null); 18 | 19 | //then 20 | assertEquals("name", segmentation.getFieldName().get()); 21 | } 22 | 23 | @Test 24 | public void checkSegmentationQuery() throws Exception { 25 | //given 26 | ElasticSearchQuery query = ElasticSearchQueryBuilder.builder().build(); 27 | 28 | //when 29 | DoubleFieldSegmentation segmentation = DoubleFieldSegmentation.create("name", Lists.newArrayList(0.0, 1.0), query); 30 | 31 | //then 32 | assertEquals(query, segmentation.getQuery()); 33 | } 34 | 35 | @Test 36 | public void checkSegmentationOneThreshold() throws Exception { 37 | //when 38 | DoubleFieldSegmentation segmentation = DoubleFieldSegmentation.create("name", Lists.newArrayList(0.0, 1.0), null); 39 | 40 | //then 41 | assertEquals(1, segmentation.getSegmentsCount()); 42 | assertThat((RangeSegment) segmentation.getThreshold(0).get()) 43 | .hasLowerOpenBound(0.0) 44 | .hasUpperBound(1.0); 45 | } 46 | 47 | @Test 48 | public void checkSegmentationDoubleThreshold() throws Exception { 49 | //when 50 | DoubleFieldSegmentation segmentation = DoubleFieldSegmentation.create("name", Lists.newArrayList(0.0, 1.0, 2.0), null); 51 | 52 | //then 53 | assertEquals(2, segmentation.getSegmentsCount()); 54 | assertThat((RangeSegment) segmentation.getThreshold(0).get()) 55 | .hasLowerOpenBound(0.0) 56 | .hasUpperBound(1.0); 57 | assertThat((RangeSegment) segmentation.getThreshold(1).get()) 58 | .hasLowerOpenBound(1.0) 59 | .hasUpperBound(2.0); 60 | } 61 | 62 | 63 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/EmptySegmentationTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import com.google.common.collect.Lists; 4 | import org.junit.Test; 5 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 6 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQueryBuilder; 7 | 8 | import static org.junit.Assert.*; 9 | 10 | public class EmptySegmentationTest { 11 | 12 | @Test 13 | public void checkSegmentationQuery() throws Exception { 14 | //given 15 | ElasticSearchQuery query = ElasticSearchQueryBuilder.builder().build(); 16 | 17 | //when 18 | EmptySegmentation segmentation = EmptySegmentation.createEmptySegmentation(query); 19 | 20 | //then 21 | assertEquals(query, segmentation.getQuery()); 22 | } 23 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/PrefixSegmentAssert.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import junit.framework.TestCase; 4 | import org.assertj.core.api.AbstractAssert; 5 | 6 | public class PrefixSegmentAssert extends AbstractAssert { 7 | 8 | protected PrefixSegmentAssert(PrefixSegment actual) { 9 | super(actual, PrefixSegmentAssert.class); 10 | } 11 | 12 | public static PrefixSegmentAssert assertThat(PrefixSegment actual) { 13 | return new PrefixSegmentAssert(actual); 14 | } 15 | 16 | public PrefixSegmentAssert hasPrefix(String prefix) { 17 | isNotNull(); 18 | if (!actual.getPrefix().equals(prefix)) { 19 | failWithMessage("Expected character's prefix to be <%s> but was <%s>", prefix, actual.getPrefix()); 20 | } 21 | return this; 22 | } 23 | 24 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/QuerySegmentationAssert.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import junit.framework.TestCase; 4 | import org.assertj.core.api.AbstractAssert; 5 | 6 | public class QuerySegmentationAssert extends AbstractAssert { 7 | 8 | protected QuerySegmentationAssert(QuerySegmentation actual) { 9 | super(actual, QuerySegmentationAssert.class); 10 | } 11 | 12 | public static QuerySegmentationAssert assertThat(QuerySegmentation actual) { 13 | return new QuerySegmentationAssert(actual); 14 | } 15 | 16 | public QuerySegmentationAssert isInstanceOf(Class clazz) { 17 | isNotNull(); 18 | if (!actual.getClass().isAssignableFrom(clazz)) { 19 | failWithMessage("Expected instance class to be <%s> but was <%s>", clazz, actual.getClass()); 20 | } 21 | return this; 22 | } 23 | 24 | public QuerySegmentationAssert hasFileName(String fileName) { 25 | isNotNull(); 26 | if (!actual.getFieldName().get().equals(fileName)) { 27 | failWithMessage("Expected character's fileName to be <%s> but was <%s>", fileName, actual.getFieldName().get()); 28 | } 29 | return this; 30 | } 31 | 32 | public QuerySegmentationAssert hasSegmentsCount(int segmentsCount) { 33 | isNotNull(); 34 | if (actual.getSegmentsCount() != segmentsCount) { 35 | failWithMessage("Expected segmentsCount to be <%d> but was <%d>", segmentsCount, actual.getSegmentsCount()); 36 | } 37 | return this; 38 | } 39 | 40 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/QuerySegmentationFactoryTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import com.google.common.collect.Lists; 4 | import junitparams.JUnitParamsRunner; 5 | import junitparams.Parameters; 6 | import org.junit.Test; 7 | import org.junit.runner.RunWith; 8 | import pl.allegro.tech.search.elasticsearch.tools.reindex.command.ReindexCommand; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | import static org.mockito.Mockito.mock; 12 | import static org.mockito.Mockito.when; 13 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.query.PrefixSegmentAssert.assertThat; 14 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.query.QuerySegmentationAssert.assertThat; 15 | import static pl.allegro.tech.search.elasticsearch.tools.reindex.query.RangeSegmentAssert.*; 16 | 17 | @RunWith(JUnitParamsRunner.class) 18 | public class QuerySegmentationFactoryTest { 19 | 20 | @Test 21 | public void shouldCreateEmptySegmentationWhenNoSegmentationFieldGiven() throws Exception { 22 | //given 23 | ReindexCommand command = mock(ReindexCommand.class); 24 | when(command.getSegmentationField()).thenReturn(null); 25 | //when 26 | QuerySegmentation querySegmentation = QuerySegmentationFactory.create(command); 27 | //then 28 | assertEquals(EmptySegmentation.class, querySegmentation.getClass()); 29 | } 30 | 31 | @Test 32 | @Parameters({ "1.0, 2.0" }) 33 | public void shouldCreateDoubleFieldSegmentation(double lowerBound, double upperBound) throws Exception { 34 | //given 35 | ReindexCommand command = mock(ReindexCommand.class); 36 | String fieldName = "fieldName"; 37 | when(command.getSegmentationField()).thenReturn(fieldName); 38 | when(command.getSegmentationThresholds()).thenReturn(Lists.newArrayList(lowerBound, upperBound)); 39 | //when 40 | QuerySegmentation querySegmentation = QuerySegmentationFactory.create(command); 41 | //then 42 | assertThat(querySegmentation) 43 | .isInstanceOf(DoubleFieldSegmentation.class) 44 | .hasFileName(fieldName) 45 | .hasSegmentsCount(1); 46 | RangeSegmentAssert.assertThat((RangeSegment) (querySegmentation.getThreshold(0).get())) 47 | .hasLowerOpenBound(lowerBound) 48 | .hasUpperBound(upperBound); 49 | } 50 | 51 | @Test 52 | @Parameters({ "1, 2" }) 53 | public void shouldCreateStringPrefixFieldSegmentation(String firstPrefix, String secondPrefix) throws Exception { 54 | //given 55 | ReindexCommand command = mock(ReindexCommand.class); 56 | String fieldName = "fieldName"; 57 | when(command.getSegmentationField()).thenReturn(fieldName); 58 | when(command.getSegmentationPrefixes()).thenReturn(Lists.newArrayList(firstPrefix, secondPrefix)); 59 | //when 60 | QuerySegmentation querySegmentation = QuerySegmentationFactory.create(command); 61 | //then 62 | assertThat(querySegmentation) 63 | .isInstanceOf(StringPrefixSegmentation.class) 64 | .hasFileName(fieldName) 65 | .hasSegmentsCount(2); 66 | assertThat((PrefixSegment) querySegmentation.getThreshold(0).get()) 67 | .hasPrefix(firstPrefix); 68 | assertThat((PrefixSegment) querySegmentation.getThreshold(1).get()) 69 | .hasPrefix(secondPrefix); 70 | } 71 | 72 | @Test(expected = BadSegmentationDefinitionException.class) 73 | public void shouldThrowExceptionWhenBadSegmentationDefinition() throws Exception { 74 | //given 75 | ReindexCommand command = mock(ReindexCommand.class); 76 | String fieldName = "fieldName"; 77 | when(command.getSegmentationField()).thenReturn(fieldName); 78 | //when 79 | QuerySegmentation querySegmentation = QuerySegmentationFactory.create(command); 80 | //then 81 | throw new RuntimeException("shouldn't create segmentation for fieldName " + querySegmentation.getFieldName()); 82 | } 83 | 84 | 85 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/RangeSegmentAssert.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import org.assertj.core.api.AbstractAssert; 4 | 5 | public class RangeSegmentAssert extends AbstractAssert { 6 | 7 | protected RangeSegmentAssert(RangeSegment actual) { 8 | super(actual, RangeSegmentAssert.class); 9 | } 10 | 11 | public static RangeSegmentAssert assertThat(RangeSegment actual) { 12 | return new RangeSegmentAssert(actual); 13 | } 14 | 15 | public RangeSegmentAssert hasLowerOpenBound(Double lowerOpenBound) { 16 | isNotNull(); 17 | if (!actual.getLowerOpenBound().equals(lowerOpenBound)) { 18 | failWithMessage("Expected lowerOpenBound to be <%f> but was <%f>", lowerOpenBound, actual.getLowerOpenBound()); 19 | } 20 | return this; 21 | } 22 | 23 | public RangeSegmentAssert hasUpperBound(Double upperBound) { 24 | isNotNull(); 25 | if (!actual.getUpperBound().equals(upperBound)) { 26 | failWithMessage("Expected upperBound to be <%f> but was <%f>", upperBound, actual.getUpperBound()); 27 | } 28 | return this; 29 | } 30 | 31 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/StringPrefixSegmentationTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query; 2 | 3 | import org.junit.Test; 4 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQuery; 5 | import pl.allegro.tech.search.elasticsearch.tools.reindex.connection.ElasticSearchQueryBuilder; 6 | 7 | import java.util.Collections; 8 | 9 | import static org.junit.Assert.assertEquals; 10 | 11 | public class StringPrefixSegmentationTest { 12 | 13 | @Test 14 | public void checkSegmentationQuery() throws Exception { 15 | //given 16 | ElasticSearchQuery query = ElasticSearchQueryBuilder.builder().build(); 17 | //when 18 | QuerySegmentation segmentation = StringPrefixSegmentation.create("fieldName", Collections.emptyList(), query); 19 | 20 | //then 21 | assertEquals(query, segmentation.getQuery()); 22 | } 23 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/query/filter/BoundedFilterFactoryTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.query.filter; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import org.elasticsearch.index.query.PrefixQueryBuilder; 6 | import org.elasticsearch.index.query.QueryBuilder; 7 | import org.elasticsearch.index.query.RangeQueryBuilder; 8 | import org.junit.Test; 9 | 10 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.PrefixSegment; 11 | import pl.allegro.tech.search.elasticsearch.tools.reindex.query.RangeSegment; 12 | 13 | public class BoundedFilterFactoryTest { 14 | 15 | @Test 16 | public void shouldCreatePrefixFilter() throws Exception { 17 | //given 18 | BoundedFilterFactory factory = new BoundedFilterFactory(); 19 | PrefixSegment anyPrefixSegment = new PrefixSegment("prefix"); 20 | //when 21 | QueryBuilder filter = factory.createBoundedFilter("fieldName", anyPrefixSegment); 22 | //then 23 | assertThat(filter).isInstanceOf(PrefixQueryBuilder.class); 24 | } 25 | 26 | @Test 27 | public void shouldCreateDoubleBoundedFilter() throws Exception { 28 | //given 29 | BoundedFilterFactory factory = new BoundedFilterFactory(); 30 | RangeSegment anyRangeSegment = new RangeSegment(1.0, 2.0); 31 | //when 32 | QueryBuilder filter = factory.createBoundedFilter("fieldName", anyRangeSegment); 33 | //then 34 | assertThat(filter).isInstanceOf(RangeQueryBuilder.class); 35 | } 36 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/statistics/ProcessStatisticsTest.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.statistics; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.*; 6 | 7 | public class ProcessStatisticsTest { 8 | 9 | @Test 10 | public void shouldGetEmptyReindexingSummary() throws Exception { 11 | //given 12 | ProcessStatistics processStatistics = new ProcessStatistics(); 13 | //when 14 | ReindexingSummary reindexingSummary = processStatistics.createReindexingSummary(); 15 | //then 16 | ReindexingSummaryAssert.assertThat(reindexingSummary) 17 | .hasIndexedCount(0L) 18 | .hasQueriedCount(0L) 19 | .hasFailedIndexedCount(0L); 20 | } 21 | 22 | @Test 23 | public void shouldCountInIndexing() throws Exception { 24 | //given 25 | ProcessStatistics processStatistics = new ProcessStatistics(); 26 | //when 27 | processStatistics.incrementUpdates(1); 28 | ReindexingSummary reindexingSummary = processStatistics.createReindexingSummary(); 29 | //then 30 | assertEquals(1L, reindexingSummary.getIndexed()); 31 | } 32 | 33 | @Test 34 | public void shouldCountQueries() throws Exception { 35 | //given 36 | ProcessStatistics processStatistics = new ProcessStatistics(); 37 | //when 38 | processStatistics.incrementQueries(10); 39 | ReindexingSummary reindexingSummary = processStatistics.createReindexingSummary(); 40 | //then 41 | assertEquals(10L, reindexingSummary.getQueried()); 42 | } 43 | 44 | @Test 45 | public void shouldCountFailedIndexing() throws Exception { 46 | //given 47 | ProcessStatistics processStatistics = new ProcessStatistics(); 48 | //when 49 | processStatistics.incrementFailures(5); 50 | ReindexingSummary reindexingSummary = processStatistics.createReindexingSummary(); 51 | //then 52 | assertEquals(5L, reindexingSummary.getFailedIndexed()); 53 | } 54 | 55 | } -------------------------------------------------------------------------------- /src/test/java/pl/allegro/tech/search/elasticsearch/tools/reindex/statistics/ReindexingSummaryAssert.java: -------------------------------------------------------------------------------- 1 | package pl.allegro.tech.search.elasticsearch.tools.reindex.statistics; 2 | 3 | import org.assertj.core.api.AbstractAssert; 4 | 5 | public class ReindexingSummaryAssert extends AbstractAssert { 6 | 7 | protected ReindexingSummaryAssert(ReindexingSummary actual) { 8 | super(actual, ReindexingSummaryAssert.class); 9 | } 10 | 11 | public static ReindexingSummaryAssert assertThat(ReindexingSummary actual) { 12 | return new ReindexingSummaryAssert(actual); 13 | } 14 | 15 | public ReindexingSummaryAssert hasIndexedCount(long indexedCount) { 16 | isNotNull(); 17 | if (actual.getIndexed() != indexedCount) { 18 | failWithMessage("Expected indexedCount to be <%d> but was <%d>", indexedCount, actual.getIndexed()); 19 | } 20 | return this; 21 | } 22 | 23 | public ReindexingSummaryAssert hasQueriedCount(long queriedCount) { 24 | isNotNull(); 25 | if (actual.getQueried() != queriedCount) { 26 | failWithMessage("Expected queriedCount to be <%d> but was <%d>", queriedCount, actual.getQueried()); 27 | } 28 | return this; 29 | } 30 | 31 | public ReindexingSummaryAssert hasFailedIndexedCount(long failedIndexedCount) { 32 | isNotNull(); 33 | if (actual.getFailedIndexed() != failedIndexedCount) { 34 | failWithMessage("Expected failedIndexedCount to be <%d> but was <%d>", failedIndexedCount, actual.getFailedIndexed()); 35 | } 36 | return this; 37 | } 38 | 39 | } --------------------------------------------------------------------------------