├── .gitignore ├── src ├── main │ ├── resources │ │ └── es-plugin.properties │ ├── java │ │ └── com │ │ │ └── pannous │ │ │ └── es │ │ │ └── reindex │ │ │ ├── MySearchHits.java │ │ │ ├── MySearchHit.java │ │ │ ├── MySearchResponse.java │ │ │ ├── ReIndexPlugin.java │ │ │ ├── MySearchResponseES.java │ │ │ ├── ExampleUsage.java │ │ │ ├── ReIndexAction.java │ │ │ ├── ReIndexWithCreate.java │ │ │ └── MySearchResponseJson.java │ └── assemblies │ │ └── plugin.xml └── test │ └── java │ └── com │ └── pannous │ └── es │ └── reindex │ ├── ReIndexActionJsonTest.java │ ├── ReIndexActionESTest.java │ ├── AbstractNodesTests.java │ └── ReIndexActionTester.java ├── reinstall.sh ├── Readme.md └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | nb-configuration.xml 3 | *~ 4 | data/ 5 | deploy.sh -------------------------------------------------------------------------------- /src/main/resources/es-plugin.properties: -------------------------------------------------------------------------------- 1 | plugin=com.pannous.es.reindex.ReIndexPlugin -------------------------------------------------------------------------------- /reinstall.sh: -------------------------------------------------------------------------------- 1 | ES=/usr/share/elasticsearch 2 | sudo $ES/bin/plugin remove reindex 3 | mvn -DskipTests clean package 4 | FILE=`ls ./target/elasticsearch-*zip` 5 | sudo $ES/bin/plugin -url file:$FILE -install reindex 6 | sudo service elasticsearch restart -------------------------------------------------------------------------------- /src/test/java/com/pannous/es/reindex/ReIndexActionJsonTest.java: -------------------------------------------------------------------------------- 1 | package com.pannous.es.reindex; 2 | 3 | public class ReIndexActionJsonTest extends ReIndexActionTester { 4 | 5 | @Override 6 | protected MySearchResponse scrollSearch(String searchIndex, String type, String filter, int hits, 7 | boolean withVersion, int keepMinutes) { 8 | // System.err.println("you need to shutdown all local instances to run this test! " + getClass().getName()); 9 | return new MySearchResponseJson("localhost", 9200, searchIndex, type, filter, "", hits, 10 | withVersion, keepMinutes); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/test/java/com/pannous/es/reindex/ReIndexActionESTest.java: -------------------------------------------------------------------------------- 1 | package com.pannous.es.reindex; 2 | 3 | import org.elasticsearch.action.search.SearchRequestBuilder; 4 | 5 | public class ReIndexActionESTest extends ReIndexActionTester { 6 | 7 | @Override 8 | protected MySearchResponse scrollSearch(String index, String type, String query, int hits, 9 | boolean withVersion, int keepMinutes) { 10 | SearchRequestBuilder srb = action.createScrollSearch(index, type, query, hits, withVersion, keepMinutes, client); 11 | return new MySearchResponseES(client, srb.execute().actionGet(), keepMinutes); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/MySearchHits.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2012 Peter Karich 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.pannous.es.reindex; 17 | 18 | /** 19 | * @author Peter Karich 20 | */ 21 | public interface MySearchHits { 22 | 23 | Iterable getHits(); 24 | 25 | long totalHits(); 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/MySearchHit.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2012 Peter Karich 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.pannous.es.reindex; 17 | 18 | /** 19 | * @author Peter Karich 20 | */ 21 | public interface MySearchHit { 22 | 23 | String id(); 24 | 25 | String parent(); 26 | 27 | long version(); 28 | 29 | byte[] source(); 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/MySearchResponse.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2012 Peter Karich info@jetsli.de 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.pannous.es.reindex; 17 | 18 | /** 19 | * @author Peter Karich 20 | */ 21 | public interface MySearchResponse { 22 | 23 | MySearchHits hits(); 24 | 25 | String scrollId(); 26 | 27 | int doScoll(); 28 | 29 | long bytes(); 30 | } 31 | -------------------------------------------------------------------------------- /src/main/assemblies/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | / 11 | true 12 | true 13 | 14 | org.elasticsearch:elasticsearch 15 | 16 | 17 | 18 | / 19 | true 20 | true 21 | 22 | org.apache.httpcomponents:httpclient 23 | org.json:json 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/ReIndexPlugin.java: -------------------------------------------------------------------------------- 1 | package com.pannous.es.reindex; 2 | 3 | import org.elasticsearch.common.inject.Module; 4 | import org.elasticsearch.common.logging.ESLogger; 5 | import org.elasticsearch.common.logging.Loggers; 6 | import org.elasticsearch.plugins.AbstractPlugin; 7 | import org.elasticsearch.rest.RestModule; 8 | 9 | /** 10 | * @author Peter Karich 11 | */ 12 | public class ReIndexPlugin extends AbstractPlugin { 13 | 14 | protected final ESLogger logger = Loggers.getLogger(ReIndexPlugin.class); 15 | 16 | @Override public String name() { 17 | return "reindex"; 18 | } 19 | 20 | @Override public String description() { 21 | return "ReIndex Plugin"; 22 | } 23 | 24 | @Override public void processModule(Module module) { 25 | if (module instanceof RestModule) { 26 | ((RestModule) module).addRestAction(ReIndexAction.class); 27 | ((RestModule) module).addRestAction(ReIndexWithCreate.class); 28 | // logger.info("NOW " + action.getFeed("test")); 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Purpose 2 | 3 | This plugin can be used to 4 | 5 | * update all or selected documents of an index, e.g. after you change the settings of a type 6 | * to change the index settings like shard count: create a new index with that config and reindex all documents into that index 7 | * grab all or selected documents from another elasticsearch cluster and feed your local machine with that 8 | * and more like a filtered backup etc 9 | * See ExampleUsage.java on how to use this plugin as simple rewriter of your content 10 | 11 | # License 12 | 13 | Apache License 2.0 14 | 15 | 16 | # Installation 17 | 18 | > ./reinstall.sh 19 | 20 | you should see 'loaded [reindex], sites []' in the logs. Or use the reinstall.sh script for development purposes. Be sure to use elasticsearch 1.4.0. For newer versions - please provide pull requests ;) 21 | 22 | Note: This script will build and install the plugin assuming elasticsearch is found in `/usr/share/elasticsearch`. The script will call 'sudo' on the install part, so the script should be run as a user with sudo privileges. Since maven will be used to build the plugin, it requires maven to be installed, which can be installed with the command below on a debian/ubuntu system. 23 | 24 | > sudo apt-get install maven2 25 | 26 | # Deinstallation 27 | 28 | > sudo $ES_HOME/bin/plugin remove reindex 29 | 30 | > sudo service elasticsearch restart 31 | 32 | 33 | # Usage 34 | 35 | ## WARNINGs / TODOs: 36 | 37 | * Please try this on your local machine before using it in production - especially the case searchHost!=localhost could be problematic for your performance/IO 38 | * The call is not async and not stopable (except you stop the requested server) => The plugin should probably be better a river 39 | * If you have two servers on localhost and the queried server port is 9201 and you want to search 40 | the different server at 9200 => then you have to use e.g. searchHost=127.0.0.1&searchPort=9200 41 | 42 | ## Same cluster 43 | 44 | > curl -XPUT 'http://localhost:9200/indexnew/typenew/_reindex?searchIndex=indexold&searchType=typeold' -d ' 45 | > { "term" : { "count" : 2 } }' 46 | 47 | This refeeds all documents in index 'indexold' with type 'typeold' into the index 'indexnew' with type 'typenew'. 48 | But only documents matching the specified filter will be refeeded. The internal Java API will be used which should be efficient. 49 | In this example, the term filter is used to limit the documents that will be reindexed, you can leave out the filter to copy all documents to the new index. 50 | 51 | ## Same cluster with create index api 52 | > curl -XPUT 'http://localhost:9200/_reindex?index=indexnew&type=*&searchIndex=indexold' 53 | 54 | This command creates the indexnew if not exist (the newIndexShards can be set to specify the number of shards). 55 | If type=* reindex all types from the index. If you want to skip some type and skipType=type1,type2 56 | 57 | 58 | ## Different cluster 59 | 60 | Now JSONObjects and the HttpClient will be used. TODO that is probably not efficient in terms of RAM/CPU?!: 61 | 62 | > curl -XPUT 'http://localhost:9200/indexnew/typenew/_reindex?searchIndex=indexold&searchType=typeold&searchHost=yourElasticsearchHost.com&searchPort=9200' -d ' 63 | > { "term" : { "count" : 2 } }' 64 | 65 | Further parameters: 66 | * hitsPerPage - used as search limit and at the same time for bulk indexing (default 100) 67 | * keepTimeInMinutes - the maximum time in minutes a scroll search is valid (default 30) increase if you have more data 68 | * withVersion - if the version of a document should be respected (default false) 69 | * waitInSeconds - pause the specified time after every request pair (one search+one bulkIndex). 70 | This avoids heavy load on the search or on the indexing server/cluster. This way it is very easy 71 | e.g. to grab even a massive amount of data from your production servers into your local machine. 72 | 73 | Hints: 74 | * the index 'indexnew' and the type 'typenew' should exist. 75 | * the parameters 'searchIndex' and 'searchType' are optional and the new ones will be used if not provided 76 | * the filter is also optional 77 | -------------------------------------------------------------------------------- /src/test/java/com/pannous/es/reindex/AbstractNodesTests.java: -------------------------------------------------------------------------------- 1 | package com.pannous.es.reindex; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import org.elasticsearch.client.Client; 6 | import org.elasticsearch.common.logging.ESLogger; 7 | import org.elasticsearch.common.logging.Loggers; 8 | import org.elasticsearch.common.network.NetworkUtils; 9 | import org.elasticsearch.common.settings.ImmutableSettings; 10 | import org.elasticsearch.common.settings.Settings; 11 | import org.elasticsearch.node.Node; 12 | import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; 13 | import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; 14 | import static org.elasticsearch.node.NodeBuilder.nodeBuilder; 15 | 16 | /** 17 | * a copy from Elasticsearch to avoid the dependency to tests 18 | */ 19 | public abstract class AbstractNodesTests { 20 | 21 | protected final ESLogger logger = Loggers.getLogger(getClass()); 22 | private Map nodes = new HashMap(); 23 | private Map clients = new HashMap(); 24 | private Settings defaultSettings = ImmutableSettings 25 | .settingsBuilder() 26 | .put("cluster.name", "test-cluster-" + NetworkUtils.getLocalAddress().getHostName()) 27 | .build(); 28 | 29 | public void putDefaultSettings(Settings.Builder settings) { 30 | putDefaultSettings(settings.build()); 31 | } 32 | 33 | public void putDefaultSettings(Settings settings) { 34 | defaultSettings = ImmutableSettings.settingsBuilder().put(defaultSettings).put(settings).build(); 35 | } 36 | 37 | public Node startNode(String id) { 38 | return buildNode(id).start(); 39 | } 40 | 41 | public Node startNode(String id, Settings.Builder settings) { 42 | return startNode(id, settings.build()); 43 | } 44 | 45 | public Node startNode(String id, Settings settings) { 46 | return buildNode(id, settings).start(); 47 | } 48 | 49 | public Node buildNode(String id) { 50 | return buildNode(id, EMPTY_SETTINGS); 51 | } 52 | 53 | public Node buildNode(String id, Settings.Builder settings) { 54 | return buildNode(id, settings.build()); 55 | } 56 | 57 | public Node buildNode(String id, Settings settings) { 58 | String settingsSource = getClass().getName().replace('.', '/') + ".yml"; 59 | Settings finalSettings = settingsBuilder() 60 | .loadFromClasspath(settingsSource) 61 | .put(defaultSettings) 62 | .put(settings) 63 | .put("name", id) 64 | .build(); 65 | 66 | if (finalSettings.get("gateway.type") == null) { 67 | // default to non gateway 68 | finalSettings = settingsBuilder().put(finalSettings).put("gateway.type", "none").build(); 69 | } 70 | if (finalSettings.get("cluster.routing.schedule") != null) { 71 | // decrease the routing schedule so new nodes will be added quickly 72 | finalSettings = settingsBuilder().put(finalSettings).put("cluster.routing.schedule", "50ms").build(); 73 | } 74 | 75 | Node node = nodeBuilder() 76 | .settings(finalSettings) 77 | .build(); 78 | nodes.put(id, node); 79 | clients.put(id, node.client()); 80 | return node; 81 | } 82 | 83 | public void closeNode(String id) { 84 | Client client = clients.remove(id); 85 | if (client != null) { 86 | client.close(); 87 | } 88 | Node node = nodes.remove(id); 89 | if (node != null) { 90 | node.close(); 91 | } 92 | } 93 | 94 | public Node node(String id) { 95 | return nodes.get(id); 96 | } 97 | 98 | public Client client(String id) { 99 | return clients.get(id); 100 | } 101 | 102 | public void closeAllNodes() { 103 | for (Client client : clients.values()) { 104 | client.close(); 105 | } 106 | clients.clear(); 107 | for (Node node : nodes.values()) { 108 | node.close(); 109 | } 110 | nodes.clear(); 111 | } 112 | } -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/MySearchResponseES.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2012 Peter Karich info@jetsli.de 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.pannous.es.reindex; 17 | 18 | import java.util.Iterator; 19 | import org.elasticsearch.action.search.SearchResponse; 20 | import org.elasticsearch.client.Client; 21 | import org.elasticsearch.common.unit.TimeValue; 22 | import org.elasticsearch.search.SearchHit; 23 | import org.elasticsearch.search.SearchHits; 24 | 25 | /** 26 | * Searches with the given client - used for the same cluster. Not suited for 27 | * other clusters as they could have an incompatible version. 28 | * 29 | * @author Peter Karich 30 | */ 31 | public class MySearchResponseES implements MySearchResponse { 32 | 33 | private SearchResponse rsp; 34 | private final int keepTimeInMinutes; 35 | private final Client client; 36 | private long bytes = 0; 37 | 38 | public MySearchResponseES(Client client, SearchResponse rsp, int keepTimeInMinutes) { 39 | this.client = client; 40 | this.rsp = rsp; 41 | this.keepTimeInMinutes = keepTimeInMinutes; 42 | } 43 | 44 | @Override public MySearchHits hits() { 45 | final SearchHits hits = rsp.getHits(); 46 | // uh iterable is strange 47 | return new MySearchHits() { 48 | @Override public Iterable getHits() { 49 | return new Iterable() { 50 | @Override public Iterator iterator() { 51 | return new Iterator() { 52 | SearchHit[] arr = hits.hits(); 53 | int counter = 0; 54 | 55 | @Override public boolean hasNext() { 56 | return counter < arr.length; 57 | } 58 | 59 | @Override public MySearchHit next() { 60 | bytes += arr[counter].source().length; 61 | MySearchHitES ret = new MySearchHitES(arr[counter]); 62 | counter++; 63 | return ret; 64 | } 65 | 66 | @Override public void remove() { 67 | throw new UnsupportedOperationException("Not supported yet."); 68 | } 69 | }; 70 | } 71 | }; 72 | } 73 | 74 | @Override 75 | public long totalHits() { 76 | return hits.totalHits(); 77 | } 78 | }; 79 | } 80 | 81 | @Override public String scrollId() { 82 | return rsp.getScrollId(); 83 | } 84 | 85 | @Override public int doScoll() { 86 | rsp = client.prepareSearchScroll(scrollId()).setScroll(TimeValue.timeValueMinutes(keepTimeInMinutes)). 87 | execute().actionGet(); 88 | return rsp.getHits().hits().length; 89 | } 90 | 91 | @Override 92 | public long bytes() { 93 | return bytes; 94 | } 95 | 96 | static class MySearchHitES implements MySearchHit { 97 | 98 | private SearchHit sh; 99 | 100 | public MySearchHitES(SearchHit sh) { 101 | this.sh = sh; 102 | } 103 | 104 | @Override public String id() { 105 | return sh.id(); 106 | } 107 | 108 | @Override public String parent() { 109 | if (sh.field("_parent") != null) 110 | return sh.field("_parent").value(); 111 | return ""; 112 | } 113 | 114 | @Override public long version() { 115 | return sh.version(); 116 | } 117 | 118 | @Override public byte[] source() { 119 | return sh.source(); 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.pannous.es 6 | reindex 7 | 1.4.1 8 | jar 9 | 10 | Reindex Plugin 11 | http://maven.apache.org 12 | 13 | 14 | 1.6 15 | UTF-8 16 | 1.4.0 17 | 18 | 19 | 20 | 21 | org.elasticsearch 22 | elasticsearch 23 | ${elasticsearch.version} 24 | 25 | 26 | 27 | 28 | org.apache.httpcomponents 29 | httpclient 30 | 4.2.5 31 | 32 | 33 | 34 | 35 | org.json 36 | json 37 | 20090211 38 | 39 | 40 | 41 | org.testng 42 | testng 43 | 6.8.1 44 | test 45 | 46 | 47 | org.hamcrest 48 | hamcrest-core 49 | 50 | 51 | junit 52 | junit 53 | 54 | 55 | 56 | 57 | org.hamcrest 58 | hamcrest-all 59 | 1.3 60 | test 61 | 62 | 63 | 64 | 65 | 66 | 67 | maven-assembly-plugin 68 | 2.3 69 | 70 | elasticsearch-${project.artifactId}-${project.version} 71 | false 72 | ${project.build.directory}/ 73 | 74 | ${basedir}/src/main/assemblies/plugin.xml 75 | 76 | 77 | 78 | 79 | package 80 | 81 | single 82 | 83 | 84 | 85 | 86 | 87 | de.thetaphi 88 | forbiddenapis 89 | 1.3 90 | 91 | 92 | true 93 | 97 | false 98 | 99 | 103 | jdk-unsafe 104 | jdk-deprecated 105 | jdk-system-out 106 | 107 | 112 | 113 | 114 | 115 | 116 | check 117 | testCheck 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | org.apache.maven.plugins 129 | maven-compiler-plugin 130 | 131 | 1.6 132 | 1.6 133 | 134 | 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/ExampleUsage.java: -------------------------------------------------------------------------------- 1 | package com.pannous.es.reindex; 2 | 3 | import java.io.UnsupportedEncodingException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | import java.util.logging.Logger; 7 | import org.elasticsearch.action.search.SearchRequestBuilder; 8 | import org.elasticsearch.action.search.SearchResponse; 9 | import org.elasticsearch.client.Client; 10 | import org.elasticsearch.client.transport.TransportClient; 11 | import org.elasticsearch.common.settings.ImmutableSettings; 12 | import org.elasticsearch.common.settings.Settings; 13 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 14 | import org.elasticsearch.rest.RestController; 15 | import org.json.JSONException; 16 | import org.json.JSONObject; 17 | 18 | /** 19 | * Class to use the reindex plugin as rewrite/refeed plugin - directly from 20 | * java. 21 | * 22 | * @author Peter Karich 23 | */ 24 | public class ExampleUsage { 25 | 26 | private final static String charset = "UTF-8"; 27 | 28 | public static void main(String[] args) { 29 | String searchHost = "1.1.1.1"; 30 | int searchPort = 9300; 31 | String searchIndexName = "search_index"; 32 | String searchType = "some_type"; 33 | String newIndexName = "feed_index"; 34 | String newType = "some_type"; 35 | // String filter = "{ 'term' : {'locale' : 'de'} }".replaceAll("'", "\""); 36 | String filter = "{ 'query' : {'query_string' : { 'query' : 'text:blup*'} } }".replaceAll("'", "\""); 37 | String basicAuthCredentials = "base64_ifrequried="; 38 | boolean withVersion = false; 39 | final int hitsPerPage = 500; 40 | float waitInSeconds = 0.1f; 41 | // increase if you have lots of things to update 42 | int keepTimeInMinutes = 90; 43 | String cluster = "your_production_cluster_name"; 44 | 45 | boolean local = false; 46 | if (local) { 47 | cluster = "elasticsearch"; 48 | searchHost = "localhost"; 49 | basicAuthCredentials = "base64_ifrequried="; 50 | } 51 | 52 | Logger.getLogger("test").info("querying " + searchHost + ":" + searchPort 53 | + " at " + searchIndexName + " with " + basicAuthCredentials); 54 | 55 | Settings settings = ImmutableSettings.settingsBuilder() 56 | .put("cluster.name", cluster).build(); 57 | Client client = new TransportClient(settings). 58 | addTransportAddress(new InetSocketTransportAddress(searchHost, searchPort)); 59 | 60 | Settings emptySettings = ImmutableSettings.settingsBuilder().build(); 61 | RestController contrl = new RestController(emptySettings); 62 | ReIndexAction action = new ReIndexAction(emptySettings, client, contrl) { 63 | @Override protected MySearchHits callback(MySearchHits hits) { 64 | SimpleList res = new SimpleList(hitsPerPage, hits.totalHits()); 65 | for (MySearchHit h : hits.getHits()) { 66 | try { 67 | String str = new String(h.source(), charset); 68 | RewriteSearchHit newHit = new RewriteSearchHit(h.id(), h.parent(), h.version(), str); 69 | String someField = newHit.get("some_field"); 70 | if (someField.contains("some content")) { 71 | newHit.put("some_field", "IT WORKS!"); 72 | } 73 | 74 | res.add(newHit); 75 | } catch (UnsupportedEncodingException ex) { 76 | throw new RuntimeException(ex); 77 | } 78 | } 79 | return res; 80 | } 81 | }; 82 | // first query, further scroll-queries in reindex! 83 | SearchRequestBuilder srb = action.createScrollSearch(searchIndexName, searchType, filter, 84 | hitsPerPage, withVersion, keepTimeInMinutes, client); 85 | SearchResponse sr = srb.execute().actionGet(); 86 | MySearchResponse rsp = new MySearchResponseES(client, sr, keepTimeInMinutes); 87 | 88 | // now feed and call callback 89 | action.reindex(rsp, newIndexName, newType, withVersion, waitInSeconds, client); 90 | 91 | client.close(); 92 | } 93 | 94 | public static class SimpleList implements MySearchHits { 95 | 96 | long totalHits; 97 | List hits; 98 | 99 | public SimpleList(int size, long total) { 100 | hits = new ArrayList(size); 101 | totalHits = total; 102 | } 103 | 104 | public void add(MySearchHit hit) { 105 | hits.add(hit); 106 | } 107 | 108 | @Override public Iterable getHits() { 109 | return hits; 110 | } 111 | 112 | @Override 113 | public long totalHits() { 114 | return totalHits; 115 | } 116 | } 117 | 118 | public static class RewriteSearchHit implements MySearchHit { 119 | 120 | String id; 121 | String parent; 122 | long version; 123 | JSONObject json; 124 | 125 | public RewriteSearchHit(String id, String parent, long version, String jsonStr) { 126 | this.id = id; 127 | this.version = version; 128 | this.parent = parent; 129 | try { 130 | json = new JSONObject(jsonStr); 131 | } catch (JSONException ex) { 132 | throw new RuntimeException(ex); 133 | } 134 | } 135 | 136 | public String get(String key) { 137 | try { 138 | if (!json.has(key)) 139 | return ""; 140 | String val = json.getString(key); 141 | if (val == null) 142 | return ""; 143 | return val; 144 | } catch (JSONException ex) { 145 | throw new RuntimeException(ex); 146 | } 147 | } 148 | 149 | public JSONObject put(String key, Object obj) { 150 | try { 151 | return json.put(key, obj); 152 | } catch (JSONException ex) { 153 | throw new RuntimeException(ex); 154 | } 155 | } 156 | 157 | @Override public String id() { 158 | return id; 159 | } 160 | 161 | @Override public String parent() { 162 | return parent; 163 | } 164 | @Override public long version() { 165 | return version; 166 | } 167 | 168 | @Override public byte[] source() { 169 | try { 170 | return json.toString().getBytes(charset); 171 | } catch (UnsupportedEncodingException ex) { 172 | throw new RuntimeException(ex); 173 | } 174 | } 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/ReIndexAction.java: -------------------------------------------------------------------------------- 1 | package com.pannous.es.reindex; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.Collections; 6 | import java.util.List; 7 | import org.elasticsearch.action.admin.indices.flush.FlushRequest; 8 | import org.elasticsearch.action.bulk.BulkItemResponse; 9 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 10 | import org.elasticsearch.action.bulk.BulkResponse; 11 | import org.elasticsearch.action.index.IndexRequest; 12 | import org.elasticsearch.action.search.SearchRequestBuilder; 13 | import org.elasticsearch.action.search.SearchResponse; 14 | import org.elasticsearch.action.search.SearchType; 15 | import org.elasticsearch.client.Client; 16 | import org.elasticsearch.client.Requests; 17 | import org.elasticsearch.common.StopWatch; 18 | import org.elasticsearch.common.inject.Inject; 19 | import org.elasticsearch.common.settings.Settings; 20 | import org.elasticsearch.common.unit.TimeValue; 21 | import org.elasticsearch.rest.*; 22 | import static org.elasticsearch.rest.RestRequest.Method.*; 23 | import static org.elasticsearch.rest.RestStatus.*; 24 | 25 | /** 26 | * Refeeds all the documents which matches the type and the (optional) query. 27 | * 28 | * @author Peter Karich 29 | */ 30 | public class ReIndexAction extends BaseRestHandler { 31 | 32 | @Inject public ReIndexAction(Settings settings, Client client, RestController controller) { 33 | super(settings, controller, client); 34 | 35 | if (controller != null) { 36 | // Define REST endpoints to do a reindex 37 | controller.registerHandler(PUT, "/{index}/{type}/_reindex", this); 38 | controller.registerHandler(POST, "/{index}/{type}/_reindex", this); 39 | } 40 | } 41 | 42 | @Override public void handleRequest(RestRequest request, RestChannel channel, Client client) { 43 | handleRequest(request, channel, null, false, client); 44 | } 45 | 46 | public void handleRequest(RestRequest request, RestChannel channel, String newTypeOverride, boolean internalCall, Client client) { 47 | logger.info("ReIndexAction.handleRequest [{}]", request.params()); 48 | String newIndexName = request.param("index"); 49 | String searchIndexName = request.param("searchIndex"); 50 | if (searchIndexName == null || searchIndexName.isEmpty()) 51 | searchIndexName = newIndexName; 52 | 53 | String newType = newTypeOverride != null ? newTypeOverride : request.param("type"); 54 | String searchType = newTypeOverride != null ? newTypeOverride : request.param("searchType"); 55 | if (searchType == null || searchType.isEmpty()) 56 | searchType = newType; 57 | 58 | int searchPort = request.paramAsInt("searchPort", 9200); 59 | String searchHost = request.param("searchHost", "localhost"); 60 | boolean localAction = "localhost".equals(searchHost) && searchPort == 9200; 61 | boolean withVersion = request.paramAsBoolean("withVersion", false); 62 | int keepTimeInMinutes = request.paramAsInt("keepTimeInMinutes", 30); 63 | int hitsPerPage = request.paramAsInt("hitsPerPage", 1000); 64 | float waitInSeconds = request.paramAsFloat("waitInSeconds", 0); 65 | String basicAuthCredentials = request.param("credentials", ""); 66 | String filter = request.content().toUtf8(); 67 | MySearchResponse rsp; 68 | if (localAction) { 69 | SearchRequestBuilder srb = createScrollSearch(searchIndexName, searchType, filter, 70 | hitsPerPage, withVersion, keepTimeInMinutes, client); 71 | SearchResponse sr = srb.execute().actionGet(); 72 | rsp = new MySearchResponseES(client, sr, keepTimeInMinutes); 73 | } else { 74 | // TODO make it possible to restrict to a cluster 75 | rsp = new MySearchResponseJson(searchHost, searchPort, searchIndexName, searchType, filter, 76 | basicAuthCredentials, hitsPerPage, withVersion, keepTimeInMinutes); 77 | } 78 | 79 | // TODO make async and allow control of process from external (e.g. stopping etc) 80 | // or just move stuff into a river? 81 | reindex(rsp, newIndexName, newType, withVersion, waitInSeconds, client); 82 | 83 | // TODO reindex again all new items => therefor we need a timestamp field to filter 84 | // + how to combine with existing filter? 85 | 86 | logger.info("Finished reindexing of index " + searchIndexName + " into " + newIndexName + ", query " + filter); 87 | 88 | if (!internalCall) 89 | channel.sendResponse(new BytesRestResponse(OK)); 90 | } 91 | 92 | public SearchRequestBuilder createScrollSearch(String oldIndexName, String oldType, String filter, 93 | int hitsPerPage, boolean withVersion, int keepTimeInMinutes, Client client) { 94 | SearchRequestBuilder srb = client.prepareSearch(oldIndexName). 95 | setTypes(oldType). 96 | setVersion(withVersion). 97 | setSize(hitsPerPage). 98 | setSearchType(SearchType.SCAN). 99 | addField("_source"). 100 | addField("_parent"). 101 | setScroll(TimeValue.timeValueMinutes(keepTimeInMinutes)); 102 | 103 | if (filter != null && !filter.trim().isEmpty()) 104 | srb.setPostFilter(filter); 105 | return srb; 106 | } 107 | 108 | public int reindex(MySearchResponse rsp, String newIndex, String newType, boolean withVersion, 109 | float waitSeconds, Client client) { 110 | boolean flushEnabled = false; 111 | long total = rsp.hits().totalHits(); 112 | int collectedResults = 0; 113 | int failed = 0; 114 | while (true) { 115 | if (collectedResults > 0 && waitSeconds > 0) { 116 | try { 117 | Thread.sleep(Math.round(waitSeconds * 1000)); 118 | } catch (InterruptedException ex) { 119 | break; 120 | } 121 | } 122 | StopWatch queryWatch = new StopWatch().start(); 123 | int currentResults = rsp.doScoll(); 124 | if (currentResults == 0) 125 | break; 126 | 127 | MySearchHits res = callback(rsp.hits()); 128 | if (res == null) 129 | break; 130 | queryWatch.stop(); 131 | StopWatch updateWatch = new StopWatch().start(); 132 | failed += bulkUpdate(res, newIndex, newType, withVersion, client).size(); 133 | if (flushEnabled) 134 | client.admin().indices().flush(new FlushRequest(newIndex)).actionGet(); 135 | 136 | updateWatch.stop(); 137 | collectedResults += currentResults; 138 | logger.debug("Progress " + collectedResults + "/" + total 139 | + ". Time of update:" + updateWatch.totalTime().getSeconds() + " query:" 140 | + queryWatch.totalTime().getSeconds() + " failed:" + failed); 141 | } 142 | String str = "found " + total + ", collected:" + collectedResults 143 | + ", transfered:" + (float) rsp.bytes() / (1 << 20) + "MB"; 144 | if (failed > 0) 145 | logger.warn(failed + " FAILED documents! " + str); 146 | else 147 | logger.info(str); 148 | return collectedResults; 149 | } 150 | 151 | Collection bulkUpdate(MySearchHits objects, String indexName, 152 | String newType, boolean withVersion, Client client) { 153 | BulkRequestBuilder brb = client.prepareBulk(); 154 | for (MySearchHit hit : objects.getHits()) { 155 | if (hit.id() == null || hit.id().isEmpty()) { 156 | logger.warn("Skipped object without id when bulkUpdate:" + hit); 157 | continue; 158 | } 159 | 160 | try { 161 | IndexRequest indexReq = Requests.indexRequest(indexName).type(newType).id(hit.id()).source(hit.source()); 162 | if (withVersion) 163 | indexReq.version(hit.version()); 164 | if (hit.parent() != null && !hit.parent().isEmpty()) { 165 | indexReq.parent(hit.parent()); 166 | } 167 | brb.add(indexReq); 168 | } catch (Exception ex) { 169 | logger.warn("Cannot add object:" + hit + " to bulkIndexing action." + ex.getMessage()); 170 | } 171 | } 172 | if (brb.numberOfActions() > 0) { 173 | BulkResponse rsp = brb.execute().actionGet(); 174 | if (rsp.hasFailures()) { 175 | List list = new ArrayList(rsp.getItems().length); 176 | for (BulkItemResponse br : rsp.getItems()) { 177 | if (br.isFailed()) 178 | list.add(br.getItemId()); 179 | } 180 | return list; 181 | } 182 | } 183 | return Collections.emptyList(); 184 | } 185 | 186 | /** 187 | * Can be used to be overwritten and to rewrite some fields of the hits. 188 | */ 189 | protected MySearchHits callback(MySearchHits hits) { 190 | return hits; 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/test/java/com/pannous/es/reindex/ReIndexActionTester.java: -------------------------------------------------------------------------------- 1 | package com.pannous.es.reindex; 2 | 3 | import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; 4 | import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; 5 | import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; 6 | import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; 7 | import org.elasticsearch.action.count.CountRequest; 8 | import org.elasticsearch.action.index.IndexRequestBuilder; 9 | import org.elasticsearch.action.index.IndexResponse; 10 | import org.elasticsearch.action.search.SearchResponse; 11 | import org.elasticsearch.client.Client; 12 | import org.elasticsearch.cluster.metadata.IndexMetaData; 13 | import org.elasticsearch.common.hppc.cursors.ObjectCursor; 14 | import org.elasticsearch.common.settings.ImmutableSettings; 15 | import org.elasticsearch.common.settings.Settings; 16 | import org.elasticsearch.index.query.QueryBuilder; 17 | import org.elasticsearch.index.query.QueryBuilders; 18 | import org.elasticsearch.rest.RestController; 19 | import org.elasticsearch.search.sort.SortOrder; 20 | import org.testng.annotations.AfterClass; 21 | import org.testng.annotations.BeforeClass; 22 | import org.testng.annotations.Test; 23 | 24 | import static org.hamcrest.MatcherAssert.assertThat; 25 | import static org.hamcrest.Matchers.*; 26 | import org.json.JSONObject; 27 | import org.testng.annotations.BeforeMethod; 28 | 29 | import java.lang.reflect.Method; 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | 33 | /** 34 | * This test will be called from the ElasticSearch and from the 'JSON' 35 | * implementation. (to avoid code duplication) 36 | * 37 | * @author Peter Karich 38 | */ 39 | public abstract class ReIndexActionTester extends AbstractNodesTests { 40 | 41 | protected Client client; 42 | protected ReIndexAction action; 43 | // protected String indexNames = "oldtweets,tweets"; 44 | 45 | @BeforeClass public void createNodes() throws Exception { 46 | startNode("node1"); 47 | client = client("node1"); 48 | } 49 | 50 | @AfterClass public void closeNodes() { 51 | client.close(); 52 | closeAllNodes(); 53 | } 54 | 55 | @BeforeMethod public void setUp() { 56 | client.admin().indices().delete(new DeleteIndexRequest("_all")).actionGet(); 57 | Settings emptySettings = ImmutableSettings.settingsBuilder().build(); 58 | action = new ReIndexAction(emptySettings, client, new RestController(emptySettings)); 59 | } 60 | 61 | protected MySearchResponse scrollSearch(String searchIndex, String searchType, String query) { 62 | return scrollSearch(searchIndex, searchType, query, 10, false, 1); 63 | } 64 | 65 | protected abstract MySearchResponse scrollSearch(String searchIndex, String searchType, 66 | String filter, int hits, boolean withVersion, int keepMinutes); 67 | 68 | @Test public void reindexAll() throws Exception { 69 | add("oldtweets", "tweet", null, "{ \"name\" : \"hello world\", \"count\" : 1}"); 70 | add("oldtweets", "tweet", null, "{ \"name\" : \"peter ä test\", \"count\" : 2}"); 71 | refresh("oldtweets"); 72 | assertThat(count("oldtweets"), equalTo(2L)); 73 | 74 | int res = action.reindex(scrollSearch("oldtweets", "tweet", ""), "tweets", "tweet", false, 0, client); 75 | assertThat(res, equalTo(2)); 76 | refresh("tweets"); 77 | assertThat(count("tweets"), equalTo(2L)); 78 | 79 | // now check if content was correctly streamed and saved 80 | SearchResponse sr = client.prepareSearch("tweets"). 81 | addSort("count", SortOrder.ASC).execute().actionGet(); 82 | assertThat(sr.getHits().hits().length, equalTo(2)); 83 | assertThat(new JSONObject(sr.getHits().hits()[0].sourceAsString()).getString("name"), equalTo("hello world")); 84 | assertThat(new JSONObject(sr.getHits().hits()[1].sourceAsString()).getString("name"), equalTo("peter ä test")); 85 | } 86 | 87 | @Test public void reindexAllPartial() throws Exception { 88 | add("oldtweets", "tweet", null, "{ \"name\" : \"hello world\", \"count\" : 1}"); 89 | add("oldtweets", "tweet", null, "{ \"name\" : \"peter test\", \"count\" : 2}"); 90 | refresh("oldtweets"); 91 | assertThat(count("oldtweets"), equalTo(2L)); 92 | int res = action.reindex(scrollSearch("oldtweets", "tweet", "{ \"term\": { \"count\" : 2} }"), "tweets", "tweet", false, 0, client); 93 | assertThat(res, equalTo(1)); 94 | refresh("tweets"); 95 | assertThat(count("tweets"), equalTo(1L)); 96 | SearchResponse sr = client.prepareSearch("tweets").execute().actionGet(); 97 | assertThat(sr.getHits().hits().length, equalTo(1)); 98 | assertThat(new JSONObject(sr.getHits().hits()[0].sourceAsString()).getString("name"), equalTo("peter test")); 99 | } 100 | 101 | @Test public void reindexChilds() throws Exception { 102 | String parent = add("oldtweets", "tweet", null, "{ \"name\" : \"hello world\", \"count\" : 1}"); 103 | // update the mapping settings for oldtweets childs (i.e retweet type) 104 | client.admin().indices().preparePutMapping().setIndices("oldtweets").setType("retweet").setSource("{\"retweet\": { \"_parent\": { \"type\": \"tweet\" }, \"_routing\": { \"required\": true }, \"properties\": { \"name\": { \"type\": \"string\" }, \"count\": { \"type\": \"long\" } } }}").execute().actionGet(); 105 | String child = add("oldtweets", "retweet", parent, "{ \"name\" : \"RE: hello world\", \"count\" : 1, \"_parent\" : \"" + parent + "\"}"); 106 | refresh("oldtweets"); 107 | assertThat(count("oldtweets"), equalTo(2L)); 108 | 109 | int res = action.reindex(scrollSearch("oldtweets", "tweet", ""), "tweets", "tweet", false, 0, client); 110 | assertThat(res, equalTo(1)); 111 | refresh("tweets"); 112 | assertThat(count("tweets"), equalTo(1L)); 113 | 114 | // update the mapping settings for oldtweets childs (i.e retweet type) 115 | client.admin().indices().preparePutMapping().setIndices("tweets").setType("retweet").setSource("{\"retweet\": { \"_parent\": { \"type\": \"tweet\" }, \"_routing\": { \"required\": true }, \"properties\": { \"name\": { \"type\": \"string\" }, \"count\": { \"type\": \"long\" } } }}").execute().actionGet(); 116 | 117 | res = action.reindex(scrollSearch("oldtweets", "retweet", ""), "tweets", "retweet", false, 0, client); 118 | assertThat(res, equalTo(1)); 119 | refresh("tweets"); 120 | assertThat(count("tweets"), equalTo(2L)); 121 | 122 | // now check if content was correctly streamed and saved 123 | SearchResponse parent_sr = client.prepareSearch("tweets").setTypes("tweet") 124 | .addSort("count", SortOrder.ASC).execute().actionGet(); 125 | assertThat(parent_sr.getHits().hits().length, equalTo(1)); 126 | String reindex_parent = parent_sr.getHits().hits()[0].id(); 127 | 128 | SearchResponse child_sr = client.prepareSearch("tweets").setTypes("retweet").setRouting(reindex_parent).setQuery(QueryBuilders.boolQuery().must(QueryBuilders.termQuery("_parent", reindex_parent))) 129 | .addSort("count", SortOrder.ASC).execute().actionGet(); 130 | assertThat(child_sr.getHits().hits().length, equalTo(1)); 131 | } 132 | 133 | @Test public void copyAliases() throws Exception { 134 | add("oldtweets", "tweet", null, "{ \"name\" : \"hello world\", \"count\" : 1}"); 135 | add("tweets", "tweet", null, "{ \"name\" : \"peter ä test\", \"count\" : 2}"); 136 | IndicesAliasesRequest aReq = new IndicesAliasesRequest(); 137 | aReq.addAlias("myalias", "oldtweets"); 138 | client.admin().indices().aliases(aReq).actionGet(); 139 | refresh("oldtweets"); 140 | refresh("tweets"); 141 | List oldAliases = getAliasesNames("oldtweets"); 142 | assertThat(oldAliases.size(), equalTo(1)); 143 | assertThat(oldAliases.get(0), equalTo("myalias")); 144 | 145 | Settings emptySettings = ImmutableSettings.settingsBuilder().build(); 146 | RestController contrl = new RestController(emptySettings); 147 | ReIndexWithCreate action = new ReIndexWithCreate(emptySettings, client, contrl); 148 | 149 | Method copyAliases = action.getClass().getDeclaredMethod("copyAliases", String.class, String.class, Boolean.class, Client.class); 150 | copyAliases.setAccessible(true); 151 | copyAliases.invoke(action, "tweets", "oldtweets", false, client); 152 | List createdAliases = getAliasesNames("tweets"); 153 | assertThat(oldAliases, equalTo(createdAliases)); 154 | } 155 | 156 | private String add(String index, String type, String routing, String json) { 157 | IndexRequestBuilder req = client.prepareIndex(index, type).setSource(json); 158 | if (routing != null) 159 | req.setRouting(routing); 160 | 161 | IndexResponse rsp = req.execute().actionGet(); 162 | return rsp.getId(); 163 | } 164 | 165 | private void refresh(String index) { 166 | client.admin().indices().refresh(new RefreshRequest(index)).actionGet(); 167 | } 168 | 169 | private List getAliasesNames(String index) { 170 | IndexMetaData meta = client.admin().cluster().state(new ClusterStateRequest()). 171 | actionGet().getState().metaData().index(index); 172 | List aliases = new ArrayList(); 173 | if(meta != null && meta.aliases() != null) { 174 | for (ObjectCursor oldAliasCursor : meta.aliases().keys()) { 175 | aliases.add(oldAliasCursor.value); 176 | } 177 | } 178 | return aliases; 179 | } 180 | 181 | private long count(String index) { 182 | return client.count(new CountRequest(index)).actionGet().getCount(); 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/ReIndexWithCreate.java: -------------------------------------------------------------------------------- 1 | package com.pannous.es.reindex; 2 | 3 | import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; 4 | import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; 5 | import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; 6 | import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; 7 | import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; 8 | import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; 9 | import org.elasticsearch.action.count.CountRequest; 10 | import org.elasticsearch.client.Client; 11 | import org.elasticsearch.cluster.metadata.IndexMetaData; 12 | import org.elasticsearch.cluster.metadata.MappingMetaData; 13 | import org.elasticsearch.common.hppc.cursors.ObjectCursor; 14 | import org.elasticsearch.common.hppc.cursors.ObjectObjectCursor; 15 | import org.elasticsearch.common.inject.Inject; 16 | import org.elasticsearch.common.settings.ImmutableSettings; 17 | import org.elasticsearch.common.settings.Settings; 18 | import org.elasticsearch.rest.*; 19 | 20 | import java.io.IOException; 21 | 22 | import static org.elasticsearch.rest.RestRequest.Method.POST; 23 | import static org.elasticsearch.rest.RestRequest.Method.PUT; 24 | import static org.elasticsearch.rest.RestStatus.OK; 25 | import java.util.*; 26 | 27 | /** 28 | * @author Peter Karich 29 | */ 30 | public class ReIndexWithCreate extends BaseRestHandler { 31 | 32 | private ReIndexAction reindexAction; 33 | 34 | @Inject public ReIndexWithCreate(Settings settings, Client client, RestController controller) { 35 | super(settings, controller, client); 36 | 37 | // Define REST endpoints to do a reindex 38 | controller.registerHandler(PUT, "/_reindex", this); 39 | controller.registerHandler(POST, "/_reindex", this); 40 | 41 | // give null controller as argument to avoid registering twice 42 | // which would lead to an assert exception 43 | reindexAction = new ReIndexAction(settings, client, null); 44 | } 45 | 46 | @Override public void handleRequest(RestRequest request, RestChannel channel, Client client) { 47 | logger.info("ReIndexWithCreate.handleRequest [{}]", request.toString()); 48 | try { 49 | // required parameters 50 | String newIndexName = request.param("index"); 51 | if (newIndexName.isEmpty()) { 52 | channel.sendResponse(new BytesRestResponse(RestStatus.EXPECTATION_FAILED, "parameter index missing")); 53 | return; 54 | } 55 | String type = request.param("type", ""); 56 | if (type.isEmpty()) { 57 | channel.sendResponse(new BytesRestResponse(RestStatus.EXPECTATION_FAILED, "parameter type missing")); 58 | return; 59 | } 60 | String searchIndexName = request.param("searchIndex"); 61 | if (searchIndexName.isEmpty()) { 62 | channel.sendResponse(new BytesRestResponse(RestStatus.EXPECTATION_FAILED, "parameter searchIndex missing")); 63 | return; 64 | } 65 | 66 | String skipType = request.param("skipType", ""); 67 | ListskipTypeList; 68 | if (!skipType.isEmpty()) { 69 | skipTypeList=Arrays.asList(skipType.trim().split(",")); 70 | }else{ 71 | skipTypeList = new ArrayList(); 72 | } 73 | 74 | int newShards = request.paramAsInt("newIndexShards", -1); 75 | try { 76 | if(client.admin().indices().exists(new IndicesExistsRequest(newIndexName)).actionGet().isExists()) { 77 | logger.info("target index already exists, skip creation: " + newIndexName); 78 | } 79 | else { 80 | createIdenticalIndex(searchIndexName, type, newIndexName, newShards, client); 81 | } 82 | } catch (Exception ex) { 83 | String str = "Problem while creating index " + newIndexName + " from " + searchIndexName + " " + ex.getMessage(); 84 | logger.error(str, ex); 85 | channel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, str)); 86 | return; 87 | } 88 | 89 | // TODO: what if queries goes to the old index while we reindexed? 90 | // now reindex 91 | 92 | if(type.equals("*")) { 93 | 94 | IndexMetaData indexData = client.admin().cluster().state(new ClusterStateRequest()). 95 | actionGet().getState().metaData().indices().get(searchIndexName); 96 | Settings searchIndexSettings = indexData.settings(); 97 | 98 | for(ObjectCursor mapKeyCursor : indexData.mappings().keys()) { 99 | if (skipTypeList.contains(mapKeyCursor.value)) { 100 | logger.info("Skip type [{}]", mapKeyCursor.value); 101 | continue; 102 | } 103 | reindexAction.handleRequest(request, channel, mapKeyCursor.value, true, client); 104 | } 105 | } 106 | else { 107 | reindexAction.handleRequest(request, channel, type, true, client); 108 | } 109 | 110 | boolean delete = request.paramAsBoolean("delete", false); 111 | if (delete) { 112 | 113 | // make sure to refresh the index here 114 | // (e.g. the index may be paused or refreshing with a very long interval): 115 | logger.info("refreshing " + searchIndexName); 116 | client.admin().indices().refresh(new RefreshRequest(newIndexName)).actionGet(); 117 | 118 | long oldCount = client.count(new CountRequest(searchIndexName)).actionGet().getCount(); 119 | long newCount = client.count(new CountRequest(newIndexName)).actionGet().getCount(); 120 | if (oldCount == newCount) { 121 | logger.info("deleting " + searchIndexName); 122 | client.admin().indices().delete(new DeleteIndexRequest(searchIndexName)).actionGet(); 123 | } 124 | } 125 | 126 | boolean copyAliases = request.paramAsBoolean("copyAliases", false); 127 | if (copyAliases) 128 | copyAliases(request, client); 129 | 130 | channel.sendResponse(new BytesRestResponse(OK)); 131 | 132 | } catch (Exception ex) { // also catch the RuntimeException thrown by ReIndexAction 133 | try { 134 | channel.sendResponse(new BytesRestResponse(channel, ex)); 135 | } catch (Exception ex2) { 136 | logger.error("problem while rolling index", ex2); 137 | } 138 | } 139 | } 140 | 141 | /** 142 | * Creates a new index out of the settings from the old index. 143 | */ 144 | private void createIdenticalIndex(String oldIndex, String type, 145 | String newIndex, int newIndexShards, Client client) throws IOException { 146 | IndexMetaData indexData = client.admin().cluster().state(new ClusterStateRequest()). 147 | actionGet().getState().metaData().indices().get(oldIndex); 148 | Settings searchIndexSettings = indexData.settings(); 149 | ImmutableSettings.Builder settingBuilder = ImmutableSettings.settingsBuilder().put(searchIndexSettings); 150 | if (newIndexShards > 0) 151 | settingBuilder.put("index.number_of_shards", newIndexShards); 152 | 153 | CreateIndexRequest createReq; 154 | 155 | if(type.equals("*")) { 156 | createReq = new CreateIndexRequest(newIndex); 157 | for(ObjectObjectCursor mapCursor : indexData.mappings()) { 158 | createReq.mapping(mapCursor.key, mapCursor.value.sourceAsMap()); 159 | } 160 | createReq.settings(settingBuilder.build()); 161 | } 162 | else { 163 | MappingMetaData mappingMeta = indexData.mapping(type); 164 | createReq = new CreateIndexRequest(newIndex). 165 | mapping(type, mappingMeta.sourceAsMap()). 166 | settings(settingBuilder.build()); 167 | } 168 | 169 | client.admin().indices().create(createReq).actionGet(); 170 | } 171 | 172 | private void copyAliases(RestRequest request, Client client) { 173 | String index = request.param("index"); 174 | String searchIndexName = request.param("searchIndex"); 175 | boolean aliasIncludeIndex = request.paramAsBoolean("addOldIndexAsAlias", false); 176 | copyAliases(index, searchIndexName, aliasIncludeIndex, client); 177 | } 178 | 179 | private void copyAliases(String index, String searchIndexName, Boolean aliasIncludeIndex, Client client) { 180 | IndexMetaData meta = client.admin().cluster().state(new ClusterStateRequest()). 181 | actionGet().getState().metaData().index(searchIndexName); 182 | IndicesAliasesRequest aReq = new IndicesAliasesRequest(); 183 | boolean empty = true; 184 | if(meta != null && meta.aliases() != null) { 185 | for (ObjectCursor oldAliasCursor : meta.aliases().keys() ) { 186 | empty = false; 187 | aReq.addAlias(oldAliasCursor.value, index); 188 | } 189 | } 190 | if (aliasIncludeIndex) { 191 | if (client.admin().indices().exists(new IndicesExistsRequest(searchIndexName)).actionGet().isExists()) { 192 | logger.warn("Cannot add old index name (" + searchIndexName + ") as alias to index " 193 | + index + " - as old index still exists"); 194 | } 195 | else { 196 | aReq.addAlias(searchIndexName, index); 197 | empty = false; 198 | } 199 | } 200 | if(!empty) //!aReq.aliasActions().isEmpty()) 201 | client.admin().indices().aliases(aReq).actionGet(); 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /src/main/java/com/pannous/es/reindex/MySearchResponseJson.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2012 Peter Karich info@jetsli.de 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.pannous.es.reindex; 17 | 18 | import java.io.BufferedInputStream; 19 | import java.io.ByteArrayOutputStream; 20 | import java.io.IOException; 21 | import java.io.InputStream; 22 | import java.net.HttpURLConnection; 23 | import java.net.MalformedURLException; 24 | import java.net.Proxy; 25 | import java.net.URL; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import org.apache.http.HttpResponse; 29 | import org.apache.http.HttpVersion; 30 | import org.apache.http.client.HttpClient; 31 | import org.apache.http.entity.StringEntity; 32 | import org.apache.http.client.methods.HttpEntityEnclosingRequestBase; 33 | import org.apache.http.client.methods.HttpGet; 34 | import org.apache.http.client.methods.HttpPost; 35 | import org.apache.http.client.methods.HttpRequestBase; 36 | import org.apache.http.impl.client.DefaultHttpClient; 37 | import org.apache.http.impl.conn.PoolingClientConnectionManager; 38 | import org.apache.http.params.BasicHttpParams; 39 | import org.apache.http.params.CoreProtocolPNames; 40 | import org.apache.http.params.HttpConnectionParams; 41 | import org.apache.http.params.HttpProtocolParams; 42 | import org.apache.http.protocol.HTTP; 43 | import org.json.JSONArray; 44 | import org.json.JSONException; 45 | import org.json.JSONObject; 46 | 47 | /** 48 | * @author Peter Karich 49 | */ 50 | public class MySearchResponseJson implements MySearchResponse { 51 | 52 | private int timeout = 20000; 53 | private HttpClient client; 54 | private String scrollId; 55 | private List bufferedHits; 56 | private String host; 57 | private int port; 58 | private int keepMin; 59 | private final boolean withVersion; 60 | private final long totalHits; 61 | private long bytes; 62 | private String credentials = ""; 63 | 64 | public MySearchResponseJson(String searchHost, int searchPort, String searchIndexName, 65 | String searchType, String filter, String credentials, 66 | int hitsPerPage, boolean withVersion, int keepTimeInMinutes) { 67 | if (!searchHost.startsWith("http")) 68 | searchHost = "http://" + searchHost; 69 | this.host = searchHost; 70 | this.port = searchPort; 71 | this.withVersion = withVersion; 72 | keepMin = keepTimeInMinutes; 73 | bufferedHits = new ArrayList(hitsPerPage); 74 | PoolingClientConnectionManager connManager = new PoolingClientConnectionManager(); 75 | connManager.setMaxTotal(10); 76 | 77 | BasicHttpParams params = new BasicHttpParams(); 78 | HttpConnectionParams.setConnectionTimeout(params, timeout); 79 | HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1); 80 | HttpProtocolParams.setContentCharset(params, "UTF-8"); 81 | client = new DefaultHttpClient(connManager, params); 82 | 83 | // does not work!? client.getParams().setParameter("Authorization", "Basic " + credentials); 84 | if (credentials != null) 85 | this.credentials = credentials; 86 | 87 | // initial query to get scroll id for our specific search 88 | try { 89 | String url = searchHost + ":" + searchPort + "/" + searchIndexName + "/" + searchType 90 | + "/_search?search_type=scan&scroll=" + keepMin + "m&size=" + hitsPerPage; 91 | 92 | String query; 93 | if (filter == null || filter.isEmpty()) 94 | query = "{ \"query\" : {\"match_all\" : {}}, \"fields\" : [\"_source\", \"_parent\"]}"; 95 | else 96 | query = "{ \"filter\" : " + filter + ", \"fields\" : [\"_source\", \"_parent\"] }"; 97 | 98 | JSONObject res = doPost(url, query); 99 | scrollId = res.getString("_scroll_id"); 100 | totalHits = res.getJSONObject("hits").getLong("total"); 101 | } catch (JSONException ex) { 102 | throw new RuntimeException(ex); 103 | } 104 | } 105 | 106 | @Override public MySearchHits hits() { 107 | return new MySearchHits() { 108 | @Override public Iterable getHits() { 109 | return bufferedHits; 110 | } 111 | 112 | @Override public long totalHits() { 113 | return totalHits; 114 | } 115 | }; 116 | } 117 | 118 | @Override public String scrollId() { 119 | return scrollId; 120 | } 121 | 122 | @Override public int doScoll() { 123 | try { 124 | bufferedHits.clear(); 125 | JSONObject json = doGet(host + ":" + port 126 | + "/_search/scroll?scroll=" + keepMin + "m&scroll_id=" + scrollId); 127 | scrollId = json.getString("_scroll_id"); 128 | JSONObject hitsJson = json.getJSONObject("hits"); 129 | JSONArray arr = hitsJson.getJSONArray("hits"); 130 | for (int i = 0; i < arr.length(); i++) { 131 | JSONObject hitJson = arr.getJSONObject(i); 132 | long version = -1; 133 | String id = hitJson.getString("_id"); 134 | String parent = ""; 135 | if (hitJson.has("_parent")) 136 | parent = hitJson.getString("_parent"); 137 | if (hitJson.has("fields")) { 138 | JSONObject fields = hitJson.getJSONObject("fields"); 139 | if (fields.has("_parent")) { 140 | parent = fields.getString("_parent"); 141 | } 142 | } 143 | String sourceStr = hitJson.getString("_source"); 144 | byte[] source = sourceStr.getBytes("UTF-8"); 145 | if (withVersion && hitJson.has("_version")) 146 | version = hitJson.getLong("_version"); 147 | bytes += source.length; 148 | MySearchHitJson res = new MySearchHitJson(id, parent, source, version); 149 | bufferedHits.add(res); 150 | } 151 | return bufferedHits.size(); 152 | } catch (Exception ex) { 153 | throw new RuntimeException(ex); 154 | } 155 | } 156 | 157 | @Override 158 | public long bytes() { 159 | return bytes; 160 | } 161 | 162 | class MySearchHitJson implements MySearchHit { 163 | 164 | String id; 165 | String parent; 166 | byte[] source; 167 | long version; 168 | 169 | public MySearchHitJson(String id, String parent, byte[] source, long version) { 170 | this.id = id; 171 | this.parent = parent; 172 | this.source = source; 173 | this.version = version; 174 | } 175 | 176 | @Override public String id() { 177 | return id; 178 | } 179 | 180 | @Override public String parent() { 181 | return parent; 182 | } 183 | 184 | @Override public long version() { 185 | return version; 186 | } 187 | 188 | @Override public byte[] source() { 189 | return source; 190 | } 191 | } 192 | 193 | protected HttpURLConnection createUrlConnection(String urlAsStr, int timeout) 194 | throws MalformedURLException, IOException { 195 | URL url = new URL(urlAsStr); 196 | //using proxy may increase latency 197 | HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY); 198 | hConn.setRequestProperty("User-Agent", "ElasticSearch reindex"); 199 | hConn.setRequestProperty("Accept", "application/json"); 200 | hConn.setRequestProperty("content-charset", "UTF-8"); 201 | // hConn.setRequestProperty("Cache-Control", cacheControl); 202 | // suggest respond to be gzipped or deflated (which is just another compression) 203 | // http://stackoverflow.com/q/3932117 204 | hConn.setRequestProperty("Accept-Encoding", "gzip, deflate"); 205 | hConn.setConnectTimeout(timeout); 206 | hConn.setReadTimeout(timeout); 207 | return hConn; 208 | } 209 | 210 | public JSONObject doPost(String url, String content) throws JSONException { 211 | return new JSONObject(requestContent(new HttpPost(url), content)); 212 | } 213 | 214 | public JSONObject doGet(String url) throws JSONException { 215 | HttpGet http = new HttpGet(url); 216 | try { 217 | addHeaders(http); 218 | HttpResponse rsp = client.execute(http); 219 | int ret = rsp.getStatusLine().getStatusCode(); 220 | if (ret / 200 == 1) 221 | return new JSONObject(readString(rsp.getEntity().getContent(), "UTF-8")); 222 | 223 | throw new RuntimeException("Problem " + ret + " while " + http.getMethod() 224 | + " " + readString(rsp.getEntity().getContent(), "UTF-8")); 225 | } catch (Exception ex) { 226 | throw new RuntimeException("Problem while " + http.getMethod() 227 | + ", Error:" + ex.getMessage() + ", url:" + url, ex); 228 | } finally { 229 | http.releaseConnection(); 230 | } 231 | } 232 | 233 | private void addHeaders(HttpRequestBase http) { 234 | http.setHeader("Content-Type", "application/json; charset=utf-8"); 235 | if (!credentials.isEmpty()) 236 | http.setHeader("Authorization", "Basic " + credentials); 237 | } 238 | 239 | public String requestContent(HttpEntityEnclosingRequestBase http, String content) { 240 | try { 241 | addHeaders(http); 242 | StringEntity sendentity = new StringEntity(content, "UTF-8"); 243 | http.setEntity(sendentity); 244 | HttpResponse rsp = client.execute(http); 245 | int ret = rsp.getStatusLine().getStatusCode(); 246 | if (ret / 200 == 1) 247 | return readString(rsp.getEntity().getContent(), "UTF-8"); 248 | 249 | throw new RuntimeException("Problem " + ret + " while " + http.getMethod() 250 | + " " + readString(rsp.getEntity().getContent(), "UTF-8")); 251 | } catch (Exception ex) { 252 | throw new RuntimeException("Problem while " + http.getMethod() 253 | + ", Error:" + ex.getMessage() + ", url:" + http.getURI(), ex); 254 | } finally { 255 | http.releaseConnection(); 256 | } 257 | } 258 | 259 | public static String readString(InputStream inputStream, String encoding) throws IOException { 260 | InputStream in = new BufferedInputStream(inputStream); 261 | try { 262 | byte[] buffer = new byte[4096]; 263 | ByteArrayOutputStream output = new ByteArrayOutputStream(); 264 | int numRead; 265 | while ((numRead = in.read(buffer)) != -1) { 266 | output.write(buffer, 0, numRead); 267 | } 268 | return output.toString(encoding); 269 | } finally { 270 | in.close(); 271 | } 272 | } 273 | } 274 | --------------------------------------------------------------------------------