├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── pom.xml └── src ├── main └── java │ └── com │ └── github │ └── fhuss │ └── storm │ └── elasticsearch │ ├── ClientFactory.java │ ├── Document.java │ ├── bolt │ └── IndexBatchBolt.java │ ├── commons │ └── RichTickTupleBolt.java │ ├── handler │ └── BulkResponseHandler.java │ ├── mapper │ ├── MappingException.java │ ├── TridentTupleMapper.java │ ├── TupleMapper.java │ └── impl │ │ └── DefaultTupleMapper.java │ └── state │ ├── ESIndexMapState.java │ ├── ESIndexState.java │ ├── ESIndexUpdater.java │ ├── QuerySearchIndexQuery.java │ └── ValueSerializer.java └── test ├── java └── com │ └── github │ └── fhuss │ └── storm │ └── elasticsearch │ ├── BaseLocalClusterTest.java │ ├── bolt │ └── IndexBatchBoltTest.java │ ├── functions │ ├── CreateJson.java │ ├── DocumentBuilder.java │ └── ExtractSearchArgs.java │ ├── model │ └── Tweet.java │ └── state │ ├── ESIndexUpdaterTest.java │ ├── IndexMapStateTest.java │ └── ValueSerializerTest.java └── resources └── elasticsearch.yml /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.rar 20 | *.tar 21 | *.zip 22 | 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | *.sql 27 | *.sqlite 28 | 29 | # OS generated files # 30 | ###################### 31 | .DS_Store 32 | .DS_Store? 33 | ._* 34 | .Spotlight-V100 35 | .Trashes 36 | ehthumbs.db 37 | Thumbs.db 38 | 39 | # Eclipse 40 | .classpath 41 | .project 42 | .settings/ 43 | 44 | # Intellij 45 | .idea/ 46 | *.iml 47 | *.iws 48 | 49 | # Maven 50 | log/ 51 | target/ 52 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.3.0 2 | 3 | * [#6][]: Fixed NotSerializableException on Document class. 4 | * [#5][]:Index update with ESIndexState should wotk with other generic type than String 5 | * Update storm version to 0.9.3 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Apache Storm - Elasticsearch 2 | ---------------------------- 3 | 4 | [Apache Storm](https://storm.apache.org/) is a free and open source distributed realtime computation system. 5 | 6 | ### Bolt/Trident API implementation for [Elasticsearch](https://www.elastic.co/) 7 | 8 | This library provides core storm bolt and implements a Trident state on top of Elasticsearch. 9 | It supports non-transactional, transactional, and opaque state types. 10 | 11 | ### Maven dependency 12 | ```xml 13 | 14 | com.github.fhuss 15 | storm-elasticsearch 16 | 0.3.0 17 | 18 | ``` 19 | ### TupleMapper / TridentTupleMapper 20 | To index documents into elasticsearch you need to provide an implementation of following interfaces according to 21 | you use bolt or trident state. 22 | 23 | These two interfaces have only one method defined used to map tuple fields to a [Document](https://github.com/fhussonnois/storm-trident-elasticsearch/blob/master/src/main/java/com/github/fhuss/storm/elasticsearch/Document.java). 24 | 25 | ```java 26 | public interface TupleMapper extends Serializable { 27 | T map(Tuple input); 28 | } 29 | ``` 30 | 31 | ```java 32 | public interface TridentTupleMapper extends Serializable { 33 | T map(TridentTuple input); 34 | } 35 | ``` 36 | 37 | To be indexed, a document requires at least following attributes: 38 | 39 | - The **name** of the index 40 | - The **type** of document 41 | - The **source** document 42 | 43 | For general use cases, a default implementation is provided [DefaultTupleMapper](https://github.com/fhussonnois/storm-trident-elasticsearch/blob/master/src/main/java/com/github/fhuss/storm/elasticsearch/mapper/impl/DefaultTupleMapper.java). 44 | 45 | ### Core Bolt / IndexBatchBolt 46 | The IndexBatchBolt implementation relies on storm tick tuple feature and Elasticsearch [Bulk API](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html) to 47 | index many tuples. 48 | 49 | ### Trident State examples 50 | #### Persistent Aggregate 51 | 52 | ```java 53 | FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, 54 | new Values("the cow jumped over the moon"), 55 | new Values("the man went to the store and bought some candy"), 56 | new Values("four score and seven years ago"), 57 | new Values("how many apples can you eat"), 58 | new Values("to be or not to be the person")); 59 | spout.setCycle(true); 60 | 61 | TridentTopology topology = new TridentTopology(); 62 | 63 | Settings settings = ImmutableSettings.settingsBuilder().loadFromClasspath("elasticsearch.yml").build(); 64 | StateFactory stateFactory = ESIndexMapState.nonTransactional(new ClientFactory.LocalTransport(settings.getAsMap()), Tweet.class); 65 | 66 | topology.newStream("tweets", spout) 67 | .each(new Fields("sentence"), new DocumentBuilder(), new Fields("document")) 68 | .each(new Fields("document"), new ExtractDocumentInfo(), new Fields("id", "index", "type")) 69 | .groupBy(new Fields("index", "type", "id")) 70 | .persistentAggregate(stateFactory, new Fields("document"), new TweetBuilder(), new Fields("tweet")) 71 | .parallelismHint(1); 72 | ``` 73 | 74 | #### Search query using DRPC 75 | ```java 76 | TridentTopology topology = new TridentTopology(); 77 | 78 | Settings settings = ImmutableSettings.settingsBuilder().loadFromClasspath("elasticsearch.yml").build(); 79 | TridentState staticState = topology.newStaticState(new ESIndexState.Factory<>(new LocalTransport(settings.getAsMap()), Tweet.class)); 80 | topology.newDRPCStream("search", drpc) 81 | .each(new Fields("args"), new ExtractSearchArgs(), new Fields("query", "indices", "types")) 82 | .groupBy(new Fields("query", "indices", "types")) 83 | .stateQuery(staticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("tweet")) 84 | .each(new Fields("tweet"), new FilterNull()) 85 | .each(new Fields("tweet"), new CreateJson(), new Fields("json")) 86 | .project(new Fields("json")); 87 | ``` 88 | 89 | ## License 90 | 91 | Licensed to the Apache Software Foundation (ASF) under one 92 | or more contributor license agreements. See the NOTICE file 93 | distributed with this work for additional information 94 | regarding copyright ownership. The ASF licenses this file 95 | to you under the Apache License, Version 2.0 (the 96 | "License"); you may not use this file except in compliance 97 | with the License. You may obtain a copy of the License at 98 | 99 | http://www.apache.org/licenses/LICENSE-2.0 100 | 101 | Unless required by applicable law or agreed to in writing, 102 | software distributed under the License is distributed on an 103 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 104 | KIND, either express or implied. See the License for the 105 | specific language governing permissions and limitations 106 | under the License. 107 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | org.sonatype.oss 9 | oss-parent 10 | 9 11 | 12 | 13 | com.github.fhuss 14 | storm-elasticsearch 15 | 0.3.0 16 | Storm Trident Elasticsearch 17 | Trident API implementation for Elasticsearch 18 | 19 | 20 | UTF-8 21 | 1.7 22 | 0.9.3 23 | 1.7.6 24 | 1.0.1 25 | 2.3.2 26 | 4.11 27 | 1.2.1 28 | 16.0.1 29 | 3.3 30 | 31 | 32 | 33 | 34 | The Apache Software License, Version 2.0 35 | http://www.apache.org/licenses/LICENSE-2.0.txt 36 | 37 | 38 | 39 | 40 | 41 | fhuss 42 | Florian Hussonnois 43 | florian.hussonnois@gmail.com 44 | https://github.com/fhussonnois 45 | 46 | developer 47 | 48 | 49 | 50 | 51 | 52 | scm:git:git@github.com:fhussonnois/storm-trident-elasticsearch.git 53 | scm:git:git@github.com:fhussonnois/storm-trident-elasticsearch.git 54 | git@github.com:fhussonnois/storm-trident-elasticsearch.git 55 | 56 | 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-compiler-plugin 62 | 3.1 63 | 64 | ${java.version} 65 | ${java.version} 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | org.apache.commons 74 | commons-lang3 75 | ${commons-lang3.version} 76 | 77 | 78 | 79 | org.apache.storm 80 | storm-core 81 | ${storm.version} 82 | provided 83 | 84 | 85 | 86 | org.slf4j 87 | slf4j-api 88 | ${org.slf4j.version} 89 | 90 | 91 | 92 | com.fasterxml.jackson.core 93 | jackson-databind 94 | ${jackson.databind.version} 95 | 96 | 97 | 98 | org.elasticsearch 99 | elasticsearch 100 | ${org.elasticsearch.version} 101 | provided 102 | 103 | 104 | 105 | com.google.guava 106 | guava 107 | ${guava.version} 108 | 109 | 110 | 111 | junit 112 | junit 113 | ${junit.version} 114 | test 115 | 116 | 117 | 118 | com.github.tlrx 119 | elasticsearch-test 120 | ${elasticsearch-test.version} 121 | test 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/ClientFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch; 20 | 21 | import com.google.common.base.Preconditions; 22 | import org.apache.commons.lang3.StringUtils; 23 | import org.elasticsearch.client.Client; 24 | import org.elasticsearch.client.transport.TransportClient; 25 | import org.elasticsearch.common.settings.ImmutableSettings; 26 | import org.elasticsearch.common.settings.Settings; 27 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 28 | import org.elasticsearch.common.transport.LocalTransportAddress; 29 | import org.elasticsearch.common.unit.TimeValue; 30 | import org.elasticsearch.node.Node; 31 | import org.elasticsearch.node.NodeBuilder; 32 | 33 | import java.io.Serializable; 34 | import java.util.Map; 35 | 36 | /** 37 | * Interface to make Elasticsearch client based on the Storm map configuration. 38 | * 39 | * @author fhussonnois 40 | */ 41 | public interface ClientFactory extends Serializable { 42 | 43 | public static final int DEFAULT_PORT = 9300; 44 | public static final String NAME = "storm.elasticsearch.cluster.name"; 45 | public static final String HOSTS = "storm.elasticsearch.hosts"; 46 | public static final char PORT_SEPARATOR = ':'; 47 | public static final char HOST_SEPARATOR = ','; 48 | 49 | T makeClient(Map conf) ; 50 | 51 | /** 52 | * Use this factory to create {@link TransportClient} that connects to a cluster. 53 | */ 54 | public static class Transport implements ClientFactory { 55 | 56 | private Map settings; 57 | 58 | public Transport() { 59 | } 60 | 61 | public Transport(Map settings) { 62 | this.settings = settings; 63 | } 64 | 65 | @Override 66 | public TransportClient makeClient(Map conf) { 67 | 68 | String clusterHosts = (String)conf.get(HOSTS); 69 | String clusterName = (String)conf.get(NAME); 70 | 71 | Preconditions.checkNotNull(clusterHosts,"no setting found for Transport Client, make sure that you set property " + HOSTS); 72 | 73 | TransportClient client = new TransportClient(buildSettings(clusterName)); 74 | 75 | for(String hostAndPort : StringUtils.split(clusterHosts, HOST_SEPARATOR)) { 76 | int portPos = hostAndPort.indexOf(PORT_SEPARATOR); 77 | boolean noPortDefined = portPos == -1; 78 | int port = ( noPortDefined ) ? DEFAULT_PORT : Integer.parseInt(hostAndPort.substring(portPos + 1, hostAndPort.length())); 79 | String host = (noPortDefined) ? hostAndPort : hostAndPort.substring(0, portPos); 80 | client.addTransportAddress(new InetSocketTransportAddress(host, port)); 81 | } 82 | return client; 83 | } 84 | 85 | private Settings buildSettings(String clusterName) { 86 | ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder(); 87 | if( StringUtils.isNotEmpty(clusterName)) sb.put("cluster.name", clusterName); 88 | if( settings != null) sb.put(settings); 89 | 90 | return sb.build(); 91 | } 92 | } 93 | 94 | /** 95 | * Use this factory to create {@link TransportClient} that connects to a local cluster. 96 | */ 97 | public static class LocalTransport implements ClientFactory { 98 | 99 | private Map settings; 100 | 101 | public LocalTransport() { 102 | } 103 | 104 | public LocalTransport(Map settings) { 105 | this.settings = settings; 106 | } 107 | 108 | 109 | @Override 110 | public TransportClient makeClient(Map conf) { 111 | TransportClient client = new TransportClient(buildSettings()); 112 | client.addTransportAddress(new LocalTransportAddress("1")); 113 | return client; 114 | } 115 | 116 | protected Settings buildSettings( ) { 117 | ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder().put("node.local", "true"); 118 | if( settings != null) sb.put(settings); 119 | 120 | return sb.build(); 121 | } 122 | } 123 | 124 | /** 125 | * Use this factory to create an embedded Node that acts as a node within a cluster. 126 | */ 127 | public static class NodeClient implements ClientFactory { 128 | 129 | private Map settings; 130 | 131 | public NodeClient() {} 132 | 133 | public NodeClient(Map settings) { 134 | this.settings = settings; 135 | } 136 | 137 | @Override 138 | public Client makeClient(Map conf) { 139 | String clusterName = (String)conf.get(NAME); 140 | 141 | final Node node = NodeBuilder.nodeBuilder().settings(buildSettings(clusterName)).node(); 142 | registerShutdownHook(node); 143 | 144 | return node.client(); 145 | } 146 | 147 | private void registerShutdownHook(final Node node) { 148 | Runtime.getRuntime().addShutdownHook(new Thread() { 149 | public void run() { 150 | node.close(); 151 | } 152 | }); 153 | } 154 | 155 | private Settings buildSettings(String clusterName) { 156 | ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder().put("node.client", true); 157 | 158 | if( StringUtils.isNotEmpty(clusterName)) sb.put("cluster.name", clusterName); 159 | if( settings != null) sb.put(settings); 160 | 161 | return sb.build(); 162 | } 163 | } 164 | 165 | /** 166 | * Use this factory to create a local embedded Node that acts as a node within a cluster. 167 | * This factory should be preferred for testing purpose. 168 | */ 169 | public static class LocalNodeClient implements ClientFactory { 170 | 171 | private Map settings; 172 | 173 | public LocalNodeClient() {} 174 | 175 | public LocalNodeClient(Map settings) { this.settings = settings; } 176 | 177 | @Override 178 | public Client makeClient(Map conf) { 179 | 180 | final Node node = NodeBuilder.nodeBuilder().settings( buildSettings() ).node(); 181 | registerShutdownHook(node); 182 | 183 | return waitForYellowStatus(node.client()); 184 | } 185 | 186 | private void registerShutdownHook(final Node node) { 187 | Runtime.getRuntime().addShutdownHook(new Thread() { 188 | public void run() { 189 | node.close(); 190 | } 191 | }); 192 | } 193 | 194 | private Client waitForYellowStatus(Client client) { 195 | client.admin().cluster() 196 | .prepareHealth() 197 | .setWaitForYellowStatus() 198 | .setTimeout(TimeValue.timeValueSeconds(30)) 199 | .execute() 200 | .actionGet(); 201 | return client; 202 | } 203 | 204 | private Settings buildSettings( ) { 205 | ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder() 206 | .put("node.name", "elastic-storm-test") 207 | .put("node.local", true) 208 | .put("index.store.type", "memory"); 209 | 210 | if( settings != null) sb.put(settings); 211 | 212 | return sb.build(); 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/Document.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch; 20 | 21 | import java.io.Serializable; 22 | 23 | /** 24 | * This class should be used to wrap data required to index a document. 25 | * 26 | * @author fhussonnois 27 | * @param type of the underlying document 28 | */ 29 | public class Document implements Serializable { 30 | 31 | private static final long serialVersionUID = 1L; 32 | 33 | /** 34 | * The name of the index 35 | */ 36 | private String name; 37 | /** 38 | * The type of document 39 | */ 40 | private String type; 41 | /** 42 | * The source document 43 | */ 44 | private T source; 45 | /** 46 | * The document id 47 | */ 48 | private String id; 49 | /** 50 | * The parent document id 51 | */ 52 | private String parentId; 53 | 54 | public Document(String name, String type,T source) { 55 | this(name, type, source, null, null); 56 | } 57 | 58 | public Document(String name, String type, T source, String id) { 59 | this(name, type, source, id, null); 60 | } 61 | 62 | public Document(String name, String type, T source, String id, String parentId) { 63 | this.name = name; 64 | this.type = type; 65 | this.source = source; 66 | this.id = id; 67 | this.parentId = parentId; 68 | } 69 | 70 | public String getName( ) { 71 | return this.name; 72 | } 73 | 74 | public String getType( ) { 75 | return this.type; 76 | } 77 | 78 | public T getSource( ) { 79 | return this.source; 80 | } 81 | 82 | public String getId( ) { 83 | return this.id; 84 | } 85 | 86 | public String getParentId() { 87 | return this.parentId; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/bolt/IndexBatchBolt.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.bolt; 20 | 21 | import backtype.storm.task.OutputCollector; 22 | import backtype.storm.task.TopologyContext; 23 | import backtype.storm.topology.OutputFieldsDeclarer; 24 | import backtype.storm.tuple.Tuple; 25 | 26 | import com.github.fhuss.storm.elasticsearch.ClientFactory; 27 | import com.github.fhuss.storm.elasticsearch.Document; 28 | import com.github.fhuss.storm.elasticsearch.commons.RichTickTupleBolt; 29 | import com.github.fhuss.storm.elasticsearch.mapper.TupleMapper; 30 | import org.elasticsearch.ElasticsearchException; 31 | import org.elasticsearch.action.bulk.BulkItemResponse; 32 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 33 | import org.elasticsearch.action.bulk.BulkResponse; 34 | import org.elasticsearch.action.index.IndexRequestBuilder; 35 | import org.elasticsearch.client.Client; 36 | 37 | import org.slf4j.Logger; 38 | import org.slf4j.LoggerFactory; 39 | 40 | import java.util.ArrayList; 41 | import java.util.List; 42 | import java.util.Map; 43 | import java.util.concurrent.LinkedBlockingQueue; 44 | import java.util.concurrent.TimeUnit; 45 | 46 | /** 47 | * Simple Bolt to index documents batch into an elasticsearch cluster. 48 | * 49 | * @author fhussonnois 50 | */ 51 | public class IndexBatchBolt extends RichTickTupleBolt { 52 | 53 | private static final Logger LOGGER = LoggerFactory.getLogger(IndexBatchBolt.class); 54 | 55 | public static final TimeUnit DEFAULT_TIME_UNIT = TimeUnit.SECONDS; 56 | 57 | public static final long DEFAULT_EMIT_FREQUENCY = 10; 58 | 59 | private static final int QUEUE_MAX_SIZE = 1000; 60 | 61 | private OutputCollector outputCollector; 62 | 63 | private Client client; 64 | 65 | private ClientFactory clientFactory; 66 | 67 | private LinkedBlockingQueue queue; 68 | 69 | private TupleMapper> mapper; 70 | 71 | /** 72 | * Creates a new {@link IndexBatchBolt} instance. 73 | * 74 | * @param emitFrequency the batch frequency 75 | * @param unit the time unit of the emit frequency 76 | * @param clientFactory the elasticsearch client factory 77 | * @param mapper the document tuple mapper 78 | */ 79 | public IndexBatchBolt(ClientFactory clientFactory, TupleMapper> mapper, long emitFrequency, TimeUnit unit) { 80 | super(emitFrequency, unit); 81 | this.clientFactory = clientFactory; 82 | this.mapper = mapper; 83 | } 84 | 85 | /** 86 | * Creates a new {@link IndexBatchBolt} instance which use SECOND as time unit for batch frequency. 87 | * @param clientFactory the elasticsearch client factory 88 | * @param mapper the the document tuple mapper 89 | */ 90 | public IndexBatchBolt(ClientFactory clientFactory, TupleMapper> mapper, long emitFrequency) { 91 | this(clientFactory, mapper, emitFrequency, DEFAULT_TIME_UNIT); 92 | } 93 | 94 | /** 95 | * Creates a new {@link IndexBatchBolt} instance with a default batch frequency set to 10 seconds. 96 | * @param clientFactory the elasticsearch client factory 97 | * @param mapper the the document tuple mapper 98 | */ 99 | public IndexBatchBolt(ClientFactory clientFactory, TupleMapper> mapper) { 100 | this(clientFactory, mapper, DEFAULT_EMIT_FREQUENCY, DEFAULT_TIME_UNIT); 101 | } 102 | 103 | /** 104 | * (non-Javadoc) 105 | * @see backtype.storm.task.IBolt#prepare(java.util.Map, backtype.storm.task.TopologyContext, backtype.storm.task.OutputCollector) 106 | */ 107 | @Override 108 | public void prepare(Map stormConf, TopologyContext topologyContext, OutputCollector outputCollector) { 109 | this.outputCollector = outputCollector; 110 | this.client = clientFactory.makeClient(stormConf); 111 | this.queue = new LinkedBlockingQueue<>(QUEUE_MAX_SIZE); 112 | } 113 | 114 | @Override 115 | protected void executeTickTuple(Tuple tuple) { 116 | bulkUpdateIndexes(); 117 | outputCollector.ack(tuple); 118 | } 119 | 120 | @Override 121 | protected void executeTuple(Tuple tuple) { 122 | if( ! queue.offer(tuple) ) { 123 | bulkUpdateIndexes(); 124 | queue.add(tuple); 125 | } 126 | } 127 | 128 | protected void bulkUpdateIndexes( ) { 129 | 130 | List inputs = new ArrayList<>(queue.size()); 131 | queue.drainTo(inputs); 132 | BulkRequestBuilder bulkRequest = client.prepareBulk(); 133 | for (Tuple input : inputs) { 134 | Document doc = mapper.map(input); 135 | IndexRequestBuilder request = client.prepareIndex(doc.getName(), doc.getType(), doc.getId()).setSource((String)doc.getSource()); 136 | 137 | if(doc.getParentId() != null) { 138 | request.setParent(doc.getParentId()); 139 | } 140 | bulkRequest.add(request); 141 | } 142 | 143 | try { 144 | if (bulkRequest.numberOfActions() > 0) { 145 | BulkResponse bulkItemResponses = bulkRequest.execute().actionGet(); 146 | if (bulkItemResponses.hasFailures()) { 147 | BulkItemResponse[] items = bulkItemResponses.getItems(); 148 | for (int i = 0; i < items.length; i++) { 149 | ackOrFail(items[i], inputs.get(i)); 150 | } 151 | } else { 152 | ackAll(inputs); 153 | } 154 | } 155 | } catch (ElasticsearchException e) { 156 | LOGGER.error("Unable to process bulk request, " + inputs.size() + " tuples are in failure", e); 157 | outputCollector.reportError(e.getRootCause()); 158 | failAll(inputs); 159 | } 160 | } 161 | 162 | private void ackOrFail(BulkItemResponse item, Tuple tuple) { 163 | if (item.isFailed()) { 164 | LOGGER.error("Failed to process tuple : " + mapper.map(tuple)); 165 | outputCollector.fail(tuple); 166 | } else { 167 | outputCollector.ack(tuple); 168 | } 169 | } 170 | 171 | protected void ackAll(List inputs) { 172 | for(Tuple t : inputs) 173 | outputCollector.ack(t); 174 | } 175 | 176 | protected void failAll(List inputs) { 177 | for(Tuple t : inputs) 178 | outputCollector.fail(t); 179 | } 180 | 181 | @Override 182 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { 183 | /* no-ouput */ 184 | } 185 | 186 | @Override 187 | public void cleanup() { 188 | if( this.client != null) this.client.close(); 189 | } 190 | } -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/commons/RichTickTupleBolt.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.commons; 20 | 21 | import backtype.storm.Config; 22 | import backtype.storm.Constants; 23 | import backtype.storm.topology.IRichBolt; 24 | import backtype.storm.tuple.Tuple; 25 | 26 | import java.util.Map; 27 | import java.util.concurrent.TimeUnit; 28 | 29 | /** 30 | * A simple {@link backtype.storm.topology.base.BaseBasicBolt} implementation with tick tuple support. 31 | * 32 | * @author fhussonnois 33 | * 34 | */ 35 | public abstract class RichTickTupleBolt implements IRichBolt { 36 | 37 | private long emitFrequency; 38 | 39 | /** 40 | * Creates a new {@link RichTickTupleBolt} instance. 41 | * @param emitFrequency the tick tuple emit frequency 42 | * @param unit the time unit of the emit frequency 43 | */ 44 | public RichTickTupleBolt(long emitFrequency, TimeUnit unit) { 45 | this.emitFrequency = unit.toSeconds(emitFrequency); 46 | } 47 | 48 | 49 | private static boolean isTickTuple(Tuple tuple) { 50 | return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) 51 | && tuple.getSourceStreamId().equals(Constants.SYSTEM_TICK_STREAM_ID); 52 | } 53 | 54 | @Override 55 | public Map getComponentConfiguration() { 56 | Config conf = new Config(); 57 | conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, emitFrequency); 58 | return conf; 59 | } 60 | 61 | @Override 62 | public void execute(Tuple tuple) { 63 | if( isTickTuple(tuple) ) { 64 | executeTickTuple(tuple); 65 | } else { 66 | executeTuple(tuple); 67 | } 68 | } 69 | 70 | protected abstract void executeTickTuple(Tuple tuple); 71 | 72 | protected abstract void executeTuple(Tuple tuple); 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/handler/BulkResponseHandler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.handler; 20 | 21 | import org.elasticsearch.action.bulk.BulkResponse; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.Serializable; 26 | 27 | /** 28 | * Interface to handle response after executing a bulk request. 29 | * 30 | * @author fhussonnois 31 | */ 32 | public interface BulkResponseHandler extends Serializable { 33 | 34 | final Logger LOGGER = LoggerFactory.getLogger(LoggerResponseHandler.class); 35 | 36 | void handle(BulkResponse response); 37 | 38 | public class LoggerResponseHandler implements BulkResponseHandler { 39 | 40 | @Override 41 | public void handle(BulkResponse response) { 42 | 43 | if( response.hasFailures() ) { 44 | LOGGER.error("BulkResponse has failures : {}", response.buildFailureMessage()); 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/mapper/MappingException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.mapper; 20 | 21 | 22 | public class MappingException extends RuntimeException { 23 | 24 | public MappingException(String message, Throwable source) { 25 | super(message, source); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/mapper/TridentTupleMapper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.mapper; 20 | 21 | import storm.trident.tuple.TridentTuple; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * Interface for building document from {@link storm.trident.tuple.TridentTuple}. 27 | * @param 28 | */ 29 | public interface TridentTupleMapper extends Serializable { 30 | 31 | T map(TridentTuple input); 32 | } -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/mapper/TupleMapper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.mapper; 20 | 21 | import backtype.storm.tuple.Tuple; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * Interface for building document from {@link Tuple}. 27 | * @param 28 | 29 | */ 30 | public interface TupleMapper extends Serializable { 31 | 32 | T map(Tuple input); 33 | } -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/mapper/impl/DefaultTupleMapper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.mapper.impl; 20 | 21 | import backtype.storm.tuple.Tuple; 22 | import com.fasterxml.jackson.core.JsonProcessingException; 23 | import com.fasterxml.jackson.databind.ObjectMapper; 24 | import com.github.fhuss.storm.elasticsearch.Document; 25 | import com.github.fhuss.storm.elasticsearch.mapper.MappingException; 26 | import com.github.fhuss.storm.elasticsearch.mapper.TupleMapper; 27 | 28 | import java.io.UnsupportedEncodingException; 29 | 30 | /** 31 | * Default mapper that attempt to map tuple fields to a {@link Document}. 32 | * 33 | * @author fhussonnois 34 | */ 35 | public class DefaultTupleMapper implements TupleMapper> { 36 | 37 | public static final String FIELD_SOURCE = "source"; 38 | public static final String FIELD_NAME = "name"; 39 | public static final String FIELD_TYPE = "type"; 40 | public static final String FIELD_PARENT_ID = "parentId"; 41 | public static final String FIELD_ID = "id"; 42 | 43 | private TupleMapper sourceMapperStrategy; 44 | 45 | private DefaultTupleMapper(TupleMapper sourceMapperStrategy) { 46 | this.sourceMapperStrategy = sourceMapperStrategy; 47 | } 48 | 49 | /** 50 | * Returns a new {@link DefaultTupleMapper} that accept String as source field value. 51 | */ 52 | public static final DefaultTupleMapper newStringDefaultTupleMapper( ) { 53 | return new DefaultTupleMapper(new TupleMapper() { 54 | @Override 55 | public String map(Tuple input) { 56 | return input.getStringByField(FIELD_SOURCE); 57 | } 58 | }); 59 | } 60 | /** 61 | * Returns a new {@link DefaultTupleMapper} that accept Byte[] as source field value. 62 | */ 63 | public static final DefaultTupleMapper newBinaryDefaultTupleMapper( ) { 64 | return new DefaultTupleMapper(new TupleMapper() { 65 | @Override 66 | public String map(Tuple input) { 67 | try { 68 | return new String(input.getBinaryByField(FIELD_SOURCE), "UTF-8"); 69 | } catch (UnsupportedEncodingException e) { 70 | throw new MappingException("Error while processing source as a byte[]", e); 71 | } 72 | } 73 | }); 74 | } 75 | 76 | /** 77 | * Returns a new {@link DefaultTupleMapper} that accept Object as source field value. 78 | */ 79 | public static final DefaultTupleMapper newObjectDefaultTupleMapper( ) { 80 | final ObjectMapper mapper = new ObjectMapper(); 81 | return new DefaultTupleMapper(new TupleMapper() { 82 | @Override 83 | public String map(Tuple input) { 84 | try { 85 | return mapper.writeValueAsString(input.getValueByField(FIELD_SOURCE)); 86 | } catch (JsonProcessingException e) { 87 | throw new MappingException("Error happen while processing json on object", e); 88 | } 89 | } 90 | }); 91 | } 92 | 93 | @Override 94 | public Document map(Tuple input) { 95 | String id = input.getStringByField(FIELD_ID); 96 | String name = input.getStringByField(FIELD_NAME); 97 | String type = input.getStringByField(FIELD_TYPE); 98 | String parentId = ( input.contains(FIELD_PARENT_ID) ) ? input.getStringByField(FIELD_PARENT_ID) : null; 99 | 100 | return new Document<>(name, type, sourceMapperStrategy.map(input), id, parentId); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/state/ESIndexMapState.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.state; 20 | 21 | import backtype.storm.task.IMetricsContext; 22 | import backtype.storm.topology.FailedException; 23 | import backtype.storm.topology.ReportedFailedException; 24 | import backtype.storm.tuple.Values; 25 | import com.github.fhuss.storm.elasticsearch.ClientFactory; 26 | import com.github.fhuss.storm.elasticsearch.handler.BulkResponseHandler; 27 | import com.google.common.base.Objects; 28 | import org.elasticsearch.ElasticsearchException; 29 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 30 | import org.elasticsearch.action.get.GetResponse; 31 | import org.elasticsearch.action.get.MultiGetItemResponse; 32 | import org.elasticsearch.action.get.MultiGetRequestBuilder; 33 | import org.elasticsearch.action.get.MultiGetResponse; 34 | import org.elasticsearch.client.Client; 35 | import org.slf4j.Logger; 36 | import org.slf4j.LoggerFactory; 37 | import storm.trident.state.OpaqueValue; 38 | import storm.trident.state.State; 39 | import storm.trident.state.StateFactory; 40 | import storm.trident.state.StateType; 41 | import storm.trident.state.TransactionalValue; 42 | import storm.trident.state.map.CachedMap; 43 | import storm.trident.state.map.IBackingMap; 44 | import storm.trident.state.map.MapState; 45 | import storm.trident.state.map.NonTransactionalMap; 46 | import storm.trident.state.map.OpaqueMap; 47 | import storm.trident.state.map.SnapshottableMap; 48 | import storm.trident.state.map.TransactionalMap; 49 | 50 | import java.io.IOException; 51 | import java.util.ArrayList; 52 | import java.util.HashMap; 53 | import java.util.List; 54 | import java.util.ListIterator; 55 | import java.util.Map; 56 | 57 | import static com.github.fhuss.storm.elasticsearch.state.ValueSerializer.*; 58 | 59 | /** 60 | * This class implements Trident State on top of ElasticSearch. 61 | * It follows trident-memcached library (https://github.com/nathanmarz/trident-memcached) as a template. 62 | * 63 | * @author fhussonnois 64 | * @param OpaqueValue, TransactionalValue or any other non transactional type 65 | */ 66 | public class ESIndexMapState implements IBackingMap { 67 | 68 | private static final Logger LOGGER = LoggerFactory.getLogger(ESIndexMapState.class); 69 | 70 | public static class Options extends HashMap { 71 | 72 | private static final int DEFAULT_CACHE_SIZE = 1000; 73 | private static final String DEFAULT_GLOBAL_KEY = "GLOBAL$KEY"; 74 | public static final String REPORT_ERROR = "trident.elasticsearch.state.report.error"; 75 | public static final String CACHE_SIZE = "trident.elasticsearch.state.cache.size"; 76 | public static final String GLOBAL_KEY = "trident.elasticsearch.state.global.key"; 77 | 78 | public Options(Map conf) { 79 | super(conf); 80 | } 81 | public boolean reportError() { 82 | return Boolean.valueOf(get(REPORT_ERROR)); 83 | } 84 | public int getCachedMapSize( ) { 85 | String cacheSize = get(CACHE_SIZE); 86 | return cacheSize != null ? Integer.valueOf(cacheSize) : DEFAULT_CACHE_SIZE; 87 | 88 | } 89 | public String getGlobalKey( ) { 90 | String globalKey = get(GLOBAL_KEY); 91 | return globalKey != null ? globalKey : DEFAULT_GLOBAL_KEY; 92 | } 93 | } 94 | 95 | public static Factory> opaque(ClientFactory client, Class type) { 96 | return new OpaqueFactory<>(client, StateType.OPAQUE, new OpaqueValueSerializer<>(type)); 97 | } 98 | 99 | public static Factory> transactional(ClientFactory client, Class type) { 100 | return new TransactionalFactory<>(client, StateType.TRANSACTIONAL, new TransactionalValueSerializer<>(type)); 101 | } 102 | 103 | public static Factory nonTransactional(ClientFactory client, Class type) { 104 | return new NonTransactionalFactory<>(client, StateType.NON_TRANSACTIONAL, new NonTransactionalValueSerializer<>(type)); 105 | } 106 | 107 | public abstract static class Factory implements StateFactory { 108 | protected ValueSerializer serializer; 109 | protected ClientFactory clientFactory; 110 | protected StateType stateType; 111 | 112 | public Factory(ClientFactory clientFactory, StateType stateType, ValueSerializer serializer) { 113 | this.clientFactory = clientFactory; 114 | this.stateType = stateType; 115 | this.serializer = serializer; 116 | } 117 | } 118 | 119 | public static class OpaqueFactory extends Factory> { 120 | 121 | public OpaqueFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer> serializer) { 122 | super(clientFactory, stateType, serializer); 123 | } 124 | 125 | @Override 126 | public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) { 127 | Options options = new Options(conf); 128 | ESIndexMapState> mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError()); 129 | MapState ms = OpaqueMap.build(new CachedMap(mapState, options.getCachedMapSize())); 130 | return new SnapshottableMap>(ms, new Values(options.getGlobalKey())); 131 | } 132 | } 133 | 134 | public static class TransactionalFactory extends Factory> { 135 | 136 | public TransactionalFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer> serializer) { 137 | super(clientFactory, stateType, serializer); 138 | } 139 | 140 | @Override 141 | public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) { 142 | Options options = new Options(conf); 143 | ESIndexMapState> mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError()); 144 | MapState ms = TransactionalMap.build(new CachedMap(mapState, options.getCachedMapSize())); 145 | Values snapshotKey = new Values(options.getGlobalKey()); 146 | return new SnapshottableMap<>(ms, snapshotKey); 147 | } 148 | } 149 | 150 | public static class NonTransactionalFactory extends Factory { 151 | 152 | public NonTransactionalFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer serializer) { 153 | super(clientFactory, stateType, serializer); 154 | } 155 | 156 | @Override 157 | public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) { 158 | Options options = new Options(conf); 159 | ESIndexMapState mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError()); 160 | MapState ms = NonTransactionalMap.build(new CachedMap<>(mapState, options.getCachedMapSize())); 161 | return new SnapshottableMap<>(ms, new Values(options.getGlobalKey())); 162 | } 163 | } 164 | 165 | private BulkResponseHandler bulkResponseHandler; 166 | private ValueSerializer serializer; 167 | private Client client; 168 | 169 | private boolean reportError; 170 | 171 | public ESIndexMapState(Client client, ValueSerializer serializer, BulkResponseHandler bulkResponseHandler, boolean reportError) { 172 | this.client = client; 173 | this.serializer = serializer; 174 | this.bulkResponseHandler = bulkResponseHandler; 175 | this.reportError = reportError; 176 | } 177 | 178 | @Override 179 | public List multiGet(List> keys) { 180 | List responses = new ArrayList<>(keys.size()); 181 | 182 | List groupByKeys = new ArrayList<>(keys.size()); 183 | for(List key : keys) { 184 | groupByKeys.add(GroupByKey.fromKeysList(key)); 185 | } 186 | 187 | if( ! groupByKeys.isEmpty() ) { 188 | 189 | MultiGetRequestBuilder request = client.prepareMultiGet(); 190 | for(GroupByKey key : groupByKeys) { 191 | request.add(key.index, key.type, key.id); 192 | } 193 | MultiGetResponse multiGetResponses; 194 | try { 195 | multiGetResponses = request.execute().actionGet(); 196 | } catch (ElasticsearchException e) { 197 | String error = "Failed to read data into elasticsearch"; 198 | throw (reportError) ? new ReportedFailedException(error, e) : new FailedException(error, e); 199 | } 200 | for(MultiGetItemResponse itemResponse : multiGetResponses.getResponses()) { 201 | GetResponse res = itemResponse.getResponse(); 202 | if( res != null && !res.isSourceEmpty()) { 203 | try { 204 | responses.add(serializer.deserialize(res.getSourceAsBytes())); 205 | } catch (IOException e) { 206 | LOGGER.error("error while trying to deserialize data from json", e); 207 | responses.add(null); 208 | } 209 | } else { 210 | responses.add(null); 211 | } 212 | } 213 | } 214 | return responses; 215 | } 216 | 217 | @Override 218 | public void multiPut(List> keys, List values) { 219 | BulkRequestBuilder bulkRequestBuilder = client.prepareBulk(); 220 | ListIterator listIterator = values.listIterator(); 221 | while (listIterator.hasNext()) { 222 | GroupByKey groupBy = GroupByKey.fromKeysList(keys.get(listIterator.nextIndex())); 223 | T value = listIterator.next(); 224 | try { 225 | byte[] source = serializer.serialize(value); 226 | bulkRequestBuilder.add(client.prepareIndex(groupBy.index, groupBy.type, groupBy.id).setSource(source)); 227 | } catch (IOException e) { 228 | LOGGER.error("Oops data loss - error while trying to serialize data to json", e); 229 | } 230 | } 231 | 232 | try { 233 | bulkResponseHandler.handle(bulkRequestBuilder.execute().actionGet()); 234 | } catch(ElasticsearchException e) { 235 | LOGGER.error("error while executing bulk request to elasticsearch"); 236 | String error = "Failed to store data into elasticsearch"; 237 | throw (reportError) ? new ReportedFailedException(error, e) : new FailedException(error, e); 238 | } 239 | } 240 | 241 | private static class GroupByKey { 242 | public final String index; 243 | public final String type; 244 | public final String id; 245 | 246 | public GroupByKey(String index, String type, String id) { 247 | this.index = index; 248 | this.type = type; 249 | this.id = id; 250 | } 251 | 252 | public static GroupByKey fromKeysList(List keys) { 253 | if( keys == null || keys.size() < 3) { 254 | throw new RuntimeException("Keys not supported " + keys); 255 | } 256 | return new GroupByKey(keys.get(0).toString(), keys.get(1).toString(), keys.get(2).toString()); 257 | } 258 | 259 | public String toString( ) { 260 | return Objects.toStringHelper(this) 261 | .add("index", index) 262 | .add("type", type) 263 | .add("id", id).toString(); 264 | } 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/state/ESIndexState.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.state; 20 | 21 | import java.io.IOException; 22 | import java.util.Collection; 23 | import java.util.LinkedList; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | import org.elasticsearch.ElasticsearchException; 28 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 29 | import org.elasticsearch.action.index.IndexRequestBuilder; 30 | import org.elasticsearch.action.search.SearchRequestBuilder; 31 | import org.elasticsearch.action.search.SearchResponse; 32 | import org.elasticsearch.client.Client; 33 | import org.elasticsearch.search.SearchHit; 34 | import org.elasticsearch.search.sort.SortBuilder; 35 | import org.slf4j.Logger; 36 | import org.slf4j.LoggerFactory; 37 | 38 | import storm.trident.state.State; 39 | import storm.trident.state.StateFactory; 40 | import storm.trident.tuple.TridentTuple; 41 | import backtype.storm.task.IMetricsContext; 42 | import backtype.storm.topology.FailedException; 43 | 44 | import com.github.fhuss.storm.elasticsearch.ClientFactory; 45 | import com.github.fhuss.storm.elasticsearch.Document; 46 | import com.github.fhuss.storm.elasticsearch.handler.BulkResponseHandler; 47 | import com.github.fhuss.storm.elasticsearch.mapper.TridentTupleMapper; 48 | 49 | /** 50 | * Simple {@link State} implementation for Elasticsearch. 51 | * 52 | * @author fhussonnois 53 | */ 54 | public class ESIndexState implements State { 55 | 56 | public static final Logger LOGGER = LoggerFactory.getLogger(ESIndexState.class); 57 | 58 | private Client client; 59 | 60 | private ValueSerializer serializer; 61 | 62 | public ESIndexState(Client client, ValueSerializer serializer) { 63 | this.client = client; 64 | this.serializer = serializer; 65 | } 66 | 67 | @Override 68 | public void beginCommit(Long aLong) { 69 | 70 | } 71 | 72 | @Override 73 | public void commit(Long aLong) { 74 | 75 | } 76 | 77 | public void bulkUpdateIndices(List inputs, TridentTupleMapper> mapper, BulkResponseHandler handler) { 78 | BulkRequestBuilder bulkRequest = client.prepareBulk(); 79 | for (TridentTuple input : inputs) { 80 | Document doc = mapper.map(input); 81 | byte[] source = serializeSourceOrFail(doc); 82 | IndexRequestBuilder request = client.prepareIndex(doc.getName(), doc.getType(), doc.getId()).setSource(source); 83 | 84 | if(doc.getParentId() != null) { 85 | request.setParent(doc.getParentId()); 86 | } 87 | bulkRequest.add(request); 88 | } 89 | 90 | if( bulkRequest.numberOfActions() > 0) { 91 | try { 92 | handler.handle(bulkRequest.execute().actionGet()); 93 | } catch(ElasticsearchException e) { 94 | LOGGER.error("error while executing bulk request to elasticsearch"); 95 | throw new FailedException("Failed to store data into elasticsearch", e); 96 | } 97 | } 98 | } 99 | 100 | protected byte[] serializeSourceOrFail(Document doc) { 101 | try { 102 | return serializer.serialize(doc.getSource()); 103 | } catch (IOException e) { 104 | LOGGER.error("Error while serializing document source", e); 105 | throw new FailedException("Failed to serialize source as byte[]", e); 106 | } 107 | } 108 | 109 | public Collection searchQuery(String query, List indices, List types) { 110 | return searchQuery(query, indices, types, 10); 111 | } 112 | 113 | public Collection searchQuery(String query, List indices, List types, int size) { 114 | SearchResponse response = buildSearchQuery(query, indices, types, size).execute().actionGet(); 115 | return buildResult(response); 116 | } 117 | 118 | public Collection searchSortedAndFirstNQuery(String query, List indices, List types, 119 | SortBuilder sortBuilder, int firstN) { 120 | SearchResponse response = buildSearchQuery(query, indices, types, firstN) 121 | .addSort(sortBuilder) 122 | .setQuery(query).execute().actionGet(); 123 | 124 | return buildResult(response); 125 | } 126 | 127 | private SearchRequestBuilder buildSearchQuery(String query, List indices, List types, int size) { 128 | return client.prepareSearch() 129 | .setIndices(indices.toArray(new String[indices.size()])) 130 | .setTypes(types.toArray(new String[types.size()])) 131 | .setSize(size) 132 | .setQuery(query); 133 | } 134 | 135 | private List buildResult(SearchResponse response) { 136 | List result = new LinkedList<>(); 137 | for(SearchHit hit : response.getHits()) { 138 | try { 139 | result.add(serializer.deserialize(hit.source())); 140 | } catch (IOException e) { 141 | LOGGER.error("Error while trying to deserialize data from json source"); 142 | } 143 | } 144 | return result; 145 | } 146 | 147 | public static class Factory implements StateFactory { 148 | 149 | private ClientFactory clientFactory; 150 | private ValueSerializer serializer; 151 | 152 | public Factory(ClientFactory clientFactory, Class clazz ) { 153 | this.clientFactory = clientFactory; 154 | this.serializer = new ValueSerializer.NonTransactionalValueSerializer<>(clazz); 155 | } 156 | 157 | @Override 158 | public State makeState(Map map, IMetricsContext iMetricsContext, int i, int i2) { 159 | return new ESIndexState<>(makeClient(map), serializer); 160 | } 161 | 162 | protected Client makeClient(Map map) { 163 | return clientFactory.makeClient(map); 164 | } 165 | } 166 | } -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/state/ESIndexUpdater.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.state; 20 | 21 | import com.github.fhuss.storm.elasticsearch.handler.BulkResponseHandler; 22 | import com.github.fhuss.storm.elasticsearch.Document; 23 | import com.github.fhuss.storm.elasticsearch.handler.BulkResponseHandler.LoggerResponseHandler; 24 | import com.github.fhuss.storm.elasticsearch.mapper.TridentTupleMapper; 25 | import storm.trident.operation.TridentCollector; 26 | import storm.trident.state.BaseStateUpdater; 27 | import storm.trident.tuple.TridentTuple; 28 | 29 | import java.util.List; 30 | 31 | /** 32 | * Simple {@link BaseStateUpdater} implementation for Elasticsearch. 33 | * 34 | * @author fhussonnois 35 | */ 36 | public class ESIndexUpdater extends BaseStateUpdater> { 37 | 38 | private final TridentTupleMapper> documentTupleMapper; 39 | private final BulkResponseHandler bulkResponseHandler; 40 | 41 | public ESIndexUpdater(TridentTupleMapper> documentTupleMapper) { 42 | this(documentTupleMapper, new LoggerResponseHandler()); 43 | } 44 | 45 | public ESIndexUpdater(TridentTupleMapper> docBuilder, BulkResponseHandler bulkResponseHandler) { 46 | this.documentTupleMapper = docBuilder; 47 | this.bulkResponseHandler = bulkResponseHandler; 48 | } 49 | 50 | public void updateState(ESIndexState state, List inputs, TridentCollector collector) { 51 | state.bulkUpdateIndices(inputs, documentTupleMapper, bulkResponseHandler); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/state/QuerySearchIndexQuery.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.state; 20 | 21 | import backtype.storm.tuple.Values; 22 | import storm.trident.operation.TridentCollector; 23 | import storm.trident.state.BaseQueryFunction; 24 | import storm.trident.tuple.TridentTuple; 25 | 26 | import java.util.Collection; 27 | import java.util.LinkedList; 28 | import java.util.List; 29 | 30 | /** 31 | * Simple {@link BaseQueryFunction} to execute elasticsearch query search. 32 | * 33 | * @author fhussonnois 34 | */ 35 | public class QuerySearchIndexQuery extends BaseQueryFunction, Collection> { 36 | 37 | @Override 38 | public List> batchRetrieve(ESIndexState indexState, List inputs) { 39 | List> res = new LinkedList<>( ); 40 | for(TridentTuple input : inputs) { 41 | String query = (String)input.getValueByField("query"); 42 | List types = (List) input.getValueByField("types"); 43 | List indices = (List) input.getValueByField("indices"); 44 | res.add(indexState.searchQuery(query, indices, types)); 45 | } 46 | return res; 47 | } 48 | 49 | @Override 50 | public void execute(TridentTuple objects, Collection tl, TridentCollector tridentCollector) { 51 | for(T t : tl) tridentCollector.emit( new Values(t)); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/github/fhuss/storm/elasticsearch/state/ValueSerializer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package com.github.fhuss.storm.elasticsearch.state; 20 | 21 | import com.fasterxml.jackson.databind.JsonNode; 22 | import com.fasterxml.jackson.databind.ObjectMapper; 23 | import com.fasterxml.jackson.databind.node.ObjectNode; 24 | import storm.trident.state.OpaqueValue; 25 | import storm.trident.state.TransactionalValue; 26 | import java.io.IOException; 27 | import java.io.Serializable; 28 | 29 | /** 30 | * Abstract class to serialize {@link TransactionalValue}, {@link OpaqueValue} 31 | * and non transactional value. 32 | * 33 | * @author fhussonnois 34 | * 35 | * @param type of the document 36 | */ 37 | public abstract class ValueSerializer implements Serializable { 38 | 39 | private static final String FIELD_TXID = "txid"; 40 | private static final String FIELD_CURR_TIXD = "currTxid"; 41 | private static final String FIELD_VAL = "val"; 42 | private static final String FIELD_CURR = "curr"; 43 | private static final String FIELD_PREV = "prev"; 44 | 45 | protected static final ObjectMapper mapper = new ObjectMapper(); 46 | 47 | public byte[] serialize(T o) throws IOException { 48 | return mapper.writeValueAsBytes(o); 49 | } 50 | 51 | public abstract T deserialize(byte[] value) throws IOException; 52 | 53 | /** 54 | * Basic serializer implementation for {@link storm.trident.state.TransactionalValue}. 55 | * @param the value type 56 | */ 57 | public static class NonTransactionalValueSerializer extends ValueSerializer { 58 | private Class type; 59 | public NonTransactionalValueSerializer(Class type) { 60 | this.type = type; 61 | } 62 | 63 | 64 | @Override 65 | public T deserialize(byte[] value) throws IOException { 66 | return mapper.readValue(value, type); 67 | } 68 | } 69 | 70 | /** 71 | * Basic serializer implementation for {@link storm.trident.state.TransactionalValue}. 72 | * @param the value type 73 | */ 74 | public static class TransactionalValueSerializer extends ValueSerializer> { 75 | 76 | private Class type; 77 | 78 | public TransactionalValueSerializer(Class type) { 79 | this.type = type; 80 | } 81 | 82 | @Override 83 | public TransactionalValue deserialize(byte[] value) throws IOException { 84 | ObjectNode node = mapper.readValue(value, ObjectNode.class); 85 | byte[] bytes = mapper.writeValueAsBytes(node.get(FIELD_VAL)); 86 | return new TransactionalValue<>(node.get(FIELD_TXID).asLong(), mapper.readValue(bytes, type)); 87 | } 88 | } 89 | 90 | /** 91 | * Basic serializer implementation for {@link storm.trident.state.OpaqueValue}. 92 | * @param the value type 93 | */ 94 | public static class OpaqueValueSerializer extends ValueSerializer> { 95 | 96 | private Class type; 97 | 98 | public OpaqueValueSerializer(Class type) { 99 | this.type = type; 100 | } 101 | 102 | @Override 103 | public OpaqueValue deserialize(byte[] value) throws IOException { 104 | ObjectNode node = mapper.readValue(value, ObjectNode.class); 105 | long currTxid = node.get(FIELD_CURR_TIXD).asLong(); 106 | T val = mapper.readValue(mapper.writeValueAsBytes(node.get(FIELD_CURR)), type); 107 | JsonNode prevNode = node.get(FIELD_PREV); 108 | T prev = (prevNode.isNull()) ? null : mapper.readValue(mapper.writeValueAsBytes(prevNode), type); 109 | return new OpaqueValue<>(currTxid, val, prev); 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/BaseLocalClusterTest.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.LocalDRPC; 6 | import backtype.storm.generated.StormTopology; 7 | import backtype.storm.utils.Utils; 8 | import com.github.tlrx.elasticsearch.test.EsSetup; 9 | import org.elasticsearch.common.settings.ImmutableSettings; 10 | import org.elasticsearch.common.settings.Settings; 11 | import org.junit.After; 12 | import org.junit.Before; 13 | 14 | import static com.github.tlrx.elasticsearch.test.EsSetup.createIndex; 15 | 16 | /** 17 | * Default class for starting/stopping storm local cluster before and after tests. 18 | * 19 | * @author fhussonnois 20 | */ 21 | public abstract class BaseLocalClusterTest { 22 | 23 | public static final Settings SETTINGS = ImmutableSettings.settingsBuilder().loadFromClasspath("elasticsearch.yml").build(); 24 | 25 | protected EsSetup esSetup; 26 | protected LocalCluster cluster; 27 | protected LocalDRPC drpc; 28 | protected Settings settings; 29 | protected String index; 30 | 31 | /** 32 | * Creates a new {@link BaseLocalClusterTest}. 33 | * @param index name of the index. 34 | */ 35 | public BaseLocalClusterTest(String index) { 36 | this(index, SETTINGS); 37 | } 38 | 39 | /** 40 | * Creates a new {@link BaseLocalClusterTest}. 41 | * @param index name of the index. 42 | * @param settings settings 43 | */ 44 | public BaseLocalClusterTest(String index, Settings settings) { 45 | this.index = index; 46 | this.settings = settings; 47 | } 48 | 49 | public ClientFactory.LocalTransport getLocalClient() { 50 | return new ClientFactory.LocalTransport(settings.getAsMap()); 51 | } 52 | 53 | @Before 54 | public void setUp() { 55 | esSetup = new EsSetup(settings); 56 | esSetup.execute(createIndex(index)); 57 | 58 | drpc = new LocalDRPC(); 59 | StormTopology topology = buildTopology(); 60 | 61 | cluster = new LocalCluster(); 62 | cluster.submitTopology("elastic-storm", new Config(), topology); 63 | 64 | Utils.sleep(10000); // let's do some work 65 | } 66 | 67 | @After 68 | public void tearDown() { 69 | drpc.shutdown(); 70 | cluster.shutdown(); 71 | esSetup.terminate(); 72 | } 73 | 74 | /** 75 | * Builds the topology that must be submitted to the local cluster. 76 | */ 77 | protected abstract StormTopology buildTopology(); 78 | } 79 | -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/bolt/IndexBatchBoltTest.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch.bolt; 2 | 3 | import backtype.storm.generated.StormTopology; 4 | import backtype.storm.spout.SpoutOutputCollector; 5 | import backtype.storm.task.TopologyContext; 6 | import backtype.storm.topology.OutputFieldsDeclarer; 7 | import backtype.storm.topology.TopologyBuilder; 8 | import backtype.storm.topology.base.BaseRichSpout; 9 | import backtype.storm.tuple.Fields; 10 | import backtype.storm.tuple.Values; 11 | import com.github.fhuss.storm.elasticsearch.BaseLocalClusterTest; 12 | import com.github.fhuss.storm.elasticsearch.mapper.impl.DefaultTupleMapper; 13 | import static com.github.fhuss.storm.elasticsearch.mapper.impl.DefaultTupleMapper.*; 14 | 15 | import com.github.fhuss.storm.elasticsearch.model.Tweet; 16 | import org.junit.Assert; 17 | import org.junit.Test; 18 | 19 | import java.util.Map; 20 | import java.util.concurrent.TimeUnit; 21 | 22 | /** 23 | * @author fhussonnois 24 | */ 25 | public class IndexBatchBoltTest extends BaseLocalClusterTest { 26 | 27 | public IndexBatchBoltTest() { 28 | super("my_index"); 29 | } 30 | 31 | @Test 32 | public void shouldExecuteBulkRequestAfterReceivingTickTuple() { 33 | Assert.assertEquals(StaticSpout.MSGS.length, esSetup.countAll().intValue()); 34 | } 35 | 36 | @Override 37 | public StormTopology buildTopology() { 38 | 39 | TopologyBuilder builder = new TopologyBuilder(); 40 | builder.setSpout("batch", new StaticSpout()).setMaxTaskParallelism(1); 41 | builder.setBolt("index", newIndexBatchBolt()).shuffleGrouping("batch"); 42 | 43 | return builder.createTopology(); 44 | } 45 | 46 | protected IndexBatchBolt newIndexBatchBolt( ) { 47 | DefaultTupleMapper mapper = DefaultTupleMapper.newObjectDefaultTupleMapper(); 48 | return new IndexBatchBolt<>(getLocalClient(), mapper, 5, TimeUnit.SECONDS); 49 | } 50 | 51 | public static class StaticSpout extends BaseRichSpout { 52 | 53 | public static String[] MSGS = { 54 | "the cow jumped over the moon", 55 | "the man went to the store and bought some candy", 56 | "four score and seven years ago", 57 | "how many apples can you eat", 58 | "to be or not to be the person" 59 | }; 60 | 61 | private SpoutOutputCollector spoutOutputCollector; 62 | private int current = 0; 63 | 64 | @Override 65 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { 66 | outputFieldsDeclarer.declare(new Fields(FIELD_NAME, FIELD_TYPE, FIELD_SOURCE, FIELD_ID, FIELD_PARENT_ID)); 67 | } 68 | 69 | @Override 70 | public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) { 71 | this.spoutOutputCollector = spoutOutputCollector; 72 | } 73 | 74 | @Override 75 | public void nextTuple() { 76 | if( current < MSGS.length ) { 77 | spoutOutputCollector.emit(new Values("my_index", "my_type", new Tweet(MSGS[current], 0), String.valueOf(MSGS[current].hashCode()), null)); 78 | current++; 79 | } 80 | } 81 | } 82 | } -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/functions/CreateJson.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch.functions; 2 | 3 | import backtype.storm.tuple.Values; 4 | import com.fasterxml.jackson.core.JsonProcessingException; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import storm.trident.operation.BaseFunction; 7 | import storm.trident.operation.TridentCollector; 8 | import storm.trident.tuple.TridentTuple; 9 | 10 | public class CreateJson extends BaseFunction { 11 | @Override 12 | public void execute(TridentTuple tuple, TridentCollector collector) { 13 | try { 14 | collector.emit(new Values(new ObjectMapper().writeValueAsString(tuple.getValue(0)))); 15 | } catch (JsonProcessingException e) { 16 | throw new RuntimeException(e); 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/functions/DocumentBuilder.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch.functions; 2 | 3 | 4 | import backtype.storm.tuple.Values; 5 | import com.github.fhuss.storm.elasticsearch.Document; 6 | import storm.trident.operation.BaseFunction; 7 | import storm.trident.operation.TridentCollector; 8 | import storm.trident.tuple.TridentTuple; 9 | 10 | public class DocumentBuilder extends BaseFunction { 11 | @Override 12 | public void execute(TridentTuple tuple, TridentCollector collector) { 13 | String sentence = tuple.getString(0); 14 | collector.emit(new Values( new Document<>("my_index", "my_type", sentence, String.valueOf(sentence.hashCode())))); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/functions/ExtractSearchArgs.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch.functions; 2 | 3 | import backtype.storm.tuple.Values; 4 | import com.google.common.collect.Lists; 5 | import storm.trident.operation.BaseFunction; 6 | import storm.trident.operation.TridentCollector; 7 | import storm.trident.tuple.TridentTuple; 8 | 9 | public class ExtractSearchArgs extends BaseFunction { 10 | @Override 11 | public void execute(TridentTuple tuple, TridentCollector collector) { 12 | String args = (String)tuple.getValue(0); 13 | String[] split = args.split(" "); 14 | collector.emit(new Values(split[0], Lists.newArrayList(split[1]), Lists.newArrayList(split[2]))); 15 | } 16 | } -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/model/Tweet.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch.model; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * Simple class used for testing purpose. 7 | */ 8 | public class Tweet implements Serializable { 9 | 10 | private String text; 11 | private int count; 12 | 13 | public Tweet() { 14 | } 15 | 16 | public Tweet(String text, int count) { 17 | this.text = text; 18 | this.count = count; 19 | } 20 | 21 | public String getText() { 22 | return text; 23 | } 24 | 25 | public int getCount() { 26 | return count; 27 | } 28 | 29 | public void incrementCount( ) { 30 | this.count++; 31 | } 32 | 33 | public String toString() { 34 | return text; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/state/ESIndexUpdaterTest.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch.state; 2 | 3 | import backtype.storm.generated.StormTopology; 4 | import backtype.storm.tuple.Fields; 5 | import backtype.storm.tuple.Values; 6 | import com.fasterxml.jackson.databind.ObjectMapper; 7 | import com.github.fhuss.storm.elasticsearch.BaseLocalClusterTest; 8 | import com.github.fhuss.storm.elasticsearch.Document; 9 | import com.github.fhuss.storm.elasticsearch.functions.CreateJson; 10 | import com.github.fhuss.storm.elasticsearch.functions.ExtractSearchArgs; 11 | import com.github.fhuss.storm.elasticsearch.mapper.TridentTupleMapper; 12 | import com.github.fhuss.storm.elasticsearch.model.Tweet; 13 | import org.elasticsearch.index.query.QueryBuilders; 14 | import org.junit.Assert; 15 | import org.junit.Test; 16 | import storm.trident.TridentState; 17 | import storm.trident.TridentTopology; 18 | import storm.trident.operation.builtin.FilterNull; 19 | import storm.trident.testing.FixedBatchSpout; 20 | import storm.trident.tuple.TridentTuple; 21 | 22 | import java.io.IOException; 23 | 24 | /** 25 | * @author fhussonnois 26 | */ 27 | public class ESIndexUpdaterTest extends BaseLocalClusterTest { 28 | 29 | static final ObjectMapper MAPPER = new ObjectMapper(); 30 | 31 | public ESIndexUpdaterTest() { 32 | super("my_index"); 33 | } 34 | 35 | @Test 36 | public void shouldExecuteDRPC( ) throws IOException { 37 | String query1 = QueryBuilders.termQuery("text", "moon").buildAsBytes().toUtf8(); 38 | String query2 = QueryBuilders.termQuery("text", "man").buildAsBytes().toUtf8(); 39 | String query3 = QueryBuilders.termQuery("text", "score").buildAsBytes().toUtf8(); 40 | String query4 = QueryBuilders.termQuery("text", "apples").buildAsBytes().toUtf8(); 41 | String query5 = QueryBuilders.termQuery("text", "person").buildAsBytes().toUtf8(); 42 | 43 | assertDRPC(drpc.execute("search", query1 + " my_index my_type"), "the cow jumped over the moon"); 44 | assertDRPC(drpc.execute("search", query2 + " my_index my_type"), "the man went to the store and bought some candy"); 45 | assertDRPC(drpc.execute("search", query3 + " my_index my_type"), "four score and seven years ago"); 46 | assertDRPC(drpc.execute("search", query4 + " my_index my_type"), "how many apples can you eat"); 47 | assertDRPC(drpc.execute("search", query5 + " my_index my_type"), "to be or not to be the person"); 48 | 49 | } 50 | 51 | protected void assertDRPC(String actual, String expected) throws IOException { 52 | String s = MAPPER.readValue(actual, String[][].class)[0][0]; 53 | Assert.assertEquals(expected, MAPPER.readValue(s, Tweet.class).getText()); 54 | } 55 | 56 | @Override 57 | protected StormTopology buildTopology() { 58 | FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, 59 | new Values("the cow jumped over the moon"), 60 | new Values("the man went to the store and bought some candy"), 61 | new Values("four score and seven years ago"), 62 | new Values("how many apples can you eat"), 63 | new Values("to be or not to be the person")); 64 | spout.setCycle(true); 65 | 66 | ESIndexState.Factory factory = new ESIndexState.Factory<>(getLocalClient(), Tweet.class); 67 | TridentTopology topology = new TridentTopology(); 68 | 69 | TridentState state = topology.newStream("tweets", spout) 70 | .partitionPersist(factory, new Fields("sentence"), new ESIndexUpdater(new MyTridentTupleMapper())); 71 | 72 | topology.newDRPCStream("search", drpc) 73 | .each(new Fields("args"), new ExtractSearchArgs(), new Fields("query", "indices", "types")) 74 | .groupBy(new Fields("query", "indices", "types")) 75 | .stateQuery(state, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("tweet")) 76 | .each(new Fields("tweet"), new FilterNull()) 77 | .each(new Fields("tweet"), new CreateJson(), new Fields("json")) 78 | .project(new Fields("json")); 79 | 80 | return topology.build(); 81 | } 82 | 83 | 84 | public static class MyTridentTupleMapper implements TridentTupleMapper> { 85 | 86 | @Override 87 | public Document map(TridentTuple input) { 88 | String sentence = input.getStringByField("sentence"); 89 | return new Document<>("my_index", "my_type", new Tweet(sentence, 0), String.valueOf(sentence.hashCode()), null); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/state/IndexMapStateTest.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch.state; 2 | 3 | import backtype.storm.generated.StormTopology; 4 | import backtype.storm.tuple.Fields; 5 | import backtype.storm.tuple.Values; 6 | import com.fasterxml.jackson.core.JsonProcessingException; 7 | import com.fasterxml.jackson.databind.ObjectMapper; 8 | import com.github.fhuss.storm.elasticsearch.BaseLocalClusterTest; 9 | import com.github.fhuss.storm.elasticsearch.functions.CreateJson; 10 | import com.github.fhuss.storm.elasticsearch.functions.ExtractSearchArgs; 11 | import com.github.fhuss.storm.elasticsearch.model.Tweet; 12 | import com.google.common.collect.Lists; 13 | import com.github.fhuss.storm.elasticsearch.Document; 14 | import org.elasticsearch.index.query.QueryBuilders; 15 | import org.junit.*; 16 | import storm.trident.TridentState; 17 | import storm.trident.TridentTopology; 18 | import storm.trident.operation.BaseFunction; 19 | import storm.trident.operation.ReducerAggregator; 20 | import storm.trident.operation.TridentCollector; 21 | import storm.trident.operation.builtin.FilterNull; 22 | import storm.trident.testing.FixedBatchSpout; 23 | import storm.trident.tuple.TridentTuple; 24 | 25 | import java.io.IOException; 26 | 27 | /** 28 | * Default test class. 29 | * 30 | * @author fhussonnois 31 | */ 32 | public class IndexMapStateTest extends BaseLocalClusterTest{ 33 | 34 | static final ObjectMapper MAPPER = new ObjectMapper(); 35 | 36 | public IndexMapStateTest() { 37 | super("my_index"); 38 | } 39 | 40 | @Test 41 | public void shouldExecuteDRPC( ) throws IOException { 42 | String query1 = QueryBuilders.termQuery("text", "moon").buildAsBytes().toUtf8(); 43 | String query2 = QueryBuilders.termQuery("text", "man").buildAsBytes().toUtf8(); 44 | String query3 = QueryBuilders.termQuery("text", "score").buildAsBytes().toUtf8(); 45 | String query4 = QueryBuilders.termQuery("text", "apples").buildAsBytes().toUtf8(); 46 | String query5 = QueryBuilders.termQuery("text", "person").buildAsBytes().toUtf8(); 47 | 48 | assertDRPC(drpc.execute("search", query1 + " my_index my_type"), "the cow jumped over the moon"); 49 | assertDRPC(drpc.execute("search", query2 + " my_index my_type"), "the man went to the store and bought some candy"); 50 | assertDRPC(drpc.execute("search", query3 + " my_index my_type"), "four score and seven years ago"); 51 | assertDRPC(drpc.execute("search", query4 + " my_index my_type"), "how many apples can you eat"); 52 | assertDRPC(drpc.execute("search", query5 + " my_index my_type"), "to be or not to be the person"); 53 | 54 | } 55 | 56 | protected void assertDRPC(String actual, String expected) throws IOException { 57 | String s = MAPPER.readValue(actual, String[][].class)[0][0]; 58 | Assert.assertEquals(expected, MAPPER.readValue(s, Tweet.class).getText() ); 59 | } 60 | 61 | public static class DocumentBuilder extends BaseFunction { 62 | @Override 63 | public void execute(TridentTuple tuple, TridentCollector collector) { 64 | String sentence = tuple.getString(0); 65 | collector.emit(new Values( new Document<>("my_index", "my_type", sentence, String.valueOf(sentence.hashCode())))); 66 | } 67 | } 68 | 69 | public static class ExtractDocumentInfo extends BaseFunction { 70 | @Override 71 | public void execute(TridentTuple tuple, TridentCollector collector) { 72 | Document t = (Document)tuple.getValue(0); 73 | collector.emit(new Values(t.getId(), t.getName(), t.getType())); 74 | } 75 | } 76 | 77 | public static class TweetBuilder implements ReducerAggregator { 78 | @Override 79 | public Tweet init() { 80 | return null; 81 | } 82 | 83 | @Override 84 | public Tweet reduce(Tweet tweet, TridentTuple objects) { 85 | 86 | Document doc = (Document) objects.getValueByField("document"); 87 | if( tweet == null) 88 | tweet = new Tweet(doc.getSource(), 1); 89 | else { 90 | tweet.incrementCount(); 91 | } 92 | 93 | return tweet; 94 | } 95 | } 96 | 97 | @Override 98 | public StormTopology buildTopology( ) { 99 | ESIndexMapState.Factory state = ESIndexMapState.nonTransactional(getLocalClient(), Tweet.class); 100 | 101 | FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, 102 | new Values("the cow jumped over the moon"), 103 | new Values("the man went to the store and bought some candy"), 104 | new Values("four score and seven years ago"), 105 | new Values("how many apples can you eat"), 106 | new Values("to be or not to be the person")); 107 | spout.setCycle(true); 108 | 109 | TridentTopology topology = new TridentTopology(); 110 | 111 | TridentState staticState = topology.newStaticState(new ESIndexState.Factory<>(getLocalClient(), Tweet.class)); 112 | 113 | topology.newStream("tweets", spout) 114 | .each(new Fields("sentence"), new DocumentBuilder(), new Fields("document")) 115 | .each(new Fields("document"), new ExtractDocumentInfo(), new Fields("id", "index", "type")) 116 | .groupBy(new Fields("index", "type", "id")) 117 | .persistentAggregate(state, new Fields("document"), new TweetBuilder(), new Fields("tweet")) 118 | .parallelismHint(1); 119 | 120 | topology.newDRPCStream("search", drpc) 121 | .each(new Fields("args"), new ExtractSearchArgs(), new Fields("query", "indices", "types")) 122 | .groupBy(new Fields("query", "indices", "types")) 123 | .stateQuery(staticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("tweet")) 124 | .each(new Fields("tweet"), new FilterNull()) 125 | .each(new Fields("tweet"), new CreateJson(), new Fields("json")) 126 | .project(new Fields("json")); 127 | 128 | return topology.build(); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/test/java/com/github/fhuss/storm/elasticsearch/state/ValueSerializerTest.java: -------------------------------------------------------------------------------- 1 | package com.github.fhuss.storm.elasticsearch.state; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | import storm.trident.state.OpaqueValue; 6 | import storm.trident.state.TransactionalValue; 7 | 8 | import java.io.IOException; 9 | 10 | import static com.github.fhuss.storm.elasticsearch.state.ValueSerializer.*; 11 | 12 | /** 13 | * Unit tests for {@link ValueSerializer} implementations. 14 | * 15 | * @author fhussonnois 16 | */ 17 | public class ValueSerializerTest { 18 | 19 | public static class FooDocument { 20 | public String value; 21 | 22 | // dummy constructor 23 | public FooDocument() { 24 | 25 | } 26 | public FooDocument(String value) { 27 | this.value = value; 28 | } 29 | } 30 | 31 | @Test 32 | public void shouldDeSerializeNonTransactionValue( ) throws IOException { 33 | 34 | NonTransactionalValueSerializer serializer = new NonTransactionalValueSerializer<>(FooDocument.class); 35 | byte[] value = serializer.serialize(new FooDocument("foo")); 36 | 37 | FooDocument actual = serializer.deserialize(value); 38 | Assert.assertNotNull(actual); 39 | Assert.assertEquals("foo", actual.value); 40 | } 41 | 42 | @Test 43 | public void shouldSerializeTransactionValue( ) throws IOException { 44 | TransactionalValueSerializer serializer = new TransactionalValueSerializer<>(FooDocument.class); 45 | byte[] value = serializer.serialize(new TransactionalValue<>(1L, new FooDocument("foo"))); 46 | 47 | TransactionalValue actual = serializer.deserialize(value); 48 | Assert.assertNotNull(actual); 49 | Assert.assertEquals(1L, (long)actual.getTxid()); 50 | Assert.assertEquals("foo", actual.getVal().value); 51 | } 52 | 53 | @Test 54 | public void shouldSerializeOpaqueValueWithNoPreviousValue( ) throws IOException { 55 | OpaqueValueSerializer serializer = new OpaqueValueSerializer<>(FooDocument.class); 56 | byte[] value = serializer.serialize(new OpaqueValue<>(1L, new FooDocument("foo"))); 57 | 58 | OpaqueValue actual = serializer.deserialize(value); 59 | Assert.assertNotNull(actual); 60 | Assert.assertEquals(1L, (long)actual.getCurrTxid()); 61 | Assert.assertEquals("foo", actual.getCurr().value); 62 | } 63 | 64 | @Test 65 | public void shouldSerializeOpaqueValueWithPreviousValue( ) throws IOException { 66 | OpaqueValueSerializer serializer = new OpaqueValueSerializer<>(FooDocument.class); 67 | byte[] value = serializer.serialize(new OpaqueValue<>(1L, new FooDocument("foo"), new FooDocument("bar"))); 68 | 69 | OpaqueValue actual = serializer.deserialize(value); 70 | Assert.assertNotNull(actual); 71 | Assert.assertEquals(1L, (long)actual.getCurrTxid()); 72 | Assert.assertEquals("foo", actual.getCurr().value); 73 | Assert.assertEquals("bar", actual.getPrev().value); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/test/resources/elasticsearch.yml: -------------------------------------------------------------------------------- 1 | cluster.name: storm --------------------------------------------------------------------------------