├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── pom.xml
└── src
├── main
└── java
│ └── com
│ └── github
│ └── fhuss
│ └── storm
│ └── elasticsearch
│ ├── ClientFactory.java
│ ├── Document.java
│ ├── bolt
│ └── IndexBatchBolt.java
│ ├── commons
│ └── RichTickTupleBolt.java
│ ├── handler
│ └── BulkResponseHandler.java
│ ├── mapper
│ ├── MappingException.java
│ ├── TridentTupleMapper.java
│ ├── TupleMapper.java
│ └── impl
│ │ └── DefaultTupleMapper.java
│ └── state
│ ├── ESIndexMapState.java
│ ├── ESIndexState.java
│ ├── ESIndexUpdater.java
│ ├── QuerySearchIndexQuery.java
│ └── ValueSerializer.java
└── test
├── java
└── com
│ └── github
│ └── fhuss
│ └── storm
│ └── elasticsearch
│ ├── BaseLocalClusterTest.java
│ ├── bolt
│ └── IndexBatchBoltTest.java
│ ├── functions
│ ├── CreateJson.java
│ ├── DocumentBuilder.java
│ └── ExtractSearchArgs.java
│ ├── model
│ └── Tweet.java
│ └── state
│ ├── ESIndexUpdaterTest.java
│ ├── IndexMapStateTest.java
│ └── ValueSerializerTest.java
└── resources
└── elasticsearch.yml
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled source #
2 | ###################
3 | *.com
4 | *.class
5 | *.dll
6 | *.exe
7 | *.o
8 | *.so
9 |
10 | # Packages #
11 | ############
12 | # it's better to unpack these files and commit the raw source
13 | # git has its own built in compression methods
14 | *.7z
15 | *.dmg
16 | *.gz
17 | *.iso
18 | *.jar
19 | *.rar
20 | *.tar
21 | *.zip
22 |
23 | # Logs and databases #
24 | ######################
25 | *.log
26 | *.sql
27 | *.sqlite
28 |
29 | # OS generated files #
30 | ######################
31 | .DS_Store
32 | .DS_Store?
33 | ._*
34 | .Spotlight-V100
35 | .Trashes
36 | ehthumbs.db
37 | Thumbs.db
38 |
39 | # Eclipse
40 | .classpath
41 | .project
42 | .settings/
43 |
44 | # Intellij
45 | .idea/
46 | *.iml
47 | *.iws
48 |
49 | # Maven
50 | log/
51 | target/
52 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## 0.3.0
2 |
3 | * [#6][]: Fixed NotSerializableException on Document class.
4 | * [#5][]:Index update with ESIndexState should wotk with other generic type than String
5 | * Update storm version to 0.9.3
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Apache Storm - Elasticsearch
2 | ----------------------------
3 |
4 | [Apache Storm](https://storm.apache.org/) is a free and open source distributed realtime computation system.
5 |
6 | ### Bolt/Trident API implementation for [Elasticsearch](https://www.elastic.co/)
7 |
8 | This library provides core storm bolt and implements a Trident state on top of Elasticsearch.
9 | It supports non-transactional, transactional, and opaque state types.
10 |
11 | ### Maven dependency
12 | ```xml
13 |
14 | com.github.fhuss
15 | storm-elasticsearch
16 | 0.3.0
17 |
18 | ```
19 | ### TupleMapper / TridentTupleMapper
20 | To index documents into elasticsearch you need to provide an implementation of following interfaces according to
21 | you use bolt or trident state.
22 |
23 | These two interfaces have only one method defined used to map tuple fields to a [Document](https://github.com/fhussonnois/storm-trident-elasticsearch/blob/master/src/main/java/com/github/fhuss/storm/elasticsearch/Document.java).
24 |
25 | ```java
26 | public interface TupleMapper extends Serializable {
27 | T map(Tuple input);
28 | }
29 | ```
30 |
31 | ```java
32 | public interface TridentTupleMapper extends Serializable {
33 | T map(TridentTuple input);
34 | }
35 | ```
36 |
37 | To be indexed, a document requires at least following attributes:
38 |
39 | - The **name** of the index
40 | - The **type** of document
41 | - The **source** document
42 |
43 | For general use cases, a default implementation is provided [DefaultTupleMapper](https://github.com/fhussonnois/storm-trident-elasticsearch/blob/master/src/main/java/com/github/fhuss/storm/elasticsearch/mapper/impl/DefaultTupleMapper.java).
44 |
45 | ### Core Bolt / IndexBatchBolt
46 | The IndexBatchBolt implementation relies on storm tick tuple feature and Elasticsearch [Bulk API](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html) to
47 | index many tuples.
48 |
49 | ### Trident State examples
50 | #### Persistent Aggregate
51 |
52 | ```java
53 | FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
54 | new Values("the cow jumped over the moon"),
55 | new Values("the man went to the store and bought some candy"),
56 | new Values("four score and seven years ago"),
57 | new Values("how many apples can you eat"),
58 | new Values("to be or not to be the person"));
59 | spout.setCycle(true);
60 |
61 | TridentTopology topology = new TridentTopology();
62 |
63 | Settings settings = ImmutableSettings.settingsBuilder().loadFromClasspath("elasticsearch.yml").build();
64 | StateFactory stateFactory = ESIndexMapState.nonTransactional(new ClientFactory.LocalTransport(settings.getAsMap()), Tweet.class);
65 |
66 | topology.newStream("tweets", spout)
67 | .each(new Fields("sentence"), new DocumentBuilder(), new Fields("document"))
68 | .each(new Fields("document"), new ExtractDocumentInfo(), new Fields("id", "index", "type"))
69 | .groupBy(new Fields("index", "type", "id"))
70 | .persistentAggregate(stateFactory, new Fields("document"), new TweetBuilder(), new Fields("tweet"))
71 | .parallelismHint(1);
72 | ```
73 |
74 | #### Search query using DRPC
75 | ```java
76 | TridentTopology topology = new TridentTopology();
77 |
78 | Settings settings = ImmutableSettings.settingsBuilder().loadFromClasspath("elasticsearch.yml").build();
79 | TridentState staticState = topology.newStaticState(new ESIndexState.Factory<>(new LocalTransport(settings.getAsMap()), Tweet.class));
80 | topology.newDRPCStream("search", drpc)
81 | .each(new Fields("args"), new ExtractSearchArgs(), new Fields("query", "indices", "types"))
82 | .groupBy(new Fields("query", "indices", "types"))
83 | .stateQuery(staticState, new Fields("query", "indices", "types"), new QuerySearchIndexQuery(), new Fields("tweet"))
84 | .each(new Fields("tweet"), new FilterNull())
85 | .each(new Fields("tweet"), new CreateJson(), new Fields("json"))
86 | .project(new Fields("json"));
87 | ```
88 |
89 | ## License
90 |
91 | Licensed to the Apache Software Foundation (ASF) under one
92 | or more contributor license agreements. See the NOTICE file
93 | distributed with this work for additional information
94 | regarding copyright ownership. The ASF licenses this file
95 | to you under the Apache License, Version 2.0 (the
96 | "License"); you may not use this file except in compliance
97 | with the License. You may obtain a copy of the License at
98 |
99 | http://www.apache.org/licenses/LICENSE-2.0
100 |
101 | Unless required by applicable law or agreed to in writing,
102 | software distributed under the License is distributed on an
103 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
104 | KIND, either express or implied. See the License for the
105 | specific language governing permissions and limitations
106 | under the License.
107 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 |
8 | org.sonatype.oss
9 | oss-parent
10 | 9
11 |
12 |
13 | com.github.fhuss
14 | storm-elasticsearch
15 | 0.3.0
16 | Storm Trident Elasticsearch
17 | Trident API implementation for Elasticsearch
18 |
19 |
20 | UTF-8
21 | 1.7
22 | 0.9.3
23 | 1.7.6
24 | 1.0.1
25 | 2.3.2
26 | 4.11
27 | 1.2.1
28 | 16.0.1
29 | 3.3
30 |
31 |
32 |
33 |
34 | The Apache Software License, Version 2.0
35 | http://www.apache.org/licenses/LICENSE-2.0.txt
36 |
37 |
38 |
39 |
40 |
41 | fhuss
42 | Florian Hussonnois
43 | florian.hussonnois@gmail.com
44 | https://github.com/fhussonnois
45 |
46 | developer
47 |
48 |
49 |
50 |
51 |
52 | scm:git:git@github.com:fhussonnois/storm-trident-elasticsearch.git
53 | scm:git:git@github.com:fhussonnois/storm-trident-elasticsearch.git
54 | git@github.com:fhussonnois/storm-trident-elasticsearch.git
55 |
56 |
57 |
58 |
59 |
60 | org.apache.maven.plugins
61 | maven-compiler-plugin
62 | 3.1
63 |
64 | ${java.version}
65 | ${java.version}
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | org.apache.commons
74 | commons-lang3
75 | ${commons-lang3.version}
76 |
77 |
78 |
79 | org.apache.storm
80 | storm-core
81 | ${storm.version}
82 | provided
83 |
84 |
85 |
86 | org.slf4j
87 | slf4j-api
88 | ${org.slf4j.version}
89 |
90 |
91 |
92 | com.fasterxml.jackson.core
93 | jackson-databind
94 | ${jackson.databind.version}
95 |
96 |
97 |
98 | org.elasticsearch
99 | elasticsearch
100 | ${org.elasticsearch.version}
101 | provided
102 |
103 |
104 |
105 | com.google.guava
106 | guava
107 | ${guava.version}
108 |
109 |
110 |
111 | junit
112 | junit
113 | ${junit.version}
114 | test
115 |
116 |
117 |
118 | com.github.tlrx
119 | elasticsearch-test
120 | ${elasticsearch-test.version}
121 | test
122 |
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/ClientFactory.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch;
20 |
21 | import com.google.common.base.Preconditions;
22 | import org.apache.commons.lang3.StringUtils;
23 | import org.elasticsearch.client.Client;
24 | import org.elasticsearch.client.transport.TransportClient;
25 | import org.elasticsearch.common.settings.ImmutableSettings;
26 | import org.elasticsearch.common.settings.Settings;
27 | import org.elasticsearch.common.transport.InetSocketTransportAddress;
28 | import org.elasticsearch.common.transport.LocalTransportAddress;
29 | import org.elasticsearch.common.unit.TimeValue;
30 | import org.elasticsearch.node.Node;
31 | import org.elasticsearch.node.NodeBuilder;
32 |
33 | import java.io.Serializable;
34 | import java.util.Map;
35 |
36 | /**
37 | * Interface to make Elasticsearch client based on the Storm map configuration.
38 | *
39 | * @author fhussonnois
40 | */
41 | public interface ClientFactory extends Serializable {
42 |
43 | public static final int DEFAULT_PORT = 9300;
44 | public static final String NAME = "storm.elasticsearch.cluster.name";
45 | public static final String HOSTS = "storm.elasticsearch.hosts";
46 | public static final char PORT_SEPARATOR = ':';
47 | public static final char HOST_SEPARATOR = ',';
48 |
49 | T makeClient(Map conf) ;
50 |
51 | /**
52 | * Use this factory to create {@link TransportClient} that connects to a cluster.
53 | */
54 | public static class Transport implements ClientFactory {
55 |
56 | private Map settings;
57 |
58 | public Transport() {
59 | }
60 |
61 | public Transport(Map settings) {
62 | this.settings = settings;
63 | }
64 |
65 | @Override
66 | public TransportClient makeClient(Map conf) {
67 |
68 | String clusterHosts = (String)conf.get(HOSTS);
69 | String clusterName = (String)conf.get(NAME);
70 |
71 | Preconditions.checkNotNull(clusterHosts,"no setting found for Transport Client, make sure that you set property " + HOSTS);
72 |
73 | TransportClient client = new TransportClient(buildSettings(clusterName));
74 |
75 | for(String hostAndPort : StringUtils.split(clusterHosts, HOST_SEPARATOR)) {
76 | int portPos = hostAndPort.indexOf(PORT_SEPARATOR);
77 | boolean noPortDefined = portPos == -1;
78 | int port = ( noPortDefined ) ? DEFAULT_PORT : Integer.parseInt(hostAndPort.substring(portPos + 1, hostAndPort.length()));
79 | String host = (noPortDefined) ? hostAndPort : hostAndPort.substring(0, portPos);
80 | client.addTransportAddress(new InetSocketTransportAddress(host, port));
81 | }
82 | return client;
83 | }
84 |
85 | private Settings buildSettings(String clusterName) {
86 | ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder();
87 | if( StringUtils.isNotEmpty(clusterName)) sb.put("cluster.name", clusterName);
88 | if( settings != null) sb.put(settings);
89 |
90 | return sb.build();
91 | }
92 | }
93 |
94 | /**
95 | * Use this factory to create {@link TransportClient} that connects to a local cluster.
96 | */
97 | public static class LocalTransport implements ClientFactory {
98 |
99 | private Map settings;
100 |
101 | public LocalTransport() {
102 | }
103 |
104 | public LocalTransport(Map settings) {
105 | this.settings = settings;
106 | }
107 |
108 |
109 | @Override
110 | public TransportClient makeClient(Map conf) {
111 | TransportClient client = new TransportClient(buildSettings());
112 | client.addTransportAddress(new LocalTransportAddress("1"));
113 | return client;
114 | }
115 |
116 | protected Settings buildSettings( ) {
117 | ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder().put("node.local", "true");
118 | if( settings != null) sb.put(settings);
119 |
120 | return sb.build();
121 | }
122 | }
123 |
124 | /**
125 | * Use this factory to create an embedded Node that acts as a node within a cluster.
126 | */
127 | public static class NodeClient implements ClientFactory {
128 |
129 | private Map settings;
130 |
131 | public NodeClient() {}
132 |
133 | public NodeClient(Map settings) {
134 | this.settings = settings;
135 | }
136 |
137 | @Override
138 | public Client makeClient(Map conf) {
139 | String clusterName = (String)conf.get(NAME);
140 |
141 | final Node node = NodeBuilder.nodeBuilder().settings(buildSettings(clusterName)).node();
142 | registerShutdownHook(node);
143 |
144 | return node.client();
145 | }
146 |
147 | private void registerShutdownHook(final Node node) {
148 | Runtime.getRuntime().addShutdownHook(new Thread() {
149 | public void run() {
150 | node.close();
151 | }
152 | });
153 | }
154 |
155 | private Settings buildSettings(String clusterName) {
156 | ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder().put("node.client", true);
157 |
158 | if( StringUtils.isNotEmpty(clusterName)) sb.put("cluster.name", clusterName);
159 | if( settings != null) sb.put(settings);
160 |
161 | return sb.build();
162 | }
163 | }
164 |
165 | /**
166 | * Use this factory to create a local embedded Node that acts as a node within a cluster.
167 | * This factory should be preferred for testing purpose.
168 | */
169 | public static class LocalNodeClient implements ClientFactory {
170 |
171 | private Map settings;
172 |
173 | public LocalNodeClient() {}
174 |
175 | public LocalNodeClient(Map settings) { this.settings = settings; }
176 |
177 | @Override
178 | public Client makeClient(Map conf) {
179 |
180 | final Node node = NodeBuilder.nodeBuilder().settings( buildSettings() ).node();
181 | registerShutdownHook(node);
182 |
183 | return waitForYellowStatus(node.client());
184 | }
185 |
186 | private void registerShutdownHook(final Node node) {
187 | Runtime.getRuntime().addShutdownHook(new Thread() {
188 | public void run() {
189 | node.close();
190 | }
191 | });
192 | }
193 |
194 | private Client waitForYellowStatus(Client client) {
195 | client.admin().cluster()
196 | .prepareHealth()
197 | .setWaitForYellowStatus()
198 | .setTimeout(TimeValue.timeValueSeconds(30))
199 | .execute()
200 | .actionGet();
201 | return client;
202 | }
203 |
204 | private Settings buildSettings( ) {
205 | ImmutableSettings.Builder sb = ImmutableSettings.settingsBuilder()
206 | .put("node.name", "elastic-storm-test")
207 | .put("node.local", true)
208 | .put("index.store.type", "memory");
209 |
210 | if( settings != null) sb.put(settings);
211 |
212 | return sb.build();
213 | }
214 | }
215 | }
216 |
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/Document.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch;
20 |
21 | import java.io.Serializable;
22 |
23 | /**
24 | * This class should be used to wrap data required to index a document.
25 | *
26 | * @author fhussonnois
27 | * @param type of the underlying document
28 | */
29 | public class Document implements Serializable {
30 |
31 | private static final long serialVersionUID = 1L;
32 |
33 | /**
34 | * The name of the index
35 | */
36 | private String name;
37 | /**
38 | * The type of document
39 | */
40 | private String type;
41 | /**
42 | * The source document
43 | */
44 | private T source;
45 | /**
46 | * The document id
47 | */
48 | private String id;
49 | /**
50 | * The parent document id
51 | */
52 | private String parentId;
53 |
54 | public Document(String name, String type,T source) {
55 | this(name, type, source, null, null);
56 | }
57 |
58 | public Document(String name, String type, T source, String id) {
59 | this(name, type, source, id, null);
60 | }
61 |
62 | public Document(String name, String type, T source, String id, String parentId) {
63 | this.name = name;
64 | this.type = type;
65 | this.source = source;
66 | this.id = id;
67 | this.parentId = parentId;
68 | }
69 |
70 | public String getName( ) {
71 | return this.name;
72 | }
73 |
74 | public String getType( ) {
75 | return this.type;
76 | }
77 |
78 | public T getSource( ) {
79 | return this.source;
80 | }
81 |
82 | public String getId( ) {
83 | return this.id;
84 | }
85 |
86 | public String getParentId() {
87 | return this.parentId;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/bolt/IndexBatchBolt.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch.bolt;
20 |
21 | import backtype.storm.task.OutputCollector;
22 | import backtype.storm.task.TopologyContext;
23 | import backtype.storm.topology.OutputFieldsDeclarer;
24 | import backtype.storm.tuple.Tuple;
25 |
26 | import com.github.fhuss.storm.elasticsearch.ClientFactory;
27 | import com.github.fhuss.storm.elasticsearch.Document;
28 | import com.github.fhuss.storm.elasticsearch.commons.RichTickTupleBolt;
29 | import com.github.fhuss.storm.elasticsearch.mapper.TupleMapper;
30 | import org.elasticsearch.ElasticsearchException;
31 | import org.elasticsearch.action.bulk.BulkItemResponse;
32 | import org.elasticsearch.action.bulk.BulkRequestBuilder;
33 | import org.elasticsearch.action.bulk.BulkResponse;
34 | import org.elasticsearch.action.index.IndexRequestBuilder;
35 | import org.elasticsearch.client.Client;
36 |
37 | import org.slf4j.Logger;
38 | import org.slf4j.LoggerFactory;
39 |
40 | import java.util.ArrayList;
41 | import java.util.List;
42 | import java.util.Map;
43 | import java.util.concurrent.LinkedBlockingQueue;
44 | import java.util.concurrent.TimeUnit;
45 |
46 | /**
47 | * Simple Bolt to index documents batch into an elasticsearch cluster.
48 | *
49 | * @author fhussonnois
50 | */
51 | public class IndexBatchBolt extends RichTickTupleBolt {
52 |
53 | private static final Logger LOGGER = LoggerFactory.getLogger(IndexBatchBolt.class);
54 |
55 | public static final TimeUnit DEFAULT_TIME_UNIT = TimeUnit.SECONDS;
56 |
57 | public static final long DEFAULT_EMIT_FREQUENCY = 10;
58 |
59 | private static final int QUEUE_MAX_SIZE = 1000;
60 |
61 | private OutputCollector outputCollector;
62 |
63 | private Client client;
64 |
65 | private ClientFactory clientFactory;
66 |
67 | private LinkedBlockingQueue queue;
68 |
69 | private TupleMapper> mapper;
70 |
71 | /**
72 | * Creates a new {@link IndexBatchBolt} instance.
73 | *
74 | * @param emitFrequency the batch frequency
75 | * @param unit the time unit of the emit frequency
76 | * @param clientFactory the elasticsearch client factory
77 | * @param mapper the document tuple mapper
78 | */
79 | public IndexBatchBolt(ClientFactory clientFactory, TupleMapper> mapper, long emitFrequency, TimeUnit unit) {
80 | super(emitFrequency, unit);
81 | this.clientFactory = clientFactory;
82 | this.mapper = mapper;
83 | }
84 |
85 | /**
86 | * Creates a new {@link IndexBatchBolt} instance which use SECOND as time unit for batch frequency.
87 | * @param clientFactory the elasticsearch client factory
88 | * @param mapper the the document tuple mapper
89 | */
90 | public IndexBatchBolt(ClientFactory clientFactory, TupleMapper> mapper, long emitFrequency) {
91 | this(clientFactory, mapper, emitFrequency, DEFAULT_TIME_UNIT);
92 | }
93 |
94 | /**
95 | * Creates a new {@link IndexBatchBolt} instance with a default batch frequency set to 10 seconds.
96 | * @param clientFactory the elasticsearch client factory
97 | * @param mapper the the document tuple mapper
98 | */
99 | public IndexBatchBolt(ClientFactory clientFactory, TupleMapper> mapper) {
100 | this(clientFactory, mapper, DEFAULT_EMIT_FREQUENCY, DEFAULT_TIME_UNIT);
101 | }
102 |
103 | /**
104 | * (non-Javadoc)
105 | * @see backtype.storm.task.IBolt#prepare(java.util.Map, backtype.storm.task.TopologyContext, backtype.storm.task.OutputCollector)
106 | */
107 | @Override
108 | public void prepare(Map stormConf, TopologyContext topologyContext, OutputCollector outputCollector) {
109 | this.outputCollector = outputCollector;
110 | this.client = clientFactory.makeClient(stormConf);
111 | this.queue = new LinkedBlockingQueue<>(QUEUE_MAX_SIZE);
112 | }
113 |
114 | @Override
115 | protected void executeTickTuple(Tuple tuple) {
116 | bulkUpdateIndexes();
117 | outputCollector.ack(tuple);
118 | }
119 |
120 | @Override
121 | protected void executeTuple(Tuple tuple) {
122 | if( ! queue.offer(tuple) ) {
123 | bulkUpdateIndexes();
124 | queue.add(tuple);
125 | }
126 | }
127 |
128 | protected void bulkUpdateIndexes( ) {
129 |
130 | List inputs = new ArrayList<>(queue.size());
131 | queue.drainTo(inputs);
132 | BulkRequestBuilder bulkRequest = client.prepareBulk();
133 | for (Tuple input : inputs) {
134 | Document doc = mapper.map(input);
135 | IndexRequestBuilder request = client.prepareIndex(doc.getName(), doc.getType(), doc.getId()).setSource((String)doc.getSource());
136 |
137 | if(doc.getParentId() != null) {
138 | request.setParent(doc.getParentId());
139 | }
140 | bulkRequest.add(request);
141 | }
142 |
143 | try {
144 | if (bulkRequest.numberOfActions() > 0) {
145 | BulkResponse bulkItemResponses = bulkRequest.execute().actionGet();
146 | if (bulkItemResponses.hasFailures()) {
147 | BulkItemResponse[] items = bulkItemResponses.getItems();
148 | for (int i = 0; i < items.length; i++) {
149 | ackOrFail(items[i], inputs.get(i));
150 | }
151 | } else {
152 | ackAll(inputs);
153 | }
154 | }
155 | } catch (ElasticsearchException e) {
156 | LOGGER.error("Unable to process bulk request, " + inputs.size() + " tuples are in failure", e);
157 | outputCollector.reportError(e.getRootCause());
158 | failAll(inputs);
159 | }
160 | }
161 |
162 | private void ackOrFail(BulkItemResponse item, Tuple tuple) {
163 | if (item.isFailed()) {
164 | LOGGER.error("Failed to process tuple : " + mapper.map(tuple));
165 | outputCollector.fail(tuple);
166 | } else {
167 | outputCollector.ack(tuple);
168 | }
169 | }
170 |
171 | protected void ackAll(List inputs) {
172 | for(Tuple t : inputs)
173 | outputCollector.ack(t);
174 | }
175 |
176 | protected void failAll(List inputs) {
177 | for(Tuple t : inputs)
178 | outputCollector.fail(t);
179 | }
180 |
181 | @Override
182 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
183 | /* no-ouput */
184 | }
185 |
186 | @Override
187 | public void cleanup() {
188 | if( this.client != null) this.client.close();
189 | }
190 | }
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/commons/RichTickTupleBolt.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch.commons;
20 |
21 | import backtype.storm.Config;
22 | import backtype.storm.Constants;
23 | import backtype.storm.topology.IRichBolt;
24 | import backtype.storm.tuple.Tuple;
25 |
26 | import java.util.Map;
27 | import java.util.concurrent.TimeUnit;
28 |
29 | /**
30 | * A simple {@link backtype.storm.topology.base.BaseBasicBolt} implementation with tick tuple support.
31 | *
32 | * @author fhussonnois
33 | *
34 | */
35 | public abstract class RichTickTupleBolt implements IRichBolt {
36 |
37 | private long emitFrequency;
38 |
39 | /**
40 | * Creates a new {@link RichTickTupleBolt} instance.
41 | * @param emitFrequency the tick tuple emit frequency
42 | * @param unit the time unit of the emit frequency
43 | */
44 | public RichTickTupleBolt(long emitFrequency, TimeUnit unit) {
45 | this.emitFrequency = unit.toSeconds(emitFrequency);
46 | }
47 |
48 |
49 | private static boolean isTickTuple(Tuple tuple) {
50 | return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID)
51 | && tuple.getSourceStreamId().equals(Constants.SYSTEM_TICK_STREAM_ID);
52 | }
53 |
54 | @Override
55 | public Map getComponentConfiguration() {
56 | Config conf = new Config();
57 | conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, emitFrequency);
58 | return conf;
59 | }
60 |
61 | @Override
62 | public void execute(Tuple tuple) {
63 | if( isTickTuple(tuple) ) {
64 | executeTickTuple(tuple);
65 | } else {
66 | executeTuple(tuple);
67 | }
68 | }
69 |
70 | protected abstract void executeTickTuple(Tuple tuple);
71 |
72 | protected abstract void executeTuple(Tuple tuple);
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/handler/BulkResponseHandler.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch.handler;
20 |
21 | import org.elasticsearch.action.bulk.BulkResponse;
22 | import org.slf4j.Logger;
23 | import org.slf4j.LoggerFactory;
24 |
25 | import java.io.Serializable;
26 |
27 | /**
28 | * Interface to handle response after executing a bulk request.
29 | *
30 | * @author fhussonnois
31 | */
32 | public interface BulkResponseHandler extends Serializable {
33 |
34 | final Logger LOGGER = LoggerFactory.getLogger(LoggerResponseHandler.class);
35 |
36 | void handle(BulkResponse response);
37 |
38 | public class LoggerResponseHandler implements BulkResponseHandler {
39 |
40 | @Override
41 | public void handle(BulkResponse response) {
42 |
43 | if( response.hasFailures() ) {
44 | LOGGER.error("BulkResponse has failures : {}", response.buildFailureMessage());
45 | }
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/mapper/MappingException.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch.mapper;
20 |
21 |
22 | public class MappingException extends RuntimeException {
23 |
24 | public MappingException(String message, Throwable source) {
25 | super(message, source);
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/mapper/TridentTupleMapper.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch.mapper;
20 |
21 | import storm.trident.tuple.TridentTuple;
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | * Interface for building document from {@link storm.trident.tuple.TridentTuple}.
27 | * @param
28 | */
29 | public interface TridentTupleMapper extends Serializable {
30 |
31 | T map(TridentTuple input);
32 | }
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/mapper/TupleMapper.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch.mapper;
20 |
21 | import backtype.storm.tuple.Tuple;
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | * Interface for building document from {@link Tuple}.
27 | * @param
28 |
29 | */
30 | public interface TupleMapper extends Serializable {
31 |
32 | T map(Tuple input);
33 | }
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/mapper/impl/DefaultTupleMapper.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch.mapper.impl;
20 |
21 | import backtype.storm.tuple.Tuple;
22 | import com.fasterxml.jackson.core.JsonProcessingException;
23 | import com.fasterxml.jackson.databind.ObjectMapper;
24 | import com.github.fhuss.storm.elasticsearch.Document;
25 | import com.github.fhuss.storm.elasticsearch.mapper.MappingException;
26 | import com.github.fhuss.storm.elasticsearch.mapper.TupleMapper;
27 |
28 | import java.io.UnsupportedEncodingException;
29 |
30 | /**
31 | * Default mapper that attempt to map tuple fields to a {@link Document}.
32 | *
33 | * @author fhussonnois
34 | */
35 | public class DefaultTupleMapper implements TupleMapper> {
36 |
37 | public static final String FIELD_SOURCE = "source";
38 | public static final String FIELD_NAME = "name";
39 | public static final String FIELD_TYPE = "type";
40 | public static final String FIELD_PARENT_ID = "parentId";
41 | public static final String FIELD_ID = "id";
42 |
43 | private TupleMapper sourceMapperStrategy;
44 |
45 | private DefaultTupleMapper(TupleMapper sourceMapperStrategy) {
46 | this.sourceMapperStrategy = sourceMapperStrategy;
47 | }
48 |
49 | /**
50 | * Returns a new {@link DefaultTupleMapper} that accept String as source field value.
51 | */
52 | public static final DefaultTupleMapper newStringDefaultTupleMapper( ) {
53 | return new DefaultTupleMapper(new TupleMapper() {
54 | @Override
55 | public String map(Tuple input) {
56 | return input.getStringByField(FIELD_SOURCE);
57 | }
58 | });
59 | }
60 | /**
61 | * Returns a new {@link DefaultTupleMapper} that accept Byte[] as source field value.
62 | */
63 | public static final DefaultTupleMapper newBinaryDefaultTupleMapper( ) {
64 | return new DefaultTupleMapper(new TupleMapper() {
65 | @Override
66 | public String map(Tuple input) {
67 | try {
68 | return new String(input.getBinaryByField(FIELD_SOURCE), "UTF-8");
69 | } catch (UnsupportedEncodingException e) {
70 | throw new MappingException("Error while processing source as a byte[]", e);
71 | }
72 | }
73 | });
74 | }
75 |
76 | /**
77 | * Returns a new {@link DefaultTupleMapper} that accept Object as source field value.
78 | */
79 | public static final DefaultTupleMapper newObjectDefaultTupleMapper( ) {
80 | final ObjectMapper mapper = new ObjectMapper();
81 | return new DefaultTupleMapper(new TupleMapper() {
82 | @Override
83 | public String map(Tuple input) {
84 | try {
85 | return mapper.writeValueAsString(input.getValueByField(FIELD_SOURCE));
86 | } catch (JsonProcessingException e) {
87 | throw new MappingException("Error happen while processing json on object", e);
88 | }
89 | }
90 | });
91 | }
92 |
93 | @Override
94 | public Document map(Tuple input) {
95 | String id = input.getStringByField(FIELD_ID);
96 | String name = input.getStringByField(FIELD_NAME);
97 | String type = input.getStringByField(FIELD_TYPE);
98 | String parentId = ( input.contains(FIELD_PARENT_ID) ) ? input.getStringByField(FIELD_PARENT_ID) : null;
99 |
100 | return new Document<>(name, type, sourceMapperStrategy.map(input), id, parentId);
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/src/main/java/com/github/fhuss/storm/elasticsearch/state/ESIndexMapState.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | package com.github.fhuss.storm.elasticsearch.state;
20 |
21 | import backtype.storm.task.IMetricsContext;
22 | import backtype.storm.topology.FailedException;
23 | import backtype.storm.topology.ReportedFailedException;
24 | import backtype.storm.tuple.Values;
25 | import com.github.fhuss.storm.elasticsearch.ClientFactory;
26 | import com.github.fhuss.storm.elasticsearch.handler.BulkResponseHandler;
27 | import com.google.common.base.Objects;
28 | import org.elasticsearch.ElasticsearchException;
29 | import org.elasticsearch.action.bulk.BulkRequestBuilder;
30 | import org.elasticsearch.action.get.GetResponse;
31 | import org.elasticsearch.action.get.MultiGetItemResponse;
32 | import org.elasticsearch.action.get.MultiGetRequestBuilder;
33 | import org.elasticsearch.action.get.MultiGetResponse;
34 | import org.elasticsearch.client.Client;
35 | import org.slf4j.Logger;
36 | import org.slf4j.LoggerFactory;
37 | import storm.trident.state.OpaqueValue;
38 | import storm.trident.state.State;
39 | import storm.trident.state.StateFactory;
40 | import storm.trident.state.StateType;
41 | import storm.trident.state.TransactionalValue;
42 | import storm.trident.state.map.CachedMap;
43 | import storm.trident.state.map.IBackingMap;
44 | import storm.trident.state.map.MapState;
45 | import storm.trident.state.map.NonTransactionalMap;
46 | import storm.trident.state.map.OpaqueMap;
47 | import storm.trident.state.map.SnapshottableMap;
48 | import storm.trident.state.map.TransactionalMap;
49 |
50 | import java.io.IOException;
51 | import java.util.ArrayList;
52 | import java.util.HashMap;
53 | import java.util.List;
54 | import java.util.ListIterator;
55 | import java.util.Map;
56 |
57 | import static com.github.fhuss.storm.elasticsearch.state.ValueSerializer.*;
58 |
59 | /**
60 | * This class implements Trident State on top of ElasticSearch.
61 | * It follows trident-memcached library (https://github.com/nathanmarz/trident-memcached) as a template.
62 | *
63 | * @author fhussonnois
64 | * @param OpaqueValue, TransactionalValue or any other non transactional type
65 | */
66 | public class ESIndexMapState implements IBackingMap {
67 |
68 | private static final Logger LOGGER = LoggerFactory.getLogger(ESIndexMapState.class);
69 |
70 | public static class Options extends HashMap {
71 |
72 | private static final int DEFAULT_CACHE_SIZE = 1000;
73 | private static final String DEFAULT_GLOBAL_KEY = "GLOBAL$KEY";
74 | public static final String REPORT_ERROR = "trident.elasticsearch.state.report.error";
75 | public static final String CACHE_SIZE = "trident.elasticsearch.state.cache.size";
76 | public static final String GLOBAL_KEY = "trident.elasticsearch.state.global.key";
77 |
78 | public Options(Map conf) {
79 | super(conf);
80 | }
81 | public boolean reportError() {
82 | return Boolean.valueOf(get(REPORT_ERROR));
83 | }
84 | public int getCachedMapSize( ) {
85 | String cacheSize = get(CACHE_SIZE);
86 | return cacheSize != null ? Integer.valueOf(cacheSize) : DEFAULT_CACHE_SIZE;
87 |
88 | }
89 | public String getGlobalKey( ) {
90 | String globalKey = get(GLOBAL_KEY);
91 | return globalKey != null ? globalKey : DEFAULT_GLOBAL_KEY;
92 | }
93 | }
94 |
95 | public static Factory> opaque(ClientFactory client, Class type) {
96 | return new OpaqueFactory<>(client, StateType.OPAQUE, new OpaqueValueSerializer<>(type));
97 | }
98 |
99 | public static Factory> transactional(ClientFactory client, Class type) {
100 | return new TransactionalFactory<>(client, StateType.TRANSACTIONAL, new TransactionalValueSerializer<>(type));
101 | }
102 |
103 | public static Factory nonTransactional(ClientFactory client, Class type) {
104 | return new NonTransactionalFactory<>(client, StateType.NON_TRANSACTIONAL, new NonTransactionalValueSerializer<>(type));
105 | }
106 |
107 | public abstract static class Factory implements StateFactory {
108 | protected ValueSerializer serializer;
109 | protected ClientFactory clientFactory;
110 | protected StateType stateType;
111 |
112 | public Factory(ClientFactory clientFactory, StateType stateType, ValueSerializer serializer) {
113 | this.clientFactory = clientFactory;
114 | this.stateType = stateType;
115 | this.serializer = serializer;
116 | }
117 | }
118 |
119 | public static class OpaqueFactory extends Factory> {
120 |
121 | public OpaqueFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer> serializer) {
122 | super(clientFactory, stateType, serializer);
123 | }
124 |
125 | @Override
126 | public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) {
127 | Options options = new Options(conf);
128 | ESIndexMapState> mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError());
129 | MapState ms = OpaqueMap.build(new CachedMap(mapState, options.getCachedMapSize()));
130 | return new SnapshottableMap>(ms, new Values(options.getGlobalKey()));
131 | }
132 | }
133 |
134 | public static class TransactionalFactory extends Factory> {
135 |
136 | public TransactionalFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer> serializer) {
137 | super(clientFactory, stateType, serializer);
138 | }
139 |
140 | @Override
141 | public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) {
142 | Options options = new Options(conf);
143 | ESIndexMapState> mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError());
144 | MapState ms = TransactionalMap.build(new CachedMap(mapState, options.getCachedMapSize()));
145 | Values snapshotKey = new Values(options.getGlobalKey());
146 | return new SnapshottableMap<>(ms, snapshotKey);
147 | }
148 | }
149 |
150 | public static class NonTransactionalFactory extends Factory {
151 |
152 | public NonTransactionalFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer serializer) {
153 | super(clientFactory, stateType, serializer);
154 | }
155 |
156 | @Override
157 | public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) {
158 | Options options = new Options(conf);
159 | ESIndexMapState mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError());
160 | MapState ms = NonTransactionalMap.build(new CachedMap<>(mapState, options.getCachedMapSize()));
161 | return new SnapshottableMap<>(ms, new Values(options.getGlobalKey()));
162 | }
163 | }
164 |
165 | private BulkResponseHandler bulkResponseHandler;
166 | private ValueSerializer serializer;
167 | private Client client;
168 |
169 | private boolean reportError;
170 |
171 | public ESIndexMapState(Client client, ValueSerializer serializer, BulkResponseHandler bulkResponseHandler, boolean reportError) {
172 | this.client = client;
173 | this.serializer = serializer;
174 | this.bulkResponseHandler = bulkResponseHandler;
175 | this.reportError = reportError;
176 | }
177 |
178 | @Override
179 | public List multiGet(List> keys) {
180 | List responses = new ArrayList<>(keys.size());
181 |
182 | List groupByKeys = new ArrayList<>(keys.size());
183 | for(List