├── .gitignore
├── README.md
└── kettle-engine-storm
├── .gitignore
├── LICENSE.txt
├── pom.xml
└── src
├── main
├── assembly
│ ├── assembly.xml
│ └── for-remote-topology-assembly.xml
├── java
│ └── org
│ │ └── pentaho
│ │ └── kettle
│ │ └── engines
│ │ └── storm
│ │ ├── BaseSpoutOutputCollector.java
│ │ ├── CappedValues.java
│ │ ├── CollectorRowListener.java
│ │ ├── IKettleOutputCollector.java
│ │ ├── KettleControlSignal.java
│ │ ├── KettleStorm.java
│ │ ├── KettleStormUtils.java
│ │ ├── KettleTopologyBuilder.java
│ │ ├── Notifier.java
│ │ ├── NotifierException.java
│ │ ├── StormExecutionEngine.java
│ │ ├── StormExecutionEngineConfig.java
│ │ ├── bolt
│ │ ├── KettleControlBolt.java
│ │ └── KettleStepBolt.java
│ │ ├── signal
│ │ ├── BasicSignalNotifier.java
│ │ ├── KettleSignal.java
│ │ ├── QuickCloseStormSignalConnectionFactory.java
│ │ ├── SignalClientFactory.java
│ │ └── SimpleSignalClientFactory.java
│ │ └── spout
│ │ └── KettleStepSpout.java
└── resources
│ ├── ccnums.ktr
│ ├── kettle-storm.properties
│ ├── stream-lookup.ktr
│ └── test.ktr
└── test
├── java
└── org
│ └── pentaho
│ └── kettle
│ └── engines
│ └── storm
│ └── bolt
│ └── KettleControlBoltTest.java
└── resources
└── empty
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | .settings/
3 | .project
4 | .classpath
5 | .externalToolBuilders/
6 | .idea/
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Kettle for Storm
2 | ============
3 | An experimental execution environment to execute a Kettle transformation as a Storm topology.
4 |
5 | Overview
6 | =============
7 | Kettle Storm is an experimental execution environment to execute a Kettle transformation across a Storm cluster. This decomposes a transformation into a topology and wraps all steps in either a Storm Spout or a Bolt. The topology is then submitted to the cluster and is automatically killed once the transformation has finished processing all data.
8 |
9 | Many things are not implemented. I've only tested this for the transformation files included on a small cluster. There are quite a few details left to be implemented. Some of which include:
10 |
11 | - Steps that do not emit at least one message for every input. Because Kettle does not have a message id to correlate Storm messages with we cannot guarantee a message has been completely processed until we see a record emited from a given step. Because of this, we also cannot determine which messages are produced for a given input if they are not immediately emitted as part of the same ```processRow()``` call. As such, we can only guarantee message processing when one input message produces at least once output message. These classification of input steps will not work until that is fixed:
12 | - Sampling
13 | - Aggregation
14 | - Sorting
15 | - Filtering
16 | - First-class Spoon support
17 | - Repository-based transformations
18 | - Error handling
19 | - Conditional hops
20 | - Sub-transformations
21 | - Metrics: Kettle timing, throughput, logging
22 |
23 | Usage
24 | =====
25 | Executing a Kettle transformation with Storm
26 | --------------------------------------------
27 | The following commands will execute a transformation using a local in-memory test cluster.
28 |
29 | ### From a checkout
30 | A Kettle transformation can be submitted as a topology using the included KettleStorm command-line application. To invoke it from Maven simply use the maven exec target with the Kettle transformation you wish to execute:
31 | ```
32 | mvn package
33 | mvn exec:java -Dexec.args=src/main/resources/test.ktr -Dkettle-storm-local-mode=true
34 | ```
35 |
36 | ### From a release
37 | Extract the release and run:
38 | ```
39 | java -Dkettle-storm-local-mode=true -jar kettle-engine-storm-${version}-assembly.jar path/to/my.ktr
40 | ```
41 |
42 | Executing on a Storm cluster
43 | ---------------------------
44 | The following instructions are meant to be executed using the artifacts packaged in a release.
45 |
46 | To execute a transformation on a Storm cluster running on the same host simply run:
47 | ```
48 | java -jar kettle-engine-storm-${version}-assembly.jar path/to/my.ktr
49 | ```
50 |
51 | To execute the transformation to a Nimbus host running remotely include the host and port via the ```storm.options``` System property:
52 | ```
53 | java -Dstorm.options=nimbus.host=my-nimbus,nimbus.thrift.port=6628 -jar kettle-engine-storm-${version}-assembly.jar path/to/my.ktr
54 | ```
55 |
56 | ### Configuration via System Properties
57 |
58 | If additional options are required they can be provided as System Properties vai the command line in the format: `-Dargument=value`.
59 |
60 | They are all optional and will be translated into ```StormExecutionEnvironmentConfig``` properties:
61 |
62 | * ```kettle-storm-local-mode```: Flag indicating if you wish to execute the transformation as a Storm topology on an in-memory "local cluster" or remotely to an external Storm cluster. Defaults to ```false```.
63 | * ```kettle-storm-debug```: Flag indicating you wish to enable debug messaging from Storm for the submitted topology. Defaults to ```false```.
64 | * ```kettle-storm-topology-jar```: The path to the jar file to submit with the Storm topology. This is only required if you have created a custom jar with additional classes you wish to make available to the Kettle transformation without having to manually install plugins or configure the environment of each Storm host.
65 |
66 | #### Storm Configuration
67 |
68 | By default, Kettle Storm will submit topologies to a nimbus host running on localhost with the default connection settings included with Storm. If you'd like to use a specific storm.yaml file declare a System property on the command line:
69 | ```
70 | mvn exec:java -Dstorm.conf.file=/path/to/storm.yaml -Dexec.args=src/main/resources/test.ktr
71 | ```
72 |
73 | Storm configuration properties can be overriden by specifying them on the command line in the format:
74 | ```
75 | -Dstorm.options=nimbus.host=my-nimbus,nimbus.thrift.port=6628
76 | ```
77 |
78 | Embedding
79 | ---------
80 | The Kettle execution engine that can submit topologies can be embedded in a Java application using ```StormExecutionEngine``` and ```StormExecutionEngineConfig```.
81 |
82 | ```StormExecutionEngine``` provides convenience methods for integrating within multithreaded environments:
83 |
84 | - ```StormExecutionEngine.isComplete```: Blocks for the provided duration and returns ```true``` if the topology has completed successfully.
85 | - ```StormExecutionEngine.stop```: Kills the topology running the transformation if it's still execution.
86 |
87 | ### Example Code
88 |
89 | ```
90 | StormExecutionEngineConfig config = new StormExecutionEngineConfig();
91 | config.setTransformationFile("/path/to/my.ktr");
92 | StormExecutionEngine engine = new StormExecutionEngine(config);
93 | engine.init();
94 | engine.execute();
95 | engine.isComplete(10, TimeUnit.MINUTE); // Block for up to 10 minutes while the topology executes.
96 | ```
97 |
98 | Building a release archive
99 | --------------------------
100 | Execute ```mvn clean package``` to produce the release artifacts. The jars will be stored in ```target/```.
101 |
102 | Multiple artifacts are produced via the ```mvn package``` target:
103 |
104 | ```
105 | kettle-engine-storm-0.0.1-SNAPSHOT-assembly.jar
106 | kettle-engine-storm-0.0.1-SNAPSHOT-for-remote-topology.jar
107 | kettle-engine-storm-0.0.1-SNAPSHOT.jar
108 | ```
109 |
110 | The ```-assembly.jar``` is used to schedule execution of a transformation and contains all dependencies. The ```-for-remote-topology.jar``` contains code to be submitted to the cluster with the topology and all dependencies. The plain jar is this project's compilation without additional dependencies.
111 |
112 | External References
113 | ===================
114 | Kettle: http://kettle.pentaho.com
115 | Storm: http://storm-project.net
116 |
--------------------------------------------------------------------------------
/kettle-engine-storm/.gitignore:
--------------------------------------------------------------------------------
1 | kettle-engine-storm.iml
2 | target
3 |
--------------------------------------------------------------------------------
/kettle-engine-storm/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
204 |
205 | APACHE HADOOP SUBCOMPONENTS:
206 |
207 | The Apache Hadoop project contains subcomponents with separate copyright
208 | notices and license terms. Your use of the source code for the these
209 | subcomponents is subject to the terms and conditions of the following
210 | licenses.
211 |
212 | For the org.apache.hadoop.util.bloom.* classes:
213 |
214 | /**
215 | *
216 | * Copyright (c) 2005, European Commission project OneLab under contract
217 | * 034819 (http://www.one-lab.org)
218 | * All rights reserved.
219 | * Redistribution and use in source and binary forms, with or
220 | * without modification, are permitted provided that the following
221 | * conditions are met:
222 | * - Redistributions of source code must retain the above copyright
223 | * notice, this list of conditions and the following disclaimer.
224 | * - Redistributions in binary form must reproduce the above copyright
225 | * notice, this list of conditions and the following disclaimer in
226 | * the documentation and/or other materials provided with the distribution.
227 | * - Neither the name of the University Catholique de Louvain - UCL
228 | * nor the names of its contributors may be used to endorse or
229 | * promote products derived from this software without specific prior
230 | * written permission.
231 | *
232 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
233 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
234 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
235 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
236 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
237 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
238 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
239 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
240 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
241 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
242 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
243 | * POSSIBILITY OF SUCH DAMAGE.
244 | */
245 |
--------------------------------------------------------------------------------
/kettle-engine-storm/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | org.pentaho.kettle.engines
6 | kettle-engine-storm
7 | 0.0.2-SNAPSHOT
8 | jar
9 |
10 | kettle-engine-storm
11 | http://github.com/pentaho/kettle-storm
12 |
13 |
14 | UTF-8
15 | 0.9.0.1
16 |
17 | compile
18 | 0.2.0
19 | TRUNK-SNAPSHOT
20 | org.pentaho.kettle.engines.storm.KettleStorm
21 | ${project.build.finalName}-for-remote-topology.jar
22 |
23 |
24 |
25 |
26 | github-releases
27 | http://oss.sonatype.org/content/repositories/github-releases/
28 |
29 |
30 | clojars.org
31 | http://clojars.org/repo
32 |
33 |
34 | pentaho
35 | http://repo.pentaho.org/artifactory/repo/
36 |
37 |
38 |
39 |
40 |
41 | storm
42 | storm
43 | ${storm.version}
44 | provided
45 |
46 |
47 |
48 | com.github.ptgoetz
49 | storm-signals
50 | ${storm.signals.version}
51 |
52 |
53 |
54 | pentaho-kettle
55 | kettle-engine
56 | ${kettle.version}
57 |
58 |
59 |
60 | pentaho-kettle
61 | kettle-core
62 | ${kettle.version}
63 |
64 |
65 | xerces
66 | xercesImpl
67 |
68 |
69 | xerces
70 | xmlParserAPIs
71 |
72 |
73 |
74 |
75 |
76 | junit
77 | junit
78 | 4.10
79 | test
80 |
81 |
82 |
83 | org.easymock
84 | easymock
85 | 3.2
86 | test
87 |
88 |
89 |
90 |
91 |
92 |
93 | src/main/resources
94 | true
95 |
96 |
97 |
98 |
99 |
100 | org.apache.maven.plugins
101 | maven-compiler-plugin
102 | 2.3.2
103 |
104 | 1.7
105 | 1.7
106 |
107 |
108 |
109 | org.codehaus.mojo
110 | exec-maven-plugin
111 | 1.2.1
112 |
113 |
114 |
115 | java
116 |
117 |
118 |
119 |
120 | ${main.class}
121 |
122 | compile
123 |
127 |
128 |
129 | kettle-storm-topology-jar
130 | target/${kettle.storm.topology.jar}
131 |
132 |
133 |
134 |
135 |
136 | maven-assembly-plugin
137 | 2.2-beta-5
138 |
139 |
140 |
141 | for-remote-topology
142 | prepare-package
143 |
144 | single
145 |
146 |
147 |
148 | src/main/assembly/for-remote-topology-assembly.xml
149 |
150 |
151 |
152 | ${main.class}
153 |
154 |
155 |
156 |
157 |
158 |
159 | assembly
160 | prepare-package
161 |
162 | single
163 |
164 |
165 |
166 | src/main/assembly/assembly.xml
167 |
168 |
169 |
170 | ${main.class}
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/assembly/assembly.xml:
--------------------------------------------------------------------------------
1 |
2 | assembly
3 |
4 | jar
5 |
6 | false
7 |
8 |
9 | true
10 | runtime
11 |
12 |
13 | true
14 | provided
15 |
16 |
17 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/assembly/for-remote-topology-assembly.xml:
--------------------------------------------------------------------------------
1 |
2 | for-remote-topology
3 |
4 | jar
5 |
6 | false
7 |
8 |
9 | true
10 | runtime
11 |
12 |
13 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/BaseSpoutOutputCollector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import backtype.storm.spout.SpoutOutputCollector;
22 |
23 | import java.util.List;
24 | import java.util.Set;
25 | import java.util.UUID;
26 |
27 | /**
28 | * Wraps an {@link SpoutOutputCollector} so pending messages may be tracked. A {@link org.pentaho.kettle.engines.storm.spout.KettleStepSpout}
29 | * relies on this to know when all the data it has emitted has been fully processed.
30 | */
31 | public class BaseSpoutOutputCollector implements IKettleOutputCollector {
32 | private SpoutOutputCollector out;
33 | /**
34 | * The collection to add message ids to when emiting tuples. This should be
35 | * thread-safe.
36 | */
37 | private Set pendingMessageIds;
38 |
39 | public BaseSpoutOutputCollector(SpoutOutputCollector out, Set pendingMessageIds) {
40 | if (out == null) {
41 | throw new NullPointerException("output collector must not be null");
42 | }
43 | if (pendingMessageIds == null) {
44 | throw new NullPointerException("pending messages set must not be null");
45 | }
46 | this.out = out;
47 | this.pendingMessageIds = pendingMessageIds;
48 | }
49 |
50 | @Override
51 | public List emit(List tuple) {
52 | // Generate a message Id so these tuples can be properly ACK'd when they've
53 | // been processed. We use message acknowledging to determine when all output
54 | // from a Spout has been processed.
55 | Object messageId = UUID.randomUUID();
56 | List taskIds = out.emit(tuple, messageId);
57 | pendingMessageIds.add(messageId);
58 | return taskIds;
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/CappedValues.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import backtype.storm.tuple.Values;
22 |
23 | /**
24 | * A convenience class for making tuples of values with at most {@code N}
25 | * values.
26 | */
27 | @SuppressWarnings("serial")
28 | public class CappedValues extends Values {
29 | public CappedValues(int maxValues, Object... values) {
30 | if (maxValues < 1) {
31 | throw new IllegalArgumentException("max values must be > 0");
32 | }
33 |
34 | int max = Math.min(values.length, maxValues);
35 | for (int i = 0; i < max; i++) {
36 | add(values[i]);
37 | }
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/CollectorRowListener.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import org.pentaho.di.core.exception.KettleStepException;
22 | import org.pentaho.di.core.row.RowMetaInterface;
23 | import org.pentaho.di.trans.step.RowListener;
24 | import org.pentaho.di.trans.step.StepMetaDataCombi;
25 |
26 | /**
27 | * Listens for rows emitted from Kettle steps and passes them to an {@link IKettleOutputCollector} so they may be routed by Storm.
28 | */
29 | public class CollectorRowListener implements RowListener {
30 |
31 | private KettleStormUtils utils = new KettleStormUtils();
32 |
33 | private IKettleOutputCollector collector;
34 | private int numFields = 0;
35 |
36 | public CollectorRowListener(StepMetaDataCombi step, IKettleOutputCollector collector, int numFields) {
37 | if (step == null || collector == null) {
38 | throw new NullPointerException();
39 | }
40 | if (numFields < 1) {
41 | throw new IllegalArgumentException("numFields must be > 0");
42 | }
43 | this.collector = collector;
44 | this.numFields = numFields;
45 | }
46 |
47 | @Override
48 | public void errorRowWrittenEvent(RowMetaInterface rowMeta, Object[] out) throws KettleStepException {
49 | }
50 |
51 | @Override
52 | public void rowReadEvent(RowMetaInterface rowMeta, Object[] out) throws KettleStepException {
53 | }
54 |
55 | @Override
56 | public void rowWrittenEvent(RowMetaInterface rowMeta, Object[] out) throws KettleStepException {
57 | collector.emit(new CappedValues(numFields, utils.convertToRow(rowMeta, out)));
58 | }
59 |
60 | }
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/IKettleOutputCollector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import java.util.List;
22 |
23 | /**
24 | * The main API for emitting tuples from Kettle to Storm.
25 | */
26 | public interface IKettleOutputCollector {
27 | List emit(List tuple);
28 | }
29 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/KettleControlSignal.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | /**
22 | * Control signals are sent from component to their dependencies to signal state
23 | * changes.
24 | */
25 | public enum KettleControlSignal {
26 | /**
27 | * Indicates a component is done processing.
28 | */
29 | COMPLETE
30 | }
31 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/KettleStorm.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import org.pentaho.di.core.exception.KettleException;
22 | import org.slf4j.Logger;
23 | import org.slf4j.LoggerFactory;
24 |
25 | import java.util.concurrent.TimeUnit;
26 |
27 | /**
28 | *
29 | */
30 | public class KettleStorm {
31 | private static final Logger logger = LoggerFactory.getLogger(KettleStorm.class);
32 |
33 | public static void main(String[] args) throws Exception {
34 | if (args == null || args.length != 1) {
35 | throw new IllegalArgumentException("Must specify transformation file name");
36 | }
37 |
38 | StormExecutionEngineConfig config = new StormExecutionEngineConfig();
39 | config.setDebugMode(Boolean.valueOf(System.getProperty("kettle-storm-debug", "false")));
40 | config.setLocalMode(Boolean.valueOf(System.getProperty("kettle-storm-local-mode", "false")));
41 | config.setTopologyJar(System.getProperty("kettle-storm-topology-jar", StormExecutionEngineConfig.loadStormTopologyJarFromConfiguration()));
42 | config.setTransformationFile(args[0]);
43 |
44 | final StormExecutionEngine engine = new StormExecutionEngine(config);
45 |
46 | if (config.isLocalMode()) {
47 | logger.debug("Executing in local mode");
48 | }
49 |
50 | engine.init();
51 | engine.execute();
52 |
53 | Runtime.getRuntime().addShutdownHook(new Thread() {
54 | @Override
55 | public void run() {
56 | logger.info("Stopping transformation");
57 | try {
58 | engine.stop();
59 | } catch (KettleException ex) {
60 | logger.error("Error stopping topology for Kettle transformation", ex);
61 | }
62 | }
63 | });
64 |
65 | logger.info("Waiting for transformation to complete...");
66 | logger.info("Press CTRL-C to kill the topology and exit.");
67 |
68 | try {
69 | do {
70 | // Wait until the transformation is complete
71 | } while (!engine.isComplete(100, TimeUnit.MILLISECONDS));
72 | logger.debug("Transformation complete!");
73 | } finally {
74 | engine.stop();
75 | }
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/KettleStormUtils.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import java.io.IOException;
22 | import java.io.Serializable;
23 | import java.util.HashSet;
24 | import java.util.List;
25 | import java.util.Set;
26 | import java.util.UUID;
27 |
28 | import org.pentaho.di.core.Const;
29 | import org.pentaho.di.core.KettleEnvironment;
30 | import org.pentaho.di.core.RowSet;
31 | import org.pentaho.di.core.exception.KettleException;
32 | import org.pentaho.di.core.row.RowMetaInterface;
33 | import org.pentaho.di.core.row.ValueMetaInterface;
34 | import org.pentaho.di.trans.Trans;
35 | import org.pentaho.di.trans.TransConfiguration;
36 | import org.pentaho.di.trans.TransExecutionConfiguration;
37 | import org.pentaho.di.trans.TransMeta;
38 | import org.pentaho.di.trans.step.StepMetaDataCombi;
39 | import org.pentaho.di.trans.step.errorhandling.StreamInterface;
40 | import org.pentaho.kettle.engines.storm.bolt.KettleControlBolt;
41 | import org.pentaho.kettle.engines.storm.bolt.KettleStepBolt;
42 | import org.pentaho.kettle.engines.storm.signal.BasicSignalNotifier;
43 | import org.pentaho.kettle.engines.storm.spout.KettleStepSpout;
44 | import org.slf4j.Logger;
45 | import org.slf4j.LoggerFactory;
46 |
47 | import backtype.storm.Config;
48 | import backtype.storm.generated.StormTopology;
49 | import backtype.storm.topology.BoltDeclarer;
50 | import backtype.storm.topology.OutputFieldsDeclarer;
51 | import backtype.storm.topology.TopologyBuilder;
52 | import backtype.storm.tuple.Fields;
53 |
54 | /**
55 | * A collection of utility methods for working with Kettle and Storm.
56 | *
57 | * TODO refactor this into more meaningful components
58 | */
59 | @SuppressWarnings("serial")
60 | public class KettleStormUtils implements Serializable {
61 | private static final Logger logger = LoggerFactory
62 | .getLogger(KettleStormUtils.class);
63 |
64 | private static final String KETTLE_TOPOLOGY_NAME = "kettle.topology.name";
65 |
66 | /**
67 | * Create a topology from a transformation.
68 | *
69 | * @param conf Storm configuration to use to configure connection information.
70 | * @param meta Transformation meta to build topology from.
71 | * @return Storm topology capable of executing the Kettle transformation.
72 | * @throws KettleException Error loading the transformation details or initializing the kettle environment
73 | * @throws IOException Error generating the transformation XML from the meta.
74 | */
75 | public StormTopology createTopology(Config conf, TransMeta meta) throws KettleException, IOException {
76 | initKettleEnvironment();
77 | TransConfiguration transConfig = new TransConfiguration(meta,
78 | new TransExecutionConfiguration());
79 | String transXml = transConfig.getXML();
80 | Trans trans = new Trans(meta);
81 | trans.prepareExecution(null);
82 | List steps = trans.getSteps();
83 |
84 | String topologyName = generateTopologyName(meta.getName());
85 | setTopologyName(conf, topologyName);
86 |
87 | TopologyBuilder builder = new TopologyBuilder();
88 |
89 | Set leafSteps = collectLeafStepNames(trans);
90 |
91 | String controlBoltId = topologyName + "-control-bolt";
92 | BasicSignalNotifier notifier = new BasicSignalNotifier(controlBoltId);
93 | BoltDeclarer controlBoltDeclarer = builder.setBolt(controlBoltId, new KettleControlBolt(topologyName, notifier, leafSteps));
94 | for (StepMetaDataCombi step : steps) {
95 | step.step.init(step.meta, step.data);
96 |
97 | // The control bolt must receive all signal tuples from all leaf steps
98 | if (leafSteps.contains(step.step.getStepname())) {
99 | controlBoltDeclarer.allGrouping(step.step.getStepname(), "signal");
100 | }
101 |
102 | if (isSpout(step)) {
103 | builder.setSpout(step.step.getStepname(), new KettleStepSpout(
104 | step.step.getStepname(), transXml, step), step.step.getStepMeta().getCopies())
105 | .setMaxTaskParallelism(step.step.getStepMeta().getCopies());
106 | } else {
107 | BoltDeclarer bd = builder.setBolt(step.step.getStepname(),
108 | new KettleStepBolt(step.step.getStepname(), transXml,
109 | step), step.step.getStepMeta().getCopies())
110 | .setMaxTaskParallelism(step.step.getStepMeta().getCopies());
111 | for (StreamInterface info : step.stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams()) {
112 | StepMetaDataCombi infoStep = findStep(trans,
113 | info.getStepname());
114 | bd.fieldsGrouping(info.getStepname(), getOutputFields(infoStep));
115 | bd.allGrouping(info.getStepname(), "signal");
116 | }
117 | for (RowSet input : step.step.getInputRowSets()) {
118 | StepMetaDataCombi inputStep = findStep(trans,
119 | input.getOriginStepName());
120 | bd.fieldsGrouping(input.getOriginStepName(),
121 | getOutputFields(inputStep));
122 | // All bolts must receive all signal tuples from all previous steps
123 | bd.allGrouping(input.getOriginStepName(), "signal");
124 | }
125 | }
126 | }
127 |
128 | return builder.createTopology();
129 | }
130 |
131 | /**
132 | * Find all steps that do not have output hops.
133 | *
134 | * @param trans
135 | * The transformation.
136 | * @return The set of all steps that do not have output hops.
137 | */
138 | private Set collectLeafStepNames(Trans trans) {
139 | Set leafSteps = new HashSet();
140 | for (StepMetaDataCombi step : trans.getSteps()) {
141 | if (isLeafStep(trans, step)) {
142 | leafSteps.add(step.step.getStepname());
143 | }
144 | }
145 | return leafSteps;
146 | }
147 |
148 | private boolean isLeafStep(Trans trans, StepMetaDataCombi step) {
149 | return trans.getTransMeta().findNextSteps(step.stepMeta).isEmpty();
150 | }
151 |
152 | /**
153 | * Finds a step by name within a transformation.
154 | *
155 | * @param trans
156 | * Transformation to search within.
157 | * @param stepName
158 | * Name of step to look up.
159 | * @return The first step found whose stepname matches the provided one.
160 | */
161 | private StepMetaDataCombi findStep(Trans trans, String stepName) {
162 | for (StepMetaDataCombi step : trans.getSteps()) {
163 | if (stepName.equals(step.step.getStepname())) {
164 | return step;
165 | }
166 | }
167 | throw new RuntimeException("Unable to find step with name " + stepName);
168 | }
169 |
170 | /**
171 | * Determines if the step should be converted to a Spout. A step should be
172 | * converted to a spout if it receives no input.
173 | *
174 | * @param step
175 | * @return
176 | */
177 | private boolean isSpout(StepMetaDataCombi step) {
178 | return step.step.getInputRowSets().isEmpty();
179 | }
180 |
181 | public void declareOutputFields(StepMetaDataCombi step,
182 | OutputFieldsDeclarer declarer) {
183 | declarer.declare(getOutputFields(step));
184 | }
185 |
186 | /**
187 | * Determine the output row meta for this step.
188 | *
189 | * @param step Step to determine output rows for.
190 | * @return The output row meta for the step provided.
191 | */
192 | private RowMetaInterface getOutputRowMeta(StepMetaDataCombi step) {
193 | try {
194 | return step.step.getTrans().getTransMeta()
195 | .getStepFields(step.step.getStepMeta());
196 | } catch (KettleException ex) {
197 | throw new RuntimeException("Unable to get output fields from step "
198 | + step.step.getStepname());
199 | }
200 | }
201 |
202 | /**
203 | * Returns the fields a step produces as output.
204 | *
205 | * @param step Step to determine output fields for.
206 | * @return The field layout the step will produce.
207 | */
208 | public Fields getOutputFields(StepMetaDataCombi step) {
209 | String[] fieldNames = getOutputRowMeta(step).getFieldNames();
210 | String[] outputFieldNames = new String[fieldNames.length];
211 | for (int i = 0; i < fieldNames.length; i ++) {
212 | outputFieldNames[i] = step.step.getStepname() + "-" + fieldNames[i];
213 | }
214 | return new Fields(outputFieldNames);
215 | }
216 |
217 | /**
218 | * Initialize the Kettle environment.
219 | *
220 | * @throws KettleException If an error is encountered during initialization
221 | */
222 | public void initKettleEnvironment() throws KettleException {
223 | if (!KettleEnvironment.isInitialized()) {
224 | logger.debug("Initializing Kettle Environment...");
225 | logger.debug("Kettle Home: " + Const.getKettleDirectory());
226 | KettleEnvironment.init();
227 | }
228 | }
229 |
230 | public StepMetaDataCombi getStep(String transXml, String stepName) throws KettleException {
231 | initKettleEnvironment();
232 | TransConfiguration transConfiguration = TransConfiguration
233 | .fromXML(transXml);
234 | TransMeta transMeta = transConfiguration.getTransMeta();
235 | Trans trans = new Trans(transMeta);
236 | trans.prepareExecution(null);
237 | transMeta.setUsingThreadPriorityManagment(false);
238 | trans.setRunning(true); // GO GO GO
239 | for (StepMetaDataCombi step : trans.getSteps()) {
240 | if (stepName.equals(step.step.getStepname())) {
241 | if (!step.step.init(step.meta, step.data)) {
242 | throw new RuntimeException("Unable to initialize step "
243 | + step.step.getStepname());
244 | }
245 | for (RowSet rowSet : step.step.getInputRowSets()) {
246 | rowSet.setRowMeta(getOutputRowMeta(findStep(trans,
247 | rowSet.getOriginStepName())));
248 | }
249 | return step;
250 | }
251 | }
252 | throw new RuntimeException("Unable to locate step: " + stepName);
253 | }
254 |
255 | /**
256 | * Convert a row from Kettle object to Java object.
257 | *
258 | * @param rowMeta Meta information about the row provided.
259 | * @param tuple Row of data to convert.
260 | * @return Converted values based on the row meta given.
261 | */
262 | public Object[] convertToRow(RowMetaInterface rowMeta, Object[] tuple) {
263 | for (int i = 0; i < tuple.length; i++) {
264 | try {
265 | if (tuple[i] != null) {
266 | ValueMetaInterface meta = rowMeta.getValueMeta(i);
267 | switch (meta.getType()) {
268 | case ValueMetaInterface.TYPE_STRING:
269 | tuple[i] = meta.getString(tuple[i]);
270 | break;
271 | case ValueMetaInterface.TYPE_NUMBER:
272 | tuple[i] = meta.getNumber(tuple[i]);
273 | break;
274 | case ValueMetaInterface.TYPE_INTEGER:
275 | tuple[i] = meta.getInteger(tuple[i]);
276 | break;
277 | case ValueMetaInterface.TYPE_DATE:
278 | tuple[i] = meta.getDate(tuple[i]);
279 | break;
280 | default:
281 | throw new IllegalArgumentException(
282 | "Unsupported data type: "
283 | + rowMeta.getValueMeta(i).getTypeDesc());
284 | }
285 | }
286 | } catch (Exception ex) {
287 | throw new RuntimeException("unable to convert value: "
288 | + tuple[i], ex);
289 | }
290 | }
291 |
292 | return tuple;
293 | }
294 |
295 | /**
296 | * Generate a unique topology name.
297 | *
298 | * @param name Prefix for the topology name so its easily identifiable.
299 | * @return A unique topology name, prefixed with the name provided.
300 | */
301 | private String generateTopologyName(String name) {
302 | return name + "-" + UUID.randomUUID().toString();
303 | }
304 |
305 | /**
306 | * Set the topology name in a configuration so it can be retrieved by another
307 | * process later.
308 | *
309 | * @param conf Configuration to store topology name in.
310 | * @param name Topology name to set.
311 | */
312 | private void setTopologyName(Config conf, String name) {
313 | conf.put(KETTLE_TOPOLOGY_NAME, name);
314 | }
315 |
316 | /**
317 | * Retrieve the topology name from a Storm configuration.
318 | *
319 | * @param conf Storm configuration used to create the topology from a Kettle
320 | * transformation.
321 | * @return The name of the topology created for a Kettle transformation with
322 | * the provided configuration.
323 | */
324 | public String getTopologyName(Config conf) {
325 | return (String) conf.get(KETTLE_TOPOLOGY_NAME);
326 | }
327 | }
328 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/KettleTopologyBuilder.java:
--------------------------------------------------------------------------------
1 | package org.pentaho.kettle.engines.storm;
2 |
3 | import org.pentaho.di.core.exception.KettleException;
4 | import org.pentaho.di.trans.TransMeta;
5 |
6 | import backtype.storm.Config;
7 | import backtype.storm.generated.StormTopology;
8 |
9 | public interface KettleTopologyBuilder {
10 | /**
11 | * Build a topology capable of executing the provided transformation.
12 | *
13 | * @param conf
14 | * Storm configuration to use to configure connection
15 | * information.
16 | * @param trans
17 | * Transformation meta to build topology from.
18 | * @return Storm topology capable of executing the Kettle transformation.
19 | * @throws KettleException
20 | * Error loading the transformation details or initializing the
21 | * kettle environment
22 | */
23 | StormTopology build(Config config, TransMeta trans) throws KettleException;
24 | }
25 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/Notifier.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import java.io.Serializable;
22 | import java.util.Map;
23 |
24 | /**
25 | * This provides a mechanism for signaling a state change. For example, by
26 | * sending a {@link KettleControlSignal#COMPLETE} a transformation can notify
27 | * interested parties it has completed.
28 | */
29 | public interface Notifier extends Serializable {
30 | /**
31 | * Initialize this notifier.
32 | *
33 | * @param stormConf
34 | * The Storm configuration for this notifier.
35 | */
36 | @SuppressWarnings("rawtypes")
37 | void init(Map stormConf);
38 |
39 | /**
40 | * Signals a state change.
41 | *
42 | * @param id
43 | * The identifier sending the message.
44 | * @param signal
45 | * The control signal.
46 | * @throws Exception
47 | * An error was encountered while sending notification messages.
48 | */
49 | void notify(String id, KettleControlSignal signal) throws NotifierException;
50 |
51 | /**
52 | * Called when the component utilizing this notifier is being cleaned up.
53 | * There is no guarantee that cleanup will be called.
54 | */
55 | void cleanup();
56 | }
57 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/NotifierException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | /**
22 | * Indicates an error sending a notification.
23 | */
24 | @SuppressWarnings("serial")
25 | public class NotifierException extends Exception {
26 |
27 | public NotifierException(String message, Throwable cause) {
28 | super(message, cause);
29 | }
30 |
31 | public NotifierException(String message) {
32 | super(message);
33 | }
34 |
35 | public NotifierException(Throwable cause) {
36 | super(cause);
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/StormExecutionEngine.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import backtype.storm.Config;
22 | import backtype.storm.LocalCluster;
23 | import backtype.storm.StormSubmitter;
24 | import backtype.storm.contrib.signals.SignalListener;
25 | import backtype.storm.contrib.signals.StormSignalConnection;
26 | import backtype.storm.generated.NotAliveException;
27 | import backtype.storm.generated.StormTopology;
28 | import backtype.storm.utils.NimbusClient;
29 | import backtype.storm.utils.Utils;
30 | import org.apache.thrift7.TException;
31 | import org.pentaho.di.core.exception.KettleException;
32 | import org.pentaho.di.trans.TransMeta;
33 | import org.pentaho.kettle.engines.storm.signal.QuickCloseStormSignalConnectionFactory;
34 | import org.slf4j.Logger;
35 | import org.slf4j.LoggerFactory;
36 |
37 | import java.io.IOException;
38 | import java.util.Collections;
39 | import java.util.Map;
40 | import java.util.concurrent.CountDownLatch;
41 | import java.util.concurrent.TimeUnit;
42 |
43 | /**
44 | * An engine capable of processing data as defined by a Kettle transformation as a Storm topology. It provides a simple mechanism for
45 | * starting, polling for status, and stopping a Storm topology.
46 | */
47 | public class StormExecutionEngine {
48 | private static final Logger logger = LoggerFactory.getLogger(StormExecutionEngine.class);
49 | private static KettleStormUtils util = new KettleStormUtils();
50 | private QuickCloseStormSignalConnectionFactory signalConnectionFactory = new QuickCloseStormSignalConnectionFactory();
51 |
52 | private LocalCluster localCluster = null;
53 |
54 | private StormExecutionEngineConfig config;
55 |
56 | private TransMeta meta;
57 |
58 | private Config stormConfig;
59 |
60 | // Flag to indicate the engine is executing
61 | private volatile boolean running = false;
62 | // This is used to synchronize blocking for the transformation to complete
63 | private CountDownLatch transCompleteLatch;
64 | private String topologyName;
65 |
66 | public StormExecutionEngine(StormExecutionEngineConfig config) {
67 | if (config == null) {
68 | throw new NullPointerException("config must not be null");
69 | }
70 | this.config = config;
71 | }
72 |
73 | /**
74 | * Prepare the engine to execute the transformation located at
75 | * {@link StormExecutionEngineConfig#getTransformationFile()}.
76 | *
77 | * @throws KettleException Error loading transformation
78 | */
79 | public void init() throws KettleException {
80 | stormConfig = loadStormConfig();
81 | util.initKettleEnvironment();
82 | meta = new TransMeta(config.getTransformationFile());
83 | setJarToUpload(config.getTopologyJar());
84 | }
85 |
86 | /**
87 | * Execute the transformation as a Storm topology.
88 | *
89 | * @throws IOException Error generating the transformation XML from the meta.
90 | * @throws KettleException Error reading transformation settings or starting execution entirely.
91 | * @throws InterruptedException Thread was interrupted while waiting for the topology to
92 | * complete. {@link #stop()} should be called before propagating.
93 | * @throws Exception Generic exception was thrown while establishing a connection to
94 | * ZooKeeper.
95 | */
96 | public synchronized void execute() throws KettleException, IOException, InterruptedException {
97 | StormTopology topology = util.createTopology(stormConfig, meta);
98 |
99 | topologyName = util.getTopologyName(stormConfig);
100 |
101 | transCompleteLatch = new CountDownLatch(1);
102 | // TODO Support more than one end step. Deserialize message and check for specific steps completing instead of just counting them.
103 | final StormSignalConnection signalConnection = signalConnectionFactory.createSignalConnection(topologyName, new SignalListener() {
104 | @Override
105 | public void onSignal(byte[] data) {
106 | // If anything is received for the topology name we consider it to mean the transformation is complete
107 | logger.info("Received transformation complete message");
108 | transCompleteLatch.countDown();
109 | }
110 | });
111 |
112 | submitTopology(topologyName, stormConfig, topology);
113 | logger.info(String.format("Submitted transformation as topology '%s'\n", topologyName));
114 | running = true;
115 | try {
116 | signalConnection.init(stormConfig);
117 | } catch (Exception ex) {
118 | try {
119 | stop();
120 | } catch (KettleException e) {
121 | logger.warn("Error stopping topology after signal connection failure", e);
122 | }
123 | throw new KettleException("Unable to establish signal connection to ZooKeeper.", ex);
124 | }
125 | }
126 |
127 | /**
128 | * Return the topology name that was started as a result of executing this
129 | * engine.
130 | *
131 | * @return The topology name used to execute the transformation provided to
132 | * this engine, or null if the engine has not been started.
133 | */
134 | public String getTopologyName() {
135 | return topologyName;
136 | }
137 |
138 | /**
139 | * A blocking call to determine if the transformation done executing.
140 | *
141 | * @param timeout the maximum time to wait
142 | * @param unit the time unit of the timeout argument
143 | * @return True if the topology this engine executed is complete
144 | * @throws InterruptedException If the current thread is interrupted while waiting
145 | * @throws IllegalStateException if the engine has not been started
146 | */
147 | public boolean isComplete(long timeout, TimeUnit unit) throws InterruptedException {
148 | if (!running) {
149 | throw new IllegalStateException("Engine not started");
150 | }
151 | return transCompleteLatch.await(timeout, unit);
152 | }
153 |
154 | /**
155 | * Stop the running transformation's topology in Storm.
156 | *
157 | * @throws KettleException If an error was encountered stopping the Storm topology.
158 | */
159 | public synchronized void stop() throws KettleException {
160 | if (!running) {
161 | // Not running, nothing to do here
162 | return;
163 | }
164 |
165 | try {
166 | logger.debug("Attempting to kill topology: " + topologyName);
167 | killTopology(stormConfig, topologyName);
168 | logger.debug("Topology killed successfully");
169 | running = false;
170 | } catch (Exception ex) {
171 | throw new KettleException("Unable to kill topology: " + topologyName, ex);
172 | }
173 | }
174 |
175 | /**
176 | * Load the Storm {@link Config} by reading command line options and the Storm
177 | * config files.
178 | *
179 | * @return Configuration with all possible configurations loaded from the
180 | * environment.
181 | */
182 | @SuppressWarnings("unchecked")
183 | private Config loadStormConfig() {
184 | final Config conf = new Config();
185 | conf.setDebug(config.isDebugMode());
186 | conf.putAll(Utils.readCommandLineOpts());
187 | conf.putAll(Utils.readStormConfig());
188 |
189 | if (config.isLocalMode()) {
190 | conf.put(Config.STORM_CLUSTER_MODE, "local");
191 | conf.put(Config.STORM_ZOOKEEPER_SERVERS, Collections.singletonList("localhost"));
192 | conf.put(Config.STORM_ZOOKEEPER_PORT, 2000);
193 | }
194 |
195 | return conf;
196 | }
197 |
198 | /**
199 | * Storm needs to know what jar contains code to execute a topology. It keys
200 | * off the "storm.jar" System property. We will set it if its not already set
201 | * to the provided jar path.
202 | *
203 | * @param jarPath Path to jar file to submit with topology. This should be a jar
204 | * containing all required resources to execute the transformation.
205 | * Plugins need not be included if they can be resolved from
206 | * $KETTLE_HOME/plugins.
207 | */
208 | private static void setJarToUpload(String jarPath) {
209 | String stormJar = System.getProperty("storm.jar", jarPath);
210 | System.setProperty("storm.jar", jarPath);
211 | logger.debug("Configured Storm topology jar as: {}", stormJar);
212 | }
213 |
214 | @SuppressWarnings("rawtypes")
215 | private void submitTopology(String name, Map stormConf, StormTopology topology) throws KettleException {
216 | if (config.isLocalMode()) {
217 | localCluster = new LocalCluster();
218 | localCluster.submitTopology(name, stormConf, topology);
219 | } else {
220 | try {
221 | StormSubmitter.submitTopology(name, stormConf, topology);
222 | } catch (Exception ex) {
223 | throw new KettleException("Error submitting topology " + name, ex);
224 | }
225 | }
226 | }
227 |
228 | @SuppressWarnings("rawtypes")
229 | private void killTopology(Map conf, String name) throws NotAliveException, TException {
230 | if (config.isLocalMode()) {
231 | localCluster.killTopology(name);
232 | localCluster.shutdown();
233 | } else {
234 | NimbusClient client = NimbusClient.getConfiguredClient(conf);
235 | client.getClient().killTopology(name);
236 | }
237 | }
238 |
239 | }
240 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/StormExecutionEngineConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm;
20 |
21 | import java.io.IOException;
22 | import java.util.Properties;
23 |
24 | /**
25 | * Defines configuration and runtime settings for the
26 | * {@link StormExecutionEngine}.
27 | */
28 | public class StormExecutionEngineConfig {
29 | /**
30 | * The jar to submit along with the topology. This should include everything Kettle needs to boot up and then load plugins from elsewhere.
31 | * By default, it will use the *-with-dependencies.jar generated with Maven from this project. See README.md for more information.
32 | */
33 | private String topologyJar;
34 | private String transformationFile;
35 | private boolean debugMode;
36 | private boolean localMode;
37 |
38 | public String getTopologyJar() {
39 | return topologyJar;
40 | }
41 |
42 | public void setTopologyJar(String topologyJar) {
43 | this.topologyJar = topologyJar;
44 | }
45 |
46 | public String getTransformationFile() {
47 | return transformationFile;
48 | }
49 |
50 | public void setTransformationFile(String transformationFile) {
51 | this.transformationFile = transformationFile;
52 | }
53 |
54 | public boolean isDebugMode() {
55 | return debugMode;
56 | }
57 |
58 | public void setDebugMode(boolean debugMode) {
59 | this.debugMode = debugMode;
60 | }
61 |
62 | public boolean isLocalMode() {
63 | return localMode;
64 | }
65 |
66 | public void setLocalMode(boolean localMode) {
67 | this.localMode = localMode;
68 | }
69 |
70 | public static String loadStormTopologyJarFromConfiguration() throws IOException {
71 | Properties p = new Properties();
72 | p.load(StormExecutionEngineConfig.class.getResourceAsStream("/kettle-storm.properties"));
73 | return p.getProperty("kettle.topology.jar");
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/bolt/KettleControlBolt.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.bolt;
20 |
21 | import java.util.ArrayList;
22 | import java.util.HashMap;
23 | import java.util.List;
24 | import java.util.Map;
25 | import java.util.Set;
26 |
27 | import org.pentaho.kettle.engines.storm.KettleControlSignal;
28 | import org.pentaho.kettle.engines.storm.Notifier;
29 | import org.pentaho.kettle.engines.storm.StormExecutionEngine;
30 | import org.pentaho.kettle.engines.storm.signal.KettleSignal;
31 | import org.slf4j.Logger;
32 | import org.slf4j.LoggerFactory;
33 |
34 | import backtype.storm.task.OutputCollector;
35 | import backtype.storm.task.TopologyContext;
36 | import backtype.storm.topology.OutputFieldsDeclarer;
37 | import backtype.storm.topology.base.BaseRichBolt;
38 | import backtype.storm.tuple.Tuple;
39 |
40 | import com.google.common.base.Preconditions;
41 | import com.google.common.base.Strings;
42 |
43 | /**
44 | * This bolt aggregates all the final {@link KettleSignal}s from leaf bolts and
45 | * notifies {@link StormExecutionEngine} that the transformation has completed.
46 | */
47 | @SuppressWarnings("serial")
48 | public class KettleControlBolt extends BaseRichBolt {
49 | private static final Logger logger = LoggerFactory
50 | .getLogger(KettleControlBolt.class);
51 |
52 | private String transformationName;
53 | private Notifier notifier;
54 | private OutputCollector collector;
55 | private Set leafSteps;
56 | private Map> componentToPendingTasks;
57 |
58 | /**
59 | * Create a new control bolt to check for completion of the given steps.
60 | *
61 | * @param name
62 | * Name of this bolt. Used only for the ZooKeeper connection.
63 | * @param topologyName
64 | * The name of the topology this bolt is participating in. This is
65 | * the name of the resource it will signal when it has received a
66 | * complete signal from all leaf steps.
67 | * @param leafSteps
68 | * List of all leaf steps that must complete before the
69 | * transformation is to be considered complete.
70 | */
71 | public KettleControlBolt(String transformationName, Notifier notifier,
72 | Set leafSteps) {
73 | Preconditions.checkArgument(!Strings.isNullOrEmpty(transformationName));
74 | Preconditions.checkNotNull(leafSteps);
75 | Preconditions.checkArgument(!leafSteps.isEmpty(),
76 | "At least 1 leaf step is expected");
77 | this.transformationName = transformationName;
78 | this.notifier = notifier;
79 | this.leafSteps = leafSteps;
80 | }
81 |
82 | @SuppressWarnings("rawtypes")
83 | @Override
84 | public void prepare(Map stormConf, TopologyContext context,
85 | OutputCollector collector) {
86 | this.collector = collector;
87 | // Build the map of tasks that must complete for the transformation to have
88 | // completed
89 | componentToPendingTasks = new HashMap>();
90 | for (String componentId : leafSteps) {
91 | List tasks = context.getComponentTasks(componentId);
92 | if (tasks == null || tasks.isEmpty()) {
93 | throw new IllegalStateException("No tasks defined for leaf step " + componentId);
94 | }
95 | componentToPendingTasks.put(componentId,
96 | new ArrayList(tasks));
97 | }
98 | notifier.init(stormConf);
99 | }
100 |
101 | @Override
102 | public void execute(Tuple input) {
103 | // We only ever expect signals to be routed to us.
104 | try {
105 | KettleSignal signal = (KettleSignal) input.getValue(0);
106 |
107 | logger.info("Received signal from " + signal.getComponentId() + ": "
108 | + signal.getSignal());
109 |
110 | // Remove the pending task from the component's list
111 | List pendingTaskIds = componentToPendingTasks.get(signal
112 | .getComponentId());
113 | if (pendingTaskIds == null || !pendingTaskIds.remove(signal.getTaskId())) {
114 | // TODO How can we fail the topology if this happens?
115 | throw new IllegalStateException(
116 | "Unexpected completion message received: componentId="
117 | + signal.getComponentId() + ",taskId=" + signal.getTaskId()
118 | + ".");
119 | }
120 | if (pendingTaskIds.isEmpty()) {
121 | componentToPendingTasks.remove(signal.getComponentId());
122 | }
123 | if (componentToPendingTasks.isEmpty()) {
124 | logger
125 | .info("All leaf steps have completed. Sending transformation complete message.");
126 | // Transformation is complete! Fire the signal.
127 | notifier.notify(transformationName, /* not used */
128 | KettleControlSignal.COMPLETE);
129 | }
130 | collector.ack(input);
131 | } catch (Exception ex) {
132 | logger.error("Error processing tuple: " + input, ex);
133 | collector.fail(input);
134 | }
135 | }
136 |
137 | @Override
138 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
139 | // We don't output anything.
140 | }
141 |
142 | @Override
143 | public void cleanup() {
144 | super.cleanup();
145 | notifier.cleanup();
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/bolt/KettleStepBolt.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.bolt;
20 |
21 | import java.util.Collections;
22 | import java.util.Deque;
23 | import java.util.LinkedList;
24 | import java.util.Map;
25 |
26 | import org.pentaho.di.core.RowSet;
27 | import org.pentaho.di.core.exception.KettleException;
28 | import org.pentaho.di.core.exception.KettleStepException;
29 | import org.pentaho.di.core.row.RowMetaInterface;
30 | import org.pentaho.di.trans.step.RowListener;
31 | import org.pentaho.di.trans.step.StepMetaDataCombi;
32 | import org.pentaho.di.trans.step.errorhandling.StreamInterface;
33 | import org.pentaho.kettle.engines.storm.CappedValues;
34 | import org.pentaho.kettle.engines.storm.KettleControlSignal;
35 | import org.pentaho.kettle.engines.storm.KettleStormUtils;
36 | import org.pentaho.kettle.engines.storm.signal.KettleSignal;
37 | import org.slf4j.Logger;
38 | import org.slf4j.LoggerFactory;
39 |
40 | import backtype.storm.task.OutputCollector;
41 | import backtype.storm.task.TopologyContext;
42 | import backtype.storm.topology.OutputFieldsDeclarer;
43 | import backtype.storm.topology.base.BaseRichBolt;
44 | import backtype.storm.tuple.Fields;
45 | import backtype.storm.tuple.Tuple;
46 |
47 | /**
48 | * A Kettle Step Bolt represents a Kettle step that receives input from at least one other Kettle step. This encapsulates the
49 | * logic required to receive input from Storm, process it, and emit any output from the step to be received by downstream bolts.
50 | */
51 | @SuppressWarnings("serial")
52 | public class KettleStepBolt extends BaseRichBolt implements RowListener {
53 | private static final Logger logger = LoggerFactory
54 | .getLogger(KettleStepBolt.class);
55 |
56 | private KettleStormUtils utils = new KettleStormUtils();
57 |
58 | private String componentId;
59 | private Integer taskId;
60 |
61 | private String transXml;
62 | private String stepName;
63 |
64 | private transient StepMetaDataCombi step;
65 | private OutputCollector collector;
66 |
67 | private boolean done;
68 |
69 | /**
70 | * A collection of tuples we've received. These are used to correlate output with input Tuples so message ack'ing properly groups output to the correct input.
71 | */
72 | private transient Deque receivedTuples;
73 | /**
74 | * The tuple we're currently processing. This is to correlate output with input Tuples so message ack'ing properly groups output to the correct input.
75 | */
76 | private transient Tuple currentTuple;
77 |
78 | public KettleStepBolt(String name, String transXml, StepMetaDataCombi step) {
79 | if (step == null) {
80 | throw new IllegalArgumentException(
81 | "Step Meta required to create a new Kettle Step Bolt");
82 | }
83 | this.step = step;
84 | this.transXml = transXml;
85 | this.stepName = step.step.getStepname();
86 | }
87 |
88 | private StepMetaDataCombi getStep() {
89 | if (step == null) {
90 | try {
91 | step = utils.getStep(transXml, stepName);
92 | } catch (KettleException e) {
93 | throw new IllegalStateException(
94 | "Error processing transformation for bolt for step: "
95 | + stepName, e);
96 | }
97 |
98 | step.step.addRowListener(this);
99 | }
100 | return step;
101 | }
102 |
103 | @Override
104 | public void prepare(@SuppressWarnings("rawtypes") Map conf,
105 | TopologyContext context, OutputCollector collector) {
106 | componentId = context.getThisComponentId();
107 | taskId = context.getThisTaskId();
108 | this.collector = collector;
109 | this.receivedTuples = new LinkedList<>();
110 | }
111 |
112 | @Override
113 | public void execute(Tuple input) {
114 | logger.debug("{} bolt received {}", stepName, input);
115 |
116 | if ("signal".equals(input.getSourceStreamId())) {
117 | onSignal(input, (KettleSignal) input.getValue(0));
118 | return;
119 | }
120 |
121 | try {
122 | // Cache the current tuple so we can anchor emitted values properly
123 | // This will not work for any step that batches records between calls to processRow()
124 | // TODO Make this work for all steps - we need a message id from Kettle to correlate tuple to message id.
125 | receivedTuples.addLast(input);
126 | injectRow(input);
127 | } catch (Exception ex) {
128 | throw new RuntimeException("Error converting tuple to Kettle row for step " + stepName,
129 | ex);
130 | }
131 |
132 | if (isInfoSource(input.getSourceComponent())) {
133 | // Immediately ack messages from info sources. We cannot determine how
134 | // they'll be used due to the lack of message identifiers in Kettle.
135 | // Assume these messages are ancillary to the input row sets messages.
136 | collector.ack(receivedTuples.removeLast());
137 | } else {
138 | processRows();
139 | }
140 | }
141 |
142 | private void injectRow(Tuple input) {
143 | RowSet rowSet = findRowSet(input.getSourceComponent());
144 | logger.debug("Injecting row to rowSet: {}", input.getSourceComponent());
145 | RowMetaInterface rowMeta = rowSet.getRowMeta();
146 | rowSet.putRow(rowMeta, utils.convertToRow(rowMeta, input.getValues().toArray()));
147 | }
148 |
149 | private RowSet findRowSet(String stepName) {
150 | // Look through info streams first
151 | for (StreamInterface infoStream : getStep().stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams()) {
152 | if (stepName.equals(infoStream.getStepname())) {
153 | return getStep().step.getTrans().findRowSet(infoStream.getStepname(), 0, this.stepName, 0);
154 | }
155 | }
156 | for (RowSet rs : getStep().step.getInputRowSets()) {
157 | if (stepName.equals(rs.getOriginStepName())) {
158 | return rs;
159 | }
160 | }
161 | throw new IllegalArgumentException(String.format("Could not locate row set for a step with the name '%s'", stepName));
162 | }
163 |
164 | /**
165 | * Process a row for every received "input" (non-info) tuple.
166 | */
167 | private void processRows() {
168 | if (!isInfoInputComplete()) {
169 | logger.debug("Info is not complete - not processing rows yet!");
170 | // If we haven't received all rows for info streams do not call processRow as we'll block waiting for them. :(
171 | return;
172 | }
173 | logger.debug("Starting to process rows for {}. {} pending rows to process", stepName, receivedTuples.size());
174 | try {
175 | do {
176 | currentTuple = receivedTuples.peekFirst();
177 | logger.debug("Processing tuple: {}", currentTuple);
178 | try {
179 | // Keep track of how many rows we have before we start to process to
180 | // determine if processRow() actually consumed anything.
181 | long rowsRemaining = getPendingRowCount();
182 | logger.debug("pending row count: {}", rowsRemaining);
183 | done = !getStep().step.processRow(step.meta, step.data);
184 | logger.debug("pending row count after processRow: ", getPendingRowCount());
185 | if (getPendingRowCount() != rowsRemaining) {
186 | // Rows were consumed and ack
187 | receivedTuples.remove();
188 | collector.ack(currentTuple);
189 | }
190 | } catch (KettleException e) {
191 | if (currentTuple != null) {
192 | receivedTuples.remove();
193 | collector.fail(currentTuple);
194 | }
195 | throw new RuntimeException("Error processing a row for step "
196 | + stepName, e);
197 | }
198 | } while (!done && !receivedTuples.isEmpty());
199 | } finally {
200 | if (done) {
201 | try {
202 | getStep().step.batchComplete();
203 | } catch (KettleException ex) {
204 | logger.error("kettle exception completing batch for step " + stepName, ex);
205 | }
206 | getStep().step.dispose(step.meta, step.data);
207 | logger.debug("Step complete: {}", stepName);
208 | }
209 | }
210 | }
211 |
212 | @Override
213 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
214 | utils.declareOutputFields(step, declarer);
215 | declarer.declareStream("signal", new Fields("signal"));
216 | }
217 |
218 | @Override
219 | public void errorRowWrittenEvent(RowMetaInterface rowMeta, Object[] row)
220 | throws KettleStepException {
221 | }
222 |
223 | @Override
224 | public void rowReadEvent(RowMetaInterface rowMeta, Object[] row)
225 | throws KettleStepException {
226 | }
227 |
228 | @Override
229 | public void rowWrittenEvent(RowMetaInterface rowMeta, Object[] row)
230 | throws KettleStepException {
231 | CappedValues values = new CappedValues(rowMeta.getValueMetaList()
232 | .size(), row);
233 | if (!values.isEmpty()) {
234 | if (currentTuple == null) {
235 | // If the current tuple is null we've likely processed all received
236 | // tuples and are simply processing to get a state of "done". If any
237 | // rows are emited as part of that last dummy call to processRow this
238 | // will happen.
239 | StringBuilder sb = new StringBuilder();
240 | for (Object o : row) {
241 | sb.append(o).append(" ");
242 | }
243 | logger.warn("Current tuple unknown for new output on bolt (" + stepName + "): " + row + ": " + sb);
244 | }
245 | collector.emit(currentTuple, values);
246 | }
247 | }
248 |
249 | /**
250 | * Process a received signal message.
251 | *
252 | * @param anchor
253 | * The incoming signal tuple to be used as an anchor for our signal
254 | * to guarantee a complete signal has been received by all downstream
255 | * systems.
256 | * @param signal
257 | * The received signal.
258 | */
259 | public void onSignal(Tuple anchor, KettleSignal signal) {
260 | logger.info("Signal received for step {}: {}", stepName, signal);
261 |
262 | switch (signal.getSignal()) {
263 | case COMPLETE:
264 | // Assume only one input for now...
265 | logger.debug("Input is complete for bolt %s: %s\n", stepName, signal.getComponentId());
266 | // Set the row set to "done"
267 | RowSet rowSet = findRowSet(signal.getComponentId());
268 | rowSet.setDone();
269 |
270 | // If all row sets (info and input) are complete then this step is completely done!
271 | // We have to attempt to process a row for the step to realize it has nothing more to read.
272 | // If all row sets are not complete but info input is and we have
273 | // pending rows we should start to process them - we may have already
274 | // received all input.
275 | if (isInputComplete() || (isInfoInputComplete() && !receivedTuples.isEmpty())) {
276 | if (!done) {
277 | processRows();
278 | }
279 | try {
280 | logger.info("Signaling complete for step " + stepName + " with taskId=" + taskId + ".");
281 | collector.emit("signal", anchor, Collections. singletonList(new KettleSignal(componentId, taskId, KettleControlSignal.COMPLETE)));
282 | // Acknowledge the received signal
283 | collector.ack(anchor);
284 | } catch (Exception e) {
285 | logger.warn(stepName + ": Error notifying downstream steps of completion", e);
286 | // Fail the received signal so it may be resent ASAP
287 | collector.fail(anchor);
288 | }
289 | } else {
290 | logger.debug("Input is not complete. Still waiting for rows...");
291 | }
292 | break;
293 | default:
294 | throw new IllegalArgumentException("Unsupported signal: " + signal.getSignal());
295 | }
296 | }
297 |
298 | /**
299 | * Calculates how many rows are waiting to be processed on across all input row sets.
300 | *
301 | * @return The number of rows in all input row sets.
302 | */
303 | private long getPendingRowCount() {
304 | long pendingRowCount = 0L;
305 | // InputRowSets does not return info stream row sets until they are ready. Then it returns them until the rows are consumed.
306 | for (RowSet rs : getStep().step.getInputRowSets()) {
307 | if (!isInfoSource(rs.getOriginStepName())) {
308 | // Only include non-info row sets in this calculation since info rows will be fully consumed once the first row is processed.
309 | logger.debug(rs.getName() + ": " + rs.size());
310 | pendingRowCount += rs.size();
311 | }
312 | }
313 | return pendingRowCount;
314 | }
315 |
316 | /**
317 | * Determines if a given step name is connected to the step for this bolt via an info stream.
318 |
319 | * @param stepName The name of a step.
320 | * @return True if {@code stepName} is connected to the step for this bolt via an info stream.
321 | */
322 | private boolean isInfoSource(String stepName) {
323 | for (StreamInterface infoStream : getStep().stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams()) {
324 | if (infoStream.getStepname().equals(stepName)) {
325 | return true;
326 | }
327 | }
328 | return false;
329 | }
330 |
331 | /**
332 | * Determines if this bolt is waiting for more input from any info streams.
333 | *
334 | * @return True if this bolt is waiting for more input from an info stream.
335 | */
336 | private boolean isInfoInputComplete() {
337 | // Look through info streams first
338 | for (StreamInterface infoStream : getStep().stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams()) {
339 | RowSet rs = getStep().step.getTrans().findRowSet(infoStream.getStepname(), 0, stepName, 0);
340 | if (!rs.isDone()) {
341 | return false;
342 | }
343 | }
344 | return true;
345 | }
346 |
347 | /**
348 | * Determines if this bolt is waiting for any additional input.
349 | *
350 | * @return True if this bolt is expecting more input.
351 | */
352 | private boolean isInputComplete() {
353 | for (RowSet rs : getStep().step.getInputRowSets()) {
354 | if (!rs.isDone()) {
355 | return false;
356 | }
357 | }
358 | return isInfoInputComplete();
359 | }
360 | }
361 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/BasicSignalNotifier.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.signal;
20 |
21 | import java.util.Map;
22 |
23 | import org.pentaho.kettle.engines.storm.KettleControlSignal;
24 | import org.pentaho.kettle.engines.storm.Notifier;
25 | import org.pentaho.kettle.engines.storm.NotifierException;
26 |
27 | import backtype.storm.contrib.signals.SignalListener;
28 | import backtype.storm.contrib.signals.StormSignalConnection;
29 |
30 | import com.google.common.base.Preconditions;
31 | import com.google.common.base.Strings;
32 |
33 | /**
34 | * A notifier that uses ZooKeeper via Storm Signals to send notifications. This
35 | * notifier will ignore the specific signal provided to
36 | * {@link #notify(String, KettleControlSignal)} and instead always send an empty
37 | * message.
38 | */
39 | @SuppressWarnings("serial")
40 | public class BasicSignalNotifier implements Notifier {
41 |
42 | private String id;
43 | private StormSignalConnection signalConnection;
44 |
45 | public BasicSignalNotifier(String name) {
46 | Preconditions.checkArgument(!Strings.isNullOrEmpty(name),
47 | "name cannot be null or empty");
48 | this.id = name;
49 | }
50 |
51 | @SuppressWarnings("rawtypes")
52 | @Override
53 | public void init(Map stormConf) {
54 | // TODO Refactor this to use ZooKeeper directly
55 | signalConnection = new StormSignalConnection(id, new SignalListener() {
56 | @Override
57 | public void onSignal(byte[] data) {
58 | throw new IllegalStateException(
59 | "not expecting any signals to be sent to " + id);
60 | }
61 | });
62 | try {
63 | signalConnection.init(stormConf);
64 | } catch (Exception ex) {
65 | throw new RuntimeException("Error creating signal connection", ex);
66 | }
67 | }
68 |
69 | /**
70 | * Send a simple empty message to the component with the given id.
71 | *
72 | * @param id
73 | * Component to notify.
74 | * @param signal
75 | * Not used.
76 | */
77 | @Override
78 | public void notify(String id, KettleControlSignal signal)
79 | throws NotifierException {
80 | // Note: Signal value does not matter. The reception of any message
81 | // indicates transformation is complete. This is received by the
82 | // StormExecutionEngine.
83 | try {
84 | signalConnection.send(id, new byte[0]);
85 | } catch (Exception ex) {
86 | throw new NotifierException("Error notifying " + id + " with signal "
87 | + signal, ex);
88 | }
89 | }
90 |
91 | @Override
92 | public void cleanup() {
93 | signalConnection.close();
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/KettleSignal.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.signal;
20 |
21 | import org.pentaho.kettle.engines.storm.KettleControlSignal;
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | * Represents a control message for a Kettle step. This is used to indicate
27 | * state changes between steps running as Spouts or Bolts within a Storm
28 | * topology.
29 | *
30 | * TODO Do we need the component and task ids here? Look into simply using the Tuple's.
31 | */
32 | @SuppressWarnings("serial")
33 | public class KettleSignal implements Serializable {
34 | private String componentId;
35 | private KettleControlSignal signal;
36 | private Integer taskId;
37 |
38 | public KettleSignal(String componentId, Integer taskId,
39 | KettleControlSignal signal) {
40 | this.componentId = componentId;
41 | this.taskId = taskId;
42 | this.signal = signal;
43 | }
44 |
45 | public String getComponentId() {
46 | return componentId;
47 | }
48 |
49 | public KettleControlSignal getSignal() {
50 | return signal;
51 | }
52 |
53 | @Override
54 | public String toString() {
55 | return "KettleSignal {componentId=" + componentId + ",taskId=" + taskId
56 | + ",signal=" + signal.name() + "}";
57 | }
58 |
59 | public Integer getTaskId() {
60 | return taskId;
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/QuickCloseStormSignalConnectionFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.signal;
20 |
21 | import backtype.storm.contrib.signals.SignalListener;
22 | import backtype.storm.contrib.signals.StormSignalConnection;
23 |
24 | /**
25 | * Generates {@link StormSignalConnection}s that close their connections after
26 | * receiving the first message.
27 | */
28 | public class QuickCloseStormSignalConnectionFactory {
29 | /**
30 | * Closes the {@link StormSignalConnection} upon first signal.
31 | */
32 | private static class QuickCloseSignalListener implements SignalListener {
33 | private StormSignalConnection connection;
34 | private SignalListener listener;
35 |
36 | public QuickCloseSignalListener(SignalListener listener) {
37 | this.listener = listener;
38 | }
39 |
40 | public void setConnection(StormSignalConnection connection) {
41 | this.connection = connection;
42 | }
43 |
44 | @Override
45 | public void onSignal(byte[] data) {
46 | try {
47 | listener.onSignal(data);
48 | } finally {
49 | connection.close();
50 | }
51 | }
52 | }
53 |
54 | public StormSignalConnection createSignalConnection(String name, SignalListener listener) {
55 | QuickCloseSignalListener l = new QuickCloseSignalListener(listener);
56 | StormSignalConnection connection = new StormSignalConnection(name, l);
57 | // Must set connection so it can be closed
58 | l.setConnection(connection);
59 | return connection;
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/SignalClientFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.signal;
20 |
21 | import backtype.storm.contrib.signals.client.SignalClient;
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | * Factory for constructing Signal Clients.
27 | */
28 | public interface SignalClientFactory extends Serializable {
29 | /**
30 | * Create a client that listens for messages addressed to {@code name}.
31 | *
32 | * @param name Name of the client.
33 | * @return A ready to use Signal client.
34 | */
35 | SignalClient createClient(String name);
36 | }
37 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/SimpleSignalClientFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.signal;
20 |
21 | import backtype.storm.contrib.signals.client.SignalClient;
22 |
23 | /**
24 | * Creates {@link SignalClient}s for a known ZooKeeper instance.
25 | */
26 | @SuppressWarnings("serial")
27 | public class SimpleSignalClientFactory implements SignalClientFactory {
28 |
29 | private String zkConnectionString;
30 |
31 | /**
32 | * Create a new factory that creates clients that use the provided ZooKeeper
33 | * connection string.
34 | *
35 | * @param zkConnectionString ZooKeeper connection string for clients to use when establishing
36 | * their connections
37 | */
38 | public SimpleSignalClientFactory(String zkConnectionString) {
39 | this.zkConnectionString = zkConnectionString;
40 | }
41 |
42 | @Override
43 | public SignalClient createClient(String name) {
44 | return new SignalClient(zkConnectionString, name);
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/spout/KettleStepSpout.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.spout;
20 |
21 | import java.util.Collections;
22 | import java.util.Map;
23 | import java.util.Set;
24 | import java.util.UUID;
25 | import java.util.concurrent.ConcurrentHashMap;
26 |
27 | import org.pentaho.di.core.exception.KettleException;
28 | import org.pentaho.di.trans.step.StepMetaDataCombi;
29 | import org.pentaho.kettle.engines.storm.BaseSpoutOutputCollector;
30 | import org.pentaho.kettle.engines.storm.CollectorRowListener;
31 | import org.pentaho.kettle.engines.storm.KettleControlSignal;
32 | import org.pentaho.kettle.engines.storm.KettleStormUtils;
33 | import org.pentaho.kettle.engines.storm.signal.KettleSignal;
34 | import org.slf4j.Logger;
35 | import org.slf4j.LoggerFactory;
36 |
37 | import backtype.storm.spout.SpoutOutputCollector;
38 | import backtype.storm.task.TopologyContext;
39 | import backtype.storm.topology.OutputFieldsDeclarer;
40 | import backtype.storm.topology.base.BaseRichSpout;
41 | import backtype.storm.tuple.Fields;
42 |
43 | /**
44 | * A Kettle Step Spout represents a Kettle step that produces records and specifically does not receive any input from other Kettle steps.
45 | * This encapsulates the logic to produce messages within Storm to be processed by downstream bolts.
46 | */
47 | @SuppressWarnings("serial")
48 | public class KettleStepSpout extends BaseRichSpout {
49 | private static final Logger logger = LoggerFactory
50 | .getLogger(KettleStepSpout.class);
51 | private KettleStormUtils utils = new KettleStormUtils();
52 |
53 | private String componentId;
54 | private Integer taskId;
55 |
56 | private String transXml;
57 | private String stepName;
58 |
59 | private transient StepMetaDataCombi step;
60 |
61 | private boolean done = false;
62 |
63 | private Object signalCompleteMessageId;
64 |
65 | /**
66 | * The set of pending messages we're waiting to be ack'd. This should be thread-safe.
67 | */
68 | private Set pendingMessages;
69 |
70 | private SpoutOutputCollector collector;
71 |
72 | public KettleStepSpout(String name, String transXml,
73 | StepMetaDataCombi step) {
74 | if (transXml == null || step == null) {
75 | throw new NullPointerException();
76 | }
77 | this.stepName = name;
78 | this.step = step;
79 | this.transXml = transXml;
80 | }
81 |
82 | @Override
83 | @SuppressWarnings("rawtypes")
84 | public void open(Map conf, TopologyContext context,
85 | SpoutOutputCollector collector) {
86 | componentId = context.getThisComponentId();
87 | taskId = context.getThisTaskId();
88 | this.collector = collector;
89 | try {
90 | this.step = utils.getStep(transXml, stepName);
91 | } catch (KettleException e) {
92 | throw new IllegalStateException(
93 | "Error processing transformation for spout for step: "
94 | + stepName, e);
95 | }
96 |
97 | if (this.step == null) {
98 | throw new IllegalStateException(
99 | "Step could not be found for spout: " + stepName);
100 | }
101 |
102 | pendingMessages = Collections.newSetFromMap(new ConcurrentHashMap(1000));
103 |
104 | step.step.addRowListener(new CollectorRowListener(step,
105 | new BaseSpoutOutputCollector(collector, pendingMessages), utils.getOutputFields(
106 | step).size()));
107 | }
108 |
109 | @Override
110 | public void nextTuple() {
111 | if (!done) {
112 | try {
113 | done = !step.step.processRow(step.meta, step.data);
114 | } catch (KettleException e) {
115 | throw new RuntimeException("Error processing a row for step "
116 | + step.step.getStepname(), e);
117 | }
118 | }
119 | }
120 |
121 | @Override
122 | public void declareOutputFields(OutputFieldsDeclarer declarer) {
123 | utils.declareOutputFields(step, declarer);
124 | declarer.declareStream("signal", new Fields("signal"));
125 | }
126 |
127 | @Override
128 | public void ack(Object msgId) {
129 | // Only handle completed row messages. If the ack'd message id is the signal
130 | // complete message then we're done!
131 | if (!msgId.equals(signalCompleteMessageId)) {
132 | handleCompleted(msgId);
133 | }
134 | }
135 |
136 | @Override
137 | public void fail(Object msgId) {
138 | if (msgId.equals(signalCompleteMessageId)) {
139 | logger.error("Error processing signal complete message. Resending...");
140 | // Send the signal complete message again
141 | // TODO we should set a retry limit
142 | signalComplete();
143 | } else {
144 | logger.error("Message failed processing: " + msgId);
145 | handleCompleted(msgId);
146 | }
147 | }
148 |
149 | private void handleCompleted(Object msgId) {
150 | // Message fully processed - remove it from our list
151 | if (!pendingMessages.remove(msgId)) {
152 | throw new IllegalStateException("Unexpected message id ack'd: " + msgId);
153 | }
154 | if (done && pendingMessages.isEmpty()) {
155 | step.step.dispose(step.meta, step.data);
156 | step.step.markStop();
157 | signalComplete();
158 | }
159 | }
160 |
161 | private void signalComplete() {
162 | logger.info("Signaling complete for step " + stepName + " with taskId=" + taskId + ".");
163 | try {
164 | signalCompleteMessageId = UUID.randomUUID();
165 | collector.emit("signal", Collections. singletonList(new KettleSignal(componentId, taskId, KettleControlSignal.COMPLETE)), signalCompleteMessageId);
166 | } catch (Exception e) {
167 | logger.warn(stepName + ": Error notifying downstream steps", e);
168 | }
169 | }
170 | }
171 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/resources/ccnums.ktr:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ccnums
5 |
6 |
7 |
8 | Normal
9 | /
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID TRANSNAME Y TRANSNAME STATUS Y STATUS LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS STARTDATE Y STARTDATE ENDDATE Y ENDDATE LOGDATE Y LOGDATE DEPDATE Y DEPDATE REPLAYDATE Y REPLAYDATE LOG_FIELD Y LOG_FIELD
20 |
21 |
22 |
23 |
24 |
25 | ID_BATCH Y ID_BATCH SEQ_NR Y SEQ_NR LOGDATE Y LOGDATE TRANSNAME Y TRANSNAME STEPNAME Y STEPNAME STEP_COPY Y STEP_COPY LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS INPUT_BUFFER_ROWS Y INPUT_BUFFER_ROWS OUTPUT_BUFFER_ROWS Y OUTPUT_BUFFER_ROWS
26 |
27 |
28 |
29 |
30 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID LOG_DATE Y LOG_DATE LOGGING_OBJECT_TYPE Y LOGGING_OBJECT_TYPE OBJECT_NAME Y OBJECT_NAME OBJECT_COPY Y OBJECT_COPY REPOSITORY_DIRECTORY Y REPOSITORY_DIRECTORY FILENAME Y FILENAME OBJECT_ID Y OBJECT_ID OBJECT_REVISION Y OBJECT_REVISION PARENT_CHANNEL_ID Y PARENT_CHANNEL_ID ROOT_CHANNEL_ID Y ROOT_CHANNEL_ID
31 |
32 |
33 |
34 |
35 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID LOG_DATE Y LOG_DATE TRANSNAME Y TRANSNAME STEPNAME Y STEPNAME STEP_COPY Y STEP_COPY LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS LOG_FIELD N LOG_FIELD
36 |
37 |
38 |
39 |
40 |
41 | 0.0
42 | 0.0
43 |
44 | 10000
45 | 50
46 | 50
47 | N
48 | Y
49 | 50000
50 | Y
51 |
52 | N
53 | 1000
54 | 100
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 | -
64 | 2012/11/07 15:22:57.478
65 | -
66 | 2012/11/07 15:22:57.478
67 |
68 |
69 |
70 |
71 | Generate random credit card numbers Select values Y Select values Text file output Y
72 |
73 | Generate random credit card numbers
74 | RandomCCNumberGenerator
75 |
76 | Y
77 | 1
78 |
79 | none
80 |
81 |
82 |
83 |
84 | American Express
85 | 15
86 | 100
87 |
88 |
89 | cnumber
90 | clength
91 | ctype
92 |
93 |
94 | 108
95 | 55
96 | Y
97 |
98 |
99 |
100 |
101 | Select values
102 | SelectValues
103 |
104 | Y
105 | 1
106 |
107 | none
108 |
109 |
110 | N
111 | clength
112 |
113 |
114 | 297
115 | 55
116 | Y
117 |
118 |
119 |
120 |
121 | Text file output
122 | TextFileOutput
123 |
124 | Y
125 | 1
126 |
127 | none
128 |
129 |
130 | ,
131 | "
132 | N
133 | N
134 |
135 |
136 | DOS
137 | None
138 |
139 |
140 | N
141 |
142 | N
143 |
144 | ccnums
145 | N
146 | N
147 | N
148 | txt
149 | N
150 | N
151 | N
152 | Y
153 | Y
154 | N
155 |
156 | Y
157 | N
158 | N
159 | 0
160 |
161 |
162 |
163 | cnumber
164 | String
165 |
166 |
167 |
168 |
169 |
170 | none
171 | -1
172 | -1
173 |
174 |
175 | ctype
176 | String
177 |
178 |
179 |
180 |
181 |
182 | none
183 | -1
184 | -1
185 |
186 |
187 |
188 |
189 | 516
190 | 55
191 | Y
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 | N
200 |
201 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/resources/kettle-storm.properties:
--------------------------------------------------------------------------------
1 | #
2 | # ******************************************************************************
3 | # Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | # ******************************************************************************
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # *****************************************************************************
17 | #
18 |
19 | kettle.topology.jar=${kettle.storm.topology.jar}
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/resources/stream-lookup.ktr:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | stream-lookup
5 |
6 |
7 |
8 | Normal
9 | /
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID TRANSNAME Y TRANSNAME STATUS Y STATUS LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS STARTDATE Y STARTDATE ENDDATE Y ENDDATE LOGDATE Y LOGDATE DEPDATE Y DEPDATE REPLAYDATE Y REPLAYDATE LOG_FIELD Y LOG_FIELD EXECUTING_SERVER N EXECUTING_SERVER EXECUTING_USER N EXECUTING_USER CLIENT N CLIENT
20 |
21 |
22 |
23 |
24 |
25 | ID_BATCH Y ID_BATCH SEQ_NR Y SEQ_NR LOGDATE Y LOGDATE TRANSNAME Y TRANSNAME STEPNAME Y STEPNAME STEP_COPY Y STEP_COPY LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS INPUT_BUFFER_ROWS Y INPUT_BUFFER_ROWS OUTPUT_BUFFER_ROWS Y OUTPUT_BUFFER_ROWS
26 |
27 |
28 |
29 |
30 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID LOG_DATE Y LOG_DATE LOGGING_OBJECT_TYPE Y LOGGING_OBJECT_TYPE OBJECT_NAME Y OBJECT_NAME OBJECT_COPY Y OBJECT_COPY REPOSITORY_DIRECTORY Y REPOSITORY_DIRECTORY FILENAME Y FILENAME OBJECT_ID Y OBJECT_ID OBJECT_REVISION Y OBJECT_REVISION PARENT_CHANNEL_ID Y PARENT_CHANNEL_ID ROOT_CHANNEL_ID Y ROOT_CHANNEL_ID
31 |
32 |
33 |
34 |
35 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID LOG_DATE Y LOG_DATE TRANSNAME Y TRANSNAME STEPNAME Y STEPNAME STEP_COPY Y STEP_COPY LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS LOG_FIELD N LOG_FIELD
36 |
37 |
38 |
39 |
40 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID LOG_DATE Y LOG_DATE METRICS_DATE Y METRICS_DATE METRICS_CODE Y METRICS_CODE METRICS_DESCRIPTION Y METRICS_DESCRIPTION METRICS_SUBJECT Y METRICS_SUBJECT METRICS_TYPE Y METRICS_TYPE METRICS_VALUE Y METRICS_VALUE
41 |
42 |
43 |
44 |
45 |
46 | 0.0
47 | 0.0
48 |
49 | 10000
50 | 50
51 | 50
52 | N
53 | Y
54 | 50000
55 | Y
56 |
57 | N
58 | 1000
59 | 100
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | -
69 | 2013/06/30 15:29:17.028
70 | -
71 | 2013/06/30 15:29:17.028
72 |
73 |
74 |
75 |
76 | Raw data Stream lookup Y
77 | Reference Stream lookup Y
78 | Stream lookup Text file output Y
79 |
80 |
81 | Raw data
82 | DataGrid
83 |
84 | Y
85 |
86 | 1
87 |
88 | none
89 |
90 |
91 |
92 |
93 | id
94 | Integer
95 |
96 |
97 |
98 |
99 | -1
100 | -1
101 | N
102 |
103 |
104 |
105 | - 1
106 | - 2
107 | - 3
108 | - 4
109 |
110 |
111 |
112 | 34
113 | 25
114 | Y
115 |
116 |
117 |
118 |
119 | Reference
120 | DataGrid
121 |
122 | Y
123 |
124 | 1
125 |
126 | none
127 |
128 |
129 |
130 |
131 | id
132 | Integer
133 |
134 |
135 |
136 |
137 | -1
138 | -1
139 | N
140 |
141 |
142 | value
143 | String
144 |
145 |
146 |
147 |
148 | -1
149 | -1
150 | N
151 |
152 |
153 |
154 | - 1
- One
155 | - 2
- Two
156 | - 3
- Three
157 | - 4
- Four
158 |
159 |
160 |
161 | 193
162 | 137
163 | Y
164 |
165 |
166 |
167 |
168 | Stream lookup
169 | StreamLookup
170 |
171 | Y
172 |
173 | 1
174 |
175 | none
176 |
177 |
178 | Reference
179 | N
180 | Y
181 | Y
182 | Y
183 |
184 |
185 | id
186 | id
187 |
188 |
189 | id
190 | id
191 |
192 | Integer
193 |
194 |
195 | value
196 | value
197 |
198 | String
199 |
200 |
201 |
202 |
203 | 192
204 | 25
205 | Y
206 |
207 |
208 |
209 |
210 | Text file output
211 | TextFileOutput
212 |
213 | Y
214 |
215 | 1
216 |
217 | none
218 |
219 |
220 | ;
221 | "
222 | N
223 | N
224 |
225 |
226 | UNIX
227 | None
228 | UTF-8
229 |
230 | N
231 |
232 | Y
233 |
234 | stream-lookup-output
235 | N
236 | Y
237 | N
238 | txt
239 | N
240 | N
241 | N
242 | Y
243 | Y
244 | N
245 | yyyy/MM/dd HH:mm:ss.SSS
246 | Y
247 | N
248 | N
249 | 0
250 |
251 |
252 |
253 |
254 |
255 | 343
256 | 25
257 | Y
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 | N
266 |
267 |
268 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/main/resources/test.ktr:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | test
5 |
6 |
7 |
8 | Normal
9 | /
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID TRANSNAME Y TRANSNAME STATUS Y STATUS LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS STARTDATE Y STARTDATE ENDDATE Y ENDDATE LOGDATE Y LOGDATE DEPDATE Y DEPDATE REPLAYDATE Y REPLAYDATE LOG_FIELD Y LOG_FIELD
20 |
21 |
22 |
23 |
24 |
25 | ID_BATCH Y ID_BATCH SEQ_NR Y SEQ_NR LOGDATE Y LOGDATE TRANSNAME Y TRANSNAME STEPNAME Y STEPNAME STEP_COPY Y STEP_COPY LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS INPUT_BUFFER_ROWS Y INPUT_BUFFER_ROWS OUTPUT_BUFFER_ROWS Y OUTPUT_BUFFER_ROWS
26 |
27 |
28 |
29 |
30 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID LOG_DATE Y LOG_DATE LOGGING_OBJECT_TYPE Y LOGGING_OBJECT_TYPE OBJECT_NAME Y OBJECT_NAME OBJECT_COPY Y OBJECT_COPY REPOSITORY_DIRECTORY Y REPOSITORY_DIRECTORY FILENAME Y FILENAME OBJECT_ID Y OBJECT_ID OBJECT_REVISION Y OBJECT_REVISION PARENT_CHANNEL_ID Y PARENT_CHANNEL_ID ROOT_CHANNEL_ID Y ROOT_CHANNEL_ID
31 |
32 |
33 |
34 |
35 | ID_BATCH Y ID_BATCH CHANNEL_ID Y CHANNEL_ID LOG_DATE Y LOG_DATE TRANSNAME Y TRANSNAME STEPNAME Y STEPNAME STEP_COPY Y STEP_COPY LINES_READ Y LINES_READ LINES_WRITTEN Y LINES_WRITTEN LINES_UPDATED Y LINES_UPDATED LINES_INPUT Y LINES_INPUT LINES_OUTPUT Y LINES_OUTPUT LINES_REJECTED Y LINES_REJECTED ERRORS Y ERRORS LOG_FIELD N LOG_FIELD
36 |
37 |
38 |
39 |
40 |
41 | 0.0
42 | 0.0
43 |
44 | 10000
45 | 50
46 | 50
47 | N
48 | Y
49 | 50000
50 | Y
51 |
52 | N
53 | 1000
54 | 100
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 | -
64 | 2012/11/07 11:07:54.689
65 | -
66 | 2012/11/07 11:07:54.689
67 |
68 |
69 |
70 |
71 |
72 |
73 | Generate random value
74 | RandomValue
75 |
76 | Y
77 | 1
78 |
79 | none
80 |
81 |
82 |
83 |
84 | id
85 | random string
86 |
87 |
88 |
89 |
90 | 81
91 | 49
92 | Y
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 | N
101 |
102 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/test/java/org/pentaho/kettle/engines/storm/bolt/KettleControlBoltTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * *****************************************************************************
3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
4 | * *****************************************************************************
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with
8 | * the License. You may obtain a copy of the License at
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | * ****************************************************************************
17 | */
18 |
19 | package org.pentaho.kettle.engines.storm.bolt;
20 |
21 | import java.util.Collections;
22 | import java.util.List;
23 | import java.util.Map;
24 |
25 | import org.easymock.EasyMock;
26 | import org.easymock.IMocksControl;
27 | import org.junit.Before;
28 | import org.junit.Test;
29 | import org.pentaho.kettle.engines.storm.KettleControlSignal;
30 | import org.pentaho.kettle.engines.storm.Notifier;
31 | import org.pentaho.kettle.engines.storm.signal.KettleSignal;
32 |
33 | import com.google.common.collect.Lists;
34 | import com.google.common.collect.Sets;
35 |
36 | import backtype.storm.task.OutputCollector;
37 | import backtype.storm.task.TopologyContext;
38 | import backtype.storm.tuple.Tuple;
39 |
40 | public class KettleControlBoltTest {
41 | private static final String TRANS_NAME = "transformation 1";
42 | private static final String STEP_1 = "step 1";
43 | private static final String STEP_2 = "step 2";
44 | private static final int TASK_ID_1 = 1723;
45 | private static final int TASK_ID_2 = 18;
46 | private static final KettleSignal STEP_1_COMPLETE = new KettleSignal(STEP_1,
47 | TASK_ID_1, KettleControlSignal.COMPLETE);
48 | private static final KettleSignal STEP_2_COMPLETE = new KettleSignal(STEP_2,
49 | TASK_ID_2, KettleControlSignal.COMPLETE);
50 |
51 | private IMocksControl control;
52 | private Notifier notifier;
53 | private TopologyContext context;
54 | private OutputCollector collector;
55 |
56 | @SuppressWarnings("rawtypes")
57 | @Before
58 | public void init() {
59 | control = EasyMock.createControl();
60 | notifier = control.createMock(Notifier.class);
61 | notifier.init(EasyMock. anyObject());
62 | EasyMock.expectLastCall().anyTimes();
63 | collector = control.createMock(OutputCollector.class);
64 | context = control.createMock(TopologyContext.class);
65 | }
66 |
67 | @Test(expected = IllegalArgumentException.class)
68 | public void construct() {
69 | new KettleControlBolt(null, notifier, Collections.singleton("step"));
70 | }
71 |
72 | @Test(expected = IllegalStateException.class)
73 | public void prepare_no_tasks_for_leaf_step_null() {
74 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier,
75 | Collections.singleton(STEP_1));
76 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(null);
77 |
78 | control.replay();
79 | bolt.prepare(Collections.emptyMap(), context, collector);
80 | }
81 |
82 | @Test(expected = IllegalStateException.class)
83 | public void prepare_no_tasks_for_leaf_step_empty() {
84 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier,
85 | Collections.singleton(STEP_1));
86 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(
87 | Collections. emptyList());
88 |
89 | control.replay();
90 | bolt.prepare(Collections.emptyMap(), context, collector);
91 | }
92 |
93 | /**
94 | * Create a tuple for the given signal.
95 | *
96 | * @param signal
97 | * Signal to emit as a single value tuple.
98 | * @return The tuple.
99 | */
100 | private Tuple createTupleForSignal(KettleSignal signal) {
101 | Tuple input = control.createMock(Tuple.class);
102 | EasyMock.expect(input.getValue(0)).andReturn(signal).anyTimes();
103 | return input;
104 | }
105 |
106 | /**
107 | * Verify the last task to complete triggers the notifier.
108 | */
109 | @Test
110 | public void execute_last_task() throws Exception {
111 | // Test set up
112 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier,
113 | Collections.singleton(STEP_1));
114 | List taskIds = Collections.singletonList(TASK_ID_1);
115 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(taskIds);
116 | Tuple step1Complete = createTupleForSignal(STEP_1_COMPLETE);
117 |
118 | // Expect that our notifier is notified after receiving a complete signal
119 | // for our one and only leaf node
120 | notifier.notify(TRANS_NAME, KettleControlSignal.COMPLETE);
121 | EasyMock.expectLastCall();
122 |
123 | // The tuple should be acknowledged
124 | collector.ack(step1Complete);
125 | EasyMock.expectLastCall();
126 |
127 | control.replay();
128 | bolt.prepare(Collections.emptyMap(), context, collector);
129 | bolt.execute(step1Complete);
130 | control.verify();
131 | }
132 |
133 | /**
134 | * Verify notifications are not sent if there are pending steps.
135 | */
136 | @Test
137 | public void execute_not_last_step() throws Exception {
138 | // Test set up
139 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier,
140 | Sets.newHashSet(STEP_1, STEP_2));
141 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(
142 | Collections.singletonList(TASK_ID_1));
143 | EasyMock.expect(context.getComponentTasks(STEP_2)).andReturn(
144 | Collections.singletonList(TASK_ID_2));
145 | Tuple step1Complete = createTupleForSignal(STEP_1_COMPLETE);
146 |
147 | // The tuple should be acknowledged
148 | collector.ack(step1Complete);
149 | EasyMock.expectLastCall();
150 |
151 | control.replay();
152 | bolt.prepare(Collections.emptyMap(), context, collector);
153 | bolt.execute(step1Complete);
154 | control.verify();
155 | }
156 |
157 | /**
158 | * Verify notifications are sent after all leaf steps are complete.
159 | */
160 | @Test
161 | public void execute_multiple_steps() throws Exception {
162 | // Test set up
163 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier,
164 | Sets.newHashSet(STEP_1, STEP_2));
165 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(
166 | Collections.singletonList(TASK_ID_1));
167 | EasyMock.expect(context.getComponentTasks(STEP_2)).andReturn(
168 | Collections.singletonList(TASK_ID_2));
169 | Tuple step1Complete = createTupleForSignal(STEP_1_COMPLETE);
170 | Tuple step2Complete = createTupleForSignal(STEP_2_COMPLETE);
171 |
172 | // The tuples should be acknowledged
173 | collector.ack(step1Complete);
174 | EasyMock.expectLastCall();
175 | collector.ack(step2Complete);
176 | EasyMock.expectLastCall();
177 |
178 | // Expect that our notifier is notified after receiving a complete signal
179 | // for our one and only leaf node
180 | notifier.notify(TRANS_NAME, KettleControlSignal.COMPLETE);
181 | EasyMock.expectLastCall();
182 |
183 | control.replay();
184 | bolt.prepare(Collections.emptyMap(), context, collector);
185 | bolt.execute(step1Complete);
186 | bolt.execute(step2Complete);
187 | control.verify();
188 | }
189 |
190 | /**
191 | * Verify notifications are sent after all copies of the leaf steps have
192 | * completed.
193 | */
194 | @Test
195 | public void execute_single_leaf_step_with_multiple_copies() throws Exception {
196 | // Test set up
197 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier,
198 | Sets.newHashSet(STEP_1));
199 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(
200 | Lists.newArrayList(TASK_ID_1, TASK_ID_2));
201 | Tuple task1Complete = createTupleForSignal(new KettleSignal(STEP_1,
202 | TASK_ID_1, KettleControlSignal.COMPLETE));
203 | Tuple task2Complete = createTupleForSignal(new KettleSignal(STEP_1,
204 | TASK_ID_2, KettleControlSignal.COMPLETE));
205 |
206 | // The tuples should be acknowledged
207 | collector.ack(task1Complete);
208 | EasyMock.expectLastCall();
209 | collector.ack(task2Complete);
210 | EasyMock.expectLastCall();
211 |
212 | // Expect that our notifier is notified after receiving a complete signal
213 | // for our one and only leaf node
214 | notifier.notify(TRANS_NAME, KettleControlSignal.COMPLETE);
215 | EasyMock.expectLastCall();
216 |
217 | control.replay();
218 | bolt.prepare(Collections.emptyMap(), context, collector);
219 | bolt.execute(task1Complete);
220 | bolt.execute(task2Complete);
221 | control.verify();
222 | }
223 |
224 | /**
225 | * Verify receiving a signal for a non-leaf step is a failure case.
226 | */
227 | @Test
228 | public void execute_unexpected_signal() throws Exception {
229 | // Test set up
230 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier,
231 | Sets.newHashSet(STEP_1));
232 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(
233 | Collections.singletonList(TASK_ID_1));
234 | Tuple unexpectedSignalTuple = createTupleForSignal(new KettleSignal(
235 | "unknown step", 1, KettleControlSignal.COMPLETE));
236 |
237 | // The tuple should be failed since we're not expected it.
238 | collector.fail(unexpectedSignalTuple);
239 | EasyMock.expectLastCall();
240 |
241 | control.replay();
242 | bolt.prepare(Collections.emptyMap(), context, collector);
243 | bolt.execute(unexpectedSignalTuple);
244 | control.verify();
245 | }
246 |
247 | @Test
248 | public void cleanup() {
249 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier,
250 | Collections.singleton(STEP_1));
251 |
252 | notifier.cleanup();
253 | EasyMock.expectLastCall();
254 |
255 | control.replay();
256 | bolt.cleanup();
257 | control.verify();
258 | }
259 | }
260 |
--------------------------------------------------------------------------------
/kettle-engine-storm/src/test/resources/empty:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pentaho/kettle-storm/c2e2bb70a38229468dab382f62708dad5e6249e1/kettle-engine-storm/src/test/resources/empty
--------------------------------------------------------------------------------