├── .gitignore ├── README.md └── kettle-engine-storm ├── .gitignore ├── LICENSE.txt ├── pom.xml └── src ├── main ├── assembly │ ├── assembly.xml │ └── for-remote-topology-assembly.xml ├── java │ └── org │ │ └── pentaho │ │ └── kettle │ │ └── engines │ │ └── storm │ │ ├── BaseSpoutOutputCollector.java │ │ ├── CappedValues.java │ │ ├── CollectorRowListener.java │ │ ├── IKettleOutputCollector.java │ │ ├── KettleControlSignal.java │ │ ├── KettleStorm.java │ │ ├── KettleStormUtils.java │ │ ├── KettleTopologyBuilder.java │ │ ├── Notifier.java │ │ ├── NotifierException.java │ │ ├── StormExecutionEngine.java │ │ ├── StormExecutionEngineConfig.java │ │ ├── bolt │ │ ├── KettleControlBolt.java │ │ └── KettleStepBolt.java │ │ ├── signal │ │ ├── BasicSignalNotifier.java │ │ ├── KettleSignal.java │ │ ├── QuickCloseStormSignalConnectionFactory.java │ │ ├── SignalClientFactory.java │ │ └── SimpleSignalClientFactory.java │ │ └── spout │ │ └── KettleStepSpout.java └── resources │ ├── ccnums.ktr │ ├── kettle-storm.properties │ ├── stream-lookup.ktr │ └── test.ktr └── test ├── java └── org │ └── pentaho │ └── kettle │ └── engines │ └── storm │ └── bolt │ └── KettleControlBoltTest.java └── resources └── empty /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .settings/ 3 | .project 4 | .classpath 5 | .externalToolBuilders/ 6 | .idea/ 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Kettle for Storm 2 | ============ 3 | An experimental execution environment to execute a Kettle transformation as a Storm topology. 4 | 5 | Overview 6 | ============= 7 | Kettle Storm is an experimental execution environment to execute a Kettle transformation across a Storm cluster. This decomposes a transformation into a topology and wraps all steps in either a Storm Spout or a Bolt. The topology is then submitted to the cluster and is automatically killed once the transformation has finished processing all data. 8 | 9 | Many things are not implemented. I've only tested this for the transformation files included on a small cluster. There are quite a few details left to be implemented. Some of which include: 10 | 11 | - Steps that do not emit at least one message for every input. Because Kettle does not have a message id to correlate Storm messages with we cannot guarantee a message has been completely processed until we see a record emited from a given step. Because of this, we also cannot determine which messages are produced for a given input if they are not immediately emitted as part of the same ```processRow()``` call. As such, we can only guarantee message processing when one input message produces at least once output message. These classification of input steps will not work until that is fixed: 12 | - Sampling 13 | - Aggregation 14 | - Sorting 15 | - Filtering 16 | - First-class Spoon support 17 | - Repository-based transformations 18 | - Error handling 19 | - Conditional hops 20 | - Sub-transformations 21 | - Metrics: Kettle timing, throughput, logging 22 | 23 | Usage 24 | ===== 25 | Executing a Kettle transformation with Storm 26 | -------------------------------------------- 27 | The following commands will execute a transformation using a local in-memory test cluster. 28 | 29 | ### From a checkout 30 | A Kettle transformation can be submitted as a topology using the included KettleStorm command-line application. To invoke it from Maven simply use the maven exec target with the Kettle transformation you wish to execute: 31 | ``` 32 | mvn package 33 | mvn exec:java -Dexec.args=src/main/resources/test.ktr -Dkettle-storm-local-mode=true 34 | ``` 35 | 36 | ### From a release 37 | Extract the release and run: 38 | ``` 39 | java -Dkettle-storm-local-mode=true -jar kettle-engine-storm-${version}-assembly.jar path/to/my.ktr 40 | ``` 41 | 42 | Executing on a Storm cluster 43 | --------------------------- 44 | The following instructions are meant to be executed using the artifacts packaged in a release. 45 | 46 | To execute a transformation on a Storm cluster running on the same host simply run: 47 | ``` 48 | java -jar kettle-engine-storm-${version}-assembly.jar path/to/my.ktr 49 | ``` 50 | 51 | To execute the transformation to a Nimbus host running remotely include the host and port via the ```storm.options``` System property: 52 | ``` 53 | java -Dstorm.options=nimbus.host=my-nimbus,nimbus.thrift.port=6628 -jar kettle-engine-storm-${version}-assembly.jar path/to/my.ktr 54 | ``` 55 | 56 | ### Configuration via System Properties 57 | 58 | If additional options are required they can be provided as System Properties vai the command line in the format: `-Dargument=value`. 59 | 60 | They are all optional and will be translated into ```StormExecutionEnvironmentConfig``` properties: 61 | 62 | * ```kettle-storm-local-mode```: Flag indicating if you wish to execute the transformation as a Storm topology on an in-memory "local cluster" or remotely to an external Storm cluster. Defaults to ```false```. 63 | * ```kettle-storm-debug```: Flag indicating you wish to enable debug messaging from Storm for the submitted topology. Defaults to ```false```. 64 | * ```kettle-storm-topology-jar```: The path to the jar file to submit with the Storm topology. This is only required if you have created a custom jar with additional classes you wish to make available to the Kettle transformation without having to manually install plugins or configure the environment of each Storm host. 65 | 66 | #### Storm Configuration 67 | 68 | By default, Kettle Storm will submit topologies to a nimbus host running on localhost with the default connection settings included with Storm. If you'd like to use a specific storm.yaml file declare a System property on the command line: 69 | ``` 70 | mvn exec:java -Dstorm.conf.file=/path/to/storm.yaml -Dexec.args=src/main/resources/test.ktr 71 | ``` 72 | 73 | Storm configuration properties can be overriden by specifying them on the command line in the format: 74 | ``` 75 | -Dstorm.options=nimbus.host=my-nimbus,nimbus.thrift.port=6628 76 | ``` 77 | 78 | Embedding 79 | --------- 80 | The Kettle execution engine that can submit topologies can be embedded in a Java application using ```StormExecutionEngine``` and ```StormExecutionEngineConfig```. 81 | 82 | ```StormExecutionEngine``` provides convenience methods for integrating within multithreaded environments: 83 | 84 | - ```StormExecutionEngine.isComplete```: Blocks for the provided duration and returns ```true``` if the topology has completed successfully. 85 | - ```StormExecutionEngine.stop```: Kills the topology running the transformation if it's still execution. 86 | 87 | ### Example Code 88 | 89 | ``` 90 | StormExecutionEngineConfig config = new StormExecutionEngineConfig(); 91 | config.setTransformationFile("/path/to/my.ktr"); 92 | StormExecutionEngine engine = new StormExecutionEngine(config); 93 | engine.init(); 94 | engine.execute(); 95 | engine.isComplete(10, TimeUnit.MINUTE); // Block for up to 10 minutes while the topology executes. 96 | ``` 97 | 98 | Building a release archive 99 | -------------------------- 100 | Execute ```mvn clean package``` to produce the release artifacts. The jars will be stored in ```target/```. 101 | 102 | Multiple artifacts are produced via the ```mvn package``` target: 103 | 104 | ``` 105 | kettle-engine-storm-0.0.1-SNAPSHOT-assembly.jar 106 | kettle-engine-storm-0.0.1-SNAPSHOT-for-remote-topology.jar 107 | kettle-engine-storm-0.0.1-SNAPSHOT.jar 108 | ``` 109 | 110 | The ```-assembly.jar``` is used to schedule execution of a transformation and contains all dependencies. The ```-for-remote-topology.jar``` contains code to be submitted to the cluster with the topology and all dependencies. The plain jar is this project's compilation without additional dependencies. 111 | 112 | External References 113 | =================== 114 | Kettle: http://kettle.pentaho.com 115 | Storm: http://storm-project.net 116 | -------------------------------------------------------------------------------- /kettle-engine-storm/.gitignore: -------------------------------------------------------------------------------- 1 | kettle-engine-storm.iml 2 | target 3 | -------------------------------------------------------------------------------- /kettle-engine-storm/LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | 205 | APACHE HADOOP SUBCOMPONENTS: 206 | 207 | The Apache Hadoop project contains subcomponents with separate copyright 208 | notices and license terms. Your use of the source code for the these 209 | subcomponents is subject to the terms and conditions of the following 210 | licenses. 211 | 212 | For the org.apache.hadoop.util.bloom.* classes: 213 | 214 | /** 215 | * 216 | * Copyright (c) 2005, European Commission project OneLab under contract 217 | * 034819 (http://www.one-lab.org) 218 | * All rights reserved. 219 | * Redistribution and use in source and binary forms, with or 220 | * without modification, are permitted provided that the following 221 | * conditions are met: 222 | * - Redistributions of source code must retain the above copyright 223 | * notice, this list of conditions and the following disclaimer. 224 | * - Redistributions in binary form must reproduce the above copyright 225 | * notice, this list of conditions and the following disclaimer in 226 | * the documentation and/or other materials provided with the distribution. 227 | * - Neither the name of the University Catholique de Louvain - UCL 228 | * nor the names of its contributors may be used to endorse or 229 | * promote products derived from this software without specific prior 230 | * written permission. 231 | * 232 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 233 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 234 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 235 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 236 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 237 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 238 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 239 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 240 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 241 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 242 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 243 | * POSSIBILITY OF SUCH DAMAGE. 244 | */ 245 | -------------------------------------------------------------------------------- /kettle-engine-storm/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | org.pentaho.kettle.engines 6 | kettle-engine-storm 7 | 0.0.2-SNAPSHOT 8 | jar 9 | 10 | kettle-engine-storm 11 | http://github.com/pentaho/kettle-storm 12 | 13 | 14 | UTF-8 15 | 0.9.0.1 16 | 17 | compile 18 | 0.2.0 19 | TRUNK-SNAPSHOT 20 | org.pentaho.kettle.engines.storm.KettleStorm 21 | ${project.build.finalName}-for-remote-topology.jar 22 | 23 | 24 | 25 | 26 | github-releases 27 | http://oss.sonatype.org/content/repositories/github-releases/ 28 | 29 | 30 | clojars.org 31 | http://clojars.org/repo 32 | 33 | 34 | pentaho 35 | http://repo.pentaho.org/artifactory/repo/ 36 | 37 | 38 | 39 | 40 | 41 | storm 42 | storm 43 | ${storm.version} 44 | provided 45 | 46 | 47 | 48 | com.github.ptgoetz 49 | storm-signals 50 | ${storm.signals.version} 51 | 52 | 53 | 54 | pentaho-kettle 55 | kettle-engine 56 | ${kettle.version} 57 | 58 | 59 | 60 | pentaho-kettle 61 | kettle-core 62 | ${kettle.version} 63 | 64 | 65 | xerces 66 | xercesImpl 67 | 68 | 69 | xerces 70 | xmlParserAPIs 71 | 72 | 73 | 74 | 75 | 76 | junit 77 | junit 78 | 4.10 79 | test 80 | 81 | 82 | 83 | org.easymock 84 | easymock 85 | 3.2 86 | test 87 | 88 | 89 | 90 | 91 | 92 | 93 | src/main/resources 94 | true 95 | 96 | 97 | 98 | 99 | 100 | org.apache.maven.plugins 101 | maven-compiler-plugin 102 | 2.3.2 103 | 104 | 1.7 105 | 1.7 106 | 107 | 108 | 109 | org.codehaus.mojo 110 | exec-maven-plugin 111 | 1.2.1 112 | 113 | 114 | 115 | java 116 | 117 | 118 | 119 | 120 | ${main.class} 121 | 122 | compile 123 | 127 | 128 | 129 | kettle-storm-topology-jar 130 | target/${kettle.storm.topology.jar} 131 | 132 | 133 | 134 | 135 | 136 | maven-assembly-plugin 137 | 2.2-beta-5 138 | 139 | 140 | 141 | for-remote-topology 142 | prepare-package 143 | 144 | single 145 | 146 | 147 | 148 | src/main/assembly/for-remote-topology-assembly.xml 149 | 150 | 151 | 152 | ${main.class} 153 | 154 | 155 | 156 | 157 | 158 | 159 | assembly 160 | prepare-package 161 | 162 | single 163 | 164 | 165 | 166 | src/main/assembly/assembly.xml 167 | 168 | 169 | 170 | ${main.class} 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/assembly/assembly.xml: -------------------------------------------------------------------------------- 1 | 2 | assembly 3 | 4 | jar 5 | 6 | false 7 | 8 | 9 | true 10 | runtime 11 | 12 | 13 | true 14 | provided 15 | 16 | 17 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/assembly/for-remote-topology-assembly.xml: -------------------------------------------------------------------------------- 1 | 2 | for-remote-topology 3 | 4 | jar 5 | 6 | false 7 | 8 | 9 | true 10 | runtime 11 | 12 | 13 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/BaseSpoutOutputCollector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import backtype.storm.spout.SpoutOutputCollector; 22 | 23 | import java.util.List; 24 | import java.util.Set; 25 | import java.util.UUID; 26 | 27 | /** 28 | * Wraps an {@link SpoutOutputCollector} so pending messages may be tracked. A {@link org.pentaho.kettle.engines.storm.spout.KettleStepSpout} 29 | * relies on this to know when all the data it has emitted has been fully processed. 30 | */ 31 | public class BaseSpoutOutputCollector implements IKettleOutputCollector { 32 | private SpoutOutputCollector out; 33 | /** 34 | * The collection to add message ids to when emiting tuples. This should be 35 | * thread-safe. 36 | */ 37 | private Set pendingMessageIds; 38 | 39 | public BaseSpoutOutputCollector(SpoutOutputCollector out, Set pendingMessageIds) { 40 | if (out == null) { 41 | throw new NullPointerException("output collector must not be null"); 42 | } 43 | if (pendingMessageIds == null) { 44 | throw new NullPointerException("pending messages set must not be null"); 45 | } 46 | this.out = out; 47 | this.pendingMessageIds = pendingMessageIds; 48 | } 49 | 50 | @Override 51 | public List emit(List tuple) { 52 | // Generate a message Id so these tuples can be properly ACK'd when they've 53 | // been processed. We use message acknowledging to determine when all output 54 | // from a Spout has been processed. 55 | Object messageId = UUID.randomUUID(); 56 | List taskIds = out.emit(tuple, messageId); 57 | pendingMessageIds.add(messageId); 58 | return taskIds; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/CappedValues.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import backtype.storm.tuple.Values; 22 | 23 | /** 24 | * A convenience class for making tuples of values with at most {@code N} 25 | * values. 26 | */ 27 | @SuppressWarnings("serial") 28 | public class CappedValues extends Values { 29 | public CappedValues(int maxValues, Object... values) { 30 | if (maxValues < 1) { 31 | throw new IllegalArgumentException("max values must be > 0"); 32 | } 33 | 34 | int max = Math.min(values.length, maxValues); 35 | for (int i = 0; i < max; i++) { 36 | add(values[i]); 37 | } 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/CollectorRowListener.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import org.pentaho.di.core.exception.KettleStepException; 22 | import org.pentaho.di.core.row.RowMetaInterface; 23 | import org.pentaho.di.trans.step.RowListener; 24 | import org.pentaho.di.trans.step.StepMetaDataCombi; 25 | 26 | /** 27 | * Listens for rows emitted from Kettle steps and passes them to an {@link IKettleOutputCollector} so they may be routed by Storm. 28 | */ 29 | public class CollectorRowListener implements RowListener { 30 | 31 | private KettleStormUtils utils = new KettleStormUtils(); 32 | 33 | private IKettleOutputCollector collector; 34 | private int numFields = 0; 35 | 36 | public CollectorRowListener(StepMetaDataCombi step, IKettleOutputCollector collector, int numFields) { 37 | if (step == null || collector == null) { 38 | throw new NullPointerException(); 39 | } 40 | if (numFields < 1) { 41 | throw new IllegalArgumentException("numFields must be > 0"); 42 | } 43 | this.collector = collector; 44 | this.numFields = numFields; 45 | } 46 | 47 | @Override 48 | public void errorRowWrittenEvent(RowMetaInterface rowMeta, Object[] out) throws KettleStepException { 49 | } 50 | 51 | @Override 52 | public void rowReadEvent(RowMetaInterface rowMeta, Object[] out) throws KettleStepException { 53 | } 54 | 55 | @Override 56 | public void rowWrittenEvent(RowMetaInterface rowMeta, Object[] out) throws KettleStepException { 57 | collector.emit(new CappedValues(numFields, utils.convertToRow(rowMeta, out))); 58 | } 59 | 60 | } -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/IKettleOutputCollector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import java.util.List; 22 | 23 | /** 24 | * The main API for emitting tuples from Kettle to Storm. 25 | */ 26 | public interface IKettleOutputCollector { 27 | List emit(List tuple); 28 | } 29 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/KettleControlSignal.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | /** 22 | * Control signals are sent from component to their dependencies to signal state 23 | * changes. 24 | */ 25 | public enum KettleControlSignal { 26 | /** 27 | * Indicates a component is done processing. 28 | */ 29 | COMPLETE 30 | } 31 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/KettleStorm.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import org.pentaho.di.core.exception.KettleException; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.util.concurrent.TimeUnit; 26 | 27 | /** 28 | * 29 | */ 30 | public class KettleStorm { 31 | private static final Logger logger = LoggerFactory.getLogger(KettleStorm.class); 32 | 33 | public static void main(String[] args) throws Exception { 34 | if (args == null || args.length != 1) { 35 | throw new IllegalArgumentException("Must specify transformation file name"); 36 | } 37 | 38 | StormExecutionEngineConfig config = new StormExecutionEngineConfig(); 39 | config.setDebugMode(Boolean.valueOf(System.getProperty("kettle-storm-debug", "false"))); 40 | config.setLocalMode(Boolean.valueOf(System.getProperty("kettle-storm-local-mode", "false"))); 41 | config.setTopologyJar(System.getProperty("kettle-storm-topology-jar", StormExecutionEngineConfig.loadStormTopologyJarFromConfiguration())); 42 | config.setTransformationFile(args[0]); 43 | 44 | final StormExecutionEngine engine = new StormExecutionEngine(config); 45 | 46 | if (config.isLocalMode()) { 47 | logger.debug("Executing in local mode"); 48 | } 49 | 50 | engine.init(); 51 | engine.execute(); 52 | 53 | Runtime.getRuntime().addShutdownHook(new Thread() { 54 | @Override 55 | public void run() { 56 | logger.info("Stopping transformation"); 57 | try { 58 | engine.stop(); 59 | } catch (KettleException ex) { 60 | logger.error("Error stopping topology for Kettle transformation", ex); 61 | } 62 | } 63 | }); 64 | 65 | logger.info("Waiting for transformation to complete..."); 66 | logger.info("Press CTRL-C to kill the topology and exit."); 67 | 68 | try { 69 | do { 70 | // Wait until the transformation is complete 71 | } while (!engine.isComplete(100, TimeUnit.MILLISECONDS)); 72 | logger.debug("Transformation complete!"); 73 | } finally { 74 | engine.stop(); 75 | } 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/KettleStormUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import java.io.IOException; 22 | import java.io.Serializable; 23 | import java.util.HashSet; 24 | import java.util.List; 25 | import java.util.Set; 26 | import java.util.UUID; 27 | 28 | import org.pentaho.di.core.Const; 29 | import org.pentaho.di.core.KettleEnvironment; 30 | import org.pentaho.di.core.RowSet; 31 | import org.pentaho.di.core.exception.KettleException; 32 | import org.pentaho.di.core.row.RowMetaInterface; 33 | import org.pentaho.di.core.row.ValueMetaInterface; 34 | import org.pentaho.di.trans.Trans; 35 | import org.pentaho.di.trans.TransConfiguration; 36 | import org.pentaho.di.trans.TransExecutionConfiguration; 37 | import org.pentaho.di.trans.TransMeta; 38 | import org.pentaho.di.trans.step.StepMetaDataCombi; 39 | import org.pentaho.di.trans.step.errorhandling.StreamInterface; 40 | import org.pentaho.kettle.engines.storm.bolt.KettleControlBolt; 41 | import org.pentaho.kettle.engines.storm.bolt.KettleStepBolt; 42 | import org.pentaho.kettle.engines.storm.signal.BasicSignalNotifier; 43 | import org.pentaho.kettle.engines.storm.spout.KettleStepSpout; 44 | import org.slf4j.Logger; 45 | import org.slf4j.LoggerFactory; 46 | 47 | import backtype.storm.Config; 48 | import backtype.storm.generated.StormTopology; 49 | import backtype.storm.topology.BoltDeclarer; 50 | import backtype.storm.topology.OutputFieldsDeclarer; 51 | import backtype.storm.topology.TopologyBuilder; 52 | import backtype.storm.tuple.Fields; 53 | 54 | /** 55 | * A collection of utility methods for working with Kettle and Storm. 56 | * 57 | * TODO refactor this into more meaningful components 58 | */ 59 | @SuppressWarnings("serial") 60 | public class KettleStormUtils implements Serializable { 61 | private static final Logger logger = LoggerFactory 62 | .getLogger(KettleStormUtils.class); 63 | 64 | private static final String KETTLE_TOPOLOGY_NAME = "kettle.topology.name"; 65 | 66 | /** 67 | * Create a topology from a transformation. 68 | * 69 | * @param conf Storm configuration to use to configure connection information. 70 | * @param meta Transformation meta to build topology from. 71 | * @return Storm topology capable of executing the Kettle transformation. 72 | * @throws KettleException Error loading the transformation details or initializing the kettle environment 73 | * @throws IOException Error generating the transformation XML from the meta. 74 | */ 75 | public StormTopology createTopology(Config conf, TransMeta meta) throws KettleException, IOException { 76 | initKettleEnvironment(); 77 | TransConfiguration transConfig = new TransConfiguration(meta, 78 | new TransExecutionConfiguration()); 79 | String transXml = transConfig.getXML(); 80 | Trans trans = new Trans(meta); 81 | trans.prepareExecution(null); 82 | List steps = trans.getSteps(); 83 | 84 | String topologyName = generateTopologyName(meta.getName()); 85 | setTopologyName(conf, topologyName); 86 | 87 | TopologyBuilder builder = new TopologyBuilder(); 88 | 89 | Set leafSteps = collectLeafStepNames(trans); 90 | 91 | String controlBoltId = topologyName + "-control-bolt"; 92 | BasicSignalNotifier notifier = new BasicSignalNotifier(controlBoltId); 93 | BoltDeclarer controlBoltDeclarer = builder.setBolt(controlBoltId, new KettleControlBolt(topologyName, notifier, leafSteps)); 94 | for (StepMetaDataCombi step : steps) { 95 | step.step.init(step.meta, step.data); 96 | 97 | // The control bolt must receive all signal tuples from all leaf steps 98 | if (leafSteps.contains(step.step.getStepname())) { 99 | controlBoltDeclarer.allGrouping(step.step.getStepname(), "signal"); 100 | } 101 | 102 | if (isSpout(step)) { 103 | builder.setSpout(step.step.getStepname(), new KettleStepSpout( 104 | step.step.getStepname(), transXml, step), step.step.getStepMeta().getCopies()) 105 | .setMaxTaskParallelism(step.step.getStepMeta().getCopies()); 106 | } else { 107 | BoltDeclarer bd = builder.setBolt(step.step.getStepname(), 108 | new KettleStepBolt(step.step.getStepname(), transXml, 109 | step), step.step.getStepMeta().getCopies()) 110 | .setMaxTaskParallelism(step.step.getStepMeta().getCopies()); 111 | for (StreamInterface info : step.stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams()) { 112 | StepMetaDataCombi infoStep = findStep(trans, 113 | info.getStepname()); 114 | bd.fieldsGrouping(info.getStepname(), getOutputFields(infoStep)); 115 | bd.allGrouping(info.getStepname(), "signal"); 116 | } 117 | for (RowSet input : step.step.getInputRowSets()) { 118 | StepMetaDataCombi inputStep = findStep(trans, 119 | input.getOriginStepName()); 120 | bd.fieldsGrouping(input.getOriginStepName(), 121 | getOutputFields(inputStep)); 122 | // All bolts must receive all signal tuples from all previous steps 123 | bd.allGrouping(input.getOriginStepName(), "signal"); 124 | } 125 | } 126 | } 127 | 128 | return builder.createTopology(); 129 | } 130 | 131 | /** 132 | * Find all steps that do not have output hops. 133 | * 134 | * @param trans 135 | * The transformation. 136 | * @return The set of all steps that do not have output hops. 137 | */ 138 | private Set collectLeafStepNames(Trans trans) { 139 | Set leafSteps = new HashSet(); 140 | for (StepMetaDataCombi step : trans.getSteps()) { 141 | if (isLeafStep(trans, step)) { 142 | leafSteps.add(step.step.getStepname()); 143 | } 144 | } 145 | return leafSteps; 146 | } 147 | 148 | private boolean isLeafStep(Trans trans, StepMetaDataCombi step) { 149 | return trans.getTransMeta().findNextSteps(step.stepMeta).isEmpty(); 150 | } 151 | 152 | /** 153 | * Finds a step by name within a transformation. 154 | * 155 | * @param trans 156 | * Transformation to search within. 157 | * @param stepName 158 | * Name of step to look up. 159 | * @return The first step found whose stepname matches the provided one. 160 | */ 161 | private StepMetaDataCombi findStep(Trans trans, String stepName) { 162 | for (StepMetaDataCombi step : trans.getSteps()) { 163 | if (stepName.equals(step.step.getStepname())) { 164 | return step; 165 | } 166 | } 167 | throw new RuntimeException("Unable to find step with name " + stepName); 168 | } 169 | 170 | /** 171 | * Determines if the step should be converted to a Spout. A step should be 172 | * converted to a spout if it receives no input. 173 | * 174 | * @param step 175 | * @return 176 | */ 177 | private boolean isSpout(StepMetaDataCombi step) { 178 | return step.step.getInputRowSets().isEmpty(); 179 | } 180 | 181 | public void declareOutputFields(StepMetaDataCombi step, 182 | OutputFieldsDeclarer declarer) { 183 | declarer.declare(getOutputFields(step)); 184 | } 185 | 186 | /** 187 | * Determine the output row meta for this step. 188 | * 189 | * @param step Step to determine output rows for. 190 | * @return The output row meta for the step provided. 191 | */ 192 | private RowMetaInterface getOutputRowMeta(StepMetaDataCombi step) { 193 | try { 194 | return step.step.getTrans().getTransMeta() 195 | .getStepFields(step.step.getStepMeta()); 196 | } catch (KettleException ex) { 197 | throw new RuntimeException("Unable to get output fields from step " 198 | + step.step.getStepname()); 199 | } 200 | } 201 | 202 | /** 203 | * Returns the fields a step produces as output. 204 | * 205 | * @param step Step to determine output fields for. 206 | * @return The field layout the step will produce. 207 | */ 208 | public Fields getOutputFields(StepMetaDataCombi step) { 209 | String[] fieldNames = getOutputRowMeta(step).getFieldNames(); 210 | String[] outputFieldNames = new String[fieldNames.length]; 211 | for (int i = 0; i < fieldNames.length; i ++) { 212 | outputFieldNames[i] = step.step.getStepname() + "-" + fieldNames[i]; 213 | } 214 | return new Fields(outputFieldNames); 215 | } 216 | 217 | /** 218 | * Initialize the Kettle environment. 219 | * 220 | * @throws KettleException If an error is encountered during initialization 221 | */ 222 | public void initKettleEnvironment() throws KettleException { 223 | if (!KettleEnvironment.isInitialized()) { 224 | logger.debug("Initializing Kettle Environment..."); 225 | logger.debug("Kettle Home: " + Const.getKettleDirectory()); 226 | KettleEnvironment.init(); 227 | } 228 | } 229 | 230 | public StepMetaDataCombi getStep(String transXml, String stepName) throws KettleException { 231 | initKettleEnvironment(); 232 | TransConfiguration transConfiguration = TransConfiguration 233 | .fromXML(transXml); 234 | TransMeta transMeta = transConfiguration.getTransMeta(); 235 | Trans trans = new Trans(transMeta); 236 | trans.prepareExecution(null); 237 | transMeta.setUsingThreadPriorityManagment(false); 238 | trans.setRunning(true); // GO GO GO 239 | for (StepMetaDataCombi step : trans.getSteps()) { 240 | if (stepName.equals(step.step.getStepname())) { 241 | if (!step.step.init(step.meta, step.data)) { 242 | throw new RuntimeException("Unable to initialize step " 243 | + step.step.getStepname()); 244 | } 245 | for (RowSet rowSet : step.step.getInputRowSets()) { 246 | rowSet.setRowMeta(getOutputRowMeta(findStep(trans, 247 | rowSet.getOriginStepName()))); 248 | } 249 | return step; 250 | } 251 | } 252 | throw new RuntimeException("Unable to locate step: " + stepName); 253 | } 254 | 255 | /** 256 | * Convert a row from Kettle object to Java object. 257 | * 258 | * @param rowMeta Meta information about the row provided. 259 | * @param tuple Row of data to convert. 260 | * @return Converted values based on the row meta given. 261 | */ 262 | public Object[] convertToRow(RowMetaInterface rowMeta, Object[] tuple) { 263 | for (int i = 0; i < tuple.length; i++) { 264 | try { 265 | if (tuple[i] != null) { 266 | ValueMetaInterface meta = rowMeta.getValueMeta(i); 267 | switch (meta.getType()) { 268 | case ValueMetaInterface.TYPE_STRING: 269 | tuple[i] = meta.getString(tuple[i]); 270 | break; 271 | case ValueMetaInterface.TYPE_NUMBER: 272 | tuple[i] = meta.getNumber(tuple[i]); 273 | break; 274 | case ValueMetaInterface.TYPE_INTEGER: 275 | tuple[i] = meta.getInteger(tuple[i]); 276 | break; 277 | case ValueMetaInterface.TYPE_DATE: 278 | tuple[i] = meta.getDate(tuple[i]); 279 | break; 280 | default: 281 | throw new IllegalArgumentException( 282 | "Unsupported data type: " 283 | + rowMeta.getValueMeta(i).getTypeDesc()); 284 | } 285 | } 286 | } catch (Exception ex) { 287 | throw new RuntimeException("unable to convert value: " 288 | + tuple[i], ex); 289 | } 290 | } 291 | 292 | return tuple; 293 | } 294 | 295 | /** 296 | * Generate a unique topology name. 297 | * 298 | * @param name Prefix for the topology name so its easily identifiable. 299 | * @return A unique topology name, prefixed with the name provided. 300 | */ 301 | private String generateTopologyName(String name) { 302 | return name + "-" + UUID.randomUUID().toString(); 303 | } 304 | 305 | /** 306 | * Set the topology name in a configuration so it can be retrieved by another 307 | * process later. 308 | * 309 | * @param conf Configuration to store topology name in. 310 | * @param name Topology name to set. 311 | */ 312 | private void setTopologyName(Config conf, String name) { 313 | conf.put(KETTLE_TOPOLOGY_NAME, name); 314 | } 315 | 316 | /** 317 | * Retrieve the topology name from a Storm configuration. 318 | * 319 | * @param conf Storm configuration used to create the topology from a Kettle 320 | * transformation. 321 | * @return The name of the topology created for a Kettle transformation with 322 | * the provided configuration. 323 | */ 324 | public String getTopologyName(Config conf) { 325 | return (String) conf.get(KETTLE_TOPOLOGY_NAME); 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/KettleTopologyBuilder.java: -------------------------------------------------------------------------------- 1 | package org.pentaho.kettle.engines.storm; 2 | 3 | import org.pentaho.di.core.exception.KettleException; 4 | import org.pentaho.di.trans.TransMeta; 5 | 6 | import backtype.storm.Config; 7 | import backtype.storm.generated.StormTopology; 8 | 9 | public interface KettleTopologyBuilder { 10 | /** 11 | * Build a topology capable of executing the provided transformation. 12 | * 13 | * @param conf 14 | * Storm configuration to use to configure connection 15 | * information. 16 | * @param trans 17 | * Transformation meta to build topology from. 18 | * @return Storm topology capable of executing the Kettle transformation. 19 | * @throws KettleException 20 | * Error loading the transformation details or initializing the 21 | * kettle environment 22 | */ 23 | StormTopology build(Config config, TransMeta trans) throws KettleException; 24 | } 25 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/Notifier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import java.io.Serializable; 22 | import java.util.Map; 23 | 24 | /** 25 | * This provides a mechanism for signaling a state change. For example, by 26 | * sending a {@link KettleControlSignal#COMPLETE} a transformation can notify 27 | * interested parties it has completed. 28 | */ 29 | public interface Notifier extends Serializable { 30 | /** 31 | * Initialize this notifier. 32 | * 33 | * @param stormConf 34 | * The Storm configuration for this notifier. 35 | */ 36 | @SuppressWarnings("rawtypes") 37 | void init(Map stormConf); 38 | 39 | /** 40 | * Signals a state change. 41 | * 42 | * @param id 43 | * The identifier sending the message. 44 | * @param signal 45 | * The control signal. 46 | * @throws Exception 47 | * An error was encountered while sending notification messages. 48 | */ 49 | void notify(String id, KettleControlSignal signal) throws NotifierException; 50 | 51 | /** 52 | * Called when the component utilizing this notifier is being cleaned up. 53 | * There is no guarantee that cleanup will be called. 54 | */ 55 | void cleanup(); 56 | } 57 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/NotifierException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | /** 22 | * Indicates an error sending a notification. 23 | */ 24 | @SuppressWarnings("serial") 25 | public class NotifierException extends Exception { 26 | 27 | public NotifierException(String message, Throwable cause) { 28 | super(message, cause); 29 | } 30 | 31 | public NotifierException(String message) { 32 | super(message); 33 | } 34 | 35 | public NotifierException(Throwable cause) { 36 | super(cause); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/StormExecutionEngine.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import backtype.storm.Config; 22 | import backtype.storm.LocalCluster; 23 | import backtype.storm.StormSubmitter; 24 | import backtype.storm.contrib.signals.SignalListener; 25 | import backtype.storm.contrib.signals.StormSignalConnection; 26 | import backtype.storm.generated.NotAliveException; 27 | import backtype.storm.generated.StormTopology; 28 | import backtype.storm.utils.NimbusClient; 29 | import backtype.storm.utils.Utils; 30 | import org.apache.thrift7.TException; 31 | import org.pentaho.di.core.exception.KettleException; 32 | import org.pentaho.di.trans.TransMeta; 33 | import org.pentaho.kettle.engines.storm.signal.QuickCloseStormSignalConnectionFactory; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | import java.io.IOException; 38 | import java.util.Collections; 39 | import java.util.Map; 40 | import java.util.concurrent.CountDownLatch; 41 | import java.util.concurrent.TimeUnit; 42 | 43 | /** 44 | * An engine capable of processing data as defined by a Kettle transformation as a Storm topology. It provides a simple mechanism for 45 | * starting, polling for status, and stopping a Storm topology. 46 | */ 47 | public class StormExecutionEngine { 48 | private static final Logger logger = LoggerFactory.getLogger(StormExecutionEngine.class); 49 | private static KettleStormUtils util = new KettleStormUtils(); 50 | private QuickCloseStormSignalConnectionFactory signalConnectionFactory = new QuickCloseStormSignalConnectionFactory(); 51 | 52 | private LocalCluster localCluster = null; 53 | 54 | private StormExecutionEngineConfig config; 55 | 56 | private TransMeta meta; 57 | 58 | private Config stormConfig; 59 | 60 | // Flag to indicate the engine is executing 61 | private volatile boolean running = false; 62 | // This is used to synchronize blocking for the transformation to complete 63 | private CountDownLatch transCompleteLatch; 64 | private String topologyName; 65 | 66 | public StormExecutionEngine(StormExecutionEngineConfig config) { 67 | if (config == null) { 68 | throw new NullPointerException("config must not be null"); 69 | } 70 | this.config = config; 71 | } 72 | 73 | /** 74 | * Prepare the engine to execute the transformation located at 75 | * {@link StormExecutionEngineConfig#getTransformationFile()}. 76 | * 77 | * @throws KettleException Error loading transformation 78 | */ 79 | public void init() throws KettleException { 80 | stormConfig = loadStormConfig(); 81 | util.initKettleEnvironment(); 82 | meta = new TransMeta(config.getTransformationFile()); 83 | setJarToUpload(config.getTopologyJar()); 84 | } 85 | 86 | /** 87 | * Execute the transformation as a Storm topology. 88 | * 89 | * @throws IOException Error generating the transformation XML from the meta. 90 | * @throws KettleException Error reading transformation settings or starting execution entirely. 91 | * @throws InterruptedException Thread was interrupted while waiting for the topology to 92 | * complete. {@link #stop()} should be called before propagating. 93 | * @throws Exception Generic exception was thrown while establishing a connection to 94 | * ZooKeeper. 95 | */ 96 | public synchronized void execute() throws KettleException, IOException, InterruptedException { 97 | StormTopology topology = util.createTopology(stormConfig, meta); 98 | 99 | topologyName = util.getTopologyName(stormConfig); 100 | 101 | transCompleteLatch = new CountDownLatch(1); 102 | // TODO Support more than one end step. Deserialize message and check for specific steps completing instead of just counting them. 103 | final StormSignalConnection signalConnection = signalConnectionFactory.createSignalConnection(topologyName, new SignalListener() { 104 | @Override 105 | public void onSignal(byte[] data) { 106 | // If anything is received for the topology name we consider it to mean the transformation is complete 107 | logger.info("Received transformation complete message"); 108 | transCompleteLatch.countDown(); 109 | } 110 | }); 111 | 112 | submitTopology(topologyName, stormConfig, topology); 113 | logger.info(String.format("Submitted transformation as topology '%s'\n", topologyName)); 114 | running = true; 115 | try { 116 | signalConnection.init(stormConfig); 117 | } catch (Exception ex) { 118 | try { 119 | stop(); 120 | } catch (KettleException e) { 121 | logger.warn("Error stopping topology after signal connection failure", e); 122 | } 123 | throw new KettleException("Unable to establish signal connection to ZooKeeper.", ex); 124 | } 125 | } 126 | 127 | /** 128 | * Return the topology name that was started as a result of executing this 129 | * engine. 130 | * 131 | * @return The topology name used to execute the transformation provided to 132 | * this engine, or null if the engine has not been started. 133 | */ 134 | public String getTopologyName() { 135 | return topologyName; 136 | } 137 | 138 | /** 139 | * A blocking call to determine if the transformation done executing. 140 | * 141 | * @param timeout the maximum time to wait 142 | * @param unit the time unit of the timeout argument 143 | * @return True if the topology this engine executed is complete 144 | * @throws InterruptedException If the current thread is interrupted while waiting 145 | * @throws IllegalStateException if the engine has not been started 146 | */ 147 | public boolean isComplete(long timeout, TimeUnit unit) throws InterruptedException { 148 | if (!running) { 149 | throw new IllegalStateException("Engine not started"); 150 | } 151 | return transCompleteLatch.await(timeout, unit); 152 | } 153 | 154 | /** 155 | * Stop the running transformation's topology in Storm. 156 | * 157 | * @throws KettleException If an error was encountered stopping the Storm topology. 158 | */ 159 | public synchronized void stop() throws KettleException { 160 | if (!running) { 161 | // Not running, nothing to do here 162 | return; 163 | } 164 | 165 | try { 166 | logger.debug("Attempting to kill topology: " + topologyName); 167 | killTopology(stormConfig, topologyName); 168 | logger.debug("Topology killed successfully"); 169 | running = false; 170 | } catch (Exception ex) { 171 | throw new KettleException("Unable to kill topology: " + topologyName, ex); 172 | } 173 | } 174 | 175 | /** 176 | * Load the Storm {@link Config} by reading command line options and the Storm 177 | * config files. 178 | * 179 | * @return Configuration with all possible configurations loaded from the 180 | * environment. 181 | */ 182 | @SuppressWarnings("unchecked") 183 | private Config loadStormConfig() { 184 | final Config conf = new Config(); 185 | conf.setDebug(config.isDebugMode()); 186 | conf.putAll(Utils.readCommandLineOpts()); 187 | conf.putAll(Utils.readStormConfig()); 188 | 189 | if (config.isLocalMode()) { 190 | conf.put(Config.STORM_CLUSTER_MODE, "local"); 191 | conf.put(Config.STORM_ZOOKEEPER_SERVERS, Collections.singletonList("localhost")); 192 | conf.put(Config.STORM_ZOOKEEPER_PORT, 2000); 193 | } 194 | 195 | return conf; 196 | } 197 | 198 | /** 199 | * Storm needs to know what jar contains code to execute a topology. It keys 200 | * off the "storm.jar" System property. We will set it if its not already set 201 | * to the provided jar path. 202 | * 203 | * @param jarPath Path to jar file to submit with topology. This should be a jar 204 | * containing all required resources to execute the transformation. 205 | * Plugins need not be included if they can be resolved from 206 | * $KETTLE_HOME/plugins. 207 | */ 208 | private static void setJarToUpload(String jarPath) { 209 | String stormJar = System.getProperty("storm.jar", jarPath); 210 | System.setProperty("storm.jar", jarPath); 211 | logger.debug("Configured Storm topology jar as: {}", stormJar); 212 | } 213 | 214 | @SuppressWarnings("rawtypes") 215 | private void submitTopology(String name, Map stormConf, StormTopology topology) throws KettleException { 216 | if (config.isLocalMode()) { 217 | localCluster = new LocalCluster(); 218 | localCluster.submitTopology(name, stormConf, topology); 219 | } else { 220 | try { 221 | StormSubmitter.submitTopology(name, stormConf, topology); 222 | } catch (Exception ex) { 223 | throw new KettleException("Error submitting topology " + name, ex); 224 | } 225 | } 226 | } 227 | 228 | @SuppressWarnings("rawtypes") 229 | private void killTopology(Map conf, String name) throws NotAliveException, TException { 230 | if (config.isLocalMode()) { 231 | localCluster.killTopology(name); 232 | localCluster.shutdown(); 233 | } else { 234 | NimbusClient client = NimbusClient.getConfiguredClient(conf); 235 | client.getClient().killTopology(name); 236 | } 237 | } 238 | 239 | } 240 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/StormExecutionEngineConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm; 20 | 21 | import java.io.IOException; 22 | import java.util.Properties; 23 | 24 | /** 25 | * Defines configuration and runtime settings for the 26 | * {@link StormExecutionEngine}. 27 | */ 28 | public class StormExecutionEngineConfig { 29 | /** 30 | * The jar to submit along with the topology. This should include everything Kettle needs to boot up and then load plugins from elsewhere. 31 | * By default, it will use the *-with-dependencies.jar generated with Maven from this project. See README.md for more information. 32 | */ 33 | private String topologyJar; 34 | private String transformationFile; 35 | private boolean debugMode; 36 | private boolean localMode; 37 | 38 | public String getTopologyJar() { 39 | return topologyJar; 40 | } 41 | 42 | public void setTopologyJar(String topologyJar) { 43 | this.topologyJar = topologyJar; 44 | } 45 | 46 | public String getTransformationFile() { 47 | return transformationFile; 48 | } 49 | 50 | public void setTransformationFile(String transformationFile) { 51 | this.transformationFile = transformationFile; 52 | } 53 | 54 | public boolean isDebugMode() { 55 | return debugMode; 56 | } 57 | 58 | public void setDebugMode(boolean debugMode) { 59 | this.debugMode = debugMode; 60 | } 61 | 62 | public boolean isLocalMode() { 63 | return localMode; 64 | } 65 | 66 | public void setLocalMode(boolean localMode) { 67 | this.localMode = localMode; 68 | } 69 | 70 | public static String loadStormTopologyJarFromConfiguration() throws IOException { 71 | Properties p = new Properties(); 72 | p.load(StormExecutionEngineConfig.class.getResourceAsStream("/kettle-storm.properties")); 73 | return p.getProperty("kettle.topology.jar"); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/bolt/KettleControlBolt.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.bolt; 20 | 21 | import java.util.ArrayList; 22 | import java.util.HashMap; 23 | import java.util.List; 24 | import java.util.Map; 25 | import java.util.Set; 26 | 27 | import org.pentaho.kettle.engines.storm.KettleControlSignal; 28 | import org.pentaho.kettle.engines.storm.Notifier; 29 | import org.pentaho.kettle.engines.storm.StormExecutionEngine; 30 | import org.pentaho.kettle.engines.storm.signal.KettleSignal; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import backtype.storm.task.OutputCollector; 35 | import backtype.storm.task.TopologyContext; 36 | import backtype.storm.topology.OutputFieldsDeclarer; 37 | import backtype.storm.topology.base.BaseRichBolt; 38 | import backtype.storm.tuple.Tuple; 39 | 40 | import com.google.common.base.Preconditions; 41 | import com.google.common.base.Strings; 42 | 43 | /** 44 | * This bolt aggregates all the final {@link KettleSignal}s from leaf bolts and 45 | * notifies {@link StormExecutionEngine} that the transformation has completed. 46 | */ 47 | @SuppressWarnings("serial") 48 | public class KettleControlBolt extends BaseRichBolt { 49 | private static final Logger logger = LoggerFactory 50 | .getLogger(KettleControlBolt.class); 51 | 52 | private String transformationName; 53 | private Notifier notifier; 54 | private OutputCollector collector; 55 | private Set leafSteps; 56 | private Map> componentToPendingTasks; 57 | 58 | /** 59 | * Create a new control bolt to check for completion of the given steps. 60 | * 61 | * @param name 62 | * Name of this bolt. Used only for the ZooKeeper connection. 63 | * @param topologyName 64 | * The name of the topology this bolt is participating in. This is 65 | * the name of the resource it will signal when it has received a 66 | * complete signal from all leaf steps. 67 | * @param leafSteps 68 | * List of all leaf steps that must complete before the 69 | * transformation is to be considered complete. 70 | */ 71 | public KettleControlBolt(String transformationName, Notifier notifier, 72 | Set leafSteps) { 73 | Preconditions.checkArgument(!Strings.isNullOrEmpty(transformationName)); 74 | Preconditions.checkNotNull(leafSteps); 75 | Preconditions.checkArgument(!leafSteps.isEmpty(), 76 | "At least 1 leaf step is expected"); 77 | this.transformationName = transformationName; 78 | this.notifier = notifier; 79 | this.leafSteps = leafSteps; 80 | } 81 | 82 | @SuppressWarnings("rawtypes") 83 | @Override 84 | public void prepare(Map stormConf, TopologyContext context, 85 | OutputCollector collector) { 86 | this.collector = collector; 87 | // Build the map of tasks that must complete for the transformation to have 88 | // completed 89 | componentToPendingTasks = new HashMap>(); 90 | for (String componentId : leafSteps) { 91 | List tasks = context.getComponentTasks(componentId); 92 | if (tasks == null || tasks.isEmpty()) { 93 | throw new IllegalStateException("No tasks defined for leaf step " + componentId); 94 | } 95 | componentToPendingTasks.put(componentId, 96 | new ArrayList(tasks)); 97 | } 98 | notifier.init(stormConf); 99 | } 100 | 101 | @Override 102 | public void execute(Tuple input) { 103 | // We only ever expect signals to be routed to us. 104 | try { 105 | KettleSignal signal = (KettleSignal) input.getValue(0); 106 | 107 | logger.info("Received signal from " + signal.getComponentId() + ": " 108 | + signal.getSignal()); 109 | 110 | // Remove the pending task from the component's list 111 | List pendingTaskIds = componentToPendingTasks.get(signal 112 | .getComponentId()); 113 | if (pendingTaskIds == null || !pendingTaskIds.remove(signal.getTaskId())) { 114 | // TODO How can we fail the topology if this happens? 115 | throw new IllegalStateException( 116 | "Unexpected completion message received: componentId=" 117 | + signal.getComponentId() + ",taskId=" + signal.getTaskId() 118 | + "."); 119 | } 120 | if (pendingTaskIds.isEmpty()) { 121 | componentToPendingTasks.remove(signal.getComponentId()); 122 | } 123 | if (componentToPendingTasks.isEmpty()) { 124 | logger 125 | .info("All leaf steps have completed. Sending transformation complete message."); 126 | // Transformation is complete! Fire the signal. 127 | notifier.notify(transformationName, /* not used */ 128 | KettleControlSignal.COMPLETE); 129 | } 130 | collector.ack(input); 131 | } catch (Exception ex) { 132 | logger.error("Error processing tuple: " + input, ex); 133 | collector.fail(input); 134 | } 135 | } 136 | 137 | @Override 138 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 139 | // We don't output anything. 140 | } 141 | 142 | @Override 143 | public void cleanup() { 144 | super.cleanup(); 145 | notifier.cleanup(); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/bolt/KettleStepBolt.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.bolt; 20 | 21 | import java.util.Collections; 22 | import java.util.Deque; 23 | import java.util.LinkedList; 24 | import java.util.Map; 25 | 26 | import org.pentaho.di.core.RowSet; 27 | import org.pentaho.di.core.exception.KettleException; 28 | import org.pentaho.di.core.exception.KettleStepException; 29 | import org.pentaho.di.core.row.RowMetaInterface; 30 | import org.pentaho.di.trans.step.RowListener; 31 | import org.pentaho.di.trans.step.StepMetaDataCombi; 32 | import org.pentaho.di.trans.step.errorhandling.StreamInterface; 33 | import org.pentaho.kettle.engines.storm.CappedValues; 34 | import org.pentaho.kettle.engines.storm.KettleControlSignal; 35 | import org.pentaho.kettle.engines.storm.KettleStormUtils; 36 | import org.pentaho.kettle.engines.storm.signal.KettleSignal; 37 | import org.slf4j.Logger; 38 | import org.slf4j.LoggerFactory; 39 | 40 | import backtype.storm.task.OutputCollector; 41 | import backtype.storm.task.TopologyContext; 42 | import backtype.storm.topology.OutputFieldsDeclarer; 43 | import backtype.storm.topology.base.BaseRichBolt; 44 | import backtype.storm.tuple.Fields; 45 | import backtype.storm.tuple.Tuple; 46 | 47 | /** 48 | * A Kettle Step Bolt represents a Kettle step that receives input from at least one other Kettle step. This encapsulates the 49 | * logic required to receive input from Storm, process it, and emit any output from the step to be received by downstream bolts. 50 | */ 51 | @SuppressWarnings("serial") 52 | public class KettleStepBolt extends BaseRichBolt implements RowListener { 53 | private static final Logger logger = LoggerFactory 54 | .getLogger(KettleStepBolt.class); 55 | 56 | private KettleStormUtils utils = new KettleStormUtils(); 57 | 58 | private String componentId; 59 | private Integer taskId; 60 | 61 | private String transXml; 62 | private String stepName; 63 | 64 | private transient StepMetaDataCombi step; 65 | private OutputCollector collector; 66 | 67 | private boolean done; 68 | 69 | /** 70 | * A collection of tuples we've received. These are used to correlate output with input Tuples so message ack'ing properly groups output to the correct input. 71 | */ 72 | private transient Deque receivedTuples; 73 | /** 74 | * The tuple we're currently processing. This is to correlate output with input Tuples so message ack'ing properly groups output to the correct input. 75 | */ 76 | private transient Tuple currentTuple; 77 | 78 | public KettleStepBolt(String name, String transXml, StepMetaDataCombi step) { 79 | if (step == null) { 80 | throw new IllegalArgumentException( 81 | "Step Meta required to create a new Kettle Step Bolt"); 82 | } 83 | this.step = step; 84 | this.transXml = transXml; 85 | this.stepName = step.step.getStepname(); 86 | } 87 | 88 | private StepMetaDataCombi getStep() { 89 | if (step == null) { 90 | try { 91 | step = utils.getStep(transXml, stepName); 92 | } catch (KettleException e) { 93 | throw new IllegalStateException( 94 | "Error processing transformation for bolt for step: " 95 | + stepName, e); 96 | } 97 | 98 | step.step.addRowListener(this); 99 | } 100 | return step; 101 | } 102 | 103 | @Override 104 | public void prepare(@SuppressWarnings("rawtypes") Map conf, 105 | TopologyContext context, OutputCollector collector) { 106 | componentId = context.getThisComponentId(); 107 | taskId = context.getThisTaskId(); 108 | this.collector = collector; 109 | this.receivedTuples = new LinkedList<>(); 110 | } 111 | 112 | @Override 113 | public void execute(Tuple input) { 114 | logger.debug("{} bolt received {}", stepName, input); 115 | 116 | if ("signal".equals(input.getSourceStreamId())) { 117 | onSignal(input, (KettleSignal) input.getValue(0)); 118 | return; 119 | } 120 | 121 | try { 122 | // Cache the current tuple so we can anchor emitted values properly 123 | // This will not work for any step that batches records between calls to processRow() 124 | // TODO Make this work for all steps - we need a message id from Kettle to correlate tuple to message id. 125 | receivedTuples.addLast(input); 126 | injectRow(input); 127 | } catch (Exception ex) { 128 | throw new RuntimeException("Error converting tuple to Kettle row for step " + stepName, 129 | ex); 130 | } 131 | 132 | if (isInfoSource(input.getSourceComponent())) { 133 | // Immediately ack messages from info sources. We cannot determine how 134 | // they'll be used due to the lack of message identifiers in Kettle. 135 | // Assume these messages are ancillary to the input row sets messages. 136 | collector.ack(receivedTuples.removeLast()); 137 | } else { 138 | processRows(); 139 | } 140 | } 141 | 142 | private void injectRow(Tuple input) { 143 | RowSet rowSet = findRowSet(input.getSourceComponent()); 144 | logger.debug("Injecting row to rowSet: {}", input.getSourceComponent()); 145 | RowMetaInterface rowMeta = rowSet.getRowMeta(); 146 | rowSet.putRow(rowMeta, utils.convertToRow(rowMeta, input.getValues().toArray())); 147 | } 148 | 149 | private RowSet findRowSet(String stepName) { 150 | // Look through info streams first 151 | for (StreamInterface infoStream : getStep().stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams()) { 152 | if (stepName.equals(infoStream.getStepname())) { 153 | return getStep().step.getTrans().findRowSet(infoStream.getStepname(), 0, this.stepName, 0); 154 | } 155 | } 156 | for (RowSet rs : getStep().step.getInputRowSets()) { 157 | if (stepName.equals(rs.getOriginStepName())) { 158 | return rs; 159 | } 160 | } 161 | throw new IllegalArgumentException(String.format("Could not locate row set for a step with the name '%s'", stepName)); 162 | } 163 | 164 | /** 165 | * Process a row for every received "input" (non-info) tuple. 166 | */ 167 | private void processRows() { 168 | if (!isInfoInputComplete()) { 169 | logger.debug("Info is not complete - not processing rows yet!"); 170 | // If we haven't received all rows for info streams do not call processRow as we'll block waiting for them. :( 171 | return; 172 | } 173 | logger.debug("Starting to process rows for {}. {} pending rows to process", stepName, receivedTuples.size()); 174 | try { 175 | do { 176 | currentTuple = receivedTuples.peekFirst(); 177 | logger.debug("Processing tuple: {}", currentTuple); 178 | try { 179 | // Keep track of how many rows we have before we start to process to 180 | // determine if processRow() actually consumed anything. 181 | long rowsRemaining = getPendingRowCount(); 182 | logger.debug("pending row count: {}", rowsRemaining); 183 | done = !getStep().step.processRow(step.meta, step.data); 184 | logger.debug("pending row count after processRow: ", getPendingRowCount()); 185 | if (getPendingRowCount() != rowsRemaining) { 186 | // Rows were consumed and ack 187 | receivedTuples.remove(); 188 | collector.ack(currentTuple); 189 | } 190 | } catch (KettleException e) { 191 | if (currentTuple != null) { 192 | receivedTuples.remove(); 193 | collector.fail(currentTuple); 194 | } 195 | throw new RuntimeException("Error processing a row for step " 196 | + stepName, e); 197 | } 198 | } while (!done && !receivedTuples.isEmpty()); 199 | } finally { 200 | if (done) { 201 | try { 202 | getStep().step.batchComplete(); 203 | } catch (KettleException ex) { 204 | logger.error("kettle exception completing batch for step " + stepName, ex); 205 | } 206 | getStep().step.dispose(step.meta, step.data); 207 | logger.debug("Step complete: {}", stepName); 208 | } 209 | } 210 | } 211 | 212 | @Override 213 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 214 | utils.declareOutputFields(step, declarer); 215 | declarer.declareStream("signal", new Fields("signal")); 216 | } 217 | 218 | @Override 219 | public void errorRowWrittenEvent(RowMetaInterface rowMeta, Object[] row) 220 | throws KettleStepException { 221 | } 222 | 223 | @Override 224 | public void rowReadEvent(RowMetaInterface rowMeta, Object[] row) 225 | throws KettleStepException { 226 | } 227 | 228 | @Override 229 | public void rowWrittenEvent(RowMetaInterface rowMeta, Object[] row) 230 | throws KettleStepException { 231 | CappedValues values = new CappedValues(rowMeta.getValueMetaList() 232 | .size(), row); 233 | if (!values.isEmpty()) { 234 | if (currentTuple == null) { 235 | // If the current tuple is null we've likely processed all received 236 | // tuples and are simply processing to get a state of "done". If any 237 | // rows are emited as part of that last dummy call to processRow this 238 | // will happen. 239 | StringBuilder sb = new StringBuilder(); 240 | for (Object o : row) { 241 | sb.append(o).append(" "); 242 | } 243 | logger.warn("Current tuple unknown for new output on bolt (" + stepName + "): " + row + ": " + sb); 244 | } 245 | collector.emit(currentTuple, values); 246 | } 247 | } 248 | 249 | /** 250 | * Process a received signal message. 251 | * 252 | * @param anchor 253 | * The incoming signal tuple to be used as an anchor for our signal 254 | * to guarantee a complete signal has been received by all downstream 255 | * systems. 256 | * @param signal 257 | * The received signal. 258 | */ 259 | public void onSignal(Tuple anchor, KettleSignal signal) { 260 | logger.info("Signal received for step {}: {}", stepName, signal); 261 | 262 | switch (signal.getSignal()) { 263 | case COMPLETE: 264 | // Assume only one input for now... 265 | logger.debug("Input is complete for bolt %s: %s\n", stepName, signal.getComponentId()); 266 | // Set the row set to "done" 267 | RowSet rowSet = findRowSet(signal.getComponentId()); 268 | rowSet.setDone(); 269 | 270 | // If all row sets (info and input) are complete then this step is completely done! 271 | // We have to attempt to process a row for the step to realize it has nothing more to read. 272 | // If all row sets are not complete but info input is and we have 273 | // pending rows we should start to process them - we may have already 274 | // received all input. 275 | if (isInputComplete() || (isInfoInputComplete() && !receivedTuples.isEmpty())) { 276 | if (!done) { 277 | processRows(); 278 | } 279 | try { 280 | logger.info("Signaling complete for step " + stepName + " with taskId=" + taskId + "."); 281 | collector.emit("signal", anchor, Collections. singletonList(new KettleSignal(componentId, taskId, KettleControlSignal.COMPLETE))); 282 | // Acknowledge the received signal 283 | collector.ack(anchor); 284 | } catch (Exception e) { 285 | logger.warn(stepName + ": Error notifying downstream steps of completion", e); 286 | // Fail the received signal so it may be resent ASAP 287 | collector.fail(anchor); 288 | } 289 | } else { 290 | logger.debug("Input is not complete. Still waiting for rows..."); 291 | } 292 | break; 293 | default: 294 | throw new IllegalArgumentException("Unsupported signal: " + signal.getSignal()); 295 | } 296 | } 297 | 298 | /** 299 | * Calculates how many rows are waiting to be processed on across all input row sets. 300 | * 301 | * @return The number of rows in all input row sets. 302 | */ 303 | private long getPendingRowCount() { 304 | long pendingRowCount = 0L; 305 | // InputRowSets does not return info stream row sets until they are ready. Then it returns them until the rows are consumed. 306 | for (RowSet rs : getStep().step.getInputRowSets()) { 307 | if (!isInfoSource(rs.getOriginStepName())) { 308 | // Only include non-info row sets in this calculation since info rows will be fully consumed once the first row is processed. 309 | logger.debug(rs.getName() + ": " + rs.size()); 310 | pendingRowCount += rs.size(); 311 | } 312 | } 313 | return pendingRowCount; 314 | } 315 | 316 | /** 317 | * Determines if a given step name is connected to the step for this bolt via an info stream. 318 | 319 | * @param stepName The name of a step. 320 | * @return True if {@code stepName} is connected to the step for this bolt via an info stream. 321 | */ 322 | private boolean isInfoSource(String stepName) { 323 | for (StreamInterface infoStream : getStep().stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams()) { 324 | if (infoStream.getStepname().equals(stepName)) { 325 | return true; 326 | } 327 | } 328 | return false; 329 | } 330 | 331 | /** 332 | * Determines if this bolt is waiting for more input from any info streams. 333 | * 334 | * @return True if this bolt is waiting for more input from an info stream. 335 | */ 336 | private boolean isInfoInputComplete() { 337 | // Look through info streams first 338 | for (StreamInterface infoStream : getStep().stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams()) { 339 | RowSet rs = getStep().step.getTrans().findRowSet(infoStream.getStepname(), 0, stepName, 0); 340 | if (!rs.isDone()) { 341 | return false; 342 | } 343 | } 344 | return true; 345 | } 346 | 347 | /** 348 | * Determines if this bolt is waiting for any additional input. 349 | * 350 | * @return True if this bolt is expecting more input. 351 | */ 352 | private boolean isInputComplete() { 353 | for (RowSet rs : getStep().step.getInputRowSets()) { 354 | if (!rs.isDone()) { 355 | return false; 356 | } 357 | } 358 | return isInfoInputComplete(); 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/BasicSignalNotifier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.signal; 20 | 21 | import java.util.Map; 22 | 23 | import org.pentaho.kettle.engines.storm.KettleControlSignal; 24 | import org.pentaho.kettle.engines.storm.Notifier; 25 | import org.pentaho.kettle.engines.storm.NotifierException; 26 | 27 | import backtype.storm.contrib.signals.SignalListener; 28 | import backtype.storm.contrib.signals.StormSignalConnection; 29 | 30 | import com.google.common.base.Preconditions; 31 | import com.google.common.base.Strings; 32 | 33 | /** 34 | * A notifier that uses ZooKeeper via Storm Signals to send notifications. This 35 | * notifier will ignore the specific signal provided to 36 | * {@link #notify(String, KettleControlSignal)} and instead always send an empty 37 | * message. 38 | */ 39 | @SuppressWarnings("serial") 40 | public class BasicSignalNotifier implements Notifier { 41 | 42 | private String id; 43 | private StormSignalConnection signalConnection; 44 | 45 | public BasicSignalNotifier(String name) { 46 | Preconditions.checkArgument(!Strings.isNullOrEmpty(name), 47 | "name cannot be null or empty"); 48 | this.id = name; 49 | } 50 | 51 | @SuppressWarnings("rawtypes") 52 | @Override 53 | public void init(Map stormConf) { 54 | // TODO Refactor this to use ZooKeeper directly 55 | signalConnection = new StormSignalConnection(id, new SignalListener() { 56 | @Override 57 | public void onSignal(byte[] data) { 58 | throw new IllegalStateException( 59 | "not expecting any signals to be sent to " + id); 60 | } 61 | }); 62 | try { 63 | signalConnection.init(stormConf); 64 | } catch (Exception ex) { 65 | throw new RuntimeException("Error creating signal connection", ex); 66 | } 67 | } 68 | 69 | /** 70 | * Send a simple empty message to the component with the given id. 71 | * 72 | * @param id 73 | * Component to notify. 74 | * @param signal 75 | * Not used. 76 | */ 77 | @Override 78 | public void notify(String id, KettleControlSignal signal) 79 | throws NotifierException { 80 | // Note: Signal value does not matter. The reception of any message 81 | // indicates transformation is complete. This is received by the 82 | // StormExecutionEngine. 83 | try { 84 | signalConnection.send(id, new byte[0]); 85 | } catch (Exception ex) { 86 | throw new NotifierException("Error notifying " + id + " with signal " 87 | + signal, ex); 88 | } 89 | } 90 | 91 | @Override 92 | public void cleanup() { 93 | signalConnection.close(); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/KettleSignal.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.signal; 20 | 21 | import org.pentaho.kettle.engines.storm.KettleControlSignal; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * Represents a control message for a Kettle step. This is used to indicate 27 | * state changes between steps running as Spouts or Bolts within a Storm 28 | * topology. 29 | * 30 | * TODO Do we need the component and task ids here? Look into simply using the Tuple's. 31 | */ 32 | @SuppressWarnings("serial") 33 | public class KettleSignal implements Serializable { 34 | private String componentId; 35 | private KettleControlSignal signal; 36 | private Integer taskId; 37 | 38 | public KettleSignal(String componentId, Integer taskId, 39 | KettleControlSignal signal) { 40 | this.componentId = componentId; 41 | this.taskId = taskId; 42 | this.signal = signal; 43 | } 44 | 45 | public String getComponentId() { 46 | return componentId; 47 | } 48 | 49 | public KettleControlSignal getSignal() { 50 | return signal; 51 | } 52 | 53 | @Override 54 | public String toString() { 55 | return "KettleSignal {componentId=" + componentId + ",taskId=" + taskId 56 | + ",signal=" + signal.name() + "}"; 57 | } 58 | 59 | public Integer getTaskId() { 60 | return taskId; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/QuickCloseStormSignalConnectionFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.signal; 20 | 21 | import backtype.storm.contrib.signals.SignalListener; 22 | import backtype.storm.contrib.signals.StormSignalConnection; 23 | 24 | /** 25 | * Generates {@link StormSignalConnection}s that close their connections after 26 | * receiving the first message. 27 | */ 28 | public class QuickCloseStormSignalConnectionFactory { 29 | /** 30 | * Closes the {@link StormSignalConnection} upon first signal. 31 | */ 32 | private static class QuickCloseSignalListener implements SignalListener { 33 | private StormSignalConnection connection; 34 | private SignalListener listener; 35 | 36 | public QuickCloseSignalListener(SignalListener listener) { 37 | this.listener = listener; 38 | } 39 | 40 | public void setConnection(StormSignalConnection connection) { 41 | this.connection = connection; 42 | } 43 | 44 | @Override 45 | public void onSignal(byte[] data) { 46 | try { 47 | listener.onSignal(data); 48 | } finally { 49 | connection.close(); 50 | } 51 | } 52 | } 53 | 54 | public StormSignalConnection createSignalConnection(String name, SignalListener listener) { 55 | QuickCloseSignalListener l = new QuickCloseSignalListener(listener); 56 | StormSignalConnection connection = new StormSignalConnection(name, l); 57 | // Must set connection so it can be closed 58 | l.setConnection(connection); 59 | return connection; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/SignalClientFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.signal; 20 | 21 | import backtype.storm.contrib.signals.client.SignalClient; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * Factory for constructing Signal Clients. 27 | */ 28 | public interface SignalClientFactory extends Serializable { 29 | /** 30 | * Create a client that listens for messages addressed to {@code name}. 31 | * 32 | * @param name Name of the client. 33 | * @return A ready to use Signal client. 34 | */ 35 | SignalClient createClient(String name); 36 | } 37 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/signal/SimpleSignalClientFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.signal; 20 | 21 | import backtype.storm.contrib.signals.client.SignalClient; 22 | 23 | /** 24 | * Creates {@link SignalClient}s for a known ZooKeeper instance. 25 | */ 26 | @SuppressWarnings("serial") 27 | public class SimpleSignalClientFactory implements SignalClientFactory { 28 | 29 | private String zkConnectionString; 30 | 31 | /** 32 | * Create a new factory that creates clients that use the provided ZooKeeper 33 | * connection string. 34 | * 35 | * @param zkConnectionString ZooKeeper connection string for clients to use when establishing 36 | * their connections 37 | */ 38 | public SimpleSignalClientFactory(String zkConnectionString) { 39 | this.zkConnectionString = zkConnectionString; 40 | } 41 | 42 | @Override 43 | public SignalClient createClient(String name) { 44 | return new SignalClient(zkConnectionString, name); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/java/org/pentaho/kettle/engines/storm/spout/KettleStepSpout.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.spout; 20 | 21 | import java.util.Collections; 22 | import java.util.Map; 23 | import java.util.Set; 24 | import java.util.UUID; 25 | import java.util.concurrent.ConcurrentHashMap; 26 | 27 | import org.pentaho.di.core.exception.KettleException; 28 | import org.pentaho.di.trans.step.StepMetaDataCombi; 29 | import org.pentaho.kettle.engines.storm.BaseSpoutOutputCollector; 30 | import org.pentaho.kettle.engines.storm.CollectorRowListener; 31 | import org.pentaho.kettle.engines.storm.KettleControlSignal; 32 | import org.pentaho.kettle.engines.storm.KettleStormUtils; 33 | import org.pentaho.kettle.engines.storm.signal.KettleSignal; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | import backtype.storm.spout.SpoutOutputCollector; 38 | import backtype.storm.task.TopologyContext; 39 | import backtype.storm.topology.OutputFieldsDeclarer; 40 | import backtype.storm.topology.base.BaseRichSpout; 41 | import backtype.storm.tuple.Fields; 42 | 43 | /** 44 | * A Kettle Step Spout represents a Kettle step that produces records and specifically does not receive any input from other Kettle steps. 45 | * This encapsulates the logic to produce messages within Storm to be processed by downstream bolts. 46 | */ 47 | @SuppressWarnings("serial") 48 | public class KettleStepSpout extends BaseRichSpout { 49 | private static final Logger logger = LoggerFactory 50 | .getLogger(KettleStepSpout.class); 51 | private KettleStormUtils utils = new KettleStormUtils(); 52 | 53 | private String componentId; 54 | private Integer taskId; 55 | 56 | private String transXml; 57 | private String stepName; 58 | 59 | private transient StepMetaDataCombi step; 60 | 61 | private boolean done = false; 62 | 63 | private Object signalCompleteMessageId; 64 | 65 | /** 66 | * The set of pending messages we're waiting to be ack'd. This should be thread-safe. 67 | */ 68 | private Set pendingMessages; 69 | 70 | private SpoutOutputCollector collector; 71 | 72 | public KettleStepSpout(String name, String transXml, 73 | StepMetaDataCombi step) { 74 | if (transXml == null || step == null) { 75 | throw new NullPointerException(); 76 | } 77 | this.stepName = name; 78 | this.step = step; 79 | this.transXml = transXml; 80 | } 81 | 82 | @Override 83 | @SuppressWarnings("rawtypes") 84 | public void open(Map conf, TopologyContext context, 85 | SpoutOutputCollector collector) { 86 | componentId = context.getThisComponentId(); 87 | taskId = context.getThisTaskId(); 88 | this.collector = collector; 89 | try { 90 | this.step = utils.getStep(transXml, stepName); 91 | } catch (KettleException e) { 92 | throw new IllegalStateException( 93 | "Error processing transformation for spout for step: " 94 | + stepName, e); 95 | } 96 | 97 | if (this.step == null) { 98 | throw new IllegalStateException( 99 | "Step could not be found for spout: " + stepName); 100 | } 101 | 102 | pendingMessages = Collections.newSetFromMap(new ConcurrentHashMap(1000)); 103 | 104 | step.step.addRowListener(new CollectorRowListener(step, 105 | new BaseSpoutOutputCollector(collector, pendingMessages), utils.getOutputFields( 106 | step).size())); 107 | } 108 | 109 | @Override 110 | public void nextTuple() { 111 | if (!done) { 112 | try { 113 | done = !step.step.processRow(step.meta, step.data); 114 | } catch (KettleException e) { 115 | throw new RuntimeException("Error processing a row for step " 116 | + step.step.getStepname(), e); 117 | } 118 | } 119 | } 120 | 121 | @Override 122 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 123 | utils.declareOutputFields(step, declarer); 124 | declarer.declareStream("signal", new Fields("signal")); 125 | } 126 | 127 | @Override 128 | public void ack(Object msgId) { 129 | // Only handle completed row messages. If the ack'd message id is the signal 130 | // complete message then we're done! 131 | if (!msgId.equals(signalCompleteMessageId)) { 132 | handleCompleted(msgId); 133 | } 134 | } 135 | 136 | @Override 137 | public void fail(Object msgId) { 138 | if (msgId.equals(signalCompleteMessageId)) { 139 | logger.error("Error processing signal complete message. Resending..."); 140 | // Send the signal complete message again 141 | // TODO we should set a retry limit 142 | signalComplete(); 143 | } else { 144 | logger.error("Message failed processing: " + msgId); 145 | handleCompleted(msgId); 146 | } 147 | } 148 | 149 | private void handleCompleted(Object msgId) { 150 | // Message fully processed - remove it from our list 151 | if (!pendingMessages.remove(msgId)) { 152 | throw new IllegalStateException("Unexpected message id ack'd: " + msgId); 153 | } 154 | if (done && pendingMessages.isEmpty()) { 155 | step.step.dispose(step.meta, step.data); 156 | step.step.markStop(); 157 | signalComplete(); 158 | } 159 | } 160 | 161 | private void signalComplete() { 162 | logger.info("Signaling complete for step " + stepName + " with taskId=" + taskId + "."); 163 | try { 164 | signalCompleteMessageId = UUID.randomUUID(); 165 | collector.emit("signal", Collections. singletonList(new KettleSignal(componentId, taskId, KettleControlSignal.COMPLETE)), signalCompleteMessageId); 166 | } catch (Exception e) { 167 | logger.warn(stepName + ": Error notifying downstream steps", e); 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/resources/ccnums.ktr: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ccnums 5 | 6 | 7 | 8 | Normal 9 | / 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDTRANSNAMEYTRANSNAMESTATUSYSTATUSLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSSTARTDATEYSTARTDATEENDDATEYENDDATELOGDATEYLOGDATEDEPDATEYDEPDATEREPLAYDATEYREPLAYDATELOG_FIELDYLOG_FIELD 20 | 21 | 22 |
23 | 24 | 25 | ID_BATCHYID_BATCHSEQ_NRYSEQ_NRLOGDATEYLOGDATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSINPUT_BUFFER_ROWSYINPUT_BUFFER_ROWSOUTPUT_BUFFER_ROWSYOUTPUT_BUFFER_ROWS 26 | 27 | 28 |
29 | 30 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATELOGGING_OBJECT_TYPEYLOGGING_OBJECT_TYPEOBJECT_NAMEYOBJECT_NAMEOBJECT_COPYYOBJECT_COPYREPOSITORY_DIRECTORYYREPOSITORY_DIRECTORYFILENAMEYFILENAMEOBJECT_IDYOBJECT_IDOBJECT_REVISIONYOBJECT_REVISIONPARENT_CHANNEL_IDYPARENT_CHANNEL_IDROOT_CHANNEL_IDYROOT_CHANNEL_ID 31 | 32 | 33 |
34 | 35 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSLOG_FIELDNLOG_FIELD 36 | 37 | 38 | 39 |
40 | 41 | 0.0 42 | 0.0 43 | 44 | 10000 45 | 50 46 | 50 47 | N 48 | Y 49 | 50000 50 | Y 51 | 52 | N 53 | 1000 54 | 100 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | - 64 | 2012/11/07 15:22:57.478 65 | - 66 | 2012/11/07 15:22:57.478 67 | 68 | 69 | 70 | 71 | Generate random credit card numbersSelect valuesY Select valuesText file outputY 72 | 73 | Generate random credit card numbers 74 | RandomCCNumberGenerator 75 | 76 | Y 77 | 1 78 | 79 | none 80 | 81 | 82 | 83 | 84 | American Express 85 | 15 86 | 100 87 | 88 | 89 | cnumber 90 | clength 91 | ctype 92 | 93 | 94 | 108 95 | 55 96 | Y 97 | 98 | 99 | 100 | 101 | Select values 102 | SelectValues 103 | 104 | Y 105 | 1 106 | 107 | none 108 | 109 | 110 | N 111 | clength 112 | 113 | 114 | 297 115 | 55 116 | Y 117 | 118 | 119 | 120 | 121 | Text file output 122 | TextFileOutput 123 | 124 | Y 125 | 1 126 | 127 | none 128 | 129 | 130 | , 131 | " 132 | N 133 | N 134 |
Y
135 |
N
136 | DOS 137 | None 138 | 139 | 140 | N 141 | 142 | N 143 | 144 | ccnums 145 | N 146 | N 147 | N 148 | txt 149 | N 150 | N 151 | N 152 | Y 153 | Y 154 | N 155 | 156 | Y 157 | N 158 | N 159 | 0 160 | 161 | 162 | 163 | cnumber 164 | String 165 | 166 | 167 | 168 | 169 | 170 | none 171 | -1 172 | -1 173 | 174 | 175 | ctype 176 | String 177 | 178 | 179 | 180 | 181 | 182 | none 183 | -1 184 | -1 185 | 186 | 187 | 188 | 189 | 516 190 | 55 191 | Y 192 | 193 |
194 | 195 | 196 | 197 | 198 | 199 | N 200 | 201 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/resources/kettle-storm.properties: -------------------------------------------------------------------------------- 1 | # 2 | # ****************************************************************************** 3 | # Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | # ****************************************************************************** 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ***************************************************************************** 17 | # 18 | 19 | kettle.topology.jar=${kettle.storm.topology.jar} -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/resources/stream-lookup.ktr: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | stream-lookup 5 | 6 | 7 | 8 | Normal 9 | / 10 | 11 | 12 | 13 | 14 | 15 |
16 | 17 | 18 | 19 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDTRANSNAMEYTRANSNAMESTATUSYSTATUSLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSSTARTDATEYSTARTDATEENDDATEYENDDATELOGDATEYLOGDATEDEPDATEYDEPDATEREPLAYDATEYREPLAYDATELOG_FIELDYLOG_FIELDEXECUTING_SERVERNEXECUTING_SERVEREXECUTING_USERNEXECUTING_USERCLIENTNCLIENT 20 | 21 | 22 |
23 | 24 | 25 | ID_BATCHYID_BATCHSEQ_NRYSEQ_NRLOGDATEYLOGDATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSINPUT_BUFFER_ROWSYINPUT_BUFFER_ROWSOUTPUT_BUFFER_ROWSYOUTPUT_BUFFER_ROWS 26 | 27 | 28 |
29 | 30 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATELOGGING_OBJECT_TYPEYLOGGING_OBJECT_TYPEOBJECT_NAMEYOBJECT_NAMEOBJECT_COPYYOBJECT_COPYREPOSITORY_DIRECTORYYREPOSITORY_DIRECTORYFILENAMEYFILENAMEOBJECT_IDYOBJECT_IDOBJECT_REVISIONYOBJECT_REVISIONPARENT_CHANNEL_IDYPARENT_CHANNEL_IDROOT_CHANNEL_IDYROOT_CHANNEL_ID 31 | 32 | 33 |
34 | 35 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSLOG_FIELDNLOG_FIELD 36 | 37 | 38 |
39 | 40 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATEMETRICS_DATEYMETRICS_DATEMETRICS_CODEYMETRICS_CODEMETRICS_DESCRIPTIONYMETRICS_DESCRIPTIONMETRICS_SUBJECTYMETRICS_SUBJECTMETRICS_TYPEYMETRICS_TYPEMETRICS_VALUEYMETRICS_VALUE 41 | 42 | 43 | 44 |
45 | 46 | 0.0 47 | 0.0 48 | 49 | 10000 50 | 50 51 | 50 52 | N 53 | Y 54 | 50000 55 | Y 56 | 57 | N 58 | 1000 59 | 100 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | - 69 | 2013/06/30 15:29:17.028 70 | - 71 | 2013/06/30 15:29:17.028 72 | 73 | 74 | 75 | 76 | Raw dataStream lookupY 77 | ReferenceStream lookupY 78 | Stream lookupText file outputY 79 | 80 | 81 | Raw data 82 | DataGrid 83 | 84 | Y 85 | 86 | 1 87 | 88 | none 89 | 90 | 91 | 92 | 93 | id 94 | Integer 95 | 96 | 97 | 98 | 99 | -1 100 | -1 101 | N 102 | 103 | 104 | 105 | 1 106 | 2 107 | 3 108 | 4 109 | 110 | 111 | 112 | 34 113 | 25 114 | Y 115 | 116 | 117 | 118 | 119 | Reference 120 | DataGrid 121 | 122 | Y 123 | 124 | 1 125 | 126 | none 127 | 128 | 129 | 130 | 131 | id 132 | Integer 133 | 134 | 135 | 136 | 137 | -1 138 | -1 139 | N 140 | 141 | 142 | value 143 | String 144 | 145 | 146 | 147 | 148 | -1 149 | -1 150 | N 151 | 152 | 153 | 154 | 1One 155 | 2Two 156 | 3Three 157 | 4Four 158 | 159 | 160 | 161 | 193 162 | 137 163 | Y 164 | 165 | 166 | 167 | 168 | Stream lookup 169 | StreamLookup 170 | 171 | Y 172 | 173 | 1 174 | 175 | none 176 | 177 | 178 | Reference 179 | N 180 | Y 181 | Y 182 | Y 183 | 184 | 185 | id 186 | id 187 | 188 | 189 | id 190 | id 191 | 192 | Integer 193 | 194 | 195 | value 196 | value 197 | 198 | String 199 | 200 | 201 | 202 | 203 | 192 204 | 25 205 | Y 206 | 207 | 208 | 209 | 210 | Text file output 211 | TextFileOutput 212 | 213 | Y 214 | 215 | 1 216 | 217 | none 218 | 219 | 220 | ; 221 | " 222 | N 223 | N 224 |
Y
225 |
N
226 | UNIX 227 | None 228 | UTF-8 229 | 230 | N 231 | 232 | Y 233 | 234 | stream-lookup-output 235 | N 236 | Y 237 | N 238 | txt 239 | N 240 | N 241 | N 242 | Y 243 | Y 244 | N 245 | yyyy/MM/dd HH:mm:ss.SSS 246 | Y 247 | N 248 | N 249 | 0 250 | 251 | 252 | 253 | 254 | 255 | 343 256 | 25 257 | Y 258 | 259 |
260 | 261 | 262 | 263 | 264 | 265 | N 266 | 267 | 268 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/main/resources/test.ktr: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | test 5 | 6 | 7 | 8 | Normal 9 | / 10 | 11 | 12 | 13 | 14 | 15 |
16 | 17 | 18 | 19 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDTRANSNAMEYTRANSNAMESTATUSYSTATUSLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSSTARTDATEYSTARTDATEENDDATEYENDDATELOGDATEYLOGDATEDEPDATEYDEPDATEREPLAYDATEYREPLAYDATELOG_FIELDYLOG_FIELD 20 | 21 | 22 |
23 | 24 | 25 | ID_BATCHYID_BATCHSEQ_NRYSEQ_NRLOGDATEYLOGDATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSINPUT_BUFFER_ROWSYINPUT_BUFFER_ROWSOUTPUT_BUFFER_ROWSYOUTPUT_BUFFER_ROWS 26 | 27 | 28 |
29 | 30 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATELOGGING_OBJECT_TYPEYLOGGING_OBJECT_TYPEOBJECT_NAMEYOBJECT_NAMEOBJECT_COPYYOBJECT_COPYREPOSITORY_DIRECTORYYREPOSITORY_DIRECTORYFILENAMEYFILENAMEOBJECT_IDYOBJECT_IDOBJECT_REVISIONYOBJECT_REVISIONPARENT_CHANNEL_IDYPARENT_CHANNEL_IDROOT_CHANNEL_IDYROOT_CHANNEL_ID 31 | 32 | 33 |
34 | 35 | ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSLOG_FIELDNLOG_FIELD 36 | 37 | 38 | 39 |
40 | 41 | 0.0 42 | 0.0 43 | 44 | 10000 45 | 50 46 | 50 47 | N 48 | Y 49 | 50000 50 | Y 51 | 52 | N 53 | 1000 54 | 100 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | - 64 | 2012/11/07 11:07:54.689 65 | - 66 | 2012/11/07 11:07:54.689 67 | 68 | 69 | 70 | 71 | 72 | 73 | Generate random value 74 | RandomValue 75 | 76 | Y 77 | 1 78 | 79 | none 80 | 81 | 82 | 83 | 84 | id 85 | random string 86 | 87 | 88 | 89 | 90 | 81 91 | 49 92 | Y 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | N 101 | 102 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/test/java/org/pentaho/kettle/engines/storm/bolt/KettleControlBoltTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * ***************************************************************************** 3 | * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com 4 | * ***************************************************************************** 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with 8 | * the License. You may obtain a copy of the License at 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | * **************************************************************************** 17 | */ 18 | 19 | package org.pentaho.kettle.engines.storm.bolt; 20 | 21 | import java.util.Collections; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | import org.easymock.EasyMock; 26 | import org.easymock.IMocksControl; 27 | import org.junit.Before; 28 | import org.junit.Test; 29 | import org.pentaho.kettle.engines.storm.KettleControlSignal; 30 | import org.pentaho.kettle.engines.storm.Notifier; 31 | import org.pentaho.kettle.engines.storm.signal.KettleSignal; 32 | 33 | import com.google.common.collect.Lists; 34 | import com.google.common.collect.Sets; 35 | 36 | import backtype.storm.task.OutputCollector; 37 | import backtype.storm.task.TopologyContext; 38 | import backtype.storm.tuple.Tuple; 39 | 40 | public class KettleControlBoltTest { 41 | private static final String TRANS_NAME = "transformation 1"; 42 | private static final String STEP_1 = "step 1"; 43 | private static final String STEP_2 = "step 2"; 44 | private static final int TASK_ID_1 = 1723; 45 | private static final int TASK_ID_2 = 18; 46 | private static final KettleSignal STEP_1_COMPLETE = new KettleSignal(STEP_1, 47 | TASK_ID_1, KettleControlSignal.COMPLETE); 48 | private static final KettleSignal STEP_2_COMPLETE = new KettleSignal(STEP_2, 49 | TASK_ID_2, KettleControlSignal.COMPLETE); 50 | 51 | private IMocksControl control; 52 | private Notifier notifier; 53 | private TopologyContext context; 54 | private OutputCollector collector; 55 | 56 | @SuppressWarnings("rawtypes") 57 | @Before 58 | public void init() { 59 | control = EasyMock.createControl(); 60 | notifier = control.createMock(Notifier.class); 61 | notifier.init(EasyMock. anyObject()); 62 | EasyMock.expectLastCall().anyTimes(); 63 | collector = control.createMock(OutputCollector.class); 64 | context = control.createMock(TopologyContext.class); 65 | } 66 | 67 | @Test(expected = IllegalArgumentException.class) 68 | public void construct() { 69 | new KettleControlBolt(null, notifier, Collections.singleton("step")); 70 | } 71 | 72 | @Test(expected = IllegalStateException.class) 73 | public void prepare_no_tasks_for_leaf_step_null() { 74 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier, 75 | Collections.singleton(STEP_1)); 76 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(null); 77 | 78 | control.replay(); 79 | bolt.prepare(Collections.emptyMap(), context, collector); 80 | } 81 | 82 | @Test(expected = IllegalStateException.class) 83 | public void prepare_no_tasks_for_leaf_step_empty() { 84 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier, 85 | Collections.singleton(STEP_1)); 86 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn( 87 | Collections. emptyList()); 88 | 89 | control.replay(); 90 | bolt.prepare(Collections.emptyMap(), context, collector); 91 | } 92 | 93 | /** 94 | * Create a tuple for the given signal. 95 | * 96 | * @param signal 97 | * Signal to emit as a single value tuple. 98 | * @return The tuple. 99 | */ 100 | private Tuple createTupleForSignal(KettleSignal signal) { 101 | Tuple input = control.createMock(Tuple.class); 102 | EasyMock.expect(input.getValue(0)).andReturn(signal).anyTimes(); 103 | return input; 104 | } 105 | 106 | /** 107 | * Verify the last task to complete triggers the notifier. 108 | */ 109 | @Test 110 | public void execute_last_task() throws Exception { 111 | // Test set up 112 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier, 113 | Collections.singleton(STEP_1)); 114 | List taskIds = Collections.singletonList(TASK_ID_1); 115 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn(taskIds); 116 | Tuple step1Complete = createTupleForSignal(STEP_1_COMPLETE); 117 | 118 | // Expect that our notifier is notified after receiving a complete signal 119 | // for our one and only leaf node 120 | notifier.notify(TRANS_NAME, KettleControlSignal.COMPLETE); 121 | EasyMock.expectLastCall(); 122 | 123 | // The tuple should be acknowledged 124 | collector.ack(step1Complete); 125 | EasyMock.expectLastCall(); 126 | 127 | control.replay(); 128 | bolt.prepare(Collections.emptyMap(), context, collector); 129 | bolt.execute(step1Complete); 130 | control.verify(); 131 | } 132 | 133 | /** 134 | * Verify notifications are not sent if there are pending steps. 135 | */ 136 | @Test 137 | public void execute_not_last_step() throws Exception { 138 | // Test set up 139 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier, 140 | Sets.newHashSet(STEP_1, STEP_2)); 141 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn( 142 | Collections.singletonList(TASK_ID_1)); 143 | EasyMock.expect(context.getComponentTasks(STEP_2)).andReturn( 144 | Collections.singletonList(TASK_ID_2)); 145 | Tuple step1Complete = createTupleForSignal(STEP_1_COMPLETE); 146 | 147 | // The tuple should be acknowledged 148 | collector.ack(step1Complete); 149 | EasyMock.expectLastCall(); 150 | 151 | control.replay(); 152 | bolt.prepare(Collections.emptyMap(), context, collector); 153 | bolt.execute(step1Complete); 154 | control.verify(); 155 | } 156 | 157 | /** 158 | * Verify notifications are sent after all leaf steps are complete. 159 | */ 160 | @Test 161 | public void execute_multiple_steps() throws Exception { 162 | // Test set up 163 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier, 164 | Sets.newHashSet(STEP_1, STEP_2)); 165 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn( 166 | Collections.singletonList(TASK_ID_1)); 167 | EasyMock.expect(context.getComponentTasks(STEP_2)).andReturn( 168 | Collections.singletonList(TASK_ID_2)); 169 | Tuple step1Complete = createTupleForSignal(STEP_1_COMPLETE); 170 | Tuple step2Complete = createTupleForSignal(STEP_2_COMPLETE); 171 | 172 | // The tuples should be acknowledged 173 | collector.ack(step1Complete); 174 | EasyMock.expectLastCall(); 175 | collector.ack(step2Complete); 176 | EasyMock.expectLastCall(); 177 | 178 | // Expect that our notifier is notified after receiving a complete signal 179 | // for our one and only leaf node 180 | notifier.notify(TRANS_NAME, KettleControlSignal.COMPLETE); 181 | EasyMock.expectLastCall(); 182 | 183 | control.replay(); 184 | bolt.prepare(Collections.emptyMap(), context, collector); 185 | bolt.execute(step1Complete); 186 | bolt.execute(step2Complete); 187 | control.verify(); 188 | } 189 | 190 | /** 191 | * Verify notifications are sent after all copies of the leaf steps have 192 | * completed. 193 | */ 194 | @Test 195 | public void execute_single_leaf_step_with_multiple_copies() throws Exception { 196 | // Test set up 197 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier, 198 | Sets.newHashSet(STEP_1)); 199 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn( 200 | Lists.newArrayList(TASK_ID_1, TASK_ID_2)); 201 | Tuple task1Complete = createTupleForSignal(new KettleSignal(STEP_1, 202 | TASK_ID_1, KettleControlSignal.COMPLETE)); 203 | Tuple task2Complete = createTupleForSignal(new KettleSignal(STEP_1, 204 | TASK_ID_2, KettleControlSignal.COMPLETE)); 205 | 206 | // The tuples should be acknowledged 207 | collector.ack(task1Complete); 208 | EasyMock.expectLastCall(); 209 | collector.ack(task2Complete); 210 | EasyMock.expectLastCall(); 211 | 212 | // Expect that our notifier is notified after receiving a complete signal 213 | // for our one and only leaf node 214 | notifier.notify(TRANS_NAME, KettleControlSignal.COMPLETE); 215 | EasyMock.expectLastCall(); 216 | 217 | control.replay(); 218 | bolt.prepare(Collections.emptyMap(), context, collector); 219 | bolt.execute(task1Complete); 220 | bolt.execute(task2Complete); 221 | control.verify(); 222 | } 223 | 224 | /** 225 | * Verify receiving a signal for a non-leaf step is a failure case. 226 | */ 227 | @Test 228 | public void execute_unexpected_signal() throws Exception { 229 | // Test set up 230 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier, 231 | Sets.newHashSet(STEP_1)); 232 | EasyMock.expect(context.getComponentTasks(STEP_1)).andReturn( 233 | Collections.singletonList(TASK_ID_1)); 234 | Tuple unexpectedSignalTuple = createTupleForSignal(new KettleSignal( 235 | "unknown step", 1, KettleControlSignal.COMPLETE)); 236 | 237 | // The tuple should be failed since we're not expected it. 238 | collector.fail(unexpectedSignalTuple); 239 | EasyMock.expectLastCall(); 240 | 241 | control.replay(); 242 | bolt.prepare(Collections.emptyMap(), context, collector); 243 | bolt.execute(unexpectedSignalTuple); 244 | control.verify(); 245 | } 246 | 247 | @Test 248 | public void cleanup() { 249 | KettleControlBolt bolt = new KettleControlBolt(TRANS_NAME, notifier, 250 | Collections.singleton(STEP_1)); 251 | 252 | notifier.cleanup(); 253 | EasyMock.expectLastCall(); 254 | 255 | control.replay(); 256 | bolt.cleanup(); 257 | control.verify(); 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /kettle-engine-storm/src/test/resources/empty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pentaho/kettle-storm/c2e2bb70a38229468dab382f62708dad5e6249e1/kettle-engine-storm/src/test/resources/empty --------------------------------------------------------------------------------