├── .classpath ├── .gitignore ├── LICENSE ├── NOTICE ├── README.md ├── img ├── IndexerV2Design.jpg ├── IndexerV2Design.pptx ├── Kafka_ES_Illustration.png └── Kafka_ES_Illustration_New.png ├── pom.xml ├── run_indexer.sh └── src ├── main ├── assemblies │ └── plugin.xml ├── java │ └── org │ │ └── elasticsearch │ │ └── kafka │ │ └── indexer │ │ ├── BasicIndexHandler.java │ │ ├── ConsumerConfig.java │ │ ├── FailedEventsLogger.java │ │ ├── IndexHandler.java │ │ ├── IndexerESException.java │ │ ├── KafkaClient.java │ │ ├── KafkaIndexerDriver.java │ │ ├── MessageHandler.java │ │ ├── jmx │ │ ├── IndexerJobStatusMBean.java │ │ ├── KafkaEsIndexerStatus.java │ │ └── KafkaEsIndexerStatusMXBean.java │ │ ├── jobs │ │ ├── IndexerJob.java │ │ ├── IndexerJobManager.java │ │ ├── IndexerJobStatus.java │ │ └── IndexerJobStatusEnum.java │ │ ├── mappers │ │ ├── AccessLogMapper.java │ │ └── KafkaMetaDataMapper.java │ │ └── messageHandlers │ │ ├── AccessLogMessageHandler.java │ │ └── RawMessageStringHandler.java └── resources │ ├── kafka-es-indexer.properties.template │ └── logback.xml.template └── test └── org └── elasticsearch └── kafka └── indexer └── jmx └── KafkaEsIndexerStatusTest.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin/ 2 | .project 3 | .settings/ 4 | /logs/ 5 | .idea 6 | .classpath 7 | 8 | *.iml 9 | kafkaESConsumerLocal.properties 10 | dependency-reduced-pom.xml 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | kafka-elasticsearch-standalone-consumer 2 | 3 | Licensed under Apache License, Version 2.0 4 | 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Welcome to the kafka-elasticsearch-standalone-consumer wiki! 2 | 3 | ## Architecture of the kafka-elasticsearch-standalone-consumer [indexer] 4 | 5 | ![](https://raw.githubusercontent.com/ppine7/kafka-elasticsearch-standalone-consumer/master/img/IndexerV2Design.jpg) 6 | 7 | 8 | # This project has moved to below repository 9 | ### Please see https://github.com/BigDataDevs/kafka-elasticsearch-consumer 10 | 11 | # Introduction 12 | 13 | ### **Kafka Standalone Consumer [Indexer] will read messages from Kafka, in batches, process and bulk-index them into ElasticSearch.** 14 | 15 | ### _As described in the illustration above, here is how the indexer works:_ 16 | 17 | * Kafka has a topic named, say `Topic1` 18 | 19 | * Lets say, `Topic1` has 5 partitions. 20 | 21 | * In the configuration file, kafka-es-indexer.properties, set firstPartition=0 and lastPartition=4 properties 22 | 23 | * start the indexer application as described below 24 | 25 | * there will be 5 threads started, one for each consumer from each of the partitions 26 | 27 | * when a new partition is added to the kafka topic - configuration has to be updated and the indexer application has to be restarted 28 | 29 | 30 | # How to use ? 31 | 32 | ### Running as a standard Jar 33 | 34 | **1. Download the code into a `$INDEXER_HOME` dir. 35 | 36 | **2. cp `$INDEXER_HOME`/src/main/resources/kafka-es-indexer.properties.template /your/absolute/path/kafka-es-indexer.properties file - update all relevant properties as explained in the comments 37 | 38 | **3. cp `$INDEXER_HOME`/src/main/resources/logback.xml.template /your/absolute/path/logback.xml 39 | 40 | specify directory you want to store logs in: 41 | 42 | 43 | adjust values of max sizes and number of log files as needed 44 | 45 | **4. build/create the app jar (make sure you have MAven installed): 46 | 47 | cd $INDEXER_HOME 48 | mvn clean package 49 | 50 | The kafka-es-indexer-2.0.jar will be created in the $INDEXER_HOME/bin. 51 | All dependencies will be placed into $INDEXER_HOME/bin/lib. 52 | All JAR dependencies are linked via kafka-es-indexer-2.0.jar manifest. 53 | 54 | **5. edit your $INDEXER_HOME/run_indexer.sh script: 55 | -- make it executable if needed (chmod a+x $INDEXER_HOME/run_indexer.sh) 56 | -- update properties marked with "CHANGE FOR YOUR ENV" comments - according to your environment 57 | 58 | **6. run the app [use JDK1.8] : 59 | 60 | ./run_indexer.sh 61 | 62 | # Versions 63 | 64 | ### Kafka Version: 0.8.2.1 65 | 66 | ### ElasticSearch: > 1.5.1 67 | 68 | ### Scala Version for Kafka Build: 2.10.0 69 | 70 | # Configuration 71 | 72 | Indexer app configuration is specified in the kafka_es_indexer.properties file, which should be created from a provided template, kafka-es-indexer.properties.template. All properties are described in the template: 73 | 74 | [kafka-es-indexer.properties.template](https://github.com/ppine7/kafka-elasticsearch-standalone-consumer/blob/master/src/main/resources/kafka-es-indexer.properties.template) 75 | 76 | Logging properties are specified in the logback.xml file, which should be created from a provided template, logback.xml.template: 77 | 78 | [logback.xml.template](https://github.com/ppine7/kafka-elasticsearch-standalone-consumer/blob/master/src/main/resources/logback.xml.template) 79 | 80 | 81 | # Message Handler Class 82 | 83 | * `org.elasticsearch.kafka.consumer.MessageHandler` is an Abstract class that has most of the functionality of reading data from Kafka and batch-indexing into ElasticSearch already implemented. It has one abstract method, `transformMessage()`, that can be overwritten in the concrete sub-classes to customize message transformation before posting into ES 84 | 85 | * `org.elasticsearch.kafka.consumer.messageHandlers.RawMessageStringHandler` is a simple concrete sub-class of the MessageHAndler that sends messages into ES with no additional transformation, as is, in the 'UTF-8' format 86 | 87 | * Usually, its effective to Index the message in JSON format in ElasticSearch. This can be done using a Mapper Class and transforming the message from Kafka by overriding/implementing the `transformMessage()` method. An example can be found here: `org.elasticsearch.kafka.consumer.messageHandlers.AccessLogMessageHandler` 88 | 89 | * _**Do remember to set the newly created message handler class in the `messageHandlerClass` property in the kafka-es-indexer.properties file.**_ 90 | 91 | # IndexHandler Interface and basic implementation 92 | 93 | * `org.elasticsearch.kafka.consumer.IndexHandler` is an interface that defines two methods: getIndexName(params) and getIndexType(params). 94 | 95 | * `org.elasticsearch.kafka.consumer.BasicIndexHandler` is a simple imlementation of this interface that returnes indexName and indexType values as configured in the kafkaESConsumer.properties file. 96 | 97 | * one might want to create a custom implementation of IndexHandler if, for example, index name and type are not static for all incoming messages but depend on the event data - for example customerId, orderId, etc. In that case, pass all info that is required to perform that custom index determination logic as a Map of parameters into the getIndexName(params) and getIndexType(params) methods (or pass NULL if no such data is required) 98 | 99 | * _**Do remember to set the index handler class in the `indexHandlerClass` property in the kafka-es-indexer.properties file. By default, BasicIndexHandler is used**_ 100 | 101 | # License 102 | 103 | kafka-elasticsearch-standalone-consumer 104 | 105 | Licensed under the Apache License, Version 2.0 (the "License"); you may 106 | not use this file except in compliance with the License. You may obtain 107 | a copy of the License at 108 | 109 | http://www.apache.org/licenses/LICENSE-2.0 110 | 111 | Unless required by applicable law or agreed to in writing, 112 | software distributed under the License is distributed on an 113 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 114 | KIND, either express or implied. See the License for the 115 | specific language governing permissions and limitations 116 | under the License. 117 | 118 | # Contributors 119 | - [Krishna Raj](https://github.com/reachkrishnaraj) 120 | - [Marina Popova](https://github.com/ppine7) 121 | - [Dhyan ](https://github.com/dhyan-yottaa) 122 | -------------------------------------------------------------------------------- /img/IndexerV2Design.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reachkrishnaraj/kafka-elasticsearch-standalone-consumer/20c8b3ca71b74bb8c636a744b53948318d0e1da7/img/IndexerV2Design.jpg -------------------------------------------------------------------------------- /img/IndexerV2Design.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reachkrishnaraj/kafka-elasticsearch-standalone-consumer/20c8b3ca71b74bb8c636a744b53948318d0e1da7/img/IndexerV2Design.pptx -------------------------------------------------------------------------------- /img/Kafka_ES_Illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reachkrishnaraj/kafka-elasticsearch-standalone-consumer/20c8b3ca71b74bb8c636a744b53948318d0e1da7/img/Kafka_ES_Illustration.png -------------------------------------------------------------------------------- /img/Kafka_ES_Illustration_New.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/reachkrishnaraj/kafka-elasticsearch-standalone-consumer/20c8b3ca71b74bb8c636a744b53948318d0e1da7/img/Kafka_ES_Illustration_New.png -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | kafka-es-indexer 5 | 4.0.0 6 | org.elasticsearch.kafka.consumer 7 | kafka-es-indexer 8 | ${buildId}.${buildNumber} 9 | jar 10 | Kafka standalone indexer for elasticsearch 11 | 2014 12 | 13 | 14 | The Apache Software License, Version 2.0 15 | http://www.apache.org/licenses/LICENSE-2.0.txt 16 | repo 17 | 18 | 19 | 20 | 2 21 | 0 22 | 1.5.1 23 | UTF-8 24 | 25 | 26 | 27 | 28 | org.elasticsearch 29 | elasticsearch 30 | ${elasticsearch.version} 31 | compile 32 | 33 | 34 | org.apache.kafka 35 | kafka_2.10 36 | 0.8.2.1 37 | 38 | 39 | com.netflix.curator 40 | curator-framework 41 | 1.0.1 42 | 43 | 44 | org.codehaus.jackson 45 | jackson-mapper-asl 46 | 1.9.3 47 | 48 | 49 | org.scala-lang 50 | scala-library 51 | 2.10.0 52 | 53 | 54 | log4j 55 | log4j 56 | 1.2.16 57 | 58 | 59 | javax.mail 60 | mail 61 | 62 | 63 | javax.jms 64 | jms 65 | 66 | 67 | com.sun.jdmk 68 | jmxtools 69 | 70 | 71 | com.sun.jmx 72 | jmxri 73 | 74 | 75 | 76 | 77 | org.xerial.snappy 78 | snappy-java 79 | 1.0.4.1 80 | 81 | 82 | org.apache.zookeeper 83 | zookeeper 84 | 3.3.4 85 | 86 | 87 | log4j 88 | log4j 89 | 90 | 91 | jline 92 | jline 93 | 94 | 95 | 96 | 97 | ch.qos.logback 98 | logback-classic 99 | 1.1.3 100 | 101 | 102 | ch.qos.logback 103 | logback-core 104 | 1.1.3 105 | 106 | 107 | org.codehaus.janino 108 | janino 109 | 2.7.8 110 | 111 | 112 | junit 113 | junit 114 | 4.12 115 | 116 | 117 | org.mockito 118 | mockito-all 119 | 1.10.19 120 | 121 | 122 | 123 | bin 124 | ${project.build.directory}/classes 125 | 126 | 127 | org.apache.maven.plugins 128 | maven-dependency-plugin 129 | 2.8 130 | 131 | 132 | copy-dependencies 133 | package 134 | 135 | copy-dependencies 136 | 137 | 138 | ${project.build.directory}/lib/ 139 | 140 | 141 | 142 | 143 | 144 | org.apache.maven.plugins 145 | maven-compiler-plugin 146 | 2.3.2 147 | 148 | 1.8 149 | 1.8 150 | 151 | 152 | 153 | org.apache.maven.plugins 154 | maven-source-plugin 155 | 156 | 157 | attach-sources 158 | 159 | jar 160 | 161 | 162 | 163 | 164 | 165 | org.apache.maven.plugins 166 | maven-jar-plugin 167 | 2.6 168 | 169 | ${project.build.directory}/lib/ 170 | 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /run_indexer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Setup variables 4 | # GHANGE FOR YOUR ENV: absolute path of the indexer installation dir 5 | INDEXER_HOME= 6 | 7 | # GHANGE FOR YOUR ENV: JDK 8 installation dir - you can skip it if your JAVA_HOME env variable is set 8 | JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home 9 | 10 | # GHANGE FOR YOUR ENV: absolute path of the logback config file 11 | LOGBACK_CONFIG_FILE= 12 | 13 | # GHANGE FOR YOUR ENV: absolute path of the indexer properties file 14 | INDEXER_PROPERTIES_FILE= 15 | 16 | # DO NOT CHANGE ANYTHING BELOW THIS POINT (unless you know what you are doing :) )! 17 | echo "Starting Kafka ES Indexer app ..." 18 | echo "INDEXER_HOME=$INDEXER_HOME" 19 | echo "JAVA_HOME=$JAVA_HOME" 20 | echo "LOGBACK_CONFIG_FILE=$LOGBACK_CONFIG_FILE" 21 | echo "INDEXER_PROPERTIES_FILE=$INDEXER_PROPERTIES_FILE" 22 | 23 | # add all dependent jars to the classpath 24 | for file in $INDEXER_HOME/bin/lib/*.jar; 25 | do 26 | CLASS_PATH=$CLASS_PATH:$file 27 | done 28 | echo "CLASS_PATH=$CLASS_PATH" 29 | 30 | $JAVA_HOME/bin/java -Xmx1g -cp $CLASS_PATH -Dlogback.configurationFile=$LOGBACK_CONFIG_FILE org.elasticsearch.kafka.indexer.KafkaIndexerDriver $INDEXER_PROPERTIES_FILE 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/main/assemblies/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | / 11 | true 12 | true 13 | 14 | org.elasticsearch:elasticsearch 15 | 16 | 17 | 18 | / 19 | true 20 | true 21 | 22 | org.apache.kafka:kafka 23 | com.netflix.curator:curator-framework 24 | org.scala-lang:scala-library 25 | org.apache.zookeeper:zookeeper 26 | net.sf.jopt-simple:jopt-simple 27 | com.github.sgroschupf:zkclient 28 | org.xerial.snappy:snappy-java 29 | org.codehaus.jackson:jackson-mapper-asl 30 | log4j:log4j 31 | com.timgroup:java-statsd-client 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/BasicIndexHandler.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer; 2 | 3 | import java.util.HashMap; 4 | 5 | /** 6 | * Basic Index handler that returns ElasticSearch index name defined 7 | * in the configuration file as is 8 | * 9 | * @author marinapopova 10 | * 11 | */ 12 | public class BasicIndexHandler implements IndexHandler { 13 | 14 | private final ConsumerConfig config; 15 | private String indexName; 16 | private String indexType; 17 | 18 | public BasicIndexHandler(ConsumerConfig config) { 19 | this.config = config; 20 | indexName = config.esIndex; 21 | if (indexName == null || indexName.trim().length() < 1) 22 | indexName = DEFAULT_INDEX_NAME; 23 | indexType = config.esIndexType; 24 | if (indexType == null || indexType.trim().length() < 1) 25 | indexType = DEFAULT_INDEX_TYPE; 26 | 27 | } 28 | 29 | @Override 30 | public String getIndexName(HashMap indexLookupProperties) { 31 | return indexName; 32 | } 33 | 34 | @Override 35 | public String getIndexType(HashMap indexLookupProperties) { 36 | return indexType; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/ConsumerConfig.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer; 2 | 3 | import java.io.FileInputStream; 4 | import java.util.Properties; 5 | 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | public class ConsumerConfig { 10 | 11 | private static final Logger logger = LoggerFactory.getLogger(ConsumerConfig.class); 12 | private Properties prop = new Properties(); 13 | private final int kafkaFetchSizeBytesDefault = 10 * 1024 * 1024; 14 | 15 | // Kafka ZooKeeper's IP Address/HostName : port list 16 | public final String kafkaZookeeperList; 17 | // Zookeeper session timeout in MS 18 | public final int zkSessionTimeoutMs; 19 | // Zookeeper connection timeout in MS 20 | public final int zkConnectionTimeoutMs; 21 | // Zookeeper number of retries when creating a curator client 22 | public final int zkCuratorRetryTimes; 23 | // Zookeeper: time in ms between re-tries when creating a Curator 24 | public final int zkCuratorRetryDelayMs; 25 | // Full class path and name for the concrete message handler class 26 | public final String messageHandlerClass; 27 | // Full class name of a custom IndexHandler implementation class 28 | public final String indexHandlerClass; 29 | // Kafka Broker's IP Address/HostName : port list 30 | public final String kafkaBrokersList; 31 | // Kafka Topic from which the message has to be processed 32 | public final String topic; 33 | // the below two parameters define the range of partitions to be processed by this app 34 | // first partition in the Kafka Topic from which the messages have to be processed 35 | public final short firstPartition; 36 | // last partition in the Kafka Topic from which the messages have to be processed 37 | public final short lastPartition; 38 | // Option from where the message fetching should happen in Kafka 39 | // Values can be: CUSTOM/EARLIEST/LATEST/RESTART. 40 | // If 'CUSTOM' is set, then 'startOffset' has to be set as an int value 41 | public String startOffsetFrom; 42 | // int value of the offset from where the message processing should happen 43 | public final int startOffset; 44 | // Name of the Kafka Consumer Group 45 | public final String consumerGroupName; 46 | // SimpleConsumer socket bufferSize 47 | public final int kafkaSimpleConsumerBufferSizeBytes; 48 | // SimpleConsumer socket timeout in MS 49 | public final int kafkaSimpleConsumerSocketTimeoutMs; 50 | // FetchRequest's minBytes value 51 | public final int kafkaFetchSizeMinBytes; 52 | // Preferred Message Encoding to process the message before posting it to ElasticSearch 53 | public final String messageEncoding; 54 | // Name of the ElasticSearch Cluster 55 | public final String esClusterName; 56 | // Name of the ElasticSearch Host Port List 57 | public final String esHostPortList; 58 | // IndexName in ElasticSearch to which the processed Message has to be posted 59 | public final String esIndex; 60 | // IndexType in ElasticSearch to which the processed Message has to be posted 61 | public final String esIndexType; 62 | // flag to enable/disable performance metrics reporting 63 | public boolean isPerfReportingEnabled; 64 | // number of times to try to re-init Kafka connections/consumer if read/write to Kafka fails 65 | public final int numberOfReinitAttempts; 66 | // sleep time in ms between Kafka re-init atempts 67 | public final int kafkaReinitSleepTimeMs; 68 | // sleep time in ms between attempts to index data into ES again 69 | public final int esIndexingRetrySleepTimeMs; 70 | // number of times to try to index data into ES if ES cluster is not reachable 71 | public final int numberOfEsIndexingRetryAttempts; 72 | 73 | // Log property file for the consumer instance 74 | public final String logPropertyFile; 75 | 76 | // determines whether the consumer will post to ElasticSearch or not: 77 | // If set to true, the consumer will read events from Kafka and transform them, 78 | // but will not post to ElasticSearch 79 | public final String isDryRun; 80 | 81 | // Wait time in seconds between consumer job rounds 82 | public final int consumerSleepBetweenFetchsMs; 83 | 84 | //wait time before force-stopping Consumer Job 85 | public final int timeLimitToStopConsumerJob = 10; 86 | //timeout in seconds before force-stopping Indexer app and all indexer jobs 87 | public final int appStopTimeoutSeconds; 88 | 89 | public String getStartOffsetFrom() { 90 | return startOffsetFrom; 91 | } 92 | 93 | public void setStartOffsetFrom(String startOffsetFrom) { 94 | this.startOffsetFrom = startOffsetFrom; 95 | } 96 | 97 | public ConsumerConfig(String configFile) throws Exception { 98 | try { 99 | logger.info("configFile : " + configFile); 100 | prop.load(new FileInputStream(configFile)); 101 | 102 | logger.info("Properties : " + prop); 103 | } catch (Exception e) { 104 | logger.error("Error reading/loading configFile: " + e.getMessage(), e); 105 | throw e; 106 | } 107 | 108 | kafkaZookeeperList = (String) prop.getProperty("kafkaZookeeperList", "localhost:2181"); 109 | zkSessionTimeoutMs = Integer.parseInt(prop.getProperty("zkSessionTimeoutMs", "10000")); 110 | zkConnectionTimeoutMs = Integer.parseInt(prop.getProperty("zkConnectionTimeoutMs", "15000")); 111 | zkCuratorRetryTimes = Integer.parseInt(prop.getProperty("zkCuratorRetryTimes", "3")); 112 | zkCuratorRetryDelayMs = Integer.parseInt(prop.getProperty("zkCuratorRetryDelayMs", "2000")); 113 | 114 | messageHandlerClass = prop.getProperty("messageHandlerClass", 115 | "org.elasticsearch.kafka.consumer.messageHandlers.RawMessageStringHandler"); 116 | indexHandlerClass = prop.getProperty("indexHandlerClass", 117 | "org.elasticsearch.kafka.consumer.BasicIndexHandler"); 118 | kafkaBrokersList = prop.getProperty("kafkaBrokersList", "localhost:9092"); 119 | topic = prop.getProperty("topic", ""); 120 | firstPartition = Short.parseShort(prop.getProperty("firstPartition", "0")); 121 | lastPartition = Short.parseShort(prop.getProperty("lastPartition", "3")); 122 | startOffsetFrom = prop.getProperty("startOffsetFrom", "0"); 123 | startOffset = Integer.parseInt(prop.getProperty("startOffset", "LATEST")); 124 | consumerGroupName = prop.getProperty("consumerGroupName", "ESKafkaConsumerClient"); 125 | kafkaFetchSizeMinBytes = Integer.parseInt(prop.getProperty( 126 | "kafkaFetchSizeMinBytes", 127 | String.valueOf(kafkaFetchSizeBytesDefault))); 128 | kafkaSimpleConsumerBufferSizeBytes = Integer.parseInt(prop.getProperty( 129 | "kafkaSimpleConsumerBufferSizeBytes", 130 | String.valueOf(kafkaFetchSizeBytesDefault))); 131 | kafkaSimpleConsumerSocketTimeoutMs = Integer.parseInt(prop.getProperty( 132 | "kafkaSimpleConsumerSocketTimeoutMs", "10000")); 133 | messageEncoding = prop.getProperty("messageEncoding", "UTF-8"); 134 | 135 | esClusterName = prop.getProperty("esClusterName", ""); 136 | esHostPortList = prop.getProperty("esHostPortList", "localhost:9300"); 137 | esIndex = prop.getProperty("esIndex", "kafkaConsumerIndex"); 138 | esIndexType = prop.getProperty("esIndexType", "kafka"); 139 | isPerfReportingEnabled=Boolean.getBoolean(prop.getProperty( 140 | "isPerfReportingEnabled", "false")); 141 | logPropertyFile = prop.getProperty("logPropertyFile", 142 | "log4j.properties"); 143 | isDryRun = prop.getProperty("isDryRun", "false"); 144 | consumerSleepBetweenFetchsMs = Integer.parseInt(prop.getProperty( 145 | "consumerSleepBetweenFetchsMs", "25")); 146 | appStopTimeoutSeconds = Integer.parseInt(prop.getProperty( 147 | "appStopTimeoutSeconds", "10")); 148 | numberOfReinitAttempts = Integer.parseInt(prop.getProperty( 149 | "numberOfReinitAttempts", "2")); 150 | kafkaReinitSleepTimeMs = Integer.parseInt(prop.getProperty( 151 | "kafkaReinitSleepTimeMs", "1000")); 152 | esIndexingRetrySleepTimeMs = Integer.parseInt(prop.getProperty( 153 | "esIndexingRetrySleepTimeMs", "1000")); 154 | numberOfEsIndexingRetryAttempts = Integer.parseInt(prop.getProperty( 155 | "numberOfEsIndexingRetryAttempts", "2")); 156 | logger.info("Config reading complete !"); 157 | } 158 | 159 | public boolean isPerfReportingEnabled() { 160 | return isPerfReportingEnabled; 161 | } 162 | 163 | public Properties getProperties() { 164 | return prop; 165 | } 166 | 167 | public int getNumberOfReinitAttempts() { 168 | return numberOfReinitAttempts; 169 | } 170 | 171 | public int getKafkaReinitSleepTimeMs() { 172 | return kafkaReinitSleepTimeMs; 173 | } 174 | 175 | public String getEsIndexType() { 176 | return esIndexType; 177 | } 178 | 179 | public int getEsIndexingRetrySleepTimeMs() { 180 | return esIndexingRetrySleepTimeMs; 181 | } 182 | 183 | public int getNumberOfEsIndexingRetryAttempts() { 184 | return numberOfEsIndexingRetryAttempts; 185 | } 186 | 187 | } 188 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/FailedEventsLogger.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | public class FailedEventsLogger { 7 | 8 | private static final Logger logger = LoggerFactory.getLogger(FailedEventsLogger.class); 9 | 10 | public static void logFailedEvent(String errorMsg, String event){ 11 | logger.error("General Error Processing Event: ERROR: {}, EVENT: {}", errorMsg, event); 12 | } 13 | 14 | public static void logFailedToPostToESEvent(String restResponse, String errorMsg){ 15 | logger.error("Error posting event to ES: REST response: {}, ERROR: {}", restResponse, errorMsg); 16 | } 17 | 18 | public static void logFailedToTransformEvent(long offset, String errorMsg, String event){ 19 | logger.error("Error transforming event: OFFSET: {}, ERROR: {}, EVENT: {}", 20 | offset, errorMsg, event); 21 | } 22 | public static void logFailedEvent(long startOffset,long endOffset, int partition ,String errorMsg, String event){ 23 | logger.error("Error transforming event: OFFSET: {} --> {} PARTITION: {},EVENT: {},ERROR: {} ", 24 | startOffset,endOffset, partition,event,errorMsg); 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/IndexHandler.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer; 2 | 3 | import java.util.HashMap; 4 | 5 | /** 6 | * Basic interface for ElasticSearch Index name lookup 7 | * 8 | * getIndexName() and getIndexType() methods should be implemented as needed - to use custom logic 9 | * to determine which index to send the data to, based on business-specific 10 | * criteria 11 | * 12 | * @author marinapopova 13 | * 14 | */ 15 | public interface IndexHandler { 16 | 17 | // default index name, if not specified/calculated otherwise 18 | public static final String DEFAULT_INDEX_NAME = "test_index"; 19 | // default index type, if not specified/calculated otherwise 20 | public static final String DEFAULT_INDEX_TYPE = "test_index_type"; 21 | 22 | public String getIndexName (HashMap indexLookupProperties); 23 | public String getIndexType (HashMap indexLookupProperties); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/IndexerESException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * @author marinapopova 3 | * Sep 25, 2015 4 | */ 5 | package org.elasticsearch.kafka.indexer; 6 | 7 | public class IndexerESException extends Exception { 8 | 9 | /** 10 | * 11 | */ 12 | public IndexerESException() { 13 | // TODO Auto-generated constructor stub 14 | } 15 | 16 | /** 17 | * @param message 18 | */ 19 | public IndexerESException(String message) { 20 | super(message); 21 | // TODO Auto-generated constructor stub 22 | } 23 | 24 | /** 25 | * @param cause 26 | */ 27 | public IndexerESException(Throwable cause) { 28 | super(cause); 29 | // TODO Auto-generated constructor stub 30 | } 31 | 32 | /** 33 | * @param message 34 | * @param cause 35 | */ 36 | public IndexerESException(String message, Throwable cause) { 37 | super(message, cause); 38 | // TODO Auto-generated constructor stub 39 | } 40 | 41 | /** 42 | * @param message 43 | * @param cause 44 | * @param enableSuppression 45 | * @param writableStackTrace 46 | */ 47 | public IndexerESException(String message, Throwable cause, 48 | boolean enableSuppression, boolean writableStackTrace) { 49 | super(message, cause, enableSuppression, writableStackTrace); 50 | // TODO Auto-generated constructor stub 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/KafkaClient.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer; 2 | 3 | import com.netflix.curator.framework.CuratorFramework; 4 | import com.netflix.curator.framework.CuratorFrameworkFactory; 5 | import com.netflix.curator.retry.RetryNTimes; 6 | import kafka.api.FetchRequest; 7 | import kafka.api.FetchRequestBuilder; 8 | import kafka.api.OffsetRequest; 9 | import kafka.api.PartitionOffsetRequestInfo; 10 | import kafka.common.OffsetAndMetadata; 11 | import kafka.common.TopicAndPartition; 12 | import kafka.javaapi.*; 13 | import kafka.javaapi.consumer.SimpleConsumer; 14 | import kafka.javaapi.message.ByteBufferMessageSet; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | import java.util.ArrayList; 19 | import java.util.HashMap; 20 | import java.util.List; 21 | import java.util.Map; 22 | 23 | public class KafkaClient { 24 | 25 | 26 | private static final Logger logger = LoggerFactory.getLogger(KafkaClient.class); 27 | private CuratorFramework curator; 28 | private SimpleConsumer simpleConsumer; 29 | private String kafkaClientId; 30 | private String topic; 31 | private final int partition; 32 | private String leaderBrokerHost; 33 | private int leaderBrokerPort; 34 | private String leaderBrokerURL; 35 | private final ConsumerConfig consumerConfig; 36 | private String[] kafkaBrokersArray; 37 | 38 | 39 | public KafkaClient(final ConsumerConfig config, String kafkaClientId, int partition) throws Exception { 40 | logger.info("Instantiating KafkaClient"); 41 | this.consumerConfig = config; 42 | 43 | this.topic = config.topic; 44 | this.kafkaClientId = kafkaClientId; 45 | this.partition = partition; 46 | kafkaBrokersArray = config.kafkaBrokersList.trim().split(","); 47 | logger.info("### KafkaClient Config: ###"); 48 | logger.info("kafkaZookeeperList: {}", config.kafkaZookeeperList); 49 | logger.info("kafkaBrokersList: {}", config.kafkaBrokersList); 50 | logger.info("kafkaClientId: {}", kafkaClientId); 51 | logger.info("topic: {}", topic); 52 | logger.info("partition: {}", partition); 53 | connectToZooKeeper(); 54 | findLeader(); 55 | initConsumer(); 56 | 57 | } 58 | 59 | public void connectToZooKeeper() throws Exception { 60 | try { 61 | curator = CuratorFrameworkFactory.newClient(consumerConfig.kafkaZookeeperList, 62 | consumerConfig.zkSessionTimeoutMs, consumerConfig.zkConnectionTimeoutMs, 63 | new RetryNTimes(consumerConfig.zkCuratorRetryTimes, consumerConfig.zkCuratorRetryDelayMs)); 64 | curator.start(); 65 | logger.info("Connected to Kafka Zookeeper successfully"); 66 | } catch (Exception e) { 67 | logger.error("Failed to connect to Zookeer: " + e.getMessage()); 68 | throw e; 69 | } 70 | } 71 | 72 | public void initConsumer() throws Exception{ 73 | try{ 74 | this.simpleConsumer = new SimpleConsumer( 75 | leaderBrokerHost, leaderBrokerPort, 76 | consumerConfig.kafkaSimpleConsumerSocketTimeoutMs, 77 | consumerConfig.kafkaSimpleConsumerBufferSizeBytes, 78 | kafkaClientId); 79 | logger.info("Initialized Kafka Consumer successfully for partition {}",partition); 80 | } 81 | catch(Exception e){ 82 | logger.error("Failed to initialize Kafka Consumer: " + e.getMessage()); 83 | throw e; 84 | } 85 | } 86 | 87 | public short saveOffsetInKafka(long offset, short errorCode) throws Exception{ 88 | logger.debug("Starting to save the Offset value to Kafka: offset={}, errorCode={} for partition {}", 89 | offset, errorCode,partition); 90 | short versionID = 0; 91 | int correlationId = 0; 92 | try{ 93 | TopicAndPartition tp = new TopicAndPartition(topic, partition); 94 | OffsetAndMetadata offsetMetaAndErr = new OffsetAndMetadata( 95 | offset, OffsetAndMetadata.NoMetadata(), errorCode); 96 | Map mapForCommitOffset = new HashMap<>(); 97 | mapForCommitOffset.put(tp, offsetMetaAndErr); 98 | kafka.javaapi.OffsetCommitRequest offsetCommitReq = new kafka.javaapi.OffsetCommitRequest( 99 | kafkaClientId, mapForCommitOffset, correlationId, kafkaClientId, versionID); 100 | OffsetCommitResponse offsetCommitResp = simpleConsumer.commitOffsets(offsetCommitReq); 101 | logger.debug("Completed OffsetSet commit for partition {}. OffsetCommitResponse ErrorCode = {} Returning to caller ", partition,offsetCommitResp.errors().get(tp)); 102 | return (Short) offsetCommitResp.errors().get(tp); 103 | } 104 | catch(Exception e){ 105 | logger.error("Error when commiting Offset to Kafka: " + e.getMessage(), e); 106 | throw e; 107 | } 108 | } 109 | 110 | public PartitionMetadata findLeader() throws Exception { 111 | logger.info("Looking for Kafka leader broker for partition {}...", partition); 112 | PartitionMetadata leaderPartitionMetaData = null; 113 | // try to find leader META info, trying each broker until the leader is found - 114 | // in case some of the leaders are down 115 | for (int i=0; i topics = new ArrayList(); 160 | topics.add(this.topic); 161 | TopicMetadataRequest req = new TopicMetadataRequest(topics); 162 | kafka.javaapi.TopicMetadataResponse resp = leadFindConsumer.send(req); 163 | 164 | List metaData = resp.topicsMetadata(); 165 | for (TopicMetadata item : metaData) { 166 | for (PartitionMetadata part : item.partitionsMetadata()) { 167 | if (part.partitionId() == partition) { 168 | leaderPartitionMetadata = part; 169 | logger.info("Found leader for partition {} using Kafka Broker={}:{}, topic={}; leader broker URL: {}:{}", 170 | partition, kafkaBrokerHost, kafkaBrokerPortStr, topic, 171 | leaderPartitionMetadata.leader().host(), leaderPartitionMetadata.leader().port()); 172 | break; 173 | } 174 | } 175 | // we found the leader - get out of this loop as well 176 | if (leaderPartitionMetadata != null) 177 | break; 178 | } 179 | } catch (Exception e) { 180 | logger.warn("Failed to find leader for partition {} using Kafka Broker={} , topic={}; Error: {}", 181 | partition, kafkaBrokerHost, topic, e.getMessage()); 182 | } finally { 183 | if (leadFindConsumer != null){ 184 | leadFindConsumer.close(); 185 | logger.debug("Closed the leadFindConsumer connection"); 186 | } 187 | } 188 | return leaderPartitionMetadata; 189 | } 190 | 191 | public long getLastestOffset() throws Exception { 192 | logger.debug("Getting LastestOffset for topic={}, partition={}, kafkaGroupId={}", 193 | topic, partition, kafkaClientId); 194 | long latestOffset = getOffset(topic, partition, OffsetRequest.LatestTime(), kafkaClientId); 195 | logger.debug("LatestOffset={} for partition {}", latestOffset ,partition); 196 | return latestOffset; 197 | } 198 | 199 | public long getEarliestOffset() throws Exception { 200 | logger.debug("Getting EarliestOffset for topic={}, partition={}, kafkaGroupId={}", 201 | topic, partition, kafkaClientId); 202 | long earliestOffset = this.getOffset(topic, partition, OffsetRequest.EarliestTime(), kafkaClientId); 203 | logger.debug("earliestOffset={} for partition {}", earliestOffset,partition); 204 | return earliestOffset; 205 | } 206 | 207 | private long getOffset(String topic, int partition, long whichTime, String clientName) throws Exception { 208 | try{ 209 | TopicAndPartition topicAndPartition = new TopicAndPartition(topic, 210 | partition); 211 | Map requestInfo = new HashMap(); 212 | requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo( 213 | whichTime, 1)); 214 | kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest( 215 | requestInfo, kafka.api.OffsetRequest.CurrentVersion(), 216 | clientName); 217 | OffsetResponse response = simpleConsumer.getOffsetsBefore(request); 218 | 219 | if (response.hasError()) { 220 | logger.error("Error fetching offsets from Kafka. Reason: {} for partition {}" , response.errorCode(topic, partition),partition); 221 | throw new Exception("Error fetching offsets from Kafka. Reason: " + response.errorCode(topic, partition) +"for partition "+partition); 222 | } 223 | long[] offsets = response.offsets(topic, partition); 224 | return offsets[0]; 225 | } 226 | catch(Exception e){ 227 | logger.error("Exception when trying to get the Offset. Throwing the exception for partition {}" ,partition,e); 228 | throw e; 229 | } 230 | } 231 | 232 | public long fetchCurrentOffsetFromKafka() throws Exception{ 233 | short versionID = 0; 234 | int correlationId = 0; 235 | try{ 236 | List topicPartitionList = new ArrayList(); 237 | TopicAndPartition myTopicAndPartition = new TopicAndPartition(topic, partition); 238 | topicPartitionList.add(myTopicAndPartition); 239 | OffsetFetchRequest offsetFetchReq = new OffsetFetchRequest( 240 | kafkaClientId, topicPartitionList, versionID, correlationId, kafkaClientId); 241 | OffsetFetchResponse offsetFetchResponse = simpleConsumer.fetchOffsets(offsetFetchReq); 242 | long currentOffset = offsetFetchResponse.offsets().get(myTopicAndPartition).offset(); 243 | //logger.info("Fetched Kafka's currentOffset = " + currentOffset); 244 | return currentOffset; 245 | } 246 | catch(Exception e){ 247 | logger.error("Error when fetching current offset from kafka: for partition {}" ,partition,e); 248 | throw e; 249 | } 250 | } 251 | 252 | 253 | public FetchResponse getMessagesFromKafka(long offset) throws Exception { 254 | logger.debug("Starting getMessagesFromKafka() ..."); 255 | try{ 256 | FetchRequest req = new FetchRequestBuilder() 257 | .clientId(kafkaClientId) 258 | .addFetch(topic, partition, offset, consumerConfig.kafkaFetchSizeMinBytes) 259 | .build(); 260 | FetchResponse fetchResponse = simpleConsumer.fetch(req); 261 | return fetchResponse; 262 | } 263 | catch(Exception e){ 264 | logger.error("Exception fetching messages from Kafka for partition {}" ,partition,e); 265 | throw e; 266 | } 267 | } 268 | 269 | public void close() { 270 | curator.close(); 271 | logger.info("Curator/Zookeeper connection closed"); 272 | } 273 | 274 | } 275 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/KafkaIndexerDriver.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer; 2 | 3 | import org.elasticsearch.kafka.indexer.jmx.KafkaEsIndexerStatus; 4 | import org.elasticsearch.kafka.indexer.jmx.KafkaEsIndexerStatusMXBean; 5 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobManager; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import javax.management.MBeanServer; 10 | import javax.management.ObjectName; 11 | import java.lang.management.ManagementFactory; 12 | 13 | public class KafkaIndexerDriver { 14 | 15 | boolean stopped = false; 16 | public IndexerJobManager indexerJobManager = null; 17 | private boolean isConsumeJobInProgress = false; 18 | private ConsumerConfig kafkaConsumerConfig; 19 | private static final Logger logger = LoggerFactory.getLogger(KafkaIndexerDriver.class); 20 | private static final String KAFKA_CONSUMER_SHUTDOWN_THREAD = "kafka-indexer-shutdown-thread"; 21 | 22 | public KafkaIndexerDriver(){ 23 | } 24 | 25 | public void init(String[] args) throws Exception { 26 | logger.info("Initializing Kafka ES Indexer, arguments passed to the Driver: "); 27 | for(String arg : args){ 28 | logger.info(arg); 29 | } 30 | kafkaConsumerConfig = new ConsumerConfig(args[0]); 31 | logger.info("Created kafka consumer config OK"); 32 | indexerJobManager = new IndexerJobManager(kafkaConsumerConfig); 33 | 34 | logger.info("Registering KafkfaEsIndexerStatus MBean: "); 35 | MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); 36 | ObjectName name = new ObjectName("org.elasticsearch.kafka.indexer:type=KafkfaEsIndexerStatus"); 37 | KafkaEsIndexerStatusMXBean hc = new KafkaEsIndexerStatus(indexerJobManager); 38 | mbs.registerMBean(hc, name); 39 | 40 | 41 | } 42 | 43 | 44 | public void start() throws Exception { 45 | indexerJobManager.startAll(); 46 | } 47 | 48 | public void stop() throws Exception { 49 | logger.info("Received the stop signal, trying to stop all indexer jobs..."); 50 | stopped = true; 51 | 52 | indexerJobManager.stop(); 53 | // TODO check if we still need the forced/timed-out shutdown 54 | /* 55 | LocalDateTime stopTime= LocalDateTime.now(); 56 | while(isConsumeJobInProgress){ 57 | logger.info(".... Waiting for inprogress Consumer Job to complete ..."); 58 | Thread.sleep(1000); 59 | LocalDateTime dateTime2= LocalDateTime.now(); 60 | if (java.time.Duration.between(stopTime, dateTime2).getSeconds() > kafkaConsumerConfig.timeLimitToStopConsumerJob){ 61 | logger.info(".... Consumer Job not responding for " + kafkaConsumerConfig.timeLimitToStopConsumerJob +" seconds - stopping the job"); 62 | break; 63 | } 64 | 65 | } 66 | logger.info("Completed waiting for inprogess Consumer Job to finish - stopping the job"); 67 | try{ 68 | kafkaConsumerJob.stopKafkaClient(); 69 | } 70 | catch(Exception e){ 71 | logger.error("********** Exception when trying to stop the Consumer Job: " + 72 | e.getMessage(), e); 73 | e.printStackTrace(); 74 | } 75 | /* */ 76 | 77 | logger.info("Stopped all indexer jobs OK"); 78 | } 79 | 80 | public static void main(String[] args) { 81 | KafkaIndexerDriver driver = new KafkaIndexerDriver(); 82 | 83 | Runtime.getRuntime().addShutdownHook(new Thread(KAFKA_CONSUMER_SHUTDOWN_THREAD) { 84 | public void run() { 85 | logger.info("Running Shutdown Hook .... "); 86 | try { 87 | driver.stop(); 88 | } catch (Exception e) { 89 | logger.error("Error stopping the Consumer from the ShutdownHook: " + e.getMessage()); 90 | } 91 | } 92 | }); 93 | 94 | try { 95 | driver.init(args); 96 | driver.start(); 97 | } catch (Exception e) { 98 | logger.error("Exception from main() - exiting: " + e.getMessage()); 99 | } 100 | 101 | 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/MessageHandler.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer; 2 | 3 | import java.lang.reflect.InvocationTargetException; 4 | import java.nio.ByteBuffer; 5 | import java.util.Iterator; 6 | 7 | import kafka.message.Message; 8 | import kafka.message.MessageAndOffset; 9 | 10 | import org.elasticsearch.ElasticsearchException; 11 | import org.elasticsearch.action.bulk.BulkItemResponse; 12 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 13 | import org.elasticsearch.action.bulk.BulkResponse; 14 | import org.elasticsearch.client.Client; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | 19 | public abstract class MessageHandler { 20 | 21 | private static final Logger logger = LoggerFactory.getLogger(MessageHandler.class); 22 | private Client esClient; 23 | private ConsumerConfig config; 24 | private BulkRequestBuilder bulkRequestBuilder; 25 | private IndexHandler indexHandler; 26 | 27 | public MessageHandler(Client client,ConsumerConfig config) throws Exception{ 28 | this.esClient = client; 29 | this.config = config; 30 | this.bulkRequestBuilder = null; 31 | // instantiate specified in the config IndexHandler class 32 | try { 33 | indexHandler = (IndexHandler) Class 34 | .forName(config.indexHandlerClass) 35 | .getConstructor(ConsumerConfig.class) 36 | .newInstance(config); 37 | logger.info("Created IndexHandler: ", config.indexHandlerClass); 38 | } catch (InstantiationException | IllegalAccessException 39 | | IllegalArgumentException | InvocationTargetException 40 | | NoSuchMethodException | SecurityException 41 | | ClassNotFoundException e) { 42 | logger.error("Exception creating IndexHandler: " + e.getMessage(), e); 43 | throw e; 44 | } 45 | logger.info("Created Message Handler"); 46 | } 47 | 48 | public Client getEsClient() { 49 | return esClient; 50 | } 51 | 52 | public ConsumerConfig getConfig() { 53 | return config; 54 | } 55 | 56 | public BulkRequestBuilder getBuildReqBuilder() { 57 | return bulkRequestBuilder; 58 | } 59 | 60 | public void setBuildReqBuilder(BulkRequestBuilder bulkReqBuilder) { 61 | this.bulkRequestBuilder = bulkReqBuilder; 62 | } 63 | 64 | 65 | public boolean postToElasticSearch() throws Exception { 66 | BulkResponse bulkResponse = null; 67 | BulkItemResponse bulkItemResp = null; 68 | //Nothing/NoMessages to post to ElasticSearch 69 | if(bulkRequestBuilder.numberOfActions() <= 0){ 70 | logger.warn("No messages to post to ElasticSearch - returning"); 71 | return true; 72 | } 73 | try{ 74 | bulkResponse = bulkRequestBuilder.execute().actionGet(); 75 | } 76 | catch(ElasticsearchException e){ 77 | logger.error("Failed to post messages to ElasticSearch: " + e.getMessage(), e); 78 | throw e; 79 | } 80 | logger.debug("Time to post messages to ElasticSearch: {} ms", bulkResponse.getTookInMillis()); 81 | if(bulkResponse.hasFailures()){ 82 | logger.error("Bulk Message Post to ElasticSearch has errors: {}", 83 | bulkResponse.buildFailureMessage()); 84 | int failedCount = 0; 85 | Iterator bulkRespItr = bulkResponse.iterator(); 86 | //TODO research if there is a way to get all failed messages without iterating over 87 | // ALL messages in this bulk post request 88 | while (bulkRespItr.hasNext()){ 89 | bulkItemResp = bulkRespItr.next(); 90 | if (bulkItemResp.isFailed()) { 91 | failedCount++; 92 | String errorMessage = bulkItemResp.getFailure().getMessage(); 93 | String restResponse = bulkItemResp.getFailure().getStatus().name(); 94 | logger.error("Failed Message #{}, REST response:{}; errorMessage:{}", 95 | failedCount, restResponse, errorMessage); 96 | // TODO: there does not seem to be a way to get the actual failed event 97 | // until it is possible - do not log anything into the failed events log file 98 | //FailedEventsLogger.logFailedToPostToESEvent(restResponse, errorMessage); 99 | } 100 | } 101 | logger.info("# of failed to post messages to ElasticSearch: {} ", failedCount); 102 | return false; 103 | } 104 | logger.info("Bulk Post to ElasticSearch finished OK"); 105 | bulkRequestBuilder = null; 106 | return true; 107 | } 108 | 109 | public abstract byte[] transformMessage(byte[] inputMessage, Long offset) throws Exception; 110 | 111 | public long prepareForPostToElasticSearch(Iterator messageAndOffsetIterator){ 112 | bulkRequestBuilder = esClient.prepareBulk(); 113 | int numProcessedMessages = 0; 114 | int numMessagesInBatch = 0; 115 | long offsetOfNextBatch = 0; 116 | while(messageAndOffsetIterator.hasNext()) { 117 | numMessagesInBatch++; 118 | MessageAndOffset messageAndOffset = messageAndOffsetIterator.next(); 119 | offsetOfNextBatch = messageAndOffset.nextOffset(); 120 | Message message = messageAndOffset.message(); 121 | ByteBuffer payload = message.payload(); 122 | byte[] bytesMessage = new byte[payload.limit()]; 123 | payload.get(bytesMessage); 124 | byte[] transformedMessage; 125 | try { 126 | transformedMessage = this.transformMessage(bytesMessage, messageAndOffset.offset()); 127 | } catch (Exception e) { 128 | String msgStr = new String(bytesMessage); 129 | logger.error("ERROR transforming message at offset={} - skipping it: {}", 130 | messageAndOffset.offset(), msgStr, e); 131 | FailedEventsLogger.logFailedToTransformEvent( 132 | messageAndOffset.offset(), e.getMessage(), msgStr); 133 | continue; 134 | } 135 | this.getBuildReqBuilder().add( 136 | esClient.prepareIndex( 137 | indexHandler.getIndexName(null), indexHandler.getIndexType(null)) 138 | .setSource(transformedMessage) 139 | ); 140 | numProcessedMessages++; 141 | } 142 | logger.info("Total # of messages in this batch: {}; " + 143 | "# of successfully transformed and added to Index messages: {}; offsetOfNextBatch: {}", 144 | numMessagesInBatch, numProcessedMessages, offsetOfNextBatch); 145 | return offsetOfNextBatch; 146 | } 147 | 148 | public IndexHandler getIndexHandler() { 149 | return indexHandler; 150 | } 151 | 152 | 153 | } 154 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/jmx/IndexerJobStatusMBean.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.jmx; 2 | 3 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobStatusEnum; 4 | 5 | public interface IndexerJobStatusMBean { 6 | long getLastCommittedOffset(); 7 | IndexerJobStatusEnum getJobStatus(); 8 | int getPartition(); 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/jmx/KafkaEsIndexerStatus.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.jmx; 2 | 3 | import java.util.List; 4 | 5 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobManager; 6 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobStatus; 7 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobStatusEnum; 8 | 9 | public class KafkaEsIndexerStatus implements KafkaEsIndexerStatusMXBean { 10 | 11 | protected IndexerJobManager indexerJobManager; 12 | private int failedJobs; 13 | private int cancelledJobs; 14 | private int stoppedJobs; 15 | private int hangingJobs; 16 | 17 | public KafkaEsIndexerStatus(IndexerJobManager indexerJobManager) { 18 | this.indexerJobManager = indexerJobManager; 19 | } 20 | 21 | public boolean isAlive() { 22 | return true; 23 | } 24 | 25 | public List getStatuses() { 26 | return indexerJobManager.getJobStatuses(); 27 | } 28 | 29 | public int getCountOfFailedJobs() { 30 | failedJobs = 0; 31 | for (IndexerJobStatus jobStatus : indexerJobManager.getJobStatuses()) { 32 | if (jobStatus.getJobStatus().equals(IndexerJobStatusEnum.Failed)){ 33 | failedJobs++; 34 | } 35 | } 36 | return failedJobs; 37 | } 38 | 39 | public int getCountOfStoppedJobs() { 40 | stoppedJobs = 0; 41 | for (IndexerJobStatus jobStatus : indexerJobManager.getJobStatuses()) { 42 | if (jobStatus.getJobStatus().equals(IndexerJobStatusEnum.Stopped)){ 43 | stoppedJobs++; 44 | } 45 | } 46 | return stoppedJobs; 47 | } 48 | 49 | public int getCountOfHangingJobs() { 50 | hangingJobs = 0; 51 | for (IndexerJobStatus jobStatus : indexerJobManager.getJobStatuses()) { 52 | if (jobStatus.getJobStatus().equals(IndexerJobStatusEnum.Hanging)){ 53 | hangingJobs++; 54 | } 55 | } 56 | return hangingJobs; 57 | } 58 | 59 | public int getCountOfCancelledJobs() { 60 | cancelledJobs = 0; 61 | for (IndexerJobStatus jobStatus : indexerJobManager.getJobStatuses()) { 62 | if (jobStatus.getJobStatus().equals(IndexerJobStatusEnum.Cancelled)){ 63 | cancelledJobs++; 64 | } 65 | } 66 | return cancelledJobs; 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/jmx/KafkaEsIndexerStatusMXBean.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.jmx; 2 | 3 | import java.util.List; 4 | 5 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobStatus; 6 | 7 | public interface KafkaEsIndexerStatusMXBean { 8 | boolean isAlive(); 9 | List getStatuses(); 10 | int getCountOfFailedJobs(); 11 | int getCountOfCancelledJobs(); 12 | int getCountOfStoppedJobs(); 13 | int getCountOfHangingJobs(); 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/jobs/IndexerJob.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.jobs; 2 | 3 | import java.util.concurrent.Callable; 4 | 5 | import org.elasticsearch.ElasticsearchException; 6 | import org.elasticsearch.client.transport.NoNodeAvailableException; 7 | import org.elasticsearch.client.transport.TransportClient; 8 | import org.elasticsearch.common.settings.ImmutableSettings; 9 | import org.elasticsearch.common.settings.Settings; 10 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 11 | import org.elasticsearch.kafka.indexer.ConsumerConfig; 12 | import org.elasticsearch.kafka.indexer.FailedEventsLogger; 13 | import org.elasticsearch.kafka.indexer.IndexerESException; 14 | import org.elasticsearch.kafka.indexer.KafkaClient; 15 | import org.elasticsearch.kafka.indexer.MessageHandler; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | import kafka.common.ErrorMapping; 20 | import kafka.javaapi.FetchResponse; 21 | import kafka.javaapi.message.ByteBufferMessageSet; 22 | 23 | public class IndexerJob implements Callable { 24 | 25 | private static final Logger logger = LoggerFactory.getLogger(IndexerJob.class); 26 | private ConsumerConfig consumerConfig; 27 | private MessageHandler msgHandler; 28 | private TransportClient esClient; 29 | public KafkaClient kafkaConsumerClient; 30 | private long offsetForThisRound; 31 | private long nextOffsetToProcess; 32 | private boolean isStartingFirstTime; 33 | private ByteBufferMessageSet byteBufferMsgSet = null; 34 | private FetchResponse fetchResponse = null; 35 | private final String currentTopic; 36 | private final int currentPartition; 37 | 38 | private int kafkaReinitSleepTimeMs; 39 | private int numberOfReinitAttempts; 40 | private int esIndexingRetrySleepTimeMs; 41 | private int numberOfEsIndexingRetryAttempts; 42 | private IndexerJobStatus indexerJobStatus; 43 | private volatile boolean shutdownRequested = false; 44 | private boolean isDryRun = false; 45 | 46 | public IndexerJob(ConsumerConfig config, int partition) throws Exception { 47 | this.consumerConfig = config; 48 | this.currentPartition = partition; 49 | this.currentTopic = config.topic; 50 | indexerJobStatus = new IndexerJobStatus(-1L, IndexerJobStatusEnum.Created, partition); 51 | isStartingFirstTime = true; 52 | isDryRun = Boolean.parseBoolean(config.isDryRun); 53 | kafkaReinitSleepTimeMs = config.getKafkaReinitSleepTimeMs(); 54 | numberOfReinitAttempts = config.getNumberOfReinitAttempts(); 55 | esIndexingRetrySleepTimeMs = config.getEsIndexingRetrySleepTimeMs(); 56 | numberOfEsIndexingRetryAttempts = config.getNumberOfEsIndexingRetryAttempts(); 57 | initElasticSearch(); 58 | initKafka(); 59 | createMessageHandler(); 60 | indexerJobStatus.setJobStatus(IndexerJobStatusEnum.Initialized); 61 | } 62 | 63 | void initKafka() throws Exception { 64 | logger.info("Initializing Kafka for partition {}...",currentPartition); 65 | String consumerGroupName = consumerConfig.consumerGroupName; 66 | if (consumerGroupName.isEmpty()) { 67 | consumerGroupName = "es_indexer_" + currentTopic + "_" + currentPartition; 68 | logger.info("ConsumerGroupName was empty, set it to {} for partition {}", consumerGroupName,currentPartition); 69 | } 70 | String kafkaClientId = consumerGroupName + "_" + currentPartition; 71 | logger.info("kafkaClientId={} for partition {}", kafkaClientId,currentPartition); 72 | kafkaConsumerClient = new KafkaClient(consumerConfig, kafkaClientId, currentPartition); 73 | logger.info("Kafka client created and intialized OK for partition {}",currentPartition); 74 | } 75 | 76 | private void initElasticSearch() throws Exception { 77 | String[] esHostPortList = consumerConfig.esHostPortList.trim().split(","); 78 | logger.info("Initializing ElasticSearch... hostPortList={}, esClusterName={} for partition {}", 79 | consumerConfig.esHostPortList, consumerConfig.esClusterName,currentPartition); 80 | 81 | // TODO add validation of host:port syntax - to avoid Runtime exceptions 82 | try { 83 | Settings settings = ImmutableSettings.settingsBuilder() 84 | .put("cluster.name", consumerConfig.esClusterName) 85 | .build(); 86 | esClient = new TransportClient(settings); 87 | for (String eachHostPort : esHostPortList) { 88 | logger.info("adding [{}] to TransportClient for partition {}... ", eachHostPort,currentPartition); 89 | esClient.addTransportAddress( 90 | new InetSocketTransportAddress( 91 | eachHostPort.split(":")[0].trim(), 92 | Integer.parseInt(eachHostPort.split(":")[1].trim()) 93 | ) 94 | ); 95 | } 96 | logger.info("ElasticSearch Client created and intialized OK for partition {}",currentPartition); 97 | } catch (Exception e) { 98 | logger.error("Exception when trying to connect and create ElasticSearch Client. Throwing the error. Error Message is::" 99 | + e.getMessage()); 100 | throw e; 101 | } 102 | } 103 | 104 | void reInitKafka() throws Exception { 105 | for (int i = 0; i < numberOfReinitAttempts; i++) { 106 | try { 107 | logger.info("Re-initializing Kafka for partition {}, try # {}", 108 | currentPartition, i); 109 | kafkaConsumerClient.close(); 110 | logger.info( 111 | "Kafka client closed for partition {}. Will sleep for {} ms to allow kafka to stabilize", 112 | currentPartition, kafkaReinitSleepTimeMs); 113 | Thread.sleep(kafkaReinitSleepTimeMs); 114 | logger.info("Connecting to zookeeper again for partition {}", 115 | currentPartition); 116 | kafkaConsumerClient.connectToZooKeeper(); 117 | kafkaConsumerClient.findLeader(); 118 | kafkaConsumerClient.initConsumer(); 119 | logger.info(".. trying to get offsets info for partition {} ... ", currentPartition); 120 | this.checkKafkaOffsets(); 121 | logger.info("Kafka Reintialization for partition {} finished OK", 122 | currentPartition); 123 | return; 124 | } catch (Exception e) { 125 | if (i < numberOfReinitAttempts) { 126 | logger.info("Re-initializing Kafka for partition {}, try # {} - still failing with Exception", 127 | currentPartition, i); 128 | } else { 129 | // if we got here - we failed to re-init Kafka after numberOfTries attempts - throw an exception out 130 | logger.info("Kafka Re-initialization failed for partition {} after {} attempts - throwing exception: " 131 | + e.getMessage(), currentPartition, numberOfReinitAttempts); 132 | throw e; 133 | } 134 | } 135 | } 136 | } 137 | 138 | private void createMessageHandler() throws Exception { 139 | try { 140 | logger.info("MessageHandler Class given in config is {} for partition {}", consumerConfig.messageHandlerClass,currentPartition); 141 | msgHandler = (MessageHandler) Class 142 | .forName(consumerConfig.messageHandlerClass) 143 | .getConstructor(TransportClient.class, ConsumerConfig.class) 144 | .newInstance(esClient, consumerConfig); 145 | logger.debug("Created and initialized MessageHandler: {} for partition {}", consumerConfig.messageHandlerClass,currentPartition); 146 | } catch (Exception e) { 147 | logger.error("Exception creating MessageHandler class for partition {}: ",currentPartition, e); 148 | throw e; 149 | } 150 | } 151 | 152 | // a hook to be used by the Manager app to request a graceful shutdown of the job 153 | public void requestShutdown() { 154 | shutdownRequested = true; 155 | } 156 | 157 | public void checkKafkaOffsets() throws Exception { 158 | try { 159 | long currentOffset = kafkaConsumerClient.fetchCurrentOffsetFromKafka(); 160 | long earliestOffset = kafkaConsumerClient.getEarliestOffset(); 161 | long latestOffset = kafkaConsumerClient.getLastestOffset(); 162 | logger.info("Kafka offsets: currentOffset={}; earliestOffset={}; latestOffset={} for partition {}", 163 | currentOffset, earliestOffset, latestOffset,currentPartition); 164 | } catch (Exception e) { 165 | logger.error("Exception from checkKafkaOffsets(): for partition {}" ,currentPartition, e); 166 | throw e; 167 | } 168 | 169 | } 170 | 171 | private void computeOffset() throws Exception { 172 | logger.info("**** Computing Kafka offset *** for partition {}",currentPartition); 173 | logger.info("startOffsetFrom={} for partition {}", consumerConfig.startOffsetFrom,currentPartition); 174 | if (consumerConfig.startOffsetFrom.equalsIgnoreCase("CUSTOM")) { 175 | if (consumerConfig.startOffset >= 0) { 176 | offsetForThisRound = consumerConfig.startOffset; 177 | } else { 178 | throw new Exception( 179 | "Custom start offset for topic [" + currentTopic + "], partition [" + 180 | currentPartition + "] is < 0, which is not an acceptable value - please provide a valid offset; exiting"); 181 | } 182 | } else if (consumerConfig.startOffsetFrom.equalsIgnoreCase("EARLIEST")) { 183 | this.offsetForThisRound = kafkaConsumerClient.getEarliestOffset(); 184 | } else if (consumerConfig.startOffsetFrom.equalsIgnoreCase("LATEST")) { 185 | offsetForThisRound = kafkaConsumerClient.getLastestOffset(); 186 | } else if (consumerConfig.startOffsetFrom.equalsIgnoreCase("RESTART")) { 187 | logger.info("Restarting from where the Offset is left for topic {}, for partition {}",currentTopic,currentPartition); 188 | offsetForThisRound = kafkaConsumerClient.fetchCurrentOffsetFromKafka(); 189 | if (offsetForThisRound == -1) 190 | { 191 | // if this is the first time this client tried to read - offset might be -1 192 | // [ TODO figure out all cases when this could happen] 193 | // try to get the Earliest offset and read from there - it may lead 194 | // to processing events that may have already be processed - but it is safer than 195 | // starting from the Latest offset in case not all events were processed before 196 | offsetForThisRound = kafkaConsumerClient.getEarliestOffset(); 197 | logger.info("offsetForThisRound is set to the EarliestOffset since currentOffset is -1; offsetForThisRound={} for partition {}", 198 | offsetForThisRound,currentPartition); 199 | // also store this as the CurrentOffset to Kafka - to avoid the multiple cycles through 200 | // this logic in the case no events are coming to the topic for a long time and 201 | // we always get currentOffset as -1 from Kafka 202 | try { 203 | kafkaConsumerClient.saveOffsetInKafka( offsetForThisRound, ErrorMapping.NoError()); 204 | } catch (Exception e) { 205 | logger.error("Failed to commit the offset in Kafka, exiting for partition {}: " ,currentPartition, e); 206 | throw new Exception("Failed to commit the offset in Kafka, exiting: " + e.getMessage(), e); 207 | } 208 | 209 | } else { 210 | logger.info("offsetForThisRound is set to the CurrentOffset: {} for partition {}", offsetForThisRound,currentPartition); 211 | } 212 | } 213 | long earliestOffset = kafkaConsumerClient.getEarliestOffset(); 214 | logger.info("EarliestOffset for partition {} is {}", currentPartition, earliestOffset); 215 | // check for a corner case when the computed offset (either current or custom) 216 | // is less than the Earliest offset - which could happen if some messages were 217 | // cleaned up from the topic/partition due to retention policy 218 | if (offsetForThisRound < earliestOffset){ 219 | logger.warn("WARNING: computed offset (either current or custom) = {} is less than EarliestOffset = {}" + 220 | "; setting offsetForThisRound to the EarliestOffset for partition {}", offsetForThisRound, earliestOffset,currentPartition); 221 | offsetForThisRound = earliestOffset; 222 | try { 223 | kafkaConsumerClient.saveOffsetInKafka( offsetForThisRound, ErrorMapping.NoError()); 224 | } catch (Exception e) { 225 | logger.error("Failed to commit the offset in Kafka, exiting for partition {} " ,currentPartition, e); 226 | throw new Exception("Failed to commit the offset in Kafka, exiting: " + e.getMessage(), e); 227 | } 228 | } 229 | logger.info("Resulting offsetForThisRound = {} for partition {}", offsetForThisRound,currentPartition); 230 | } 231 | 232 | public IndexerJobStatus call() { 233 | indexerJobStatus.setJobStatus(IndexerJobStatusEnum.Started); 234 | while(!shutdownRequested){ 235 | try{ 236 | // check if there was a request to stop this thread - stop processing if so 237 | if (Thread.currentThread().isInterrupted()){ 238 | // preserve interruption state of the thread 239 | Thread.currentThread().interrupt(); 240 | throw new InterruptedException( 241 | "Cought interrupted event in IndexerJob for partition=" + currentPartition + " - stopping"); 242 | } 243 | logger.debug("******* Starting a new batch of events from Kafka for partition {} ...", currentPartition); 244 | 245 | processBatch(); 246 | indexerJobStatus.setJobStatus(IndexerJobStatusEnum.InProgress); 247 | // sleep for configured time 248 | // TODO improve sleep pattern 249 | Thread.sleep(consumerConfig.consumerSleepBetweenFetchsMs * 1000); 250 | logger.debug("Completed a round of indexing into ES for partition {}",currentPartition); 251 | } catch (IndexerESException e) { 252 | indexerJobStatus.setJobStatus(IndexerJobStatusEnum.Failed); 253 | stopClients(); 254 | break; 255 | } catch (InterruptedException e) { 256 | indexerJobStatus.setJobStatus(IndexerJobStatusEnum.Stopped); 257 | stopClients(); 258 | break; 259 | } catch (Exception e){ 260 | logger.error("Exception when starting a new round of kafka Indexer job for partition {} - will try to re-init Kafka " , 261 | currentPartition, e); 262 | // try to re-init Kafka connection first - in case the leader for this partition 263 | // has changed due to a Kafka node restart and/or leader re-election 264 | try { 265 | this.reInitKafka(); 266 | } catch (Exception e2) { 267 | // we still failed - do not keep going anymore - stop and fix the issue manually, 268 | // then restart the consumer again; It is better to monitor the job externally 269 | // via Zabbix or the likes - rather then keep failing [potentially] forever 270 | logger.error("Exception when starting a new round of kafka Indexer job, partition {}, exiting: " 271 | + e2.getMessage(), currentPartition); 272 | indexerJobStatus.setJobStatus(IndexerJobStatusEnum.Failed); 273 | stopClients(); 274 | break; 275 | } 276 | } 277 | } 278 | logger.warn("******* Indexing job was stopped, indexerJobStatus={} - exiting", indexerJobStatus); 279 | return indexerJobStatus; 280 | } 281 | 282 | 283 | 284 | public void processBatch() throws Exception { 285 | //checkKafkaOffsets(); 286 | long jobStartTime = 0l; 287 | if (consumerConfig.isPerfReportingEnabled) 288 | jobStartTime = System.currentTimeMillis(); 289 | if (!isStartingFirstTime) { 290 | // do not read offset from Kafka after each run - we just stored it there 291 | // If this is the only thread that is processing data from this partition - 292 | // we can rely on the in-memory nextOffsetToProcess variable 293 | offsetForThisRound = nextOffsetToProcess; 294 | } else { 295 | indexerJobStatus.setJobStatus(IndexerJobStatusEnum.InProgress); 296 | // if this is the first time we run the Consumer - get it from Kafka 297 | try { 298 | computeOffset(); 299 | } catch (Exception e) { 300 | logger.error("Exception getting Kafka offsets for partition {}, will try to re-init Kafka ", 301 | currentPartition, e); 302 | reInitKafka(); 303 | // if re-initialization was successful - return and let the next job run try to 304 | // pickup from where it was left of before 305 | // if it failed - an exception will be thrown out of the reInitKafka() 306 | return; 307 | } 308 | // mark this as not first time startup anymore - since we already saved correct offset 309 | // to Kafka, and to avoid going through the logic of figuring out the initial offset 310 | // every round if it so happens that there were no events from Kafka for a long time 311 | isStartingFirstTime = false; 312 | nextOffsetToProcess = offsetForThisRound; 313 | } 314 | indexerJobStatus.setLastCommittedOffset(offsetForThisRound); 315 | 316 | try{ 317 | fetchResponse = kafkaConsumerClient.getMessagesFromKafka(offsetForThisRound); 318 | } catch (Exception e){ 319 | // try re-process this batch 320 | reInitKafka(); 321 | return; 322 | } 323 | if (consumerConfig.isPerfReportingEnabled) { 324 | long timeAfterKafaFetch = System.currentTimeMillis(); 325 | logger.debug("Fetched the reponse from Kafka. Approx time taken is {} ms for partition {}", 326 | (timeAfterKafaFetch - jobStartTime),currentPartition); 327 | } 328 | if (fetchResponse.hasError()) { 329 | // Do things according to the error code 330 | // TODO figure out what job status we should use - PartialFailure?? and when to clean it up? 331 | handleError(); 332 | return; 333 | } 334 | 335 | // TODO harden the byteBufferMessageSEt life-cycle - make local var 336 | byteBufferMsgSet = fetchResponse.messageSet(currentTopic, currentPartition); 337 | if (consumerConfig.isPerfReportingEnabled) { 338 | long timeAfterKafkaFetch = System.currentTimeMillis(); 339 | logger.debug("Completed MsgSet fetch from Kafka. Approx time taken is {} ms for partition {}", 340 | (timeAfterKafkaFetch - jobStartTime) ,currentPartition); 341 | } 342 | if (byteBufferMsgSet.validBytes() <= 0) { 343 | logger.debug("No events were read from Kafka - finishing this round of reads from Kafka for partition {}",currentPartition); 344 | // TODO re-review this logic 345 | long latestOffset = kafkaConsumerClient.getLastestOffset(); 346 | if (latestOffset != offsetForThisRound) { 347 | logger.warn("latestOffset [{}] is not the same as the current offsetForThisRound for this run [{}]" + 348 | " - committing latestOffset to Kafka for partition {}", latestOffset, offsetForThisRound,currentPartition); 349 | try { 350 | kafkaConsumerClient.saveOffsetInKafka( 351 | latestOffset, 352 | fetchResponse.errorCode(consumerConfig.topic, currentPartition)); 353 | } catch (Exception e) { 354 | // throw an exception as this will break reading messages in the next round 355 | logger.error("Failed to commit the offset in Kafka - exiting for partition {} ",currentPartition, e); 356 | throw e; 357 | } 358 | } 359 | return; 360 | } 361 | logger.debug("Starting to prepare for post to ElasticSearch for partition {}",currentPartition); 362 | //Need to save nextOffsetToProcess in temporary field, 363 | //and save it after successful execution of indexIntoESWithRetries method 364 | long proposedNextOffsetToProcess = msgHandler.prepareForPostToElasticSearch(byteBufferMsgSet.iterator()); 365 | 366 | if (consumerConfig.isPerfReportingEnabled) { 367 | long timeAtPrepareES = System.currentTimeMillis(); 368 | logger.debug("Completed preparing for post to ElasticSearch. Approx time taken: {}ms for partition {}", 369 | (timeAtPrepareES - jobStartTime),currentPartition ); 370 | } 371 | if (isDryRun) { 372 | logger.info("**** This is a dry run, NOT committing the offset in Kafka nor posting to ES for partition {}****",currentPartition); 373 | return; 374 | } 375 | 376 | try { 377 | this.indexIntoESWithRetries(); 378 | } catch (IndexerESException e) { 379 | // re-process batch 380 | return; 381 | } 382 | 383 | nextOffsetToProcess = proposedNextOffsetToProcess; 384 | 385 | if (consumerConfig.isPerfReportingEnabled) { 386 | long timeAftEsPost = System.currentTimeMillis(); 387 | logger.debug("Approx time to post of ElasticSearch: {} ms for partition {}", 388 | (timeAftEsPost - jobStartTime),currentPartition); 389 | } 390 | logger.info("Commiting offset: {} for partition {}", nextOffsetToProcess,currentPartition); 391 | // TODO optimize getting of the fetchResponse.errorCode - in some cases there is no error, 392 | // so no need to call the API every time 393 | try { 394 | kafkaConsumerClient.saveOffsetInKafka( 395 | nextOffsetToProcess, fetchResponse.errorCode( 396 | consumerConfig.topic, currentPartition)); 397 | } catch (Exception e) { 398 | logger.error("Failed to commit the Offset in Kafka after processing and posting to ES for partition {}: ",currentPartition, e); 399 | logger.info("Trying to reInitialize Kafka and commit the offset again for partition {}...",currentPartition); 400 | try { 401 | reInitKafka(); 402 | logger.info("Attempting to commit the offset after reInitializing Kafka now.."); 403 | kafkaConsumerClient.saveOffsetInKafka( 404 | nextOffsetToProcess, fetchResponse.errorCode( 405 | consumerConfig.topic, 406 | currentPartition)); 407 | } catch (Exception e2) { 408 | logger.error("Failed to commit the Offset in Kafka even after reInitializing Kafka - exiting for partition {}: " ,currentPartition, e2); 409 | // there is no point in continuing - as we will keep re-processing events 410 | // from the old offset. Throw an exception and exit; 411 | // manually fix KAfka/Zookeeper env and re-start from a 412 | // desired , possibly custom, offset 413 | throw e2; 414 | } 415 | } 416 | 417 | if (consumerConfig.isPerfReportingEnabled) { 418 | long timeAtEndOfJob = System.currentTimeMillis(); 419 | logger.info("*** This round of ConsumerJob took about {} ms for partition {} ", 420 | (timeAtEndOfJob - jobStartTime),currentPartition); 421 | } 422 | logger.info("*** Finished current round of ConsumerJob, processed messages with offsets [{}-{}] for partition {} ****", 423 | offsetForThisRound, nextOffsetToProcess,currentPartition); 424 | this.byteBufferMsgSet = null; 425 | this.fetchResponse = null; 426 | } 427 | 428 | private void reInitElasticSearch() throws InterruptedException, IndexerESException { 429 | for (int i=1; i<=numberOfEsIndexingRetryAttempts; i++ ){ 430 | Thread.sleep(esIndexingRetrySleepTimeMs); 431 | logger.warn("Retrying connect to ES and re-process batch, partition {}, try# {}", 432 | currentPartition, i); 433 | try { 434 | this.initElasticSearch(); 435 | // we succeeded - get out of the loop 436 | break; 437 | } catch (Exception e2) { 438 | if (i{} " + 466 | " in partition {}: NoNodeAvailableException - ES cluster is unreachable, will retry to connect after sleeping for {}ms", 467 | offsetForThisRound, nextOffsetToProcess-1, esIndexingRetrySleepTimeMs, currentPartition, e); 468 | 469 | reInitElasticSearch(); 470 | //throws Exception to re-process current batch 471 | throw new IndexerESException(); 472 | 473 | } catch (ElasticsearchException e) { 474 | // we are assuming that other exceptions are data-specific 475 | // - continue and commit the offset, 476 | // but be aware that ALL messages from this batch are NOT indexed into ES 477 | logger.error("Error posting messages to Elastic Search for offset {}-->{} in partition {} skipping them: ", 478 | offsetForThisRound, nextOffsetToProcess-1, currentPartition, e); 479 | FailedEventsLogger.logFailedEvent(offsetForThisRound, nextOffsetToProcess - 1, currentPartition, e.getDetailedMessage(), null); 480 | } 481 | 482 | } 483 | 484 | 485 | public void handleError() throws Exception { 486 | // Do things according to the error code 487 | short errorCode = fetchResponse.errorCode( 488 | consumerConfig.topic, currentPartition); 489 | logger.error("Error fetching events from Kafka - handling it. Error code: {} for partition {}" 490 | ,errorCode, currentPartition); 491 | if (errorCode == ErrorMapping.BrokerNotAvailableCode()) { 492 | logger.error("BrokerNotAvailableCode error happened when fetching message from Kafka. ReInitiating Kafka Client for partition {}",currentPartition); 493 | reInitKafka(); 494 | } else if (errorCode == ErrorMapping.InvalidFetchSizeCode()) { 495 | logger.error("InvalidFetchSizeCode error happened when fetching message from Kafka. ReInitiating Kafka Client for partition {}",currentPartition); 496 | reInitKafka(); 497 | } else if (errorCode == ErrorMapping.InvalidMessageCode()) { 498 | logger.error("InvalidMessageCode error happened when fetching message from Kafka, not handling it. Returning for partition {}",currentPartition); 499 | } else if (errorCode == ErrorMapping.LeaderNotAvailableCode()) { 500 | logger.error("LeaderNotAvailableCode error happened when fetching message from Kafka. ReInitiating Kafka Client for partition {}",currentPartition); 501 | reInitKafka(); 502 | } else if (errorCode == ErrorMapping.MessageSizeTooLargeCode()) { 503 | logger.error("MessageSizeTooLargeCode error happened when fetching message from Kafka, not handling it. Returning for partition {}",currentPartition); 504 | } else if (errorCode == ErrorMapping.NotLeaderForPartitionCode()) { 505 | logger.error("NotLeaderForPartitionCode error happened when fetching message from Kafka, not handling it. ReInitiating Kafka Client for partition {}",currentPartition); 506 | reInitKafka(); 507 | } else if (errorCode == ErrorMapping.OffsetMetadataTooLargeCode()) { 508 | logger.error("OffsetMetadataTooLargeCode error happened when fetching message from Kafka, not handling it. Returning for partition {}",currentPartition); 509 | } else if (errorCode == ErrorMapping.OffsetOutOfRangeCode()) { 510 | logger.error("OffsetOutOfRangeCode error fetching messages for partition={}, offsetForThisRound={}", 511 | currentPartition, offsetForThisRound); 512 | long earliestOffset = kafkaConsumerClient.getEarliestOffset(); 513 | // The most likely reason for this error is that the consumer is trying to read events from an offset 514 | // that has already expired from the Kafka topic due to retention period; 515 | // In that case the only course of action is to start processing events from the EARLIEST available offset 516 | logger.info("OffsetOutOfRangeCode error: setting offset for partition {} to the EARLIEST possible offset: {}", 517 | currentPartition, earliestOffset); 518 | nextOffsetToProcess = earliestOffset; 519 | try { 520 | kafkaConsumerClient.saveOffsetInKafka(earliestOffset, errorCode); 521 | } catch (Exception e) { 522 | // throw an exception as this will break reading messages in the next round 523 | // TODO verify that the IndexerJob is stopped cleanly in this case 524 | logger.error("Failed to commit offset in Kafka after OffsetOutOfRangeCode - exiting for partition {} ", currentPartition, e); 525 | throw e; 526 | } 527 | return; 528 | 529 | } else if (errorCode == ErrorMapping.ReplicaNotAvailableCode()) { 530 | logger.error("ReplicaNotAvailableCode error happened when fetching message from Kafka - re-init-ing Kafka... for partition {}",currentPartition); 531 | reInitKafka(); 532 | return; 533 | 534 | } else if (errorCode == ErrorMapping.RequestTimedOutCode()) { 535 | logger.error("RequestTimedOutCode error happened when fetching message from Kafka - re-init-ing Kafka... for partition {}",currentPartition); 536 | reInitKafka(); 537 | return; 538 | 539 | } else if (errorCode == ErrorMapping.StaleControllerEpochCode()) { 540 | logger.error("StaleControllerEpochCode error happened when fetching message from Kafka, not handling it. Returning for partition {}",currentPartition); 541 | return; 542 | 543 | } else if (errorCode == ErrorMapping.StaleLeaderEpochCode()) { 544 | logger.error("StaleLeaderEpochCode error happened when fetching message from Kafka, not handling it. Returning for partition {}",currentPartition); 545 | return; 546 | 547 | } else if (errorCode == ErrorMapping.UnknownCode()) { 548 | logger.error("UnknownCode error happened when fetching message from Kafka - re-init-ing Kafka... for partition {}",currentPartition); 549 | reInitKafka(); 550 | return; 551 | 552 | } else if (errorCode == ErrorMapping.UnknownTopicOrPartitionCode()) { 553 | logger.error("UnknownTopicOrPartitionCode error happened when fetching message from Kafka - re-init-ing Kafka...for partition {}",currentPartition); 554 | reInitKafka(); 555 | return; 556 | 557 | } 558 | 559 | } 560 | public void stopClients() { 561 | logger.info("About to stop ES client for topic {}, partition {}", 562 | currentTopic, currentPartition); 563 | if (esClient != null) 564 | esClient.close(); 565 | logger.info("About to stop Kafka client for topic {}, partition {}", 566 | currentTopic, currentPartition); 567 | if (kafkaConsumerClient != null) 568 | kafkaConsumerClient.close(); 569 | logger.info("Stopped Kafka and ES clients for topic {}, partition {}", 570 | currentTopic, currentPartition); 571 | } 572 | 573 | public IndexerJobStatus getIndexerJobStatus() { 574 | return indexerJobStatus; 575 | } 576 | 577 | } 578 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/jobs/IndexerJobManager.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.jobs; 2 | 3 | import com.google.common.util.concurrent.ThreadFactoryBuilder; 4 | import org.elasticsearch.kafka.indexer.ConsumerConfig; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | import java.util.concurrent.*; 11 | 12 | public class IndexerJobManager { 13 | 14 | private static final Logger logger = LoggerFactory.getLogger(IndexerJobManager.class); 15 | private static final String KAFKA_CONSUMER_STREAM_POOL_NAME_FORMAT = "kafka-indexer-consumer-thread-%d"; 16 | private ConsumerConfig consumerConfig; 17 | private ExecutorService executorService; 18 | private int numOfPartitions; 19 | private int firstPartition; 20 | private int lastPartition; 21 | // Map of of indexer jobs for all partitions 22 | private ConcurrentHashMap indexerJobs; 23 | // List of > futures of all submitted indexer jobs for all partitions 24 | private List> indexerJobFutures; 25 | 26 | public IndexerJobManager(ConsumerConfig config) throws Exception { 27 | this.consumerConfig = config; 28 | firstPartition = config.firstPartition; 29 | lastPartition = config.lastPartition; 30 | numOfPartitions = lastPartition - firstPartition + 1; 31 | if (numOfPartitions <= 0) { 32 | logger.error("ERROR in configuration: number of partitions is <= 0"); 33 | throw new Exception("ERROR in configuration: number of partitions is <= 0"); 34 | } 35 | logger.info("ConsumerJobManager is starting, servicing partitions: [{}-{}]", 36 | firstPartition, lastPartition); 37 | } 38 | 39 | public void startAll() throws Exception { 40 | ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat(KAFKA_CONSUMER_STREAM_POOL_NAME_FORMAT).build(); 41 | executorService = Executors.newFixedThreadPool(numOfPartitions,threadFactory); 42 | indexerJobs = new ConcurrentHashMap<>(); 43 | // create as many IndexerJobs as there are partitions in the events topic 44 | // first create all jobs without starting them - to make sure they can init all resources OK 45 | try { 46 | for (int partition=firstPartition; partition<=lastPartition; partition++){ 47 | logger.info("Creating IndexerJob for partition={}", partition); 48 | IndexerJob pIndexerJob = new IndexerJob(consumerConfig, partition); 49 | indexerJobs.put(partition, pIndexerJob); 50 | } 51 | } catch (Exception e) { 52 | logger.error("ERROR: Failure creating a consumer job, exiting: ", e); 53 | // if any job startup fails - abort; 54 | throw e; 55 | } 56 | // now start them all 57 | indexerJobFutures = executorService.invokeAll(indexerJobs.values()); 58 | } 59 | 60 | public List getJobStatuses(){ 61 | List indexerJobStatuses = new ArrayList(); 62 | for (IndexerJob indexerJob: indexerJobs.values()){ 63 | indexerJobStatuses.add(indexerJob.getIndexerJobStatus()); 64 | } 65 | return indexerJobStatuses; 66 | } 67 | 68 | public void stop() { 69 | logger.info("About to stop all consumer jobs ..."); 70 | if (executorService != null && !executorService.isTerminated()) { 71 | try { 72 | executorService.awaitTermination(consumerConfig.appStopTimeoutSeconds, TimeUnit.SECONDS); 73 | } catch (InterruptedException e) { 74 | logger.error("ERROR: failed to stop all consumer jobs due to InterruptedException: ", e); 75 | } 76 | } 77 | logger.info("Stop() finished OK"); 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/jobs/IndexerJobStatus.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.jobs; 2 | 3 | import org.elasticsearch.kafka.indexer.jmx.IndexerJobStatusMBean; 4 | 5 | public class IndexerJobStatus implements IndexerJobStatusMBean{ 6 | 7 | private long lastCommittedOffset; 8 | private IndexerJobStatusEnum jobStatus; 9 | private final int partition; 10 | 11 | public IndexerJobStatus(long lastCommittedOffset, 12 | IndexerJobStatusEnum jobStatus, int partition) { 13 | super(); 14 | this.lastCommittedOffset = lastCommittedOffset; 15 | this.jobStatus = jobStatus; 16 | this.partition = partition; 17 | } 18 | 19 | public long getLastCommittedOffset() { 20 | return lastCommittedOffset; 21 | } 22 | 23 | public void setLastCommittedOffset(long lastCommittedOffset) { 24 | this.lastCommittedOffset = lastCommittedOffset; 25 | } 26 | 27 | public IndexerJobStatusEnum getJobStatus() { 28 | return jobStatus; 29 | } 30 | 31 | public void setJobStatus(IndexerJobStatusEnum jobStatus) { 32 | this.jobStatus = jobStatus; 33 | } 34 | 35 | public int getPartition() { 36 | return partition; 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | StringBuilder sb = new StringBuilder(); 42 | sb.append("[IndexerJobStatus: {"); 43 | sb.append("partition=" + partition); 44 | sb.append("lastCommittedOffset=" + lastCommittedOffset); 45 | sb.append("jobStatus=" + jobStatus.name()); 46 | sb.append("}]"); 47 | return sb.toString(); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/jobs/IndexerJobStatusEnum.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.jobs; 2 | 3 | public enum IndexerJobStatusEnum { 4 | 5 | Created, 6 | Initialized, 7 | Started, 8 | InProgress, 9 | Hanging, 10 | Stopped, 11 | Cancelled, 12 | Failed 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/mappers/AccessLogMapper.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.mappers; 2 | 3 | public class AccessLogMapper { 4 | 5 | private KafkaMetaDataMapper kafkaMetaData = new KafkaMetaDataMapper(); 6 | private String ip; 7 | private String protocol; 8 | private String method; 9 | private String url; 10 | private String payLoad; 11 | private String sessionID; 12 | private String timeStamp; 13 | private Integer responseTime; 14 | private Integer responseCode; 15 | private String hostName; 16 | private String serverName; 17 | private String serverAndInstance; 18 | private String instance; 19 | private String sourceIpAndPort; 20 | private String ajpThreadName; 21 | private String rawMessage; 22 | 23 | public KafkaMetaDataMapper getKafkaMetaData() { 24 | return kafkaMetaData; 25 | } 26 | 27 | public void setKafkaMetaData(KafkaMetaDataMapper kafkaMetaData) { 28 | this.kafkaMetaData = kafkaMetaData; 29 | } 30 | 31 | public String getIp() { 32 | return ip; 33 | } 34 | 35 | public void setIp(String ip) { 36 | this.ip = ip; 37 | } 38 | 39 | public String getProtocol() { 40 | return protocol; 41 | } 42 | 43 | public void setProtocol(String protocol) { 44 | this.protocol = protocol; 45 | } 46 | 47 | public String getMethod() { 48 | return method; 49 | } 50 | 51 | public void setMethod(String method) { 52 | this.method = method; 53 | } 54 | 55 | public String getUrl() { 56 | return url; 57 | } 58 | 59 | public void setUrl(String url) { 60 | this.url = url; 61 | } 62 | 63 | public String getPayLoad() { 64 | return payLoad; 65 | } 66 | 67 | public void setPayLoad(String payLoad) { 68 | this.payLoad = payLoad; 69 | } 70 | 71 | public String getSessionID() { 72 | return sessionID; 73 | } 74 | 75 | public void setSessionID(String sessionID) { 76 | this.sessionID = sessionID; 77 | } 78 | 79 | public String getTimeStamp() { 80 | return timeStamp; 81 | } 82 | 83 | public void setTimeStamp(String timeStamp) { 84 | this.timeStamp = timeStamp; 85 | } 86 | 87 | public Integer getResponseTime() { 88 | return responseTime; 89 | } 90 | 91 | public void setResponseTime(Integer responseTime) { 92 | this.responseTime = responseTime; 93 | } 94 | 95 | public Integer getResponseCode() { 96 | return responseCode; 97 | } 98 | 99 | public void setResponseCode(Integer responseCode) { 100 | this.responseCode = responseCode; 101 | } 102 | 103 | public String getHostName() { 104 | return hostName; 105 | } 106 | 107 | public void setHostName(String hostName) { 108 | this.hostName = hostName; 109 | } 110 | 111 | public String getServerName() { 112 | return serverName; 113 | } 114 | 115 | public void setServerName(String serverName) { 116 | this.serverName = serverName; 117 | } 118 | 119 | public String getInstance() { 120 | return instance; 121 | } 122 | 123 | public void setInstance(String instance) { 124 | this.instance = instance; 125 | } 126 | 127 | public String getSourceIpAndPort() { 128 | return sourceIpAndPort; 129 | } 130 | 131 | public void setSourceIpAndPort(String sourceIpAndPort) { 132 | this.sourceIpAndPort = sourceIpAndPort; 133 | } 134 | 135 | public String getServerAndInstance() { 136 | return serverAndInstance; 137 | } 138 | 139 | public void setServerAndInstance(String serverAndInstance) { 140 | this.serverAndInstance = serverAndInstance; 141 | } 142 | 143 | public String getRawMessage() { 144 | return rawMessage; 145 | } 146 | 147 | public void setRawMessage(String rawMessage) { 148 | this.rawMessage = rawMessage; 149 | } 150 | 151 | public String getAjpThreadName() { 152 | return ajpThreadName; 153 | } 154 | 155 | public void setAjpThreadName(String ajpThreadName) { 156 | this.ajpThreadName = ajpThreadName; 157 | } 158 | 159 | 160 | 161 | } 162 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/mappers/KafkaMetaDataMapper.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.mappers; 2 | 3 | public class KafkaMetaDataMapper { 4 | 5 | private String topic; 6 | private String consumerGroupName; 7 | private short partition; 8 | private long offset; 9 | 10 | public String getTopic() { 11 | return topic; 12 | } 13 | 14 | public void setTopic(String topic) { 15 | this.topic = topic; 16 | } 17 | 18 | public String getConsumerGroupName() { 19 | return consumerGroupName; 20 | } 21 | 22 | public void setConsumerGroupName(String consumerGroupName) { 23 | this.consumerGroupName = consumerGroupName; 24 | } 25 | 26 | public short getPartition() { 27 | return partition; 28 | } 29 | 30 | public void setPartition(short partition) { 31 | this.partition = partition; 32 | } 33 | 34 | public long getOffset() { 35 | return offset; 36 | } 37 | 38 | public void setOffset(long offset) { 39 | this.offset = offset; 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/messageHandlers/AccessLogMessageHandler.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.messageHandlers; 2 | 3 | import java.text.SimpleDateFormat; 4 | import java.util.Date; 5 | import java.util.TimeZone; 6 | 7 | import org.codehaus.jackson.map.ObjectMapper; 8 | import org.elasticsearch.client.transport.TransportClient; 9 | import org.elasticsearch.kafka.indexer.ConsumerConfig; 10 | import org.elasticsearch.kafka.indexer.MessageHandler; 11 | import org.elasticsearch.kafka.indexer.mappers.AccessLogMapper; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | 16 | public class AccessLogMessageHandler extends MessageHandler { 17 | 18 | private static final Logger logger = LoggerFactory.getLogger(AccessLogMessageHandler.class); 19 | private final String actualDateFormat = "dd/MMM/yyyy:hh:mm:ss"; 20 | private final String expectedDateFormat = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"; 21 | private final String actualTimeZone = "Europe/London"; 22 | private final String expectedTimeZone = "Europe/London"; 23 | 24 | private ObjectMapper mapper = new ObjectMapper(); 25 | private AccessLogMapper accessLogMsgObj = new AccessLogMapper(); 26 | 27 | private String[] splittedMsg = null; 28 | private SimpleDateFormat actualFormat = null; 29 | private SimpleDateFormat expectedFormat = null; 30 | private String dateString[] = null; 31 | private Date date = null; 32 | private String[] serverAndInstance = null; 33 | 34 | public AccessLogMessageHandler(TransportClient client,ConsumerConfig config) throws Exception{ 35 | super(client, config); 36 | logger.info("Initialized org.elasticsearch.kafka.consumer.messageHandlers.AccessLogMessageHandler"); 37 | } 38 | 39 | @Override 40 | public byte[] transformMessage( byte[] inputMessage, Long offset) throws Exception{ 41 | String outputMessageStr = this.convertToJson(new String(inputMessage, "UTF-8"), offset); 42 | return outputMessageStr.getBytes(); 43 | } 44 | 45 | public String convertToJson(String rawMsg, Long offset) throws Exception{ 46 | this.splittedMsg = rawMsg.split("\\|"); 47 | for(int i=0; i < this.splittedMsg.length; i++){ 48 | this.splittedMsg[i] = this.splittedMsg[i].trim(); 49 | } 50 | this.accessLogMsgObj = new AccessLogMapper(); 51 | accessLogMsgObj.setRawMessage(rawMsg); 52 | accessLogMsgObj.getKafkaMetaData().setOffset(offset); 53 | accessLogMsgObj.getKafkaMetaData().setTopic(this.getConfig().topic); 54 | accessLogMsgObj.getKafkaMetaData().setConsumerGroupName(this.getConfig().consumerGroupName); 55 | accessLogMsgObj.getKafkaMetaData().setPartition(this.getConfig().firstPartition); 56 | accessLogMsgObj.setIp(splittedMsg[0].trim()); 57 | accessLogMsgObj.setProtocol(splittedMsg[1].trim()); 58 | 59 | if(splittedMsg[5].toUpperCase().contains("GET")){ 60 | accessLogMsgObj.setIp(splittedMsg[3].trim()); 61 | accessLogMsgObj.setProtocol(splittedMsg[4].trim()); 62 | 63 | accessLogMsgObj.setMethod(splittedMsg[5].trim()); 64 | accessLogMsgObj.setPayLoad(splittedMsg[6].trim()); 65 | accessLogMsgObj.setResponseCode(Integer.parseInt(splittedMsg[8].trim())); 66 | accessLogMsgObj.setSessionID(splittedMsg[9].trim()); 67 | this.serverAndInstance = splittedMsg[9].split("\\.")[1].split("-"); 68 | 69 | accessLogMsgObj.setServerName(serverAndInstance[0].trim()); 70 | accessLogMsgObj.setInstance(serverAndInstance[1].trim()); 71 | accessLogMsgObj.setServerAndInstance(serverAndInstance[0].trim() + "_" + serverAndInstance[1].trim()); 72 | 73 | accessLogMsgObj.setHostName(splittedMsg[12].split(" " )[0].trim()); 74 | accessLogMsgObj.setResponseTime(Integer.parseInt(splittedMsg[13].trim())); 75 | accessLogMsgObj.setUrl(splittedMsg[11].trim()); 76 | accessLogMsgObj.setAjpThreadName(splittedMsg[14].trim()); 77 | accessLogMsgObj.setSourceIpAndPort(null); 78 | 79 | this.actualFormat = new SimpleDateFormat(actualDateFormat); 80 | this.actualFormat.setTimeZone(TimeZone.getTimeZone(actualTimeZone)); 81 | 82 | this.expectedFormat = new SimpleDateFormat(expectedDateFormat); 83 | this.expectedFormat.setTimeZone(TimeZone.getTimeZone(expectedTimeZone)); 84 | this.dateString = splittedMsg[0].split(" " ); 85 | this.date = actualFormat.parse(dateString[0].trim().replaceAll("\\[", "").trim()); 86 | accessLogMsgObj.setTimeStamp(expectedFormat.format(date)); 87 | } 88 | 89 | if(splittedMsg[5].toUpperCase().contains("POST")){ 90 | accessLogMsgObj.setIp(splittedMsg[3].trim()); 91 | accessLogMsgObj.setProtocol(splittedMsg[4].trim()); 92 | 93 | accessLogMsgObj.setMethod(splittedMsg[5].trim()); 94 | if(!splittedMsg[6].trim().isEmpty()){ 95 | accessLogMsgObj.setPayLoad(splittedMsg[6].trim()); 96 | } 97 | accessLogMsgObj.setResponseCode(Integer.parseInt(splittedMsg[8].trim())); 98 | accessLogMsgObj.setSessionID(splittedMsg[9].trim()); 99 | this.serverAndInstance = splittedMsg[9].split("\\.")[1].split("-"); 100 | 101 | accessLogMsgObj.setServerName(serverAndInstance[0].trim()); 102 | accessLogMsgObj.setInstance(serverAndInstance[1].trim()); 103 | accessLogMsgObj.setServerAndInstance(serverAndInstance[0].trim() + "_" + serverAndInstance[1].trim()); 104 | 105 | accessLogMsgObj.setHostName(splittedMsg[12].trim().split(" " )[0]); 106 | accessLogMsgObj.setResponseTime(Integer.parseInt(splittedMsg[13].trim())); 107 | accessLogMsgObj.setUrl(splittedMsg[11].trim()); 108 | accessLogMsgObj.setAjpThreadName(splittedMsg[14].trim()); 109 | accessLogMsgObj.setSourceIpAndPort(null); 110 | 111 | actualFormat = new SimpleDateFormat(actualDateFormat); 112 | actualFormat.setTimeZone(TimeZone.getTimeZone(actualTimeZone)); 113 | 114 | expectedFormat = new SimpleDateFormat(expectedDateFormat); 115 | expectedFormat.setTimeZone(TimeZone.getTimeZone(expectedTimeZone)); 116 | 117 | this.dateString = splittedMsg[0].split(" " ); 118 | this.date = actualFormat.parse(dateString[0].trim().replaceAll("\\[", "").trim()); 119 | accessLogMsgObj.setTimeStamp(expectedFormat.format(date)); 120 | } 121 | 122 | //Freeing Up these folks 123 | this.splittedMsg = null; 124 | this.actualFormat = null; 125 | this.expectedFormat = null; 126 | this.dateString = null; 127 | this.date = null; 128 | this.serverAndInstance = null; 129 | 130 | return mapper.writeValueAsString(accessLogMsgObj); 131 | } 132 | 133 | } 134 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/kafka/indexer/messageHandlers/RawMessageStringHandler.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.messageHandlers; 2 | 3 | import org.elasticsearch.client.transport.TransportClient; 4 | import org.elasticsearch.kafka.indexer.ConsumerConfig; 5 | import org.elasticsearch.kafka.indexer.MessageHandler; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | public class RawMessageStringHandler extends MessageHandler { 10 | 11 | private static final Logger logger = LoggerFactory.getLogger(RawMessageStringHandler.class); 12 | 13 | public RawMessageStringHandler(TransportClient client,ConsumerConfig config) throws Exception{ 14 | super(client, config); 15 | logger.info("Initialized RawMessageStringHandler"); 16 | } 17 | 18 | @Override 19 | public byte[] transformMessage( byte[] inputMessage, Long offset) throws Exception{ 20 | byte[] outputMessage; 21 | // do necessary transformation here 22 | // in the simplest case - post as is 23 | outputMessage = inputMessage; 24 | return outputMessage; 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/main/resources/kafka-es-indexer.properties.template: -------------------------------------------------------------------------------- 1 | ### Zookeeper properties #################################### 2 | # Zookeeper's host:port list: :,…,: 3 | # default value: localhost:2181, if not specified 4 | kafkaZookeeperList=localhost:2181 5 | 6 | # Zookeeper session timeout in MS 7 | zkSessionTimeoutMs=10000 8 | 9 | # Zookeeper connection timeout in MS 10 | zkConnectionTimeoutMs=15000 11 | 12 | # Zookeeper number of retries when creating a curator client 13 | zkCuratorRetryTimes=3 14 | 15 | # Zookeeper: time in ms between re-tries when creating a Curator 16 | zkCuratorRetryDelayMs=2000 17 | 18 | ### Kafka properties #################################### 19 | # Kafka Brokers host:port list: :,…,: 20 | # default: localhost:9092 21 | kafkaBrokersList=localhost:9092 22 | 23 | # Kafka Consumer group name prefix - 24 | # each indexer job will have a clientId = consumerGroupName + "_" + partitionNumber 25 | # default: kafka_es_indexer 26 | consumerGroupName=kafka_es_indexer 27 | 28 | # Kafka Topic from which the message has to be processed 29 | # mandatory property, no default value specified. 30 | topic=my_log_topic 31 | 32 | ## below two properties define a range of partitions to be processed by this application; 33 | ## each partition will be processed by an IndexerJob in a separate Thread, so this also 34 | ## defines the number of indexer threads created by the app - please set memory requirements accordingly 35 | 36 | # first partition in the Kafka's Topic(defined by 'topic' property) to process messages from 37 | # default: 0 38 | firstPartition=0 39 | 40 | # last partition in the Kafka's Topic(defined by 'topic' property) to process messages from 41 | # no default - is required 42 | lastPartition=3 43 | 44 | # Offset option from where the message fetching should happen in kafka 45 | # Values can be: CUSTOM / EARLIEST / LATEST / RESTART. 46 | # CUSTOM: Message from the specified(defined by 'startOffset' property) offset in Kafka will be read. If 'CUSTOM' is set, then 'startOffset' property has to be set an integer value 47 | # EARLIEST: Messages from the earliest available offset in kafka will be read 48 | # LATEST: Messages from the latest available offset in kafka will be read 49 | # RESTART: Message reading will happen from the Offset where the last cycle of reading by this client has stopped 50 | # Default:"EARLIEST" 51 | startOffsetFrom=RESTART 52 | 53 | # integer value of the offset from where the message processing should happen. Use this property in conjunction with 'startOffsetFrom=CUSTOM' 54 | # mandatory property when 'startOffsetFrom' is set to 'CUSTOM', no default value specified. 55 | startOffset=0 56 | 57 | # Kafka FetchRequest's minBytes value 58 | # Default: "31457280(bytes), i.e:(10 * 1024 * 1024 * 3)" 59 | # Set it to ~4MB and slowly rampup based in your heap memory. 60 | # setting this value to more than 31457280 may cause errors from batch indexing call to ES 61 | # and in some cases this causes the indexer job to hang 62 | kafkaFetchSizeMinBytes=31457280 63 | 64 | # Kafka SimpleConsumer socket bufferSize 65 | kafkaSimpleConsumerBufferSizeBytes=31457280 66 | # Kafka SimpleConsumer socket timeout in MS 67 | kafkaSimpleConsumerSocketTimeoutMs=10000 68 | 69 | # timeout in seconds before force-stopping Indexer app and all indexer jobs 70 | appStopTimeoutSeconds=10 71 | 72 | # number of times to try to re-init Kafka connections/consumer if read/write to Kafka fails 73 | numberOfReinitTries=2 74 | # sleep time in ms between Kafka re-init attempts 75 | kafkaReinitSleepTimeMs=10000 76 | 77 | ### ElasticSearch properties #################################### 78 | # ElasticSearch Host and Port List for all the nodes 79 | # Example: esHostPortList=machine_1_ip:9300,machine_2_ip:9300 80 | esHostPortList=localhost:9300 81 | 82 | # Name of the ElasticSearch Cluster that messages will be posted to; 83 | # Tip: Its not a good idea to use the default name "ElasticSearch" as your cluster name. 84 | esClusterName=KafkaESCluster 85 | 86 | # ES Index Name that messages will be posted/indexed to; this can be customized via using a custom IndexHandler implementation class 87 | # Default: "kafkaESIndex" 88 | esIndex=kafkaESIndex 89 | 90 | # ES Index Type that messages will be posted/indexed to; this can be customized via using a custom IndexHandler implementation class 91 | # Default: “kafkaESType” 92 | esIndexType=kafkaESType 93 | 94 | # Fully qualified name of a concrete message handler class 95 | # Default: "org.elasticsearch.kafka.consumer.RawMessageStringHandler" 96 | # Custom class should be extended from org.elasticsearch.kafka.consumer.MessageHandler class 97 | messageHandlerClass=org.elasticsearch.kafka.indexer.messageHandlers.RawMessageStringHandler 98 | 99 | # Fully qualified name of a custom IndexHandler implementation class 100 | # Default: org.elasticsearch.kafka.consumer.BasicIndexHandler 101 | indexHandlerClass=org.elasticsearch.kafka.indexer.BasicIndexHandler 102 | 103 | 104 | # Preferred message encoding to process the message before posting it to ElasticSearch. 105 | # Default: "UTF-8" 106 | messageEncoding=UTF-8 107 | 108 | # Dry runs will not post to elasticsearch and won’t commit the offset to Kafka 109 | isDryRun=false 110 | 111 | # Time in ms for the consumer to sleep between each round of reading events from Kafka 112 | consumerSleepBetweenFetchsMs=10 113 | # number of times to try to re-connect to ES when performing batch indexing , if connection to ES fails 114 | numberOfEsIndexingRetryAttempts=2 115 | # sleep time in ms between attempts to connect to ES 116 | esIndexingRetrySleepTimeMs=10000 117 | 118 | # flag to enable/disable performance timings reporting; Set it to true when performance/load-testing your app, set to ‘false’ when deploying in production as it will affect performance of the app; defaults to ‘false' 119 | isPerfReportingEnabled=false 120 | 121 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 25 | 26 | %d{dd/MMM/yyyy:HH:mm:ss:SSS Z} [%thread] %-5level %logger{36} - %msg%n 27 | 28 | 29 | 30 | 31 | ${LOG_DIR}/kafka_es_indexer.log 32 | true 33 | 34 | 35 | ${LOG_DIR}/kafka_es_indexer-%d{yyyy-MM-dd}.%i.log.gz 36 | 37 | ${MAX_HISTORY_MAIN_LOG} 38 | 39 | 40 | ${MAX_SIZE_MAIN_LOG} 41 | 42 | 43 | 44 | %d{yyyy-MM-dd HH:mm:ss:SSS,SSS} [%thread] %-5level %logger{36} - %msg%n 45 | 46 | 47 | 48 | 49 | ${LOG_DIR}/failed_es_events.log 50 | true 51 | 52 | 53 | ${LOG_DIR}/failed_es_events-%d{yyyy-MM-dd}.%i.log 54 | ${MAX_HISTORY_FAILED_LOG} 55 | 57 | 58 | ${MAX_SIZE_FAILED_LOG} 59 | 60 | 61 | 62 | [%d{dd/MMM/yyyy:HH:mm:ss:SSS Z}] %msg%n 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /src/test/org/elasticsearch/kafka/indexer/jmx/KafkaEsIndexerStatusTest.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.kafka.indexer.jmx; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.mockito.Mockito.when; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobManager; 10 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobStatus; 11 | import org.elasticsearch.kafka.indexer.jobs.IndexerJobStatusEnum; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | import org.junit.runner.RunWith; 15 | import org.mockito.Mock; 16 | import org.mockito.runners.MockitoJUnitRunner; 17 | 18 | 19 | @RunWith(MockitoJUnitRunner.class) 20 | public class KafkaEsIndexerStatusTest { 21 | 22 | List statuses = new ArrayList(); 23 | 24 | KafkaEsIndexerStatus kafkaEsIndexerStatus; 25 | 26 | @Mock 27 | private IndexerJobManager jobManager; 28 | 29 | @Before 30 | public void init(){ 31 | statuses.add(new IndexerJobStatus(123, IndexerJobStatusEnum.Failed, 1)); 32 | statuses.add(new IndexerJobStatus(124, IndexerJobStatusEnum.Cancelled, 2)); 33 | statuses.add(new IndexerJobStatus(125, IndexerJobStatusEnum.Stopped, 3)); 34 | statuses.add(new IndexerJobStatus(126, IndexerJobStatusEnum.Started, 4)); 35 | statuses.add(new IndexerJobStatus(127, IndexerJobStatusEnum.Failed, 5)); 36 | 37 | when(jobManager.getJobStatuses()).thenReturn(statuses); 38 | kafkaEsIndexerStatus = new KafkaEsIndexerStatus(jobManager); 39 | } 40 | 41 | @Test 42 | public void getStatuses(){ 43 | assertEquals(kafkaEsIndexerStatus.getStatuses().equals(statuses), true); 44 | } 45 | 46 | @Test 47 | public void getCountOfFailedJobs(){ 48 | assertEquals(kafkaEsIndexerStatus.getCountOfFailedJobs(), 2); 49 | } 50 | 51 | @Test 52 | public void getCountOfCancelledJobs(){ 53 | assertEquals(kafkaEsIndexerStatus.getCountOfCancelledJobs(), 1); 54 | } 55 | 56 | @Test 57 | public void getCountOfStoppedJobs(){ 58 | assertEquals(kafkaEsIndexerStatus.getCountOfStoppedJobs(), 1); 59 | } 60 | 61 | } 62 | --------------------------------------------------------------------------------