├── NativeDeflate ├── kaboom-deflate.png ├── lib │ └── README.md ├── com_blackberry_bdp_kaboom_FastBoomWriter.h ├── README.md └── NativeDeflate.c ├── .gitignore ├── src ├── deb │ └── control │ │ ├── control │ │ └── preinst ├── test │ ├── resources │ │ └── log4j.properties │ └── java │ │ ├── TimeBasedHdfsOutputPathTest.java │ │ └── com │ │ └── blackberry │ │ └── bdp │ │ └── kaboom │ │ ├── VersionParserTest.java │ │ ├── PriParserTest.java │ │ ├── EvenLoadBalancerTest.java │ │ ├── FastBoomWriterTest.java │ │ ├── LocalZkServer.java │ │ ├── WorkerTest.java │ │ └── TimestampParserTest.java ├── main │ ├── boom.avsc │ └── java │ │ └── com │ │ └── blackberry │ │ └── bdp │ │ ├── kaboom │ │ ├── exception │ │ │ ├── NotAssignedException.java │ │ │ └── LockNotAcquiredException.java │ │ ├── IntParser.java │ │ ├── VersionParser.java │ │ ├── KerberosUser.java │ │ ├── LocalLoadBalancer.java │ │ ├── PriParser.java │ │ ├── timestamps │ │ │ ├── UniqueTimestampFinder.java │ │ │ └── TimestampWorker.java │ │ ├── EvenLoadBalancer.java │ │ ├── AsyncAssignee.java │ │ ├── Leader.java │ │ ├── TimestampParser.java │ │ ├── KaBoom.java │ │ ├── TimeBasedHdfsOutputPath.java │ │ └── Authenticator.java │ │ ├── simplekaboom │ │ ├── ResetAllOffsets.java │ │ ├── SimpleKaBoom.java │ │ └── SimpleWorker.java │ │ └── cli │ │ └── AssignmentStats.java └── rpm │ └── preinst ├── conf ├── log4j.properties.sample ├── kaboom-env.sh.sample └── kaboom.properties.sample ├── LICENSE ├── init └── kaboom.conf ├── docs ├── sdedit_uml │ ├── README.MD │ └── kaboom_lb_sequence.txt ├── Ready_Flag_Logic.md └── changes.md ├── kaboom-0.8.0.md ├── bin └── kaboom └── dependency-reduced-pom.xml /NativeDeflate/kaboom-deflate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blackberry/KaBoom/HEAD/NativeDeflate/kaboom-deflate.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .project 3 | .settings/ 4 | target/ 5 | NativeDeflate/lib/libNativeDeflate.so 6 | nb-configuration.xml 7 | -------------------------------------------------------------------------------- /NativeDeflate/lib/README.md: -------------------------------------------------------------------------------- 1 | # Lib 2 | 3 | Ensure your build produces libNativeDeflate.so here if you wish for the shared object to be included in the RPM/DEB package's installation. 4 | -------------------------------------------------------------------------------- /src/deb/control/control: -------------------------------------------------------------------------------- 1 | Package: kaboom 2 | Version: [[version]] 3 | Section: misc 4 | Priority: optional 5 | Architecture: all 6 | Maintainer: Dave Ariens 7 | Description: Daemon consumes data from Kafka using Krackle and writes to per-topic HDFS paths -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | 6 | log4j.appender.stdout.layout.ConversionPattern=%d %5p [%t] (%F:%L) - %m%n -------------------------------------------------------------------------------- /conf/log4j.properties.sample: -------------------------------------------------------------------------------- 1 | kaboom.logs.dir=/var/log/kaboom 2 | log4j.rootLogger=INFO, kaboomAppender 3 | 4 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 5 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)n 7 | 8 | log4j.appender.kaboomAppender=org.apache.log4j.DailyRollingFileAppender 9 | log4j.appender.kaboomAppender.DatePattern='.'yyy-MM-dd-HH 10 | log4j.appender.kaboomAppender.File=${kaboom.logs.dir}/server.log 11 | log4j.appender.kaboomAppender.layout=org.apache.log4j.PatternLayout 12 | log4j.appender.kaboomAppender.layout.ConversionPattern=[%d] %p %m (%c)%n 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ -------------------------------------------------------------------------------- /src/main/boom.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "namespace": "com.blackberry.bdp.containers.boom", 3 | "type": "record", 4 | "name": "LogBlock", 5 | "fields": [ 6 | { "name": "second", "type": "long" }, 7 | { "name": "createTime", "type": "long" }, 8 | { "name": "blockNumber", "type": "long" }, 9 | { "name": "logLines", "type": { 10 | "type": "array", 11 | "items": { 12 | "type": "record", 13 | "name": "MessageWithMillis", 14 | "fields": [ 15 | { "name": "ms", "type": "long" }, 16 | { "name": "message", "type": "string" } 17 | ] 18 | } 19 | }} 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /NativeDeflate/com_blackberry_bdp_kaboom_FastBoomWriter.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class com_blackberry_bdp_kaboom_FastBoomWriter */ 4 | 5 | #ifndef _Included_com_blackberry_bdp_kaboom_FastBoomWriter 6 | #define _Included_com_blackberry_bdp_kaboom_FastBoomWriter 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* 11 | * Class: com_blackberry_bdp_kaboom_FastBoomWriter 12 | * Method: compress 13 | * Signature: ([BII)[B 14 | */ 15 | JNIEXPORT jbyteArray JNICALL Java_com_blackberry_bdp_kaboom_FastBoomWriter_compress 16 | (JNIEnv *, jobject, jbyteArray, jint, jint); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | #endif 22 | -------------------------------------------------------------------------------- /init/kaboom.conf: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # This is the Klogger upstart file 4 | # 5 | # 6 | 7 | description "kaboom log processing daemon" 8 | 9 | start on net-device-up IFACE=eth0 10 | stop on runlevel [!2345] 11 | 12 | env enabled=1 13 | 14 | setuid kaboom 15 | setgid kaboom 16 | 17 | limit nofile 65535 65535 18 | 19 | respawn 20 | respawn limit 10 60 21 | 22 | env KABOOM_CONF_DIR=/opt/kaboom/config 23 | 24 | pre-start script 25 | mkdir -p /var/run/kaboom 26 | end script 27 | 28 | 29 | script 30 | set -x 31 | [ "$enabled" = "1" ] || [ "$force_start" = "1" ] || exit 0 32 | . $KABOOM_CONF_DIR/kaboom-env.sh 33 | exec $JAVA $JAVA_OPTS -cp "$CLASSPATH" com.blackberry.bdp.kaboom.KaBoom >$LOGDIR/server.out-`date +"%Y-%m-%d-%M-%S"` 34 | end script 35 | -------------------------------------------------------------------------------- /docs/sdedit_uml/README.MD: -------------------------------------------------------------------------------- 1 | Quick Sequence Diagram Editor 2 | ============================= 3 | 4 | Tool's Homepage 5 | --------------- 6 | 7 | Click [here](http://sdedit.sourceforge.net/). 8 | 9 | This tool makes creating UML sequence diagrams a piece of cake. It's easier than any WYSIWYG editor where you contantly have to keep moving message calls and lines all over the place, and god-forbid you ever want to re-draw something with drastic changes. This tool lets me use a simple text syntax language and then render PNG/vectors for publication. 10 | 11 | Running It 12 | ---------- 13 | 14 | Dunno what works for others, but this worked for me: 15 | 16 | /home/dariens/Downloads/jdk1.7.0_67/bin/java -Djava.awt.headless=false -jar /home/dariens/Downloads/sdedit-4.01.jar 17 | -------------------------------------------------------------------------------- /src/deb/control/preinst: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Remove any libraries from previous versions 4 | 5 | if [[ "x$kaboom_uid" == "x" ]] 6 | then 7 | getent group kaboom >/dev/null || groupadd -r kaboom 8 | getent passwd kaboom >/dev/null || useradd -r -g kaboom -m -s /bin/bash -c "User for kaboom." kaboom 9 | else 10 | getent group kaboom >/dev/null || groupadd -g $kaboom_uid -r kaboom 11 | getent passwd kaboom >/dev/null || useradd -u $kaboom_uid -r -g kaboom -m -s /bin/bash -c "User for kaboom." kaboom 12 | fi 13 | 14 | if [[ -e "/opt/kaboom/lib" ]] 15 | then 16 | rm /opt/kaboom/lib/* 17 | chown -R kaboom.kaboom /opt/kaboom/lib 18 | fi 19 | 20 | if [[ -e "/var/log/kaboom" ]] 21 | then 22 | chown -R kaboom.kaboom /var/log/kaboom 23 | fi 24 | 25 | if [[ -e "/opt/kaboom/config" ]] 26 | then 27 | chown -R kaboom.kaboom /opt/kaboom/config 28 | fi -------------------------------------------------------------------------------- /src/test/java/TimeBasedHdfsOutputPathTest.java: -------------------------------------------------------------------------------- 1 | 2 | import org.junit.Test; 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | /* 7 | * To change this license header, choose License Headers in Project Properties. 8 | * To change this template file, choose Tools | Templates 9 | * and open the template in the editor. 10 | */ 11 | 12 | 13 | public class TimeBasedHdfsOutputPathTest { 14 | 15 | private static final Logger LOG = LoggerFactory.getLogger(TimeBasedHdfsOutputPathTest.class); 16 | 17 | @Test 18 | public void testComparingNullOnPrivateClassPrimative() throws Exception { 19 | Foo foo1 = new Foo(); 20 | Foo foo2 = new Foo(); 21 | foo2.val = System.currentTimeMillis(); 22 | LOG.info("Comparing foo1.val={} to foo2.val={}", foo1.val, foo2.val); 23 | } 24 | 25 | private class Foo { 26 | private long val; 27 | } 28 | } 29 | 30 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/exception/NotAssignedException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 BlackBerry Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom.exception; 17 | 18 | public class NotAssignedException extends Exception{ 19 | public NotAssignedException(String message) { 20 | super(message); 21 | } 22 | } -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/exception/LockNotAcquiredException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 BlackBerry Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom.exception; 17 | 18 | public class LockNotAcquiredException extends Exception{ 19 | public LockNotAcquiredException(String message) { 20 | super(message); 21 | } 22 | } -------------------------------------------------------------------------------- /src/rpm/preinst: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Remove any libraries from previous versions 4 | 5 | if [[ "x$kaboom_uid" == "x" ]] 6 | then 7 | getent group kaboom >/dev/null || groupadd -r kaboom 8 | getent passwd kaboom >/dev/null || useradd -r -g kaboom -m -s /bin/bash -c "User for kaboom." kaboom 9 | else 10 | getent group kaboom >/dev/null || groupadd -g $kaboom_uid -r kaboom 11 | getent passwd kaboom >/dev/null || useradd -u $kaboom_uid -r -g kaboom -m -s /bin/bash -c "User for kaboom." kaboom 12 | fi 13 | 14 | if [[ -e "/opt/kaboom/lib" ]] 15 | then 16 | rm /opt/kaboom/lib/* 17 | chown -R kaboom.kaboom /opt/kaboom/lib 18 | fi 19 | 20 | if [[ -e "/var/run/kaboom" ]] 21 | then 22 | chown -R kaboom.kaboom /var/run/kaboom 23 | fi 24 | 25 | if [[ -e "/var/log/kaboom" ]] 26 | then 27 | chown -R kaboom.kaboom /var/log/kaboom 28 | fi 29 | 30 | if [[ -e "/opt/kaboom/config" ]] 31 | then 32 | chown -R kaboom.kaboom /opt/kaboom/config 33 | fi -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/IntParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.blackberry.bdp.kaboom; 18 | 19 | public class IntParser { 20 | private int intFromBytesPos; 21 | private int intFromBytesReturn; 22 | 23 | public int intFromBytes(byte[] b, int start, int length) { 24 | intFromBytesReturn = 0; 25 | intFromBytesPos = start; 26 | while (intFromBytesPos < start + length) { 27 | intFromBytesReturn = 10 * intFromBytesReturn + (b[intFromBytesPos] - '0'); 28 | intFromBytesPos++; 29 | } 30 | return intFromBytesReturn; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /NativeDeflate/README.md: -------------------------------------------------------------------------------- 1 | # KaBoom - libNativeDeflate, what's this all about? 2 | 3 | KaBoom 0.7.3 introduced offloading compression to a native system shared object. Earlier versions used Java's java.util.zip.Deflater to compress the Avro boom files. This deflation was the most expensive operation that KaBoom performs and has a large impact on performance. 4 | 5 | ![alt tag](kaboom-deflate.png) 6 | 7 | The above shows roughly a 28% increase in performance when the deflate alrorithm is implemented in C, as opposed to Java. 8 | 9 | ## Features 10 | * Uses the [Curator Framework](http://curator.apache.org/) for [Apache Zookeeper](zookeeper.apache.org) to distribute work amongst multiple servers 11 | * Supports writing to secured Hadoop clusters via Kerberos based secure impersonation (conveniently pulled from [Flume](http://flume.apache.org/)) 12 | * Recovers from Kafka server failures (even when newly elected leaders weren't in-sync when elected) 13 | * Supports consuming with either GZIP or Snappy compression 14 | * Configurable: Each topic can be configured with a unique HDFS path template with date/time variable substitution 15 | * Supports flagging timestamp template HDFS directories as 'Ready' when all a topic's partition's messages have been written for a given hour 16 | 17 | ## Author(s) 18 | * [Dave Ariens]() (current maintainer) 19 | 20 | ## Building 21 | 22 | ## Configuring 23 | -------------------------------------------------------------------------------- /conf/kaboom-env.sh.sample: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | JAVA=`which java` 4 | BASEDIR=/opt/kaboom 5 | BINDIR="$BASEDIR/bin" 6 | LIBDIR="$BASEDIR/lib" 7 | LOGDIR="/var/log/kaboom" 8 | CONFIGDIR="$BASEDIR/config" 9 | JMXPORT=9580 10 | LOG4JPROPERTIES=$CONFIGDIR/log4j.properties 11 | PIDBASE=/var/run/kaboom 12 | KABOOM_USER=kaboom 13 | 14 | JAVA_OPTS="" 15 | JAVA_OPTS="$JAVA_OPTS -server" 16 | JAVA_OPTS="$JAVA_OPTS -Xms5G -Xmx5G" 17 | JAVA_OPTS="$JAVA_OPTS -XX:+UseParNewGC -XX:+UseConcMarkSweepGC" 18 | JAVA_OPTS="$JAVA_OPTS -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSConcurrentMTEnabled -XX:+CMSScavengeBeforeRemark" 19 | JAVA_OPTS="$JAVA_OPTS -XX:CMSInitiatingOccupancyFraction=30" 20 | 21 | JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution" 22 | JAVA_OPTS="$JAVA_OPTS -Xloggc:$LOGDIR/gc.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M" 23 | 24 | JAVA_OPTS="$JAVA_OPTS -Djava.awt.headless=true" 25 | JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote" 26 | JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote.authenticate=false" 27 | JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote.ssl=false" 28 | JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote.port=$JMXPORT" 29 | 30 | JAVA_OPTS="$JAVA_OPTS -Dlog4j.configuration=file:$LOG4JPROPERTIES" 31 | 32 | JAVA_OPTS="$JAVA_OPTS -Dkaboom.logs.dir=$LOGDIR" 33 | JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$LIBDIR" 34 | 35 | 36 | CLASSPATH=$CONFIGDIR:/etc/hadoop/conf:$LIBDIR/* -------------------------------------------------------------------------------- /conf/kaboom.properties.sample: -------------------------------------------------------------------------------- 1 | ###################### 2 | # KaBoom Configuration 3 | ###################### 4 | 5 | kaboom.id=1001 6 | hadooop.fs.uri=hdfs://hadoop.company.com 7 | #kaboom.weighting= - the default is number of cores 8 | kerberos.keytab=/opt/kaboom/config/kaboom.keytab 9 | kerberos.principal=principal@AD0.COMPANY 10 | #kaboom.hostname= - the default is the system's hostname 11 | zookeeper.connection.string=r3k1.kafka.company.com:2181,r3k2.kafka.company.com:2181,r3k3.kafka.company.com:2181/KaBoomDev 12 | kafka.zookeeper.connection.string=r3k1.kafka.company.com:2181,r3k2.kafka.company.com:2181,r3k3.kafka.company.com:2181 13 | #kaboom.load.balancer.type=even - this is the default 14 | #kafka.zk.root.path= - the default is none/empty string 15 | #kafka.zk.root.path.brokers=brokers/ids - this is the default 16 | #kaboom.zk.root.path=/kaboom - this is the default 17 | #kaboom.zk.root.path.topic.configs=/kaboom/topics - this is the default 18 | #kaboom.zk.root.path.clients=/kaboom/clients - this is the default 19 | #kaboom.zk.root.path.partition.assignments=/kaboom/assignments - this is the default 20 | #kaboom.zk.root.path.flag.assignments=/kaboom/flag-assignments - this is the default 21 | #kaboom.runningConfig.zkPath=/kaboom/config - this is the default 22 | #kaboom.zk.path.leader.clientId=/kaboom/leader - this is the default 23 | 24 | ######################## 25 | # Consumer Configuration 26 | ######################## 27 | 28 | metadata.broker.list=r3k1.kafka.company.com:9092,r3k2.kafka.company.com:9092,r3k3.kafka.company.com:9092 29 | fetch.message.max.bytes=10485760 30 | fetch.wait.max.ms=5000 31 | #fetch.min.bytes=1 - this is the default 32 | socket.receive.buffer.bytes=10485760 33 | auto.offset.reset=smallest 34 | #socket.timeout.seconds=30000 - this is the default -------------------------------------------------------------------------------- /src/test/java/com/blackberry/bdp/kaboom/VersionParserTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.blackberry.bdp.kaboom; 18 | 19 | import static org.junit.Assert.*; 20 | 21 | import org.junit.Test; 22 | 23 | public class VersionParserTest { 24 | @Test 25 | public void testSkipPri() { 26 | VersionParser ver = new VersionParser(); 27 | 28 | byte[] msg; 29 | 30 | // Test all good values 31 | for (int i = 1; i <= 999; i++) { 32 | msg = (i + " This is a test.").getBytes(); 33 | assertTrue(ver.parseVersion(msg, 0, msg.length)); 34 | assertEquals(("" + i).length(), ver.getVersionLength()); 35 | 36 | msg = ("<123>" + i + " This is a test.").getBytes(); 37 | assertTrue(ver.parseVersion(msg, 5, msg.length)); 38 | assertEquals(("" + i).length(), ver.getVersionLength()); 39 | } 40 | 41 | // Various bad values 42 | assertFalse(ver.parseVersion("12This is a test".getBytes(), 0, 15)); 43 | assertFalse(ver.parseVersion("1234 This is a test".getBytes(), 0, 15)); 44 | 45 | assertFalse(ver.parseVersion("01 This is a test".getBytes(), 0, 15)); 46 | assertFalse(ver.parseVersion("0 This is a test".getBytes(), 0, 15)); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/VersionParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.blackberry.bdp.kaboom; 18 | 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | public class VersionParser { 23 | private static final Logger LOG = LoggerFactory 24 | .getLogger(VersionParser.class); 25 | 26 | private IntParser intParser = new IntParser(); 27 | 28 | private int version; 29 | private int versionLength; 30 | 31 | public boolean parseVersion(byte[] bytes, int pos, int length) { 32 | // Look for a number, up to 3 digits, followed by a space. If it's there, 33 | // return true. 34 | try { 35 | if (bytes[pos] >= '1' && bytes[pos] <= '9') { 36 | if (bytes[pos + 1] == ' ') { 37 | version = intParser.intFromBytes(bytes, pos, 1); 38 | versionLength = 1; 39 | return true; 40 | } 41 | 42 | if (bytes[pos + 1] >= '0' && bytes[pos + 1] <= '9') { 43 | if (bytes[pos + 2] == ' ') { 44 | version = intParser.intFromBytes(bytes, pos, 2); 45 | versionLength = 2; 46 | return true; 47 | } 48 | 49 | if (bytes[pos + 2] >= '0' && bytes[pos + 2] <= '9' 50 | && bytes[pos + 3] == ' ') { 51 | version = intParser.intFromBytes(bytes, pos, 3); 52 | versionLength = 3; 53 | return true; 54 | } 55 | } 56 | } 57 | } catch (Throwable t) { 58 | LOG.error("Error parsing version.", t); 59 | return false; 60 | } 61 | return false; 62 | } 63 | 64 | public int getVersion() { 65 | return version; 66 | } 67 | 68 | public int getVersionLength() { 69 | return versionLength; 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/test/java/com/blackberry/bdp/kaboom/PriParserTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.blackberry.bdp.kaboom; 18 | 19 | import static org.junit.Assert.*; 20 | import org.junit.Rule; 21 | 22 | import org.junit.Test; 23 | import org.junit.rules.ExpectedException; 24 | 25 | public class PriParserTest { 26 | 27 | @Rule 28 | public final ExpectedException exception = ExpectedException.none(); 29 | 30 | @Test 31 | public void testSkipPri() throws Exception { 32 | PriParser pri = new PriParser(); 33 | 34 | // Test all good values 35 | for (int i = 0; i <= 191; i++) { 36 | byte[] msg = ("<" + i + ">This is a test.").getBytes(); 37 | assertTrue(pri.parsePri(msg, 0, msg.length)); 38 | assertEquals(("" + i).length() + 2, pri.getPriLength()); 39 | } 40 | 41 | // Various bad values 42 | assertFalse(pri.parsePri("This is a test".getBytes(), 0, 15)); 43 | assertFalse(pri.parsePri("<>This is a test".getBytes(), 0, 15)); 44 | 45 | assertFalse(pri.parsePri("<192>This is a test".getBytes(), 0, 15)); 46 | assertFalse(pri.parsePri("<200>This is a test".getBytes(), 0, 15)); 47 | assertFalse(pri.parsePri("<01>This is a test".getBytes(), 0, 15)); 48 | 49 | exception.expect(IndexOutOfBoundsException.class); 50 | assertFalse(pri.parsePri("<0".getBytes(), 0, 2)); 51 | 52 | assertFalse(pri.parsePri("<1".getBytes(), 0, 2)); 53 | assertFalse(pri.parsePri("<2".getBytes(), 0, 2)); 54 | assertFalse(pri.parsePri("<10".getBytes(), 0, 3)); 55 | assertFalse(pri.parsePri("<20".getBytes(), 0, 3)); 56 | assertFalse(pri.parsePri("<100".getBytes(), 0, 4)); 57 | assertFalse(pri.parsePri("<191".getBytes(), 0, 4)); 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/simplekaboom/ResetAllOffsets.java: -------------------------------------------------------------------------------- 1 | package com.blackberry.bdp.simplekaboom; 2 | 3 | import org.apache.curator.RetryPolicy; 4 | import org.apache.curator.framework.CuratorFramework; 5 | import org.apache.curator.framework.CuratorFrameworkFactory; 6 | import org.apache.curator.retry.ExponentialBackoffRetry; 7 | import org.apache.zookeeper.CreateMode; 8 | import com.blackberry.bdp.common.conversion.Converter; 9 | import static com.blackberry.bdp.common.conversion.Converter.getBytes; 10 | import java.util.Arrays; 11 | 12 | public class ResetAllOffsets { 13 | 14 | private static CuratorFramework curator; 15 | 16 | public static void main(String[] args) { 17 | try { 18 | String zookeeperConnectionString = args[0]; 19 | String topicRoot = args[1]; 20 | if (zookeeperConnectionString == null || topicRoot == null) { 21 | System.out.println("Usage java " + ResetAllOffsets.class + " "); 22 | System.exit(1); 23 | } 24 | createCurator(zookeeperConnectionString); 25 | for (String topic : curator.getChildren().forPath(topicRoot)) { 26 | for (String partition : curator.getChildren().forPath(topicRoot + "/" + topic)) { 27 | String offsetPath = String.format("%s/%s/%s", topicRoot, topic, partition); 28 | try { 29 | if (curator.checkExists().forPath(offsetPath) != null) { 30 | long offset = Converter.longFromBytes(curator.getData().forPath(offsetPath)); 31 | curator.setData().forPath(offsetPath, getBytes(0l)); 32 | System.out.println("Reset" + offsetPath + "=" + offset + "to zero"); 33 | } else { 34 | System.out.println("Missing offset at " + offsetPath); 35 | } 36 | } catch (Throwable t) { 37 | System.out.printf("error: %s\n", t.getStackTrace().toString()); 38 | t.printStackTrace(); 39 | } 40 | } 41 | } 42 | } catch (Throwable t) { 43 | System.out.printf("error: %s\n", t.getStackTrace().toString()); 44 | t.printStackTrace(); 45 | } 46 | 47 | } 48 | 49 | private static void createCurator(String zookeeperConnectionString) { 50 | String[] connStringAndPrefix = zookeeperConnectionString.split("/", 2); 51 | 52 | RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3); 53 | 54 | ResetAllOffsets.curator = CuratorFrameworkFactory.builder() 55 | .namespace(connStringAndPrefix[1]) 56 | .connectString(connStringAndPrefix[0]).retryPolicy(retryPolicy) 57 | .build(); 58 | 59 | ResetAllOffsets.curator.start(); 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/KerberosUser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * The contents of this class were taken from the Apache Flume project. 5 | * 6 | * Licensed to the Apache Software Foundation (ASF) under one or more 7 | * contributor license agreements. See the NOTICE file distributed with this 8 | * work for additional information regarding copyright ownership. The ASF 9 | * licenses this file to you under the Apache License, Version 2.0 (the 10 | * "License"); you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 17 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 18 | * License for the specific language governing permissions and limitations under 19 | * the License. 20 | */ 21 | package com.blackberry.bdp.kaboom; 22 | 23 | /** 24 | * Simple Pair class used to define a unique (principal, keyTab) combination. 25 | */ 26 | public class KerberosUser { 27 | 28 | private final String principal; 29 | private final String keyTab; 30 | 31 | public KerberosUser(String principal, String keyTab) { 32 | this.principal = principal; 33 | this.keyTab = keyTab; 34 | } 35 | 36 | public String getPrincipal() { 37 | return principal; 38 | } 39 | 40 | public String getKeyTab() { 41 | return keyTab; 42 | } 43 | 44 | @Override 45 | public boolean equals(Object obj) { 46 | if (obj == null) { 47 | return false; 48 | } 49 | if (getClass() != obj.getClass()) { 50 | return false; 51 | } 52 | final KerberosUser other = (KerberosUser) obj; 53 | if ((this.principal == null) ? (other.principal != null) : !this.principal 54 | .equals(other.principal)) { 55 | return false; 56 | } 57 | if ((this.keyTab == null) ? (other.keyTab != null) : !this.keyTab 58 | .equals(other.keyTab)) { 59 | return false; 60 | } 61 | return true; 62 | } 63 | 64 | @Override 65 | public int hashCode() { 66 | int hash = 7; 67 | hash = 41 * hash + (this.principal != null ? this.principal.hashCode() : 0); 68 | hash = 41 * hash + (this.keyTab != null ? this.keyTab.hashCode() : 0); 69 | return hash; 70 | } 71 | 72 | @Override 73 | public String toString() { 74 | return "{ principal: " + principal + ", keytab: " + keyTab + " }"; 75 | } 76 | } -------------------------------------------------------------------------------- /NativeDeflate/NativeDeflate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "zlib.h" 7 | #include "com_blackberry_bdp_kaboom_FastBoomWriter.h" 8 | 9 | #define CHUNK 16384 10 | 11 | JNIEXPORT jbyteArray JNICALL Java_com_blackberry_bdp_kaboom_FastBoomWriter_compress 12 | (JNIEnv *env, jobject thisObj, jbyteArray bytesIn, jint length, jint compressionLevel) 13 | { 14 | jboolean isCopy; 15 | unsigned char* istream = (unsigned char*)(*env)->GetByteArrayElements(env, bytesIn, &isCopy); 16 | unsigned char* ostream = malloc(length); 17 | 18 | //printf("strlen(istream): %lu\n", strlen(istream)); 19 | 20 | z_stream defstream; 21 | 22 | defstream.zalloc = Z_NULL; 23 | defstream.zfree = Z_NULL; 24 | defstream.opaque = Z_NULL; 25 | 26 | deflateInit2(&defstream, 27 | compressionLevel, 28 | Z_DEFLATED, 29 | -15, 30 | 8, 31 | Z_DEFAULT_STRATEGY); 32 | 33 | unsigned long chunksRead = 0; 34 | unsigned long bytesRead = 0; 35 | unsigned char* in; 36 | unsigned char* out; 37 | int flush = Z_NO_FLUSH; 38 | int have; 39 | 40 | do 41 | { 42 | int readAmount = CHUNK; 43 | 44 | if (bytesRead + CHUNK > length) 45 | { 46 | readAmount = length - bytesRead; 47 | flush = Z_FINISH; 48 | } 49 | 50 | in = &istream[CHUNK * chunksRead]; 51 | defstream.next_in = (Bytef*) in; 52 | defstream.avail_in = readAmount; 53 | 54 | do 55 | { 56 | out = &ostream[defstream.total_out]; 57 | defstream.next_out = (Bytef*) out; 58 | defstream.avail_out = readAmount; 59 | deflate(&defstream, flush); 60 | have = CHUNK - defstream.avail_out; 61 | //printf("have: %i\tavail_out: %i\n", have, defstream.avail_out); 62 | } while (defstream.avail_out == 0); 63 | 64 | bytesRead+= readAmount; 65 | chunksRead++; 66 | 67 | /* 68 | 69 | printf("chunk number: %lu\tread amount: %i\tbytes read: %lu\ttotal_in: %lu\t total_out: %lu\n", 70 | chunksRead, 71 | readAmount, 72 | bytesRead, 73 | defstream.total_in, 74 | defstream.total_out); 75 | 76 | */ 77 | 78 | } while (bytesRead < length); 79 | 80 | (void)deflateEnd(&defstream); 81 | 82 | //printf("strlen(ostream): %lu\n", strlen(ostream)); 83 | 84 | (*env)->ReleaseByteArrayElements(env, bytesIn, istream, JNI_ABORT); 85 | jbyteArray bytesOut = (*env)->NewByteArray(env, defstream.total_out); 86 | (*env)->SetByteArrayRegion(env, bytesOut, 0, defstream.total_out, (jbyte*)ostream); 87 | 88 | free(ostream); 89 | 90 | return bytesOut;} 91 | -------------------------------------------------------------------------------- /src/test/java/com/blackberry/bdp/kaboom/EvenLoadBalancerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import com.blackberry.bdp.kaboom.api.KaBoomClient; 19 | import java.nio.charset.Charset; 20 | import org.apache.curator.RetryPolicy; 21 | import org.apache.curator.framework.CuratorFramework; 22 | import org.apache.curator.framework.CuratorFrameworkFactory; 23 | import org.apache.curator.retry.ExponentialBackoffRetry; 24 | import org.apache.zookeeper.CreateMode; 25 | import org.junit.AfterClass; 26 | import org.junit.Assert; 27 | import org.junit.BeforeClass; 28 | import org.junit.Test; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | public class EvenLoadBalancerTest { 33 | 34 | private static final Logger LOG = LoggerFactory.getLogger(EvenLoadBalancerTest.class); 35 | private static CuratorFramework curator; 36 | private static LocalZkServer zk; 37 | protected static final Charset UTF8 = Charset.forName("UTF-8"); 38 | 39 | private static CuratorFramework buildCuratorFramework() { 40 | String connectionString = "localhost:21818"; 41 | RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3); 42 | LOG.info("attempting to connect to ZK with connection string {}", "localhost:21818"); 43 | CuratorFramework newCurator = CuratorFrameworkFactory.newClient(connectionString, retryPolicy); 44 | newCurator.start(); 45 | return newCurator; 46 | } 47 | 48 | @BeforeClass 49 | public static void setup() throws Exception { 50 | zk = new LocalZkServer(); 51 | curator = buildCuratorFramework(); 52 | } 53 | 54 | @AfterClass 55 | public static void cleanup() throws Exception { 56 | curator.close(); 57 | zk.shutdown(); 58 | } 59 | 60 | @Test 61 | public void testAssignmentConversion() throws Exception { 62 | KaBoomClient client = new KaBoomClient(); 63 | client.setId(1001); 64 | 65 | String zkPath = "/assignment"; 66 | curator.create().withMode(CreateMode.PERSISTENT).forPath(zkPath, String.valueOf(client.getId()).getBytes(UTF8)); 67 | 68 | String assignmentFound = new String(curator.getData().forPath(zkPath), UTF8); 69 | int assignedClientId = new Integer(assignmentFound); 70 | Assert.assertEquals(assignedClientId, client.getId()); 71 | LOG.info("Testing assignment conversion and {} equals {}", assignedClientId, client.getId()); 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/test/java/com/blackberry/bdp/kaboom/FastBoomWriterTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.blackberry.bdp.kaboom; 18 | 19 | import com.blackberry.bdp.common.jmx.MetricRegistrySingleton; 20 | import com.codahale.metrics.Timer; 21 | import java.io.FileOutputStream; 22 | import java.io.IOException; 23 | import java.nio.charset.Charset; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | import java.util.Random; 27 | import org.apache.hadoop.fs.FSDataOutputStream; 28 | import org.apache.hadoop.fs.FileSystem; 29 | 30 | import org.junit.Test; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | public class FastBoomWriterTest { 35 | @SuppressWarnings("unused") 36 | private static final Logger LOG = LoggerFactory 37 | .getLogger(FastBoomWriterTest.class); 38 | private static final Charset UTF8 = Charset.forName("UTF8"); 39 | private static final Random rand = new Random(); 40 | private final Timer timer = MetricRegistrySingleton.getInstance().getMetricsRegistry().timer("boom writes"); 41 | 42 | @Test 43 | public void testWriteFile() throws IOException, Exception { 44 | 45 | /* 46 | FileSystem.Statistics fsDataStats = null; 47 | KaboomConfiguration config = new KaboomConfiguration(null); 48 | FileOutputStream out = new FileOutputStream("/tmp/test2.bm"); 49 | FSDataOutputStream fsDataOut = new FSDataOutputStream(out, fsDataStats); 50 | FastBoomWriter writer = new FastBoomWriter(fsDataOut, "topic1", 0, config); 51 | 52 | 53 | byte[] message = "This is a test. Let's make the line a bit longer by writing some stuff here." 54 | .getBytes(UTF8); 55 | 56 | writer.writeLine(1397268894000L, message, 0, message.length); 57 | 58 | writer.close(); 59 | */ 60 | } 61 | 62 | @Test 63 | public void testWriteBigFile() throws IOException, Exception { 64 | /* 65 | FileSystem.Statistics fsDataStats = null; 66 | KaboomConfiguration config = new KaboomConfiguration(null); 67 | FileOutputStream out = new FileOutputStream("/tmp/test2.bm"); 68 | FSDataOutputStream fsDataOut = new FSDataOutputStream(out, fsDataStats); 69 | FastBoomWriter writer = new FastBoomWriter(fsDataOut, "topic1", 0, config); 70 | 71 | byte[] chars = "abc".getBytes(UTF8); 72 | List messages = new ArrayList<>(); 73 | for (int i = 0; i < 151; i++) { 74 | StringBuilder sb = new StringBuilder("This is a test. "); 75 | int extra = rand.nextInt() % 500; 76 | for (int j = 0; j < extra; j++) { 77 | sb.append(chars[rand.nextInt(chars.length)]); 78 | } 79 | messages.add(sb.toString().getBytes(UTF8)); 80 | } 81 | 82 | byte[] message; 83 | for (int i = 0; i < 100000; i++) { 84 | message = messages.get(i % messages.size()); 85 | writer.writeLine(System.currentTimeMillis(), message, 0, message.length); 86 | } 87 | 88 | writer.close(); 89 | */ 90 | } 91 | } -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/LocalLoadBalancer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package com.blackberry.bdp.kaboom; 7 | 8 | import static com.blackberry.bdp.common.conversion.Converter.intFromBytes; 9 | import static com.blackberry.bdp.kaboom.Leader.UTF8; 10 | import com.blackberry.bdp.kaboom.api.KaBoomClient; 11 | import com.blackberry.bdp.kaboom.api.KaBoomPartition; 12 | import com.blackberry.bdp.kaboom.api.KaBoomTopic; 13 | import com.blackberry.bdp.krackle.meta.Broker; 14 | import java.util.HashMap; 15 | import java.util.List; 16 | import org.apache.zookeeper.CreateMode; 17 | import org.slf4j.Logger; 18 | import org.slf4j.LoggerFactory; 19 | 20 | /** 21 | * Assigns partitions to whomever their local client is 22 | */ 23 | public class LocalLoadBalancer extends Leader { 24 | 25 | private static final Logger LOG = LoggerFactory.getLogger(EvenLoadBalancer.class); 26 | 27 | public LocalLoadBalancer(StartupConfig config) { 28 | super(config); 29 | } 30 | 31 | /** 32 | * Unassign non-local assignments and assigns unassigned partitions to the local client 33 | * @param kaboomClients 34 | * @param kaboomTopics 35 | * @throws java.lang.Exception 36 | */ 37 | @Override 38 | protected void run_balancer( 39 | List kaboomClients, 40 | List kaboomTopics) throws Exception { 41 | 42 | HashMap idToPartition = new HashMap<>(); 43 | for (KaBoomTopic topic : kaboomTopics) { 44 | for (KaBoomPartition partition : topic.getPartitions()) { 45 | idToPartition.put(partition.getTopicPartitionString(), partition); 46 | } 47 | } 48 | 49 | HashMap hostToKaBoomClient = new HashMap<>(); 50 | HashMap idToKaBoomClient = new HashMap<>(); 51 | for (KaBoomClient client : kaboomClients) { 52 | hostToKaBoomClient.put(client.getHostname(), client); 53 | idToKaBoomClient.put(client.getId(), client); 54 | } 55 | 56 | // Delete an assignemnts if the kaboom client isn't local 57 | try { 58 | for (String partitionId : curator.getChildren().forPath(config.getZkRootPathPartitionAssignments())) { 59 | String assignmentZkPath = String.format("%s/%s", config.getZkRootPathPartitionAssignments(), partitionId); 60 | KaBoomClient assignedClient = idToKaBoomClient.get(intFromBytes(curator.getData().forPath(assignmentZkPath))); 61 | Broker leader = idToPartition.get(partitionId).getKafkaPartition().getLeader(); 62 | if (!leader.getHost().equals(assignedClient.getHostname())) { 63 | curator.delete().forPath(assignmentZkPath); 64 | LOG.info("Non-local assignment {} to client {} (hostname: {}) deleted because leader's hostname is{}", 65 | assignmentZkPath, assignedClient.getId(), assignedClient.getHostname(), leader.getHost()); 66 | } 67 | } 68 | } catch (Exception e) { 69 | LOG.error("There was a problem pruning the assignments of unsupported topics", e); 70 | } 71 | 72 | for (KaBoomPartition partition : KaBoomPartition.unassignedPartitions(kaboomTopics)) { 73 | String leaderHostname = partition.getKafkaPartition().getLeader().getHost(); 74 | KaBoomClient localClient = hostToKaBoomClient.get(leaderHostname); 75 | if (localClient != null) { 76 | String zkPath = String.format("%s/%s", config.getZkRootPathPartitionAssignments(), 77 | partition.getTopicPartitionString()); 78 | curator.create().withMode(CreateMode.PERSISTENT).forPath(zkPath, 79 | String.valueOf(localClient.getId()).getBytes(UTF8)); 80 | partition.setAssignedClient(localClient); 81 | localClient.getAssignedPartitions().add(partition); 82 | 83 | } 84 | 85 | } 86 | } 87 | 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/PriParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import org.slf4j.Logger; 19 | import org.slf4j.LoggerFactory; 20 | 21 | public class PriParser { 22 | 23 | private static final Logger LOG = LoggerFactory.getLogger(PriParser.class); 24 | 25 | private IntParser intParser = new IntParser(); 26 | 27 | private int priLength; 28 | private int pri; 29 | 30 | public int getPriLength() { 31 | return priLength; 32 | } 33 | 34 | public int getPri() { 35 | return pri; 36 | } 37 | 38 | public int getFacility() { 39 | return pri / 8; 40 | } 41 | 42 | public int getPriority() { 43 | return pri % 8; 44 | } 45 | 46 | private int digit1; 47 | 48 | public boolean parsePri(byte[] bytes, int pos, int length) throws Exception{ 49 | 50 | if (bytes[pos] == '<') { 51 | // Looks promising! 52 | 53 | digit1 = bytes[pos + 1]; 54 | if (digit1 == '0') { 55 | // This is only valid id the whole thing is "<0>" 56 | if (bytes[pos + 2] == '>') { 57 | pri = 0; 58 | priLength = 3; 59 | return true; 60 | 61 | } else { 62 | // Invalid 63 | return false; 64 | } 65 | } else { 66 | if (digit1 >= '2' && digit1 <= '9') { 67 | // The max value is 191, so this is only valid for 1 or 2 digit 68 | // results 69 | if (bytes[pos + 2] == '>') { 70 | // one digit 71 | pri = intParser.intFromBytes(bytes, pos + 1, 1); 72 | priLength = 3; 73 | return true; 74 | } else { 75 | if (bytes[pos + 2] >= '0' && bytes[pos + 2] <= '9' 76 | && bytes[pos + 3] == '>') { 77 | // two digit 78 | pri = intParser.intFromBytes(bytes, pos + 1, 2); 79 | priLength = 4; 80 | return true; 81 | } else { 82 | // Invalid 83 | return false; 84 | } 85 | } 86 | } else { 87 | if (digit1 == '1') { 88 | if (bytes[pos + 2] == '>') { 89 | // one digit 90 | pri = intParser.intFromBytes(bytes, pos + 1, 1); 91 | priLength = 3; 92 | return true; 93 | } else { 94 | if (bytes[pos + 2] >= '0' && bytes[pos + 2] <= '9' 95 | && bytes[pos + 3] == '>') { 96 | // two digit 97 | pri = intParser.intFromBytes(bytes, pos + 1, 2); 98 | priLength = 4; 99 | return true; 100 | } else { 101 | if (bytes[pos + 2] >= '0' && bytes[pos + 2] <= '8' 102 | && bytes[pos + 3] >= '0' && bytes[pos + 3] <= '9' 103 | && bytes[pos + 4] >= '>') { 104 | // Three digits starting with 18 105 | pri = intParser.intFromBytes(bytes, pos + 1, 3); 106 | priLength = 5; 107 | return true; 108 | } else { 109 | if (bytes[pos + 2] == '9' && bytes[pos + 3] >= '0' 110 | && bytes[pos + 3] <= '1' && bytes[pos + 4] >= '>') { 111 | // 190 or 191 112 | pri = intParser.intFromBytes(bytes, pos + 1, 3); 113 | priLength = 5; 114 | return true; 115 | } else { 116 | // Invalid 117 | return false; 118 | } 119 | } 120 | } 121 | } 122 | } 123 | } 124 | } 125 | } 126 | 127 | return false; 128 | } 129 | 130 | } 131 | -------------------------------------------------------------------------------- /docs/sdedit_uml/kaboom_lb_sequence.txt: -------------------------------------------------------------------------------- 1 | kaboom:kaboom[a] "KaBoom" 2 | /lb:LeaderSelectorListenerAdapter "loadBalancer" 3 | su:su "StateUtils" 4 | zk:zk "ZooKeeper" 5 | kafka:kafka "Kafa" 6 | /rfw:rfw "ReadyFlagWriter" 7 | 8 | kaboom:kaboom.initalize() 9 | kaboom:lb.new() 10 | lb:lb.takeLeadership(curatorFramework) 11 | lb: 12 | [c:loop:_while(true)_//_instance_is_leader] 13 | lb: 14 | lb:partitionToHost = HashMap 15 | lb:hostToPartition = HashMap> 16 | lb:clients = HashMap 17 | lb:clientToPartitions = HashMap> 18 | lb:partitionToClient = HashMap 19 | lb:topics = ArrayList() 20 | lb: 21 | lb:su.readTopicsFromZooKeeper(topics) 22 | su:topics=zk.getData("/brokers/topics") 23 | lb:partitionToHostsMap=su.getPartitionHosts(topics, partitionToHost, hostToPartition) 24 | su:metaData=kafka.getPartitionMetaData(topics) 25 | su:HashMap> 26 | lb:clients=su.getActiveClients() 27 | su:clents=zk.getData("/kaboom/clients") 28 | [c:loop:_for(partition:partitionToHost)] 29 | lb:client=zk.client = get("/kaboom/assignments/" + partition) 30 | [c:alt clients.containsKey(client) // client connected] 31 | lb:partitionToClient\.put(partition, client); 32 | lb:partitions = clientToPartitions\.get(client) 33 | lb:partitions\.add(partition) 34 | --else // client not connected 35 | lb:zk.delete("/kaboom/assignments/" + partition); 36 | [/c] 37 | [/c] 38 | lb:su.calculateLoad(partitionToHost, clients, clientToPartitions) 39 | su:totalPartitions = partitionToHost\.size() 40 | su:totalWeight = 0 41 | *1 lb 42 | // no return since objects modified within method 43 | *1 44 | [c:loop:_for(client:clients)] 45 | su:totalWeight += client\.getLoad() 46 | [/c] 47 | [c:loop:_for(client:clients)] 48 | su:client\.setTargetLoad(totalPartitions * (1\.0 * info\.getWeight() / totalWeight)) 49 | su:partitions = clientToPartitions\.get(client) 50 | su:client\.setLoad(partitions\.size()) 51 | [/c] 52 | [c:loop:_for(client:clients)] 53 | [c:if:_load_>_targetLoad while load > targetLoad] 54 | lb:partitionToDelete = null 55 | *2 lb 56 | - create two arrays for storing partions (local, remote) 57 | - loop though client's partitions add to appropriate array 58 | - if remote.size > 0 partitionToDelete = random remote 59 | - else partitionToDelete = random local 60 | - remove from appropriate array 61 | *2 62 | lb:client\.setLoad(client\.getLoad() - 1) 63 | lb:zk.delete("/kaboom/assignments/" + partitionToDelete) 64 | [/c] 65 | [/c] 66 | lb:comparator = new Comparator() // to sort by percent load 67 | lb:sortedClients = new ArrayList 68 | lb:sortedClients\.addAll(clients\.keySet()) 69 | [c:loop:_for(partition:partitionToHost)] 70 | lb:// Skip if the partition is assigned 71 | lb:Collections\.sort(sortedClients, comparator); 72 | lb:chosenClient = null 73 | [c:loop:_for(client:sortedClients)] 74 | *3 lb 75 | Find clients for unassigned partitions: 76 | - Prefer local clients under target load 77 | - Assign first (least loaded) when 78 | *3 79 | [c:alt client.getLoad() > client.getTargetLoad()] 80 | lb:chosenClient = sortedClients\.get(0); 81 | lb:break 82 | --client.getHostname() == partitionToHost(partition) 83 | lb:chosenClient = client; 84 | lb:break 85 | --else 86 | lb:continue 87 | [/c] 88 | [/c] 89 | lb:if (chosenClient == null) then chosenClient = sortedClients\.get(0); 90 | lb:zk.create("/kaboom/assignments/" + partition, chosenClient) 91 | lb:partitions = clientToPartitions\.get(chosenClient) 92 | lb:partitions\.add(partition) 93 | lb:partitionToClient\.put(partition, chosenClient) 94 | lb:chosenClient\.setLoad(chosenClient\.getLoad() + 1) 95 | [/c] 96 | [c:alt readyFlagThread == null || !readyFlagThread.isAlive()] 97 | lb:readyFlagWriter=rfw.new() 98 | lb:readyFlagThread = new Thread(readyFlagWriter) 99 | lb:readyFlagThread\.start(); 100 | [/c] 101 | lb:Thread\.sleep(10 * 60 * 1000) 102 | 103 | [/c] 104 | -------------------------------------------------------------------------------- /kaboom-0.8.0.md: -------------------------------------------------------------------------------- 1 | # KaBoom 0.8.0 2 | 3 | This release contains the most significant updates to KaBoom we have introduced in a single version bump. The most significant change is the migration of all running configuration parameters and topic configurations to ZooKeeper. The remaining confiugration continues to be read in via a property file. The running configuration and topic configuraiton is stored at at zk:///kaboom/config 4 | 5 | ## Startup versus Running Configurations 6 | 7 | Startup configuration changes require a KaBoom service restart to be loaded, whereas the running configuration is reloaded by KaBoom as changes are made in ZooKeeper. Updated running configuration values are then used as they are accessed by KaBoom. For example you can change the number of HDFS replicas to store for boom files in Hadoop however it will not affect any open or previously closed files only files that are created after the new configuration has been loaded (as replicas are specified when file creating files from a file system object only). 8 | 9 | ## Topic Configurations 10 | 11 | Unlike running configurations which are reloaded instantly topic configuration updates trigger all workers assigned to a partition of the topic to be gracefully shutdown (boom files closed, offsets, and offset timestamps stored in ZK). The KaBoom client will then detect and restart any gracefully shutdown workers. Workers load their topic configuration when they are launched. 12 | 13 | 14 | ## Example Topic Configuration 15 | 16 | The topic configurations are stored at zk:///kaboom/topics/, as: 17 | 18 | ``` 19 | { 20 | version: 1, 21 | id: "devtest-test3", 22 | hdfsRootDir: "/service/82/devtest/logs/%y%M%d/%H/devtest-test3", 23 | proxyUser: "dariens", 24 | defaultDirectory: "data", 25 | filterSet: [ ] 26 | } 27 | ``` 28 | 29 | Note: The empty filterSet array is reserved for future to-be-implemented use-cases. 30 | 31 | ### Startup Configuration 32 | 33 | Example startup configuration (property file based): 34 | ``` 35 | ###################### 36 | # KaBoom Configuration 37 | ###################### 38 | 39 | kaboom.id=1001 40 | hadooop.fs.uri=hdfs://hadoop.log82.bblabs 41 | #kaboom.weighting= - the default is number of cores 42 | kerberos.keytab=/opt/kaboom/config/kaboom.keytab 43 | kerberos.principal=flume@AD0.BBLABS 44 | #kaboom.hostname= - the default is the system's hostname 45 | zookeeper.connection.string=r3k1.kafka.log82.bblabs:2181,r3k2.kafka.log82.bblabs:2181,r3k3.kafka.log82.bblabs:2181/KaBoomDev 46 | kafka.zookeeper.connection.string=r3k1.kafka.log82.bblabs:2181,r3k2.kafka.log82.bblabs:2181,r3k3.kafka.log82.bblabs:2181 47 | #kaboom.load.balancer.type=even - this is the default 48 | #kaboom.runningConfig.zkPath=/kaboom/config - this is the default 49 | 50 | ######################## 51 | # Consumer Configuration 52 | ######################## 53 | 54 | metadata.broker.list=r3k1.kafka.log82.bblabs:9092,r3k2.kafka.log82.bblabs:9092,r3k3.kafka.log82.bblabs:9092 55 | fetch.message.max.bytes=10485760 56 | fetch.wait.max.ms=5000 57 | #fetch.min.bytes=1 - this is the default 58 | socket.receive.buffer.bytes=10485760 59 | auto.offset.reset=smallest 60 | #socket.timeout.seconds=30000 - this is the default 61 | ``` 62 | 63 | ## Running Configuration 64 | 65 | Here is an example running configuration stored at zk:////kaboom/config: 66 | 67 | ``` 68 | { 69 | version: 8, 70 | allowOffsetOverrides: true, 71 | sinkToHighWatermark: true, 72 | useTempOpenFileDirectory: false, 73 | useNativeCompression: false, 74 | readyFlagPrevHoursCheck: 24, 75 | leaderSleepDurationMs: 600001, 76 | compressionLevel: 6, 77 | boomFileBufferSize: 16384, 78 | boomFileReplicas: 3, 79 | boomFileBlocksize: 268435456, 80 | boomFileTmpPrefix: "_tmp_", 81 | periodicHdfsFlushInterval: 30000, 82 | kaboomServerSleepDurationMs: 10000, 83 | fileCloseGraceTimeAfterExpiredMs: 30000, 84 | forcedZkOffsetTsUpdateMs: 600000, 85 | kafkaReadyFlagFilename: "_READY", 86 | maxOpenBoomFilesPerPartition: 5, 87 | workerSprintDurationSeconds: 3600, 88 | propagateReadyFlags: true, 89 | propagateReadyFlagFrequency: 600000, 90 | propateReadyFlagDelayBetweenPathsMs: 10 91 | } 92 | ``` 93 | 94 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/timestamps/UniqueTimestampFinder.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 9 | */ 10 | package com.blackberry.bdp.kaboom.timestamps; 11 | 12 | import com.blackberry.bdp.kaboom.StartupConfig; 13 | import com.blackberry.bdp.kaboom.api.RunningConfig; 14 | import java.io.IOException; 15 | 16 | import java.nio.charset.Charset; 17 | import org.apache.hadoop.fs.FileSystem; 18 | import org.slf4j.Logger; 19 | import org.slf4j.LoggerFactory; 20 | 21 | import static org.kohsuke.args4j.ExampleMode.ALL; 22 | import org.kohsuke.args4j.CmdLineException; 23 | import org.kohsuke.args4j.CmdLineParser; 24 | import org.kohsuke.args4j.Option; 25 | import org.kohsuke.args4j.spi.IntOptionHandler; 26 | import org.kohsuke.args4j.spi.LongOptionHandler; 27 | 28 | public class UniqueTimestampFinder { 29 | 30 | private final Object fsLock = new Object(); 31 | private static final Logger LOG = LoggerFactory.getLogger(UniqueTimestampFinder.class); 32 | private static final Charset UTF8 = Charset.forName("UTF-8"); 33 | boolean shutdown = false; 34 | private TimestampWorker worker; 35 | private StartupConfig startupConfig; 36 | private RunningConfig runningConfig; 37 | 38 | @Option(name = "-topic", usage = "The topic to consume", metaVar = "") 39 | private String topic; 40 | 41 | @Option(name = "-partition", usage = "The partition to consume", handler = IntOptionHandler.class, metaVar = "") 42 | private Integer partition; 43 | 44 | @Option(name = "-startOffset", usage = "The offset to start consuming from", handler = LongOptionHandler.class, metaVar = "") 45 | private Long startOffset; 46 | 47 | @Option(name = "-endOffset", usage = "The last offset to consume and end on", handler = LongOptionHandler.class, metaVar = "") 48 | private Long endOffset; 49 | 50 | public UniqueTimestampFinder() throws Exception { 51 | } 52 | 53 | public static void main(String[] args) throws Exception, IOException { 54 | LOG.info("****************************************************"); 55 | LOG.info("*** Unique Timestamp Finder Thingy ***"); 56 | LOG.info("****************************************************"); 57 | new UniqueTimestampFinder().run(args); 58 | } 59 | 60 | private void run(String[] args) throws Exception { 61 | CmdLineParser parser = new CmdLineParser(this); 62 | try { 63 | parser.parseArgument(args); 64 | if (topic == null 65 | || partition == null 66 | || startOffset == null 67 | || endOffset == null) { 68 | throw new CmdLineException(parser, "There was a missing required command ling argument"); 69 | } 70 | 71 | LOG.info("Topic: {}", topic); 72 | LOG.info("Partition: {}", partition); 73 | LOG.info("Start offset: {}", startOffset); 74 | LOG.info("End offset: {}", endOffset); 75 | 76 | } catch (CmdLineException e) { 77 | System.err.println(e.getMessage()); 78 | parser.printUsage(System.err); 79 | System.err.println(); 80 | System.err.println(" Usage: java " + this.getClass() + " " + parser.printExample(ALL)); 81 | return; 82 | } 83 | 84 | LOG.info("Consuming topic {}, starting at {} ending on {} and writing {} as user {}", 85 | this.topic, 86 | this.startOffset, 87 | this.endOffset); 88 | 89 | try { 90 | startupConfig = new StartupConfig(); 91 | runningConfig = startupConfig.getRunningConfig(); 92 | startupConfig.logConfiguraton(); 93 | } catch (Exception e) { 94 | LOG.error("an error occured while building configuration objects: ", e); 95 | throw e; 96 | } 97 | 98 | Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { 99 | @Override 100 | public void run() { 101 | worker.stop(); 102 | try { 103 | //FileSystem.get(startupConfig.getHadoopConfiguration()).close(); 104 | } catch (Throwable t) { 105 | LOG.error("Error closing Hadoop filesystem", t); 106 | } 107 | startupConfig.getKaBoomCurator().close(); 108 | } 109 | 110 | })); 111 | 112 | try { 113 | worker = new TimestampWorker(startupConfig, topic, partition, startOffset, endOffset); 114 | } catch (Exception e) { 115 | LOG.error("An error occured setting up our simple worker: ", e); 116 | } 117 | 118 | try { 119 | worker.run(); 120 | LOG.info("All finished"); 121 | } catch (Exception e) { 122 | LOG.error("There was an error while the simple worker was running, deleting all output files"); 123 | } 124 | } 125 | 126 | } 127 | -------------------------------------------------------------------------------- /src/test/java/com/blackberry/bdp/kaboom/LocalZkServer.java: -------------------------------------------------------------------------------- 1 | package com.blackberry.bdp.kaboom; 2 | 3 | /** 4 | * Copyright 2014 BlackBerry, Limited. 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | 20 | 21 | import java.io.File; 22 | import java.io.IOException; 23 | import java.lang.reflect.Constructor; 24 | import java.lang.reflect.InvocationTargetException; 25 | import java.lang.reflect.Method; 26 | import java.net.InetSocketAddress; 27 | 28 | import org.apache.commons.io.FileUtils; 29 | import org.apache.zookeeper.server.ZooKeeperServer; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | public class LocalZkServer { 34 | private static final Logger LOG = LoggerFactory 35 | .getLogger(LocalZkServer.class); 36 | 37 | private final int clientPort = 21818; // non-standard 38 | private final int numConnections = 5000; 39 | private final int tickTime = 2000; 40 | 41 | private Class factoryClass; 42 | private Object standaloneServerFactory; 43 | private File dir; 44 | 45 | private ZooKeeperServer server; 46 | 47 | public LocalZkServer() throws InstantiationException, IllegalAccessException, 48 | SecurityException, NoSuchMethodException, IllegalArgumentException, 49 | InvocationTargetException, ClassNotFoundException, IOException { 50 | String dataDirectory = System.getProperty("java.io.tmpdir"); 51 | 52 | dir = new File(dataDirectory, "zookeeper").getAbsoluteFile(); 53 | 54 | while (dir.exists()) { 55 | LOG.info("deleting {}", dir); 56 | FileUtils.deleteDirectory(dir); 57 | } 58 | 59 | server = new ZooKeeperServer(dir, dir, tickTime); 60 | 61 | // The class that we need changed name between CDH3 and CDH4, so let's 62 | // check 63 | // for the right version here. 64 | try { 65 | factoryClass = Class 66 | .forName("org.apache.zookeeper.server.NIOServerCnxnFactory"); 67 | 68 | standaloneServerFactory = factoryClass.newInstance(); 69 | Method configure = factoryClass.getMethod("configure", 70 | InetSocketAddress.class, Integer.TYPE); 71 | configure.invoke(standaloneServerFactory, new InetSocketAddress( 72 | clientPort), numConnections); 73 | Method startup = factoryClass.getMethod("startup", ZooKeeperServer.class); 74 | startup.invoke(standaloneServerFactory, server); 75 | 76 | } catch (ClassNotFoundException e) { 77 | LOG.info("Did not find NIOServerCnxnFactory"); 78 | try { 79 | factoryClass = Class 80 | .forName("org.apache.zookeeper.server.NIOServerCnxn$Factory"); 81 | 82 | Constructor constructor = factoryClass.getConstructor( 83 | InetSocketAddress.class, Integer.TYPE); 84 | standaloneServerFactory = constructor.newInstance( 85 | new InetSocketAddress(clientPort), numConnections); 86 | Method startup = factoryClass.getMethod("startup", 87 | ZooKeeperServer.class); 88 | startup.invoke(standaloneServerFactory, server); 89 | 90 | } catch (ClassNotFoundException e1) { 91 | LOG.info("Did not find NIOServerCnxn.Factory"); 92 | throw new ClassNotFoundException( 93 | "Can't find NIOServerCnxnFactory or NIOServerCnxn.Factory"); 94 | } 95 | } 96 | } 97 | 98 | public void shutdown() throws IllegalArgumentException, 99 | IllegalAccessException, InvocationTargetException, SecurityException, 100 | NoSuchMethodException, IOException { 101 | server.shutdown(); 102 | 103 | Method shutdown = factoryClass.getMethod("shutdown", new Class[] {}); 104 | shutdown.invoke(standaloneServerFactory, new Object[] {}); 105 | 106 | while (dir.exists()) { 107 | LOG.info("deleting {}", dir); 108 | FileUtils.deleteDirectory(dir); 109 | } 110 | } 111 | 112 | public Class getFactoryClass() { 113 | return factoryClass; 114 | } 115 | 116 | public void setFactoryClass(Class factoryClass) { 117 | this.factoryClass = factoryClass; 118 | } 119 | 120 | public Object getStandaloneServerFactory() { 121 | return standaloneServerFactory; 122 | } 123 | 124 | public void setStandaloneServerFactory(Object standaloneServerFactory) { 125 | this.standaloneServerFactory = standaloneServerFactory; 126 | } 127 | 128 | public File getDir() { 129 | return dir; 130 | } 131 | 132 | public int getClientport() { 133 | return clientPort; 134 | } 135 | 136 | public int getNumconnections() { 137 | return numConnections; 138 | } 139 | 140 | public int getTicktime() { 141 | return tickTime; 142 | } 143 | 144 | public ZooKeeperServer getServer() { 145 | return server; 146 | } 147 | 148 | } 149 | -------------------------------------------------------------------------------- /bin/kaboom: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # /etc/rc.d/init.d/kaboom 4 | # 5 | # Starts the kaboom daemon 6 | # 7 | # chkconfig: - 95 5 8 | # description: Collects logs, and writes them to Kafka. 9 | ### BEGIN INIT INFO 10 | # Provides: kaboom 11 | # Required-Start: $local_fs $remote_fs $network 12 | # Required-Stop: $local_fs $remote_fs $network 13 | # Default-Start: 14 | # Default-Stop: 15 | # Description: Start the kaboom service 16 | ### END INIT INFO 17 | 18 | setup() { 19 | if [ "x$CONFIGDIR" == "x" ] 20 | then 21 | CONFIGDIR=/opt/kaboom/config 22 | fi 23 | . $CONFIGDIR/kaboom-env.sh 24 | 25 | PROG="kaboom" 26 | 27 | OS=unknown 28 | 29 | if [ -e "/etc/SuSE-release" ] 30 | then 31 | OS=suse 32 | elif [ -e "/etc/redhat-release" ] 33 | then 34 | OS=redhat 35 | else 36 | echo "Could not determine OS." 37 | fi 38 | 39 | # Source function library. 40 | [ "$OS" == "redhat" ] && . /etc/init.d/functions 41 | [ "$OS" == "suse" ] && . /etc/rc.status 42 | 43 | RETVAL=0 44 | } 45 | 46 | start() { 47 | setup 48 | 49 | # Check if kaboom is already running 50 | PIDFILE="$PIDBASE/kaboom.pid" 51 | if [ -f $PIDFILE ] 52 | then 53 | PID=`head -1 $PIDFILE` 54 | if [ -e /proc/$PID ] 55 | then 56 | echo "$PROG is already running (PID $PID)" 57 | return 1 58 | else 59 | rm -f $PIDFILE 60 | fi 61 | fi 62 | 63 | echo -n $"Starting $PROG: " 64 | # kdestroy first to ensure that we're not logged in as anyone else. 65 | # Shouldn't be, if this is a dedicated user. 66 | # Also, ignore the message we get when the crentials cache is empty. 67 | /usr/bin/kdestroy 2>&1 | grep -v 'kdestroy: No credentials cache found while destroying cache' 68 | . $CONFIGDIR/kaboom-env.sh 69 | 70 | nohup $JAVA $JAVA_OPTS -classpath "$CLASSPATH" com.blackberry.bdp.kaboom.KaBoom $CONFIGDIR/kaboom.properties >$LOGDIR/server.out 2>&1 & 71 | 72 | RETVAL=$? 73 | PID=$! 74 | 75 | if [ $RETVAL -eq 0 ] 76 | then 77 | [ "$OS" == "redhat" ] && success 78 | [ "$OS" == "suse" ] && echo -n $rc_done 79 | [ "$OS" == "unknown" ] && echo -n "... done" 80 | echo $PID > $PIDFILE 81 | else 82 | failure 83 | fi 84 | echo 85 | } 86 | 87 | stop() { 88 | setup 89 | 90 | echo -n $"Stopping $PROG: " 91 | PIDFILE="$PIDBASE/kaboom.pid" 92 | 93 | if [ -f $PIDFILE ] 94 | then 95 | PID=`head -1 $PIDFILE` 96 | if [ -e /proc/$PID ] 97 | then 98 | kill $PID 99 | for i in `seq 1 60` 100 | do 101 | sleep 1 102 | 103 | if [ ! -e /proc/$PID ] 104 | then 105 | rm -f $PIDFILE 106 | [ "$OS" == "redhat" ] && success 107 | [ "$OS" == "suse" ] && echo -n $rc_done 108 | [ "$OS" == "unknown" ] && echo -n "... done" 109 | RETVAL=0 110 | break 111 | fi 112 | done 113 | 114 | if [ -e /proc/$PID ] 115 | then 116 | echo -n "Trying kill -9 " 117 | kill -9 $PID 118 | 119 | for i in `seq 1 60` 120 | do 121 | sleep 1 122 | 123 | if [ ! -e /proc/$PID ] 124 | then 125 | rm -f $PIDFILE 126 | [ "$OS" == "redhat" ] && success 127 | [ "$OS" == "suse" ] && echo -n $rc_done 128 | [ "$OS" == "unknown" ] && echo -n "... done" 129 | RETVAL=0 130 | break 131 | fi 132 | done 133 | fi 134 | 135 | if [ -e /proc/$PID ] 136 | then 137 | echo "Could not kill " 138 | [ "$OS" == "redhat" ] && failure 139 | [ "$OS" == "suse" ] && echo -n $rc_failed 140 | [ "$OS" == "unknown" ] && echo -n "... failed" 141 | RETVAL=1 142 | fi 143 | 144 | else 145 | echo -n "PID $PID is not running " 146 | rm -f $PIDFILE 147 | [ "$OS" == "redhat" ] && success 148 | [ "$OS" == "suse" ] && echo -n $rc_done 149 | [ "$OS" == "unknown" ] && echo -n "... done" 150 | RETVAL=0 151 | fi 152 | 153 | else 154 | echo -n "Could not find $PIDFILE" 155 | [ "$OS" == "redhat" ] && failure 156 | [ "$OS" == "suse" ] && echo -n $rc_failed 157 | [ "$OS" == "unknown" ] && echo -n "... failed" 158 | RETVAL=1 159 | 160 | fi 161 | 162 | echo 163 | } 164 | 165 | restart() { 166 | stop 167 | start 168 | } 169 | 170 | _status() { 171 | setup 172 | 173 | PIDFILE="$PIDBASE/kaboom.pid" 174 | status -p $PIDFILE $PROG 175 | } 176 | 177 | # make functions available under su 178 | export -f setup 179 | export -f start 180 | export -f stop 181 | export -f restart 182 | export -f _status 183 | 184 | setup 185 | case "$1" in 186 | start) 187 | if [ "x$KABOOM_USER" != "x" ] 188 | then 189 | su $KABOOM_USER -c start 190 | service epagent restart 191 | else 192 | start 193 | fi 194 | ;; 195 | stop) 196 | if [ "x$KABOOM_USER" != "x" ] 197 | then 198 | su $KABOOM_USER -c stop 199 | else 200 | stop 201 | fi 202 | ;; 203 | restart) 204 | if [ "x$KABOOM_USER" != "x" ] 205 | then 206 | su $KABOOM_USER -c restart 207 | service epagent restart 208 | else 209 | restart 210 | fi 211 | ;; 212 | status) 213 | if [ "x$KABOOM_USER" != "x" ] 214 | then 215 | su $KABOOM_USER -c _status 216 | else 217 | _status 218 | fi 219 | ;; 220 | *) 221 | echo $"Usage: $0 {start|stop|restart|status}" 222 | exit 1 223 | esac 224 | 225 | exit $? 226 | -------------------------------------------------------------------------------- /src/test/java/com/blackberry/bdp/kaboom/WorkerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import org.apache.curator.RetryPolicy; 19 | import org.apache.curator.framework.CuratorFramework; 20 | import org.apache.curator.framework.CuratorFrameworkFactory; 21 | import org.apache.curator.retry.ExponentialBackoffRetry; 22 | import org.apache.zookeeper.CreateMode; 23 | import org.junit.AfterClass; 24 | import org.junit.Assert; 25 | import org.junit.BeforeClass; 26 | import org.junit.Test; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import com.blackberry.bdp.common.conversion.Converter; 31 | import java.util.concurrent.TimeUnit; 32 | import org.apache.curator.framework.recipes.locks.InterProcessMutex; 33 | import org.apache.curator.framework.state.ConnectionState; 34 | import org.apache.curator.framework.state.ConnectionStateListener; 35 | 36 | public class WorkerTest { 37 | 38 | private static final Logger LOG = LoggerFactory.getLogger(WorkerTest.class); 39 | private static CuratorFramework curator; 40 | private static LocalZkServer zk; 41 | 42 | private static CuratorFramework buildCuratorFramework() { 43 | String connectionString = "localhost:21818"; 44 | RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3); 45 | LOG.info("attempting to connect to ZK with connection string {}", "localhost:21818"); 46 | CuratorFramework newCurator = CuratorFrameworkFactory.newClient(connectionString, retryPolicy); 47 | newCurator.start(); 48 | return newCurator; 49 | } 50 | 51 | @BeforeClass 52 | public static void setup() throws Exception { 53 | zk = new LocalZkServer(); 54 | curator = buildCuratorFramework(); 55 | } 56 | 57 | @AfterClass 58 | public static void cleanup() throws Exception { 59 | curator.close(); 60 | zk.shutdown(); 61 | } 62 | 63 | @Test 64 | public void testStoringOffsetZero() throws Exception { 65 | String zkPath = "/zero"; 66 | long value1 = 0; 67 | curator.create().withMode(CreateMode.PERSISTENT).forPath(zkPath, Converter.getBytes(value1)); 68 | long value2 = Converter.longFromBytes(curator.getData().forPath(zkPath)); 69 | Assert.assertEquals(value1, value2); 70 | LOG.info("Testing conversion of zero {} equals {}", value1, value2); 71 | } 72 | 73 | @Test(expected = NullPointerException.class) 74 | public void testStoringOffsetNull() throws Exception { 75 | String zkPath = "/zero"; 76 | Long value1 = null; 77 | curator.create().withMode(CreateMode.PERSISTENT).forPath(zkPath, Converter.getBytes(value1)); 78 | Long value2 = Converter.longFromBytes(curator.getData().forPath(zkPath)); 79 | Assert.assertEquals(value1, value2); 80 | LOG.info("Testing conversion of null {} equals {}", value1, value2); 81 | } 82 | 83 | public void testStoringOffsetUnitialized() throws Exception { 84 | String zkPath = "/zero"; 85 | long value1 = 1; 86 | curator.create().withMode(CreateMode.PERSISTENT).forPath(zkPath, Converter.getBytes(value1)); 87 | long value2 = Converter.longFromBytes(curator.getData().forPath(zkPath)); 88 | Assert.assertEquals(value1, value2); 89 | LOG.info("Testing conversion of null {} equals {}", value1, value2); 90 | } 91 | 92 | @Test 93 | public void testCuratoringLocking() throws Exception { 94 | String zkPath = "/node/lock"; 95 | 96 | SynchronousWorker sw1 = new SynchronousWorker(zkPath, 2); 97 | Thread t1 = new Thread(sw1); 98 | 99 | SynchronousWorker sw2 = new SynchronousWorker(zkPath, 10); 100 | Thread t2 = new Thread(sw2); 101 | 102 | t1.start(); 103 | Thread.sleep(50); 104 | t2.start(); 105 | t1.join(); 106 | t2.join(); 107 | 108 | Assert.assertEquals(sw1.lockAcquired, true); 109 | Assert.assertEquals(sw2.lockAcquired, false); 110 | } 111 | 112 | private class SynchronousWorker implements Runnable { 113 | 114 | protected String zkPath; 115 | protected long waitTime; 116 | private final InterProcessMutex lock; 117 | protected boolean lockAcquired = false; 118 | private boolean paused = false; 119 | 120 | protected SynchronousWorker(String zkPath, long waitTime) { 121 | this.zkPath = zkPath; 122 | this.waitTime = waitTime; 123 | this.lock = new InterProcessMutex(curator, zkPath); 124 | } 125 | 126 | private void abort() { 127 | } 128 | 129 | 130 | @Override 131 | public void run() { 132 | curator.getConnectionStateListenable().addListener(new ConnectionStateListener() { 133 | @Override 134 | public void stateChanged(CuratorFramework client, ConnectionState newState) { 135 | if (newState == ConnectionState.SUSPENDED) { 136 | paused = true; 137 | } else if (newState == ConnectionState.RECONNECTED) { 138 | paused = false; 139 | } else if (newState == ConnectionState.LOST) { 140 | abort(); 141 | } 142 | } 143 | }); 144 | 145 | LOG.info("Thread {} trying to obtain lock on {} (waiting up to {} seconds)...", 146 | Thread.currentThread().getName(), zkPath, waitTime); 147 | try { 148 | lockAcquired = lock.acquire(waitTime, TimeUnit.SECONDS); 149 | if (lockAcquired) { 150 | LOG.info("{} holds the lock", Thread.currentThread().getName()); 151 | } else { 152 | LOG.error("{} failed to obtain lock after waiting {} seconds", Thread.currentThread().getName(), waitTime); 153 | } 154 | } catch (Exception e) { 155 | LOG.error("{} failed to obtain lock", Thread.currentThread().getName(), e); 156 | } 157 | } 158 | 159 | } 160 | 161 | } 162 | -------------------------------------------------------------------------------- /src/test/java/com/blackberry/bdp/kaboom/TimestampParserTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.blackberry.bdp.kaboom; 18 | 19 | import static org.junit.Assert.*; 20 | 21 | import java.io.UnsupportedEncodingException; 22 | import java.util.Calendar; 23 | import java.util.TimeZone; 24 | 25 | import org.junit.Test; 26 | 27 | public class TimestampParserTest { 28 | 29 | @Test 30 | public void testNonZeroOffet() throws UnsupportedEncodingException { 31 | String testString = "12345678902014-06-02T17:56:12.219+0000 this is a test"; 32 | TimestampParser tsp = new TimestampParser(); 33 | tsp.parse(testString.getBytes("UTF-8"), 10, testString.length() - 10); 34 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 35 | assertEquals(1401731772219L, tsp.getTimestamp()); 36 | } 37 | 38 | @Test 39 | public void testParser() { 40 | // we need the current year for some of this 41 | Calendar c = Calendar.getInstance(TimeZone.getTimeZone("UTC")); 42 | c.setTimeInMillis(System.currentTimeMillis()); 43 | int currentYear = c.get(Calendar.YEAR); 44 | 45 | TimestampParser tsp = new TimestampParser(); 46 | String timestamp; 47 | 48 | // Start off easy 49 | timestamp = "2014-05-07T17:05:08.123 This is a test"; 50 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 51 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 52 | assertEquals(1399482308123L, tsp.getTimestamp()); 53 | assertEquals(23, tsp.getLength()); 54 | 55 | timestamp = "2014-05-07T17:05:08.123000 This is a test"; 56 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 57 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 58 | assertEquals(1399482308123L, tsp.getTimestamp()); 59 | assertEquals(26, tsp.getLength()); 60 | 61 | timestamp = "2014-05-07T17:05:08.12 This is a test"; 62 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 63 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 64 | assertEquals(1399482308120L, tsp.getTimestamp()); 65 | assertEquals(22, tsp.getLength()); 66 | 67 | timestamp = "2014-05-07T17:05:08.1 This is a test"; 68 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 69 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 70 | assertEquals(1399482308100L, tsp.getTimestamp()); 71 | assertEquals(21, tsp.getLength()); 72 | 73 | timestamp = "2014-05-07T17:05:08. This is a test"; 74 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 75 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 76 | assertEquals(1399482308000L, tsp.getTimestamp()); 77 | assertEquals(20, tsp.getLength()); 78 | 79 | timestamp = "2014-05-07T17:05:08 This is a test"; 80 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 81 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 82 | assertEquals(1399482308000L, tsp.getTimestamp()); 83 | assertEquals(19, tsp.getLength()); 84 | 85 | timestamp = "May 07 17:05:08.123 This is a test"; 86 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 87 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 88 | Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); 89 | cal.setTimeInMillis(1399482308123L); 90 | cal.set(Calendar.YEAR, currentYear); 91 | assertEquals(cal.getTimeInMillis(), tsp.getTimestamp()); 92 | assertEquals(19, tsp.getLength()); 93 | 94 | //IPGBD-3830 Test Whitespace padded days 95 | timestamp = "May 7 17:05:08.123 This is a test"; 96 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 97 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 98 | Calendar cal2 = Calendar.getInstance(TimeZone.getTimeZone("UTC")); 99 | cal2.setTimeInMillis(1399482308123L); 100 | cal2.set(Calendar.YEAR, currentYear); 101 | assertEquals(cal2.getTimeInMillis(), tsp.getTimestamp()); 102 | assertEquals(19, tsp.getLength()); 103 | 104 | timestamp = "<1>1 2014-05-07T17:05:08 This is a test"; 105 | tsp.parse(timestamp.getBytes(), 5, timestamp.length()); 106 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 107 | assertEquals(1399482308000L, tsp.getTimestamp()); 108 | assertEquals(19, tsp.getLength()); 109 | 110 | timestamp = "2014-05-07T17:05:08Z This is a test"; 111 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 112 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 113 | assertEquals(1399482308000L, tsp.getTimestamp()); 114 | assertEquals(20, tsp.getLength()); 115 | 116 | timestamp = "2014-05-07T17:05:08+01 This is a test"; 117 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 118 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 119 | assertEquals(1399478708000L, tsp.getTimestamp()); 120 | assertEquals(22, tsp.getLength()); 121 | 122 | timestamp = "2014-05-07T17:05:08+0130 This is a test"; 123 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 124 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 125 | assertEquals(1399476908000L, tsp.getTimestamp()); 126 | assertEquals(24, tsp.getLength()); 127 | 128 | timestamp = "2014-05-07T17:05:08+01:30 This is a test"; 129 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 130 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 131 | assertEquals(1399476908000L, tsp.getTimestamp()); 132 | assertEquals(25, tsp.getLength()); 133 | 134 | timestamp = "2014-05-07T17:05:08-01:30 This is a test"; 135 | tsp.parse(timestamp.getBytes(), 0, timestamp.length()); 136 | assertEquals(TimestampParser.NO_ERROR, tsp.getError()); 137 | assertEquals(1399487708000L, tsp.getTimestamp()); 138 | assertEquals(25, tsp.getLength()); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/EvenLoadBalancer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 BlackBerry Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import com.blackberry.bdp.kaboom.api.KaBoomClient; 19 | import com.blackberry.bdp.kaboom.api.KaBoomPartition; 20 | import com.blackberry.bdp.kaboom.api.KaBoomTopic; 21 | import com.blackberry.bdp.krackle.meta.MetaData; 22 | import java.util.ArrayList; 23 | import java.util.Collections; 24 | import java.util.Comparator; 25 | import java.util.List; 26 | import org.apache.zookeeper.CreateMode; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | /** 31 | * Assigns partitions based off a weighted workload. The default 32 | * weighting is based on how many cores each client has. There will 33 | * be a preference for local work while the client is under-loaded. 34 | * 35 | * Work is assigned to least loaded once all clients are over-loaded. 36 | */ 37 | public class EvenLoadBalancer extends Leader { 38 | 39 | private static final Logger LOG = LoggerFactory.getLogger(EvenLoadBalancer.class); 40 | 41 | public EvenLoadBalancer(StartupConfig config) { 42 | super(config); 43 | LOG.info("The even load balancer has been instantiated"); 44 | } 45 | 46 | /** 47 | * Evenly distributes partitions across nodes preferring local 48 | * @param kaboomClients 49 | * @param kaboomTopics 50 | * @throws Exception 51 | */ 52 | @Override 53 | protected void run_balancer( 54 | List kaboomClients, 55 | List kaboomTopics) throws Exception { 56 | 57 | // Overloaded? 58 | for (KaBoomClient client : kaboomClients) { 59 | LOG.info("Client {} has {} assigned partitons", client.getId(), client.getAssignedPartitions().size()); 60 | if (client.tooManyAssignedPartitions() == false) { 61 | continue; 62 | } 63 | LOG.info("KaBoom client {} is overloaded and needs to shed assignments", client.getId()); 64 | // Build up our lists of local and remote partitions... 65 | List localPartitions = new ArrayList<>(); 66 | List remotePartitions = new ArrayList<>(); 67 | for (KaBoomPartition partition : client.getAssignedPartitions()) { 68 | if (partition.getKafkaPartition().getLeader().getHost().equals( 69 | client.getHostname())) { 70 | localPartitions.add(partition); 71 | } else { 72 | remotePartitions.add(partition); 73 | } 74 | } 75 | // ...then unassign partitions, remote ones first until we're even 76 | while (client.tooManyAssignedPartitions()) { 77 | KaBoomPartition partitionToDelete; 78 | LOG.info("client {} target partiton load is {} and assigned partitons count is {}", 79 | client.getId(), client.getTargetPartitionLoad(), client.getAssignedPartitions().size()); 80 | if (remotePartitions.size() > 0) { 81 | partitionToDelete = remotePartitions.remove(rand.nextInt(remotePartitions.size())); 82 | } else { 83 | partitionToDelete = localPartitions.remove(rand.nextInt(localPartitions.size())); 84 | } 85 | String deletePath = config.zkPathPartitionAssignment(partitionToDelete.getTopicPartitionString()); 86 | try { 87 | curator.delete().forPath(deletePath); 88 | client.getAssignedPartitions().remove(partitionToDelete); 89 | partitionToDelete.setAssignedClient(null); 90 | LOG.info("Deleted assignment {} to relieve overloaded client {}", 91 | deletePath, client.getId()); 92 | } catch (Exception e) { 93 | LOG.error("Failed to delete assignment {} to relieve overloaded client {}", 94 | deletePath, client.getId(), e); 95 | } 96 | } 97 | } 98 | 99 | // Sorts the clients based on their assigned partition load 100 | Comparator leastLoadedSorter = new Comparator() { 101 | @Override 102 | public int compare(KaBoomClient clientA, KaBoomClient clientB) { 103 | double valA = clientA.getAssignedPartitions().size() / clientA.getTargetPartitionLoad(); 104 | double valB = clientB.getAssignedPartitions().size() / clientB.getTargetPartitionLoad(); 105 | if (valA == valB) { 106 | return 0; 107 | } else { 108 | if (valA > valB) { 109 | return 1; 110 | } else { 111 | return -1; 112 | } 113 | } 114 | } 115 | }; 116 | 117 | // Find clients for any unassigned partitions 118 | List unassignedPartitions = KaBoomPartition.unassignedPartitions(kaboomTopics); 119 | LOG.info("[even load balancer] there are {} unassigned partitons", unassignedPartitions.size()); 120 | for (KaBoomPartition partition : unassignedPartitions) { 121 | Collections.sort(kaboomClients, leastLoadedSorter); 122 | // Grab the least loaded client in case we cannot find an underloaded local client 123 | KaBoomClient chosenClient = kaboomClients.get(0); 124 | for (KaBoomClient client : kaboomClients) { 125 | if (client.canTakeAnotherAssignment() && client.hasLocalPartition(partition)) { 126 | chosenClient = client; 127 | break; 128 | } 129 | } 130 | String zkPath = config.zkPathPartitionAssignment(partition.getTopicPartitionString()); 131 | try { 132 | if (curator.checkExists().forPath(zkPath) != null) { 133 | curator.setData().forPath(zkPath, 134 | String.valueOf(chosenClient.getId()).getBytes(UTF8)); 135 | } else { 136 | curator.create().withMode(CreateMode.PERSISTENT).forPath(zkPath, 137 | String.valueOf(chosenClient.getId()).getBytes(UTF8)); 138 | } 139 | partition.setAssignedClient(chosenClient); 140 | chosenClient.getAssignedPartitions().add(partition); 141 | LOG.info("Assigned {} to client ID {}", partition.getTopicPartitionString(), chosenClient.getId()); 142 | } catch (Exception e) { 143 | LOG.error("Failed to create assignment {} for {}", zkPath, chosenClient.getId(), e); 144 | } 145 | } 146 | } 147 | 148 | } 149 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/AsyncAssignee.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 BlackBerry Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import com.blackberry.bdp.kaboom.exception.LockNotAcquiredException; 19 | import com.blackberry.bdp.kaboom.exception.NotAssignedException; 20 | import java.util.Arrays; 21 | import java.util.concurrent.TimeUnit; 22 | import org.apache.curator.framework.CuratorFramework; 23 | import org.apache.curator.framework.recipes.cache.NodeCache; 24 | import org.apache.curator.framework.recipes.cache.NodeCacheListener; 25 | import org.apache.curator.framework.recipes.locks.InterProcessMutex; 26 | import org.apache.curator.framework.state.ConnectionState; 27 | import org.apache.curator.framework.state.ConnectionStateListener; 28 | import org.apache.zookeeper.KeeperException.NoNodeException; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | /** 33 | * It would have been swell to have the lock created and acquired in the 34 | * constructor thus reducing the implementers responsibility to only calling 35 | * super() but the lock is tied to the thread and the instantiation of the 36 | * implemented object is always within the parent thread creating it. 37 | * Instead we'll have the lock provided during aquireAssignment() and the 38 | * best we can do is check if we're assigned during instantiation. 39 | */ 40 | 41 | public abstract class AsyncAssignee implements Runnable{ 42 | 43 | private static final Logger LOG = LoggerFactory.getLogger(AsyncAssignee.class); 44 | 45 | protected boolean paused = false; 46 | protected final CuratorFramework curator; 47 | 48 | private final String zkAssignmentPath; 49 | private final byte[] assigneeBytes; 50 | private final String workerName; 51 | private final static String lockRoot = "/_LOCKS_"; 52 | private final long waitTimeMs; 53 | private InterProcessMutex lock; 54 | 55 | protected abstract void stop(); 56 | protected abstract void abort(); 57 | 58 | private NodeCache assignmentNodeCache; 59 | private ConnectionStateListener connectionListener; 60 | 61 | protected AsyncAssignee(CuratorFramework curator, 62 | String workerName, 63 | byte[] assigneeBytes, 64 | String zkAssignmentPath, 65 | long waitTimeMs) throws Exception { 66 | 67 | this.curator = curator; 68 | this.workerName = workerName; 69 | this.assigneeBytes = assigneeBytes; 70 | this.zkAssignmentPath = zkAssignmentPath; 71 | this.waitTimeMs = waitTimeMs; 72 | 73 | if (!isAssigned()) 74 | throw new NotAssignedException( 75 | String.format("%s cannot setup worker when not assigned to %s", 76 | workerName, zkAssignmentPath)); 77 | } 78 | 79 | public void aquireAssignment() throws Exception { 80 | this.lock = new InterProcessMutex(curator, zkPathToLock()); 81 | if (!isAssigned()) 82 | throw new NotAssignedException( 83 | String.format("%s will not attempt to aquire lock when not assigned to %s", 84 | workerName, zkAssignmentPath)); 85 | 86 | LOG.debug("Worker {} trying to obtain lock on {} (waiting up to {} ms)...", 87 | workerName, zkAssignmentPath, waitTimeMs); 88 | 89 | if (lock.acquire(waitTimeMs, TimeUnit.MILLISECONDS)) { 90 | watchAssignment(); 91 | watchConnection(); 92 | LOG.info("{} now holds the lock on {}", 93 | workerName, zkAssignmentPath); 94 | } else { 95 | throw new LockNotAcquiredException(String.format( 96 | "%s failed to obtain lock on %s after waiting %d ms", 97 | workerName, zkAssignmentPath, waitTimeMs)); 98 | } 99 | } 100 | 101 | protected void releaseAssignment() { 102 | try { 103 | if (assignmentNodeCache != null) { 104 | assignmentNodeCache.close(); 105 | LOG.info("assignment node cache closed"); 106 | } else { 107 | LOG.warn("assignment node cache listener is null and cannot be closed"); 108 | } 109 | } catch (Exception e) { 110 | LOG.error("Failed to close off assignment node cache: ", e); 111 | } 112 | 113 | try { 114 | if (connectionListener != null) { 115 | curator.getConnectionStateListenable().removeListener(connectionListener); 116 | LOG.info("removed the connection state listener on the curator connection"); 117 | } else { 118 | LOG.warn("connection state listener is null and cannot be removed"); 119 | } 120 | } catch (Exception e) { 121 | LOG.error("Failed to remove the connction state listener on the curator connection: ", e); 122 | } 123 | 124 | try { 125 | lock.release(); 126 | LOG.info("released lock on {}", zkPathToLock()); 127 | } catch (IllegalMonitorStateException imse) { 128 | LOG.error("Failed to release lock on {} for reason: ", zkPathToLock(), imse.getMessage()); 129 | } catch (Exception e) { 130 | LOG.error("unknown error trying to relase the lock {}: ", zkPathToLock(), e); 131 | } 132 | } 133 | 134 | private boolean isAssigned() throws Exception { 135 | try { 136 | return Arrays.equals(curator.getData().forPath(zkAssignmentPath), assigneeBytes); 137 | } catch (NoNodeException nne) { 138 | return false; 139 | } 140 | } 141 | 142 | private void watchConnection() { 143 | connectionListener = new ConnectionStateListener() { 144 | @Override 145 | public void stateChanged(CuratorFramework client, ConnectionState newState) { 146 | if (newState == ConnectionState.SUSPENDED) { 147 | paused = true; 148 | LOG.warn("Worker {} paused during suspended ZK connection", workerName); 149 | } else { 150 | if (newState == ConnectionState.RECONNECTED) { 151 | try { 152 | if (isAssigned()) { 153 | paused = false; 154 | LOG.info("Worker {} unpaused after ZK reconnected", workerName); 155 | } else { 156 | LOG.warn("Worker {} no longer assigned {} after ZK reconnected", workerName, zkAssignmentPath); 157 | stop(); 158 | } 159 | } catch (Exception ex) { 160 | LOG.error("Worker {} cannot determine if still assigned {} after connection reconnected, stopping", 161 | workerName, 162 | zkAssignmentPath, 163 | ex); 164 | stop(); 165 | } 166 | } else { 167 | if (newState == ConnectionState.LOST) { 168 | LOG.error("Worker {} lost connection to ZK, aborting assignment", workerName); 169 | abort(); 170 | } 171 | } 172 | } 173 | } 174 | }; 175 | curator.getConnectionStateListenable().addListener(connectionListener); 176 | } 177 | 178 | private void watchAssignment() throws Exception { 179 | assignmentNodeCache = new NodeCache(curator, zkAssignmentPath); 180 | assignmentNodeCache.getListenable().addListener(new NodeCacheListener() { 181 | @Override 182 | public void nodeChanged() throws Exception { 183 | if (!isAssigned()) { 184 | LOG.info("{} is no longer assigned to {}", workerName, zkAssignmentPath); 185 | stop(); 186 | } 187 | } 188 | }); 189 | assignmentNodeCache.start(); 190 | } 191 | 192 | public final String zkPathToLock() { 193 | return String.format("%s%s", lockRoot, zkAssignmentPath); 194 | } 195 | 196 | } -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/cli/AssignmentStats.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 BlackBerry Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.cli; 17 | 18 | import com.blackberry.bdp.common.props.Parser; 19 | import com.blackberry.bdp.kaboom.StartupConfig; 20 | import com.blackberry.bdp.kaboom.api.KaBoomClient; 21 | import com.blackberry.bdp.kaboom.api.KaBoomPartition; 22 | import com.blackberry.bdp.kaboom.api.KaBoomTopic; 23 | import com.blackberry.bdp.krackle.meta.MetaData; 24 | import java.nio.charset.Charset; 25 | import java.text.DecimalFormat; 26 | import java.util.ArrayList; 27 | import java.util.HashMap; 28 | import java.util.List; 29 | import java.util.Properties; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | public class AssignmentStats { 34 | 35 | private static final Logger LOG = LoggerFactory.getLogger(AssignmentStats.class); 36 | protected static final Charset UTF8 = Charset.forName("UTF-8"); 37 | private StartupConfig config; 38 | private MetaData kafkaMetaData; 39 | private List kaboomClients; 40 | private List kaboomTopics; 41 | 42 | private final HashMap idToKaBoomClient = new HashMap<>(); 43 | private final HashMap nameToKaBoomTopic = new HashMap<>(); 44 | private int totalPartitions; 45 | 46 | public static void main(String args[]) { 47 | AssignmentStats instance = new AssignmentStats(); 48 | instance.report(); 49 | } 50 | 51 | private AssignmentStats() { 52 | try { 53 | Properties props = StartupConfig.getProperties(); 54 | Parser propsParser = new Parser(props); 55 | if (propsParser.parseBoolean("configuration.authority.zk", false)) { 56 | // TODO: ZK 57 | } else { 58 | LOG.info("Configuration authority is file based"); 59 | config = new StartupConfig(props); 60 | } 61 | 62 | config.logConfiguraton(); 63 | } catch (Exception e) { 64 | LOG.error("an error occured while building configuration object: ", e); 65 | System.exit(1); 66 | } 67 | 68 | } 69 | 70 | private void refreshMetadata() throws Exception { 71 | idToKaBoomClient.clear(); 72 | nameToKaBoomTopic.clear(); 73 | kafkaMetaData = MetaData.getMetaData(config.getKafkaSeedBrokers(), "kaboom"); 74 | kaboomClients = KaBoomClient.getAll(KaBoomClient.class, config.getKaBoomCurator(), config.getZkRootPathClients()); 75 | kaboomTopics = KaBoomTopic.getAll(kaboomClients, 76 | kafkaMetaData, 77 | config.getKaBoomCurator(), 78 | config.getZkRootPathTopicConfigs(), 79 | config.getZkRootPathPartitionAssignments(), 80 | config.getZkRootPathFlagAssignments()); 81 | 82 | for (KaBoomClient kaboomClient : kaboomClients) { 83 | idToKaBoomClient.put(kaboomClient.getId(), kaboomClient); 84 | } 85 | 86 | for (KaBoomTopic kaboomTopic : kaboomTopics) { 87 | nameToKaBoomTopic.put(kaboomTopic.getKafkaTopic().getName(), kaboomTopic); 88 | } 89 | totalPartitions = KaBoomTopic.getTotalPartitonCount(kaboomTopics); 90 | 91 | LOG.info("metadata refreshed for {} partitions", totalPartitions); 92 | } 93 | 94 | private void updatePartitonMap(HashMap>> counts, 95 | String host, 96 | String identifier, 97 | KaBoomPartition partition) { 98 | HashMap> hostCounts = counts.get(host); 99 | if (hostCounts == null) { 100 | hostCounts = new HashMap<>(); 101 | counts.put(host, hostCounts); 102 | } 103 | List partitions = hostCounts.get(identifier); 104 | if (partitions == null) { 105 | partitions = new ArrayList<>(); 106 | partitions.add(partition.getTopicPartitionString()); 107 | hostCounts.put(identifier, partitions); 108 | } else { 109 | partitions.add(partition.getTopicPartitionString()); 110 | hostCounts.put(identifier, partitions); 111 | } 112 | 113 | } 114 | 115 | private void report() { 116 | try { 117 | int unassigned = 0; 118 | refreshMetadata(); 119 | HashMap> remoteSenders = new HashMap<>(); 120 | HashMap partitionIdToBroker = new HashMap<>(); 121 | HashMap>> counts = new HashMap<>(); 122 | for (KaBoomTopic topic : kaboomTopics) { 123 | for (KaBoomPartition partition : topic.getPartitions()) { 124 | partitionIdToBroker.put(partition.getTopicPartitionString(), 125 | partition.getKafkaPartition().getLeader().getHost()); 126 | if (partition.getAssignedClient() != null) { 127 | String kaboomHost = partition.getAssignedClient().getHostname(); 128 | System.out.println(String.format("partition %s is assigned to %s", 129 | partition.getTopicPartitionString(), kaboomHost)); 130 | if (kaboomHost.equals(partition.getKafkaPartition().getLeader().getHost())) { 131 | updatePartitonMap(counts, kaboomHost, "local", partition); 132 | } else { 133 | updatePartitonMap(counts, kaboomHost, "remote", partition); 134 | String ownerHost = partition.getKafkaPartition().getLeader().getHost(); 135 | List remoteSendersPartitions = remoteSenders.get(ownerHost); 136 | if (remoteSendersPartitions == null) { 137 | remoteSendersPartitions = new ArrayList<>(); 138 | } 139 | remoteSendersPartitions.add(partition.getTopicPartitionString()); 140 | remoteSenders.put(ownerHost, remoteSendersPartitions); 141 | } 142 | } else { 143 | unassigned++; 144 | System.out.println(String.format("partion %s is not assigned", partition.getTopicPartitionString())); 145 | } 146 | } 147 | } 148 | DecimalFormat df = new DecimalFormat("0.0000"); 149 | int totalLocal = 0; 150 | int totalRemote = 0; 151 | for (String hostname : counts.keySet()) { 152 | List remotePartitions = counts.get(hostname).get("remote") != null 153 | ? counts.get(hostname).get("remote") : new ArrayList(); 154 | List localPartitions = counts.get(hostname).get("local") != null 155 | ? counts.get(hostname).get("local") : new ArrayList(); 156 | totalRemote += remotePartitions.size(); 157 | totalLocal += localPartitions.size(); 158 | int remoteProducers = remoteSenders.get(hostname) != null ? remoteSenders.get(hostname).size() : 0; 159 | System.out.println(String.format("host: %s consuming: %d local / %d remote (%s percent local), producing: " + remoteProducers, 160 | hostname, 161 | localPartitions.size(), 162 | remotePartitions.size(), 163 | df.format(100.0 * ((float) localPartitions.size() / ((float) localPartitions.size() + (float) remotePartitions.size())))) 164 | ); 165 | for (String partitionId : remotePartitions) { 166 | System.out.println(String.format("\tremote partition %s lead broker is %s: ", 167 | partitionId, 168 | partitionIdToBroker.get(partitionId))); 169 | } 170 | 171 | } 172 | System.out.println(String.format("totals: %d total local, %d totalremote (%s total percent local)", 173 | totalLocal, 174 | totalRemote, 175 | df.format(100.0 * ((float) totalLocal / ((float) totalLocal + (float) totalRemote))))); 176 | 177 | System.out.println(String.format("there are %s unassigned partitions", unassigned)); 178 | 179 | } catch (Exception e) { 180 | LOG.error("There was a problem pruning the assignments of unsupported topics", e); 181 | } 182 | 183 | } 184 | 185 | } 186 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/simplekaboom/SimpleKaBoom.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 9 | */ 10 | package com.blackberry.bdp.simplekaboom; 11 | 12 | import com.blackberry.bdp.kaboom.Authenticator; 13 | import com.blackberry.bdp.kaboom.FastBoomWriter; 14 | import com.blackberry.bdp.kaboom.StartupConfig; 15 | import com.blackberry.bdp.kaboom.api.RunningConfig; 16 | import java.io.IOException; 17 | 18 | import java.nio.charset.Charset; 19 | import java.security.PrivilegedExceptionAction; 20 | import org.apache.hadoop.fs.FileSystem; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; 24 | import org.apache.hadoop.fs.Path; 25 | 26 | import static org.kohsuke.args4j.ExampleMode.ALL; 27 | import org.kohsuke.args4j.CmdLineException; 28 | import org.kohsuke.args4j.CmdLineParser; 29 | import org.kohsuke.args4j.Option; 30 | import org.kohsuke.args4j.spi.IntOptionHandler; 31 | import org.kohsuke.args4j.spi.LongOptionHandler; 32 | 33 | public class SimpleKaBoom { 34 | 35 | private final Object fsLock = new Object(); 36 | private static final Logger LOG = LoggerFactory.getLogger(SimpleKaBoom.class); 37 | private static final Charset UTF8 = Charset.forName("UTF-8"); 38 | boolean shutdown = false; 39 | private SimpleWorker worker; 40 | private StartupConfig startupConfig; 41 | private RunningConfig runningConfig; 42 | FileSystem fs; 43 | HdfsDataOutputStream hdfsDataOut; 44 | FastBoomWriter boomWriter; 45 | 46 | @Option(name = "-proxyUser", usage = "The user to create the boom file as", metaVar = "") 47 | private String proxyUser; 48 | 49 | @Option(name = "-topic", usage = "The topic to consume", metaVar = "") 50 | private String topic; 51 | 52 | @Option(name = "-partition", usage = "The partition to consume", handler = IntOptionHandler.class, metaVar = "") 53 | private Integer partition; 54 | 55 | @Option(name = "-startOffset", usage = "The offset to start consuming from", handler = LongOptionHandler.class, metaVar = "") 56 | private Long startOffset; 57 | 58 | @Option(name = "-endOffset", usage = "The last offset to consume and end on", handler = LongOptionHandler.class, metaVar = "") 59 | private Long endOffset; 60 | 61 | @Option(name = "-boomFile", usage = "The absolute path to the boom file to write", metaVar = "") 62 | private String boomFile; 63 | 64 | public SimpleKaBoom() throws Exception { 65 | } 66 | 67 | public static void main(String[] args) throws Exception, IOException { 68 | LOG.info("**********************************************"); 69 | LOG.info("*** SIMPLE KABOOM CONSUMER ***"); 70 | LOG.info("**********************************************"); 71 | new SimpleKaBoom().run(args); 72 | } 73 | 74 | private void run(String[] args) throws Exception { 75 | CmdLineParser parser = new CmdLineParser(this); 76 | try { 77 | parser.parseArgument(args); 78 | if (proxyUser == null 79 | || topic == null 80 | || partition == null 81 | || startOffset == null 82 | || endOffset == null 83 | || boomFile == null) { 84 | throw new CmdLineException(parser, "There was a missing required command ling argument"); 85 | } 86 | 87 | LOG.info("Proxy user: {}", proxyUser); 88 | LOG.info("Topic: {}", topic); 89 | LOG.info("Partition: {}", partition); 90 | LOG.info("Start offset: {}", startOffset); 91 | LOG.info("End offset: {}", endOffset); 92 | LOG.info("Boom file: {}", boomFile); 93 | 94 | } catch (CmdLineException e) { 95 | System.err.println(e.getMessage()); 96 | parser.printUsage(System.err); 97 | System.err.println(); 98 | System.err.println(" Usage: java " + this.getClass() + " " + parser.printExample(ALL)); 99 | return; 100 | } 101 | 102 | LOG.info("Consuming topic {}, starting at {} ending on {} and writing {} as user {}", 103 | this.topic, 104 | this.startOffset, 105 | this.endOffset, 106 | this.boomFile, 107 | this.proxyUser); 108 | 109 | try { 110 | startupConfig = new StartupConfig(); 111 | runningConfig = startupConfig.getRunningConfig(); 112 | startupConfig.logConfiguraton(); 113 | } catch (Exception e) { 114 | LOG.error("an error occured while building configuration objects: ", e); 115 | throw e; 116 | } 117 | 118 | Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { 119 | @Override 120 | public void run() { 121 | worker.stop(); 122 | try { 123 | FileSystem.get(startupConfig.getHadoopConfiguration()).close(); 124 | } catch (Throwable t) { 125 | LOG.error("Error closing Hadoop filesystem", t); 126 | } 127 | startupConfig.getKaBoomCurator().close(); 128 | } 129 | 130 | })); 131 | 132 | try { 133 | fs = startupConfig.authenticatedFsForProxyUser(proxyUser); 134 | if (fs == null) { 135 | LOG.warn("There is no topic configured with the proxy user {} within the KaBoom configuration file", proxyUser); 136 | LOG.info("Attempting to obtain an authenticated file system manually", proxyUser); 137 | try { 138 | LOG.info("Attempting to create file system {} for {}", startupConfig.getHadoopUrlPath(), proxyUser); 139 | Authenticator.getInstance().runPrivileged(proxyUser, new PrivilegedExceptionAction() { 140 | @Override 141 | public Void run() throws Exception { 142 | synchronized (fsLock) { 143 | try { 144 | fs = startupConfig.getHadoopUrlPath().getFileSystem(startupConfig.getHadoopConfiguration()); 145 | LOG.info("Filesystem object instantiated for {}", proxyUser); 146 | } catch (Exception e) { 147 | LOG.error("Error getting file system {} for proxy user {}", startupConfig.getHadoopUrlPath(), e); 148 | } 149 | } 150 | return null; 151 | } 152 | }); 153 | } catch (IOException | InterruptedException e) { 154 | LOG.error("Error creating file system.", e); 155 | return; 156 | } 157 | } 158 | if (fs == null) { 159 | LOG.error("Cannot proceed without a file system"); 160 | return; 161 | } 162 | if (fs.exists(new Path(boomFile))) { 163 | fs.delete(new Path(boomFile), true); 164 | LOG.info("Deleted {} as it already exists", boomFile); 165 | } 166 | hdfsDataOut = (HdfsDataOutputStream) fs.create( 167 | new Path(boomFile), 168 | startupConfig.getBoomFilePerms(), 169 | false, 170 | runningConfig.getBoomFileBufferSize(), 171 | runningConfig.getBoomFileReplicas(), 172 | runningConfig.getBoomFileBlocksize(), 173 | null); 174 | boomWriter = new FastBoomWriter( 175 | hdfsDataOut, 176 | topic, 177 | partition, 178 | startupConfig); 179 | boomWriter.setPeriodicHdfsFlushInterval(runningConfig.getPeriodicHdfsFlushInterval()); 180 | boomWriter.setUseNativeCompression(runningConfig.getUseNativeCompression()); 181 | worker = new SimpleWorker(startupConfig, topic, partition, startOffset, endOffset, boomWriter); 182 | } catch (Exception e) { 183 | LOG.error("An error occured setting up our simple worker: ", e); 184 | } 185 | 186 | try { 187 | worker.run(); 188 | hdfsDataOut.close(); 189 | LOG.info("All finished"); 190 | } catch (Exception e) { 191 | LOG.error("There was an error while the simple worker was running, deleting all output files"); 192 | hdfsDataOut.close(); 193 | fs.delete(new Path(boomFile), true); 194 | LOG.info("Deleted {}", boomFile); 195 | } 196 | } 197 | 198 | } 199 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/Leader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 BlackBerry Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import java.nio.charset.Charset; 19 | import java.util.HashMap; 20 | import java.util.Random; 21 | 22 | import org.apache.curator.framework.CuratorFramework; 23 | import org.apache.curator.framework.recipes.leader.LeaderSelectorListenerAdapter; 24 | 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import com.blackberry.bdp.kaboom.api.KaBoomTopic; 29 | import com.blackberry.bdp.common.zk.ZkUtils; 30 | import com.blackberry.bdp.kaboom.api.KaBoomClient; 31 | import com.blackberry.bdp.krackle.meta.MetaData; 32 | import java.util.List; 33 | import java.util.regex.Matcher; 34 | import java.util.regex.Pattern; 35 | import org.apache.zookeeper.CreateMode; 36 | 37 | public abstract class Leader extends LeaderSelectorListenerAdapter { 38 | 39 | private static final Logger LOG = LoggerFactory.getLogger(Leader.class); 40 | protected static final Charset UTF8 = Charset.forName("UTF-8"); 41 | protected static final Random rand = new Random(); 42 | 43 | final protected StartupConfig config; 44 | protected CuratorFramework curator; 45 | 46 | private MetaData kafkaMetaData; 47 | private List kaboomClients; 48 | private List kaboomTopics; 49 | 50 | private final HashMap idToKaBoomClient = new HashMap<>(); 51 | private final HashMap nameToKaBoomTopic = new HashMap<>(); 52 | 53 | private int totalPartitions; 54 | private int totalWeight; 55 | 56 | public Leader(StartupConfig config) { 57 | this.config = config; 58 | } 59 | 60 | protected abstract void run_balancer( 61 | List kaboomClients, 62 | List kaboomTopics) 63 | throws Exception; 64 | 65 | private void deleteAssignment(String reason, String zkPath) throws Exception { 66 | curator.delete().forPath(zkPath); 67 | LOG.info("Assignment {} deleted {}", zkPath, reason); 68 | } 69 | 70 | private void refreshMetadata() throws Exception { 71 | idToKaBoomClient.clear(); 72 | nameToKaBoomTopic.clear(); 73 | 74 | kafkaMetaData = MetaData.getMetaData(config.getKafkaSeedBrokers(), "kaboom"); 75 | 76 | kaboomClients = KaBoomClient.getAll(KaBoomClient.class, curator, config.getZkRootPathClients()); 77 | 78 | kaboomTopics = KaBoomTopic.getAll(kaboomClients, 79 | kafkaMetaData, 80 | config.getKaBoomCurator(), 81 | config.getZkRootPathTopicConfigs(), 82 | config.getZkRootPathPartitionAssignments(), 83 | config.getZkRootPathFlagAssignments()); 84 | 85 | totalPartitions = KaBoomTopic.getTotalPartitonCount(kaboomTopics); 86 | totalWeight = 0; 87 | 88 | for (KaBoomClient kaboomClient : kaboomClients) { 89 | totalWeight += kaboomClient.getWeight(); 90 | idToKaBoomClient.put(kaboomClient.getId(), kaboomClient); 91 | } 92 | 93 | for (KaBoomTopic kaboomTopic : kaboomTopics) { 94 | nameToKaBoomTopic.put(kaboomTopic.getKafkaTopic().getName(), kaboomTopic); 95 | } 96 | 97 | LOG.info("metadata refreshed => total weight {}, number of partitions: {}", totalWeight, totalPartitions); 98 | } 99 | 100 | private void pauseOnFirstDisconnectedAssignee() throws Exception { 101 | for (String partitionId : curator.getChildren().forPath( 102 | config.getZkRootPathPartitionAssignments())) { 103 | long sleepTime = config.getRunningConfig().getLeaderNodeDisconnectionWaittimeSeconds(); 104 | try { 105 | String assignedClientId = new String(curator.getData().forPath( 106 | String.format("%s/%s", 107 | config.getZkRootPathPartitionAssignments(), partitionId)), UTF8); 108 | if (!idToKaBoomClient.containsKey(Integer.parseInt(assignedClientId))) { 109 | LOG.warn("disconnected client detected forcing {} second sleep", 110 | sleepTime); 111 | Thread.sleep(sleepTime * 1000); 112 | refreshMetadata(); 113 | return; 114 | } 115 | } catch (Exception e) { 116 | LOG.error("error while fetching unique client IDs: ", e); 117 | } 118 | } 119 | 120 | } 121 | 122 | @Override 123 | public void takeLeadership(CuratorFramework curator) throws Exception { 124 | this.curator = curator; 125 | ZkUtils.writeToPath(curator, config.getZkPathLeaderClientId(), config.getKaboomId(), true, CreateMode.EPHEMERAL); 126 | LOG.info("KaBoom client ID {} is the new leader, entering the {} calm down", 127 | config.getKaboomId(), config.getRunningConfig().getNewLeaderCalmDownDelay()); 128 | Thread.sleep(config.getRunningConfig().getNewLeaderCalmDownDelay()); 129 | 130 | while (true) { 131 | 132 | refreshMetadata(); 133 | pauseOnFirstDisconnectedAssignee(); 134 | 135 | // Delete an assignemnts if the kaboom client isn't connected or the topic is not configured 136 | try { 137 | for (String partitionId : curator.getChildren().forPath(config.getZkRootPathPartitionAssignments())) { 138 | try { 139 | Pattern topicPartitionPattern = Pattern.compile("^(.*)-(\\d+)$"); 140 | Matcher m = topicPartitionPattern.matcher(partitionId); 141 | if (m.matches()) { 142 | String assignmentZkPath = String.format("%s/%s", config.getZkRootPathPartitionAssignments(), partitionId); 143 | String clientId = new String(curator.getData().forPath(assignmentZkPath), UTF8); 144 | String topicName = m.group(1); 145 | int partitonId = Integer.parseInt(m.group(2)); 146 | int assignedClientId = new Integer(clientId); 147 | 148 | // Check for all the reasons to delete an invalid assignment 149 | 150 | if (!nameToKaBoomTopic.containsKey(topicName)) { 151 | deleteAssignment("because of missing topic configuration", assignmentZkPath); 152 | } else if (!idToKaBoomClient.containsKey(assignedClientId)) { 153 | deleteAssignment(String.format("because client %s is not connected", assignedClientId), 154 | assignmentZkPath); 155 | } else if (kafkaMetaData.getTopic(topicName).getPartition(partitonId) == null) { 156 | deleteAssignment(String.format("because %s is not a valid Kafka partition", partitionId), 157 | assignmentZkPath); 158 | } else { 159 | idToKaBoomClient.get(assignedClientId).getAssignedPartitions().add( 160 | nameToKaBoomTopic.get(topicName).getKaBoomPartition(partitonId)); 161 | LOG.info("Pre-balanced found {} assigned to {}", partitionId, assignedClientId); 162 | } 163 | } 164 | } catch (Exception e) { 165 | LOG.error("There was a problem pruning the assignments of unsupported topic {}", partitionId, e); 166 | } 167 | } 168 | } catch (Exception e) { 169 | LOG.error("There was a problem pruning the assignments of unsupported topics", e); 170 | } 171 | 172 | /** 173 | * By now we have cleaned up invalid partition assignments 174 | * and we know our total weight and partition count as well 175 | * as how much work each client is currently being assigned 176 | * so we can calculate each client's target load 177 | */ 178 | for (KaBoomClient kaboomClient : kaboomClients) { 179 | kaboomClient.calculateTargetPartitionLoad(totalPartitions, totalWeight); 180 | } 181 | 182 | // With that done then we can call the balance method... 183 | try { 184 | run_balancer(kaboomClients, kaboomTopics); 185 | } catch (Exception e) { 186 | LOG.error("The load balancer raised an exception: ", e); 187 | } 188 | 189 | Thread.sleep(config.getRunningConfig().getLeaderSleepDurationMs()); 190 | } 191 | } 192 | 193 | } 194 | -------------------------------------------------------------------------------- /docs/Ready_Flag_Logic.md: -------------------------------------------------------------------------------- 1 | # Ready Flags 2 | 3 | ## Introduction 4 | 5 | KaBoom parses the date and time from each message it consumes from Kafka. The HDFS directories that it writes boom files within are based on a path template (`TimeBasedHdfsOutputPath`) that can contain date and time symbols. With a typical confifguration this allows KaBoom to create boom files in a path that would look something simmilar to this: 6 | 7 | `hdfs://hadoop.company.com/logs/--
///data/` 8 | 9 | This allows workflows based off time periods to easily watch the HDFS file system and kick off jobs that then read boom files knowing they contain messages pertaining to their date/time parts of the path. 10 | 11 | However, when there are multiple partions per Kafka topic it's hard to know when these files are ready for consumption. If your workflow or job is assuming that all the data for the respective time period exists then it will need an indicator that KaBoom is finished writing boom files for a specific hour. In addition, it's nice to know when KaBoom is finished with an entire day (i.e. all the messages for the hours for that day have all been written). 12 | 13 | ## KaBoom 0.8.2 Workers - Starting Their Shifts 14 | 15 | KaBoom assigns partitons to clients that start a worker for each partition. A partition will belong to a given topic, so for discussion consider a partiton really a combination of topic and partition. 16 | 17 | When a worker is first created and begins work on a particular partition it starts a shift. Shift's keep track of two important pieces of information, their `currentOffset` and the maximum observed timestamp of a message during the shift (`maxTimestamp`). 18 | 19 | For the offset, when the first shift is created it looks into ZooKeeper and grabs the offset that it needs to start consuming at. If no offset is found, it starts at 0. If that offset is out of range, then the behavior of the startup configuration property `auto.offset.reset` determines wether it starts consuming from the latest (most recent) or earliest (oldest) offset. However, we'll just assume that the first shift created finds an offset in ZK and that it's within a valid range for the partition. 20 | 21 | The `maxTimestamp` of each shift starts with `maxTimestamp = -1`. 22 | 23 | Shifts have a duration (let's assume it's one hour) and calculate their start/end times based when that shift should have started (had it started on time). For example, if KaBoom starts/restarts at 5:52pm, it determines that the start time would have been 5:00pm and the end time would have been 6:00pm (actual equation is `ts - ts % duration` where `ts` represents `System.currentTimeMillis()`). 24 | 25 | ## KaBoom 0.8.2 - Worker Shift Numbers 26 | 27 | Shifts are numbered, the first shift of a worker is `#1` and each subsequent shift that gets started increments that counter by 1. 28 | 29 | ## KaBoom 0.8.2 - Message Consume Loop 30 | 31 | When a worker consumes a message, it writes the message to a boom file received from `TimeBasedHdfsOutputPath` that is associated with the sprint number that it was created for. The worker then compares the message's timestamp to the current shift's `maxTimestamp`. If it's greater, then `maxTimestamp` is set to the timestamp of the current message. This logic is contained in a loop that doesn't break until one of the worker's `stop()` or `abort()` methods are called. 32 | 33 | At the top of this loop and before a message is consumed the worker checks whether `currentShift.isOver()`. If the current shift is over then it assigns `currentShift` to `previousShift` and then sets `currentShift` to a new shift created with a starting offset of `previousShift.getOffset()` and a shift number of `previousShift.shiftNumber + 1`. 34 | 35 | If the current shift isn't over, it checks to see if a `previousShift != null` and if there's one still hanging around it checks if `previousShift.isTimeToFinish()`. It's time to finish when the shift's end time is greater than the running configuration option `fileCloseGraceTimeAfterExpiredMs` (default: 30000). If it's time to finish the previous shift the worker calls `previousShift.finish(true)`. This call instructs the worker's `TimeBasedHdfsOutputPath` to close off all boom files associated to `previousSprint.sprintNumber`. After a boom file is successfully closed off, `TimeBasedHdfsOutputPath` has no record of it anymore. The `true` indicates that after successfully closing all boom files created during the sprint that the metadata (`offset`/`maxTimestamp`) should be updated in ZooKeeper. If an exception is thrown while the previous shift is finishing, then the message consume loop exits, and the worker calls `abort()`. Once the worker calls `previousShift.finish(true)` finishes the worker sets `previousShift = null`, making the current shift the only shift object instantiated within the worker. 36 | 37 | ## Persisting Metadata 38 | 39 | When a shift persists it's metadata, it's important to understand what that entails. For the offset, that's pretty easy, the offset is always the next offset that the Worker is going to attempt to consume. These are always incremented by 1 after each message is consumed from Kafka (normally--leader failovers aside) and have nothing to do with the concept of time. 40 | 41 | The `maxTimestamp` however, is a little different. If `maxTimestamp == -1` then the timestamp that gets stored is `sprint.endTime`. This ensures that if there are no messages for KaBoom to consume that KaBoom will be able to reliably indicate that the hour's (shift duration) boom files are eady for processsing by external/downstream workflows (at least for the partiton--there are likely more partitions in the topic). 42 | 43 | ## KaBoom 0.8.2 - Shutting Down 44 | 45 | Once `stop()` or `abort()` have been called, the next message consume loop iteration will break. The only other way for the message consume loop to break is if an exception occurs, in which case, `abort()` is called. 46 | 47 | Immediatley following the message consume loop, `shutdown()` is called. This method performs some housekeeping (removing gauge metrics, etc) and then determines if it's gracefully shutting down or aborting. 48 | 49 | Graceful shutdowns check if `previousShift != null` and if so, then it calls `previousShift.finish()`. The lack of the `true` boolean parameter indicates that the previousShift shouldn't persist it's metadata upon successfull closure of it's boom files. It then calls `currentShift.finish(true)`, which does store the `offset` and `maxTimestamp` to ZK. 50 | 51 | Aborting instructs the worker's `TimeBasedHdfsOutputPath` to delete all open/unfinished boom files regardless of associated sprint. 52 | 53 | ## Why This Matters 54 | 55 | With the above design implemented, ZooKeeper is guaranteed to contain metadata for each partiton that get's updated hourly. The metadata updates exist 100% independant of the timestamps within the messages that KaBoom recieves. KaBoom will only ever persist an offest for a partion once the previous hour's boom files have been closed off succesfully. It'll remember the latest timestamp that it ever parsed from a message within that hour, and reliably store that timestamp. 56 | 57 | ## Ready Flag Writer 58 | 59 | The `ReadyFlagWriter` grabs all the topics that KaBoom is configured for, and then determines the earliest of the stored `maxTimestamps` for all it's partitons. It then, starts at the beginning of the top of the previous hour and checks if 60 | 61 | * A) There's a `_READY` flag in that hour already 62 | * B) That the timesatmp of the top of the hour is < `earliestMaxTimestamp` 63 | 64 | Only if a ready flag doesn't already exist AND if all the partitions have `maxTimestamp`'s greater (i.e. later) than the top of that hour, would it write the `_READY` flag. 65 | 66 | This flag gets written into the `hdfs://hadoop.company.com/logs/--
///data` directory AND to the `hdfs://hadoop.company.com/logs/--
//` directory. 67 | 68 | This is a per topic flag that is written into a specific hourly directory. 69 | 70 | The hourly directory is created if one doesn't exist. 71 | 72 | ## Ready Flag Propagation 73 | 74 | Topics often share a common HDFS root directory in their `TimeBasedHdfsOutputPath`, first KaBoom gathers a unique list of topics with common HDFS output paths and then assigns those topics for `_READY` flag propagation evenly amongst all connected KaBoom clients. 75 | 76 | A topic-specific propagator thread is spawned every 10 minutes (if a earlier one isn't still running) and it performs a depth first traversal of the topic's HDFS root dir. 77 | 78 | If the path is an hourly directory (i.e. `hdfs://hadoop.company.com/logs/--
/`) it then it checks all child directories (topics, in our example) for a `_READY` flag. If they all do it creates `hdfs://hadoop.company.com/logs/--
//_READY`. 79 | 80 | If the path is a daily directory (i.e. `hdfs://hadoop.company.com/logs/--
`) it then checks all the child directories (hours, in our example) for a `_READY` flag. If they all do it creates `hdfs://hadoop.company.com/logs/--
/_READY`. -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/simplekaboom/SimpleWorker.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 9 | */ 10 | package com.blackberry.bdp.simplekaboom; 11 | 12 | import com.blackberry.bdp.kaboom.*; 13 | import java.net.InetAddress; 14 | import java.net.UnknownHostException; 15 | 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | import com.blackberry.bdp.common.conversion.Converter; 20 | import com.blackberry.bdp.common.jmx.MetricRegistrySingleton; 21 | import com.blackberry.bdp.krackle.consumer.BrokerUnavailableException; 22 | import com.blackberry.bdp.krackle.consumer.Consumer; 23 | import java.io.IOException; 24 | 25 | public class SimpleWorker { 26 | 27 | private static final Logger LOG = LoggerFactory.getLogger(SimpleWorker.class); 28 | 29 | private final String partitionId; 30 | private Consumer consumer; 31 | private long offset; 32 | private long timestamp; 33 | private final StartupConfig startupConfig; 34 | private final String topic; 35 | private final int partition; 36 | private final long startOffset; 37 | private final long endOffset; 38 | private String hostname; 39 | private long startTime; 40 | private long messagesWritten = 0; 41 | private boolean stopping = false; 42 | private final FastBoomWriter boomWriter; 43 | 44 | public SimpleWorker(StartupConfig startupConfig, 45 | String topic, 46 | int partition, 47 | long startOffset, 48 | long endOffset, 49 | FastBoomWriter boomWriter) throws Exception { 50 | this.startupConfig = startupConfig; 51 | this.topic = topic; 52 | this.partition = partition; 53 | this.startOffset = startOffset; 54 | this.endOffset = endOffset; 55 | this.messagesWritten = 0; 56 | this.boomWriter = boomWriter; 57 | 58 | offset = startOffset; 59 | partitionId = topic + "-" + partition; 60 | 61 | LOG.info("[{}] Created simple worker.", partitionId); 62 | //this.run(); 63 | } 64 | 65 | public void run() throws Exception { 66 | try { 67 | try { 68 | hostname = InetAddress.getLocalHost().getCanonicalHostName(); 69 | } catch (UnknownHostException e) { 70 | LOG.error("[{}] Can't determine local hostname", partitionId); 71 | hostname = "unknown.host"; 72 | } 73 | 74 | String clientId = "kaboom-" + hostname; 75 | 76 | consumer = new Consumer(startupConfig.getConsumerConfiguration(), clientId, topic, partition, startOffset, MetricRegistrySingleton.getInstance().getMetricsRegistry()); 77 | 78 | LOG.info("[{}] Created simple worker. Starting at offset {}.", partitionId, startOffset); 79 | 80 | byte[] bytes = new byte[1024 * 1024]; 81 | int length; 82 | byte version; 83 | int pos; 84 | PriParser pri = new PriParser(); 85 | VersionParser ver = new VersionParser(); 86 | TimestampParser tsp = new TimestampParser(); 87 | 88 | while (stopping == false) { 89 | try { 90 | if (offset > endOffset) { 91 | LOG.info("[{}] offset {} is past end offset of {}, stopping", partitionId, offset, endOffset); 92 | stop(); 93 | continue; 94 | } 95 | if (Thread.interrupted()) { 96 | throw new Exception("This simple worker has been interrupted"); 97 | } 98 | 99 | length = consumer.getMessage(bytes, 0, bytes.length); 100 | 101 | if (length == -1) { 102 | continue; 103 | } 104 | 105 | // offset always refers to the next offset we expect to 106 | // since we just called consumer.getMessage() let's see 107 | // if the offset of the last message is what we expected 108 | // and handle the fun edge cases when it's not 109 | if (offset != consumer.getLastOffset()) { 110 | long highWatermark = consumer.getHighWaterMark(); 111 | 112 | if (offset > consumer.getLastOffset()) { 113 | if (offset < highWatermark) { 114 | /* 115 | * When using SNAPPY compression in Krackle's consumer there will be messages received 116 | * that are in the snappy block that are from earlier than our requested offset. When this 117 | * happens the consumer will continue to send us messages from within that block so we 118 | * should just be patient until the offsets are from where we want. 119 | */ 120 | continue; 121 | } else { 122 | if (offset > highWatermark) { 123 | throw new Exception(String.format("[%s] offset %d is greater than high watermark %d", partitionId, offset, highWatermark)); 124 | } 125 | } 126 | } else { 127 | String error = String.format("[%s] Offset anomaly! Expected: %d, Got %d, Consumer high watermark %d, latest %d, earliest %d", 128 | partitionId, 129 | offset, 130 | consumer.getLastOffset(), 131 | consumer.getHighWaterMark(), 132 | consumer.getLatestOffset(), 133 | consumer.getEarliestOffset()); 134 | throw new Exception(error); 135 | } 136 | } 137 | 138 | offset = consumer.getNextOffset(); 139 | 140 | // (byte) 0xFE: -2 141 | // (byte) 0x00: 0 142 | // (byte) 0xFF: -1 143 | // Check for version 144 | if (bytes[0] == (byte) 0xFE) { 145 | version = bytes[1]; 146 | 147 | if (version == (byte) 0x00) { 148 | // Version 0 has a timestamp in the front, so we can skip that for now. Come back if we need it. 149 | pos = 10; 150 | } else { 151 | LOG.warn("[{}] Unrecognized encoding version: {}", partitionId, version); 152 | pos = 0; 153 | } 154 | } else { 155 | // version -1 is a raw log 156 | version = (byte) 0xFF; 157 | pos = 0; 158 | } 159 | 160 | // Optional PRI at the start of the line. 161 | if (pri.parsePri(bytes, pos, length)) { 162 | pos += pri.getPriLength(); 163 | } 164 | 165 | // On the off chance that someone is following RFC5424 and has 166 | // inserted a version in the log line. 167 | if (ver.parseVersion(bytes, pos, length - pos)) { 168 | // Skip the length of the version and the following space. 169 | pos += ver.getVersionLength() + 1; 170 | } 171 | 172 | tsp.parse(bytes, pos, length - pos); 173 | 174 | if (tsp.getError() == TimestampParser.NO_ERROR) { 175 | timestamp = tsp.getTimestamp(); 176 | // Move position to the end of the timestamp 177 | pos += tsp.getLength(); 178 | 179 | // mbruce: occasionally we get a line that is truncated partway through the timestamp, 180 | // however we still have the rest of the last message in the byte buffer and parsing the 181 | // timestamp will push us past then end of the line 182 | if (pos > length) { 183 | LOG.error("Error: parsing timestamp has went beyond length of the message"); 184 | continue; 185 | } 186 | // If the next char is a space, skip that too. 187 | if (pos < length && bytes[pos] == ' ') { 188 | pos++; 189 | } 190 | } else { 191 | if (version == (byte) 0x00) { 192 | LOG.debug("[{}] Failed to parse timestamp. Using stored timestamp", partitionId); 193 | timestamp = Converter.longFromBytes(bytes, 2); 194 | } else { 195 | LOG.error("[{}] Error parsing timestamp.", partitionId); 196 | timestamp = System.currentTimeMillis(); 197 | } 198 | } 199 | 200 | if ((length - pos) < 0) { 201 | LOG.info("[{}] Skipping offset as length - Offset is < 0: timestamp: {}, pos: {}, length: {}", partitionId, timestamp, pos, length); 202 | continue; 203 | } 204 | 205 | boomWriter.writeLine(timestamp, bytes, pos, length - pos); 206 | messagesWritten++; 207 | 208 | } catch (Exception e) { 209 | LOG.error("[{}] Error processing message: ", partitionId, e); 210 | throw e; 211 | } 212 | } 213 | 214 | LOG.info("[{}] Simple KaBoom client shutting down and closing all output files.", partitionId); 215 | 216 | boomWriter.close(); 217 | 218 | LOG.info("[{}] Worker stopped. (Read {} lines. Next offset is {})", partitionId, messagesWritten, offset); 219 | } catch (BrokerUnavailableException | IOException e) { 220 | LOG.error("[{}] An exception occured while setting up this worker thread giving up and returing => error : {} ", partitionId, e); 221 | } 222 | } 223 | 224 | public void stop() { 225 | LOG.info("[{}] Stop request received", partitionId); 226 | stopping = true; 227 | } 228 | 229 | public long getMsgWrittenPerSec() { 230 | return messagesWritten / ((System.currentTimeMillis() - startTime) / 1000); 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/TimestampParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.blackberry.bdp.kaboom; 18 | 19 | import java.text.ParseException; 20 | import java.util.Calendar; 21 | import java.util.TimeZone; 22 | 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | public class TimestampParser { 27 | private static final Logger LOG = LoggerFactory 28 | .getLogger(TimestampParser.class); 29 | 30 | public static final int NO_ERROR = 0; 31 | public static final int ERROR = 1; 32 | 33 | private int error = NO_ERROR; 34 | 35 | private IntParser intParser = new IntParser(); 36 | 37 | private byte[] bytes; 38 | private int pos; 39 | private int limit; 40 | private int length; 41 | private Calendar cal; 42 | private final TimeZone UTC = TimeZone.getTimeZone("UTC"); 43 | 44 | public TimestampParser() { 45 | cal = Calendar.getInstance(UTC); 46 | } 47 | 48 | public void parse(byte[] bytes, int i, int length) { 49 | try { 50 | cal.clear(); 51 | this.bytes = bytes; 52 | pos = i; 53 | limit = i + length; 54 | error = NO_ERROR; 55 | 56 | if (length == 0) { 57 | LOG.error("Can't parse date from zero length byte array."); 58 | error = ERROR; 59 | return; 60 | } 61 | 62 | // It it start with a number assume YYYY MM DD HH MM SS[.ffffff][TZ] with arbitrary separators. 63 | if (bytes[pos] >= '0' && bytes[pos] <= '9') { 64 | parseYyyyMmDd(); 65 | 66 | } 67 | // If it starts with a month name assume MMM DD HH MM SS[.ffffff][TZ] with arbitrary separators 68 | else if (bytes[pos] >= 'A' && bytes[pos] <= 'S') { 69 | parseMmmDd(); 70 | 71 | } 72 | // If we get a bad log line, we have an error and print the position and line 73 | else { 74 | LOG.debug("Can't parse timestamp @ position [{}] for line: {}", pos, new String(bytes, pos, length)); 75 | error = ERROR; 76 | return; 77 | } 78 | 79 | advance(); 80 | parseHour(); 81 | advance(); 82 | parseMinute(); 83 | advance(); 84 | parseSeconds(); 85 | parseFractions(); 86 | parseTZ(); 87 | 88 | this.length = pos - i; 89 | } catch (Throwable t) { 90 | LOG.debug("Error parsing timestamp.", t); 91 | error = ERROR; 92 | } 93 | } 94 | 95 | int tzDir; 96 | int tzHour; 97 | int tzMinute; 98 | 99 | private void parseTZ() { 100 | // Either Z, or +/-HH[[:]MM] 101 | if (bytes[pos] == 'Z') { 102 | pos++; 103 | return; 104 | } 105 | 106 | if (bytes[pos] == '-') { 107 | tzDir = 1; 108 | pos++; 109 | } else if (bytes[pos] == '+') { 110 | tzDir = -1; 111 | pos++; 112 | } else { 113 | return; 114 | } 115 | 116 | if (bytes[pos] == '0') { 117 | tzHour = intParser.intFromBytes(bytes, pos + 1, 1); 118 | cal.add(Calendar.HOUR_OF_DAY, tzDir * tzHour); 119 | pos += 2; 120 | } else { 121 | tzHour = intParser.intFromBytes(bytes, pos, 2); 122 | cal.add(Calendar.HOUR_OF_DAY, tzDir * tzHour); 123 | pos += 2; 124 | } 125 | 126 | if (bytes[pos] == ':') { 127 | pos++; 128 | } 129 | 130 | if (bytes[pos] == '0') { 131 | tzMinute = intParser.intFromBytes(bytes, pos + 1, 1); 132 | cal.add(Calendar.MINUTE, tzDir * tzMinute); 133 | pos += 2; 134 | } else if (bytes[pos] >= '0' && bytes[pos] <= '9') { 135 | tzMinute = intParser.intFromBytes(bytes, pos, 2); 136 | cal.add(Calendar.MINUTE, tzDir * tzMinute); 137 | pos += 2; 138 | } 139 | } 140 | 141 | int fracStart; 142 | int fracLength; 143 | 144 | private void parseFractions() { 145 | if (bytes[pos] == '.') { 146 | pos++; 147 | fracStart = pos; 148 | while (pos < limit && bytes[pos] >= '0' && bytes[pos] <= '9') { 149 | pos++; 150 | } 151 | fracLength = pos - fracStart; 152 | if (fracLength == 0) { 153 | // dot and no numbers? I'll allow it. 154 | cal.set(Calendar.MILLISECOND, 0); 155 | } else if (fracLength == 1) { 156 | cal.set(Calendar.MILLISECOND, 157 | 100 * intParser.intFromBytes(bytes, fracStart, fracLength)); 158 | } else if (fracLength == 2) { 159 | cal.set(Calendar.MILLISECOND, 160 | 10 * intParser.intFromBytes(bytes, fracStart, fracLength)); 161 | } else { 162 | // longer than 3, just read three digits 163 | cal.set(Calendar.MILLISECOND, 164 | intParser.intFromBytes(bytes, fracStart, 3)); 165 | } 166 | } 167 | } 168 | 169 | private void parseSeconds() { 170 | // two digits, zero padded. 171 | if (bytes[pos] == '0') { 172 | cal.set(Calendar.SECOND, intParser.intFromBytes(bytes, pos + 1, 1)); 173 | } else { 174 | cal.set(Calendar.SECOND, intParser.intFromBytes(bytes, pos, 2)); 175 | } 176 | pos += 2; 177 | } 178 | 179 | private void parseMinute() { 180 | // two digits, zero padded. 181 | if (bytes[pos] == '0') { 182 | cal.set(Calendar.MINUTE, intParser.intFromBytes(bytes, pos + 1, 1)); 183 | } else { 184 | cal.set(Calendar.MINUTE, intParser.intFromBytes(bytes, pos, 2)); 185 | } 186 | pos += 2; 187 | } 188 | 189 | private void parseHour() { 190 | // two digits, zero padded. 191 | if (bytes[pos] == '0') { 192 | cal.set(Calendar.HOUR_OF_DAY, intParser.intFromBytes(bytes, pos + 1, 1)); 193 | } else { 194 | cal.set(Calendar.HOUR_OF_DAY, intParser.intFromBytes(bytes, pos, 2)); 195 | } 196 | pos += 2; 197 | } 198 | 199 | private int areEqualPos; 200 | 201 | // Compare two byte array slices 202 | private boolean areEqual(byte[] b1, int p1, byte[] b2, int p2, int length) { 203 | areEqualPos = 0; 204 | while (areEqualPos < length) { 205 | if (b1[p1 + areEqualPos] != b2[p2 + areEqualPos]) { 206 | return false; 207 | } 208 | areEqualPos++; 209 | } 210 | return true; 211 | } 212 | 213 | private byte b; 214 | 215 | private void advance() throws ParseException { 216 | while (true) { 217 | b = bytes[pos]; 218 | if (b >= '0' && b <= '9') { 219 | break; 220 | } 221 | if (b == ' ' || b == '-' || b == ':' || b == 'T') { 222 | pos++; 223 | } else { 224 | throw new ParseException("Bad separator in timestamp: " 225 | + new String(bytes, pos, 1), pos); 226 | } 227 | } 228 | } 229 | 230 | private void parseMmmDd() throws ParseException { 231 | assumeYear(); 232 | parseMmm(); 233 | //advance(); 234 | pos++; 235 | parseDd(); 236 | } 237 | 238 | private void parseDd() { 239 | // Choices for day include two digits, space padded or zero padded. 240 | if (bytes[pos] == ' ' || bytes[pos] == '0') { 241 | cal.set(Calendar.DAY_OF_MONTH, intParser.intFromBytes(bytes, pos + 1, 1)); 242 | } else { 243 | cal.set(Calendar.DAY_OF_MONTH, intParser.intFromBytes(bytes, pos, 2)); 244 | } 245 | pos += 2; 246 | } 247 | 248 | private static final byte[][] months = new byte[][] { "Jan".getBytes(), 249 | "Feb".getBytes(), "Mar".getBytes(), "Apr".getBytes(), "May".getBytes(), 250 | "Jun".getBytes(), "Jul".getBytes(), "Aug".getBytes(), "Sep".getBytes(), 251 | "Oct".getBytes(), "Nov".getBytes(), "Dec".getBytes() }; 252 | 253 | private int parseMmmPos; 254 | 255 | private void parseMmm() throws ParseException { 256 | for (parseMmmPos = 0; parseMmmPos < months.length; parseMmmPos++) { 257 | if (areEqual(months[parseMmmPos], 0, bytes, pos, 3)) { 258 | pos += 3; 259 | cal.set(Calendar.MONTH, parseMmmPos); 260 | return; 261 | } 262 | } 263 | throw new ParseException("Unrecognized month string.", pos); 264 | } 265 | 266 | private void parseYyyyMmDd() throws ParseException { 267 | parseYyyy(); 268 | advance(); 269 | parseMm(); 270 | advance(); 271 | parseDd(); 272 | 273 | } 274 | 275 | private void parseMm() { 276 | // two digits, zero padded. 277 | if (bytes[pos] == '0') { 278 | cal.set(Calendar.MONTH, intParser.intFromBytes(bytes, pos + 1, 1) - 1); 279 | } else { 280 | cal.set(Calendar.MONTH, intParser.intFromBytes(bytes, pos, 2) - 1); 281 | } 282 | pos += 2; 283 | } 284 | 285 | private void parseYyyy() { 286 | // Hopefully we won't have to deal with zero padded years. 287 | cal.set(Calendar.YEAR, intParser.intFromBytes(bytes, pos, 4)); 288 | pos += 4; 289 | } 290 | 291 | private void assumeYear() { 292 | cal.set(Calendar.YEAR, getCurrentCal().get(Calendar.YEAR)); 293 | } 294 | 295 | private Calendar currentCal = Calendar.getInstance(UTC); 296 | 297 | private Calendar getCurrentCal() { 298 | currentCal.setTimeInMillis(System.currentTimeMillis()); 299 | return currentCal; 300 | } 301 | 302 | public int getError() { 303 | return error; 304 | } 305 | 306 | public long getTimestamp() { 307 | return cal.getTimeInMillis(); 308 | } 309 | 310 | public int getLength() { 311 | return length; 312 | } 313 | 314 | } -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/timestamps/TimestampWorker.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014 BlackBerry, Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 9 | */ 10 | package com.blackberry.bdp.kaboom.timestamps; 11 | 12 | import com.blackberry.bdp.kaboom.*; 13 | import java.net.InetAddress; 14 | import java.net.UnknownHostException; 15 | 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | import com.blackberry.bdp.common.conversion.Converter; 20 | import com.blackberry.bdp.common.jmx.MetricRegistrySingleton; 21 | import com.blackberry.bdp.krackle.consumer.BrokerUnavailableException; 22 | import com.blackberry.bdp.krackle.consumer.Consumer; 23 | import java.io.IOException; 24 | import java.nio.charset.Charset; 25 | import java.util.HashMap; 26 | 27 | public class TimestampWorker { 28 | 29 | private static final Logger LOG = LoggerFactory.getLogger(TimestampWorker.class); 30 | 31 | private final String partitionId; 32 | private Consumer consumer; 33 | private long offset; 34 | private long timestamp; 35 | private final StartupConfig startupConfig; 36 | private final String topic; 37 | private final int partition; 38 | private final long startOffset; 39 | private final long endOffset; 40 | private String hostname; 41 | private long startTime; 42 | private long messagesConsumed = 0; 43 | private boolean stopping = false; 44 | String parsedDate; 45 | String template = "%y-%M-%d-%H"; 46 | long thisOffset; 47 | byte[] bytes = new byte[1024 * 1024]; 48 | int length; 49 | private final Charset UTF8 = Charset.forName("UTF-8"); 50 | int pos; 51 | 52 | public TimestampWorker(StartupConfig startupConfig, 53 | String topic, 54 | int partition, 55 | long startOffset, 56 | long endOffset) throws Exception { 57 | this.startupConfig = startupConfig; 58 | this.topic = topic; 59 | this.partition = partition; 60 | this.startOffset = startOffset; 61 | this.endOffset = endOffset; 62 | this.messagesConsumed = 0; 63 | 64 | offset = startOffset; 65 | partitionId = topic + "-" + partition; 66 | 67 | LOG.info("[{}] Created simple worker.", partitionId); 68 | //this.run(); 69 | } 70 | 71 | public void run() throws Exception { 72 | try { 73 | try { 74 | hostname = InetAddress.getLocalHost().getCanonicalHostName(); 75 | } catch (UnknownHostException e) { 76 | LOG.error("[{}] Can't determine local hostname", partitionId); 77 | hostname = "unknown.host"; 78 | } 79 | 80 | String clientId = "kaboom-" + hostname; 81 | 82 | consumer = new Consumer(startupConfig.getConsumerConfiguration(), clientId, topic, partition, startOffset, MetricRegistrySingleton.getInstance().getMetricsRegistry()); 83 | 84 | LOG.info("[{}] Created simple worker. Starting at offset {}.", partitionId, startOffset); 85 | 86 | byte version; 87 | PriParser pri = new PriParser(); 88 | VersionParser ver = new VersionParser(); 89 | TimestampParser tsp = new TimestampParser(); 90 | 91 | HashMap msgCounters = new HashMap<>(); 92 | HashMap injectedCounters = new HashMap<>(); 93 | long tsParseErrors = 0; 94 | 95 | while (stopping == false) { 96 | try { 97 | timestamp = 0; 98 | if (offset > endOffset) { 99 | LOG.info("[{}] offset {} is past end offset of {}, stopping", partitionId, offset, endOffset); 100 | stop(); 101 | continue; 102 | } 103 | if (Thread.interrupted()) { 104 | throw new Exception("This simple worker has been interrupted"); 105 | } 106 | 107 | length = consumer.getMessage(bytes, 0, bytes.length); 108 | 109 | if (length == -1) { 110 | continue; 111 | } 112 | 113 | // offset always refers to the next offset we expect to 114 | // since we just called consumer.getMessage() let's see 115 | // if the offset of the last message is what we expected 116 | // and handle the fun edge cases when it's not 117 | if (offset != consumer.getLastOffset()) { 118 | long highWatermark = consumer.getHighWaterMark(); 119 | 120 | if (offset > consumer.getLastOffset()) { 121 | if (offset < highWatermark) { 122 | /* 123 | * When using SNAPPY compression in Krackle's consumer there will be messages received 124 | * that are in the snappy block that are from earlier than our requested offset. When this 125 | * happens the consumer will continue to send us messages from within that block so we 126 | * should just be patient until the offsets are from where we want. 127 | */ 128 | continue; 129 | } else { 130 | if (offset > highWatermark) { 131 | throw new Exception(String.format("[%s] offset %d is greater than high watermark %d", partitionId, offset, highWatermark)); 132 | } 133 | } 134 | } else { 135 | String error = String.format("[%s] Offset anomaly! Expected: %d, Got %d, Consumer high watermark %d, latest %d, earliest %d", 136 | partitionId, 137 | offset, 138 | consumer.getLastOffset(), 139 | consumer.getHighWaterMark(), 140 | consumer.getLatestOffset(), 141 | consumer.getEarliestOffset()); 142 | throw new Exception(error); 143 | } 144 | } 145 | 146 | thisOffset = offset; 147 | offset = consumer.getNextOffset(); 148 | 149 | // (byte) 0xFE: -2 150 | // (byte) 0x00: 0 151 | // (byte) 0xFF: -1 152 | // Check for version 153 | if (bytes[0] == (byte) 0xFE) { 154 | version = bytes[1]; 155 | 156 | if (version == (byte) 0x00) { 157 | // Version 0 has a timestamp in the front, so we can skip that for now. Come back if we need it. 158 | pos = 10; 159 | } else { 160 | LOG.warn("[{}] Unrecognized encoding version: {}", partitionId, version); 161 | pos = 0; 162 | } 163 | } else { 164 | // version -1 is a raw log 165 | version = (byte) 0xFF; 166 | pos = 0; 167 | } 168 | 169 | // Optional PRI at the start of the line. 170 | if (pri.parsePri(bytes, pos, length)) { 171 | pos += pri.getPriLength(); 172 | } 173 | 174 | // On the off chance that someone is following RFC5424 and has 175 | // inserted a version in the log line. 176 | if (ver.parseVersion(bytes, pos, length - pos)) { 177 | // Skip the length of the version and the following space. 178 | pos += ver.getVersionLength() + 1; 179 | } 180 | 181 | tsp.parse(bytes, pos, length - pos); 182 | 183 | if (tsp.getError() == TimestampParser.NO_ERROR) { 184 | timestamp = tsp.getTimestamp(); 185 | count(msgCounters, Converter.timestampTemplateBuilder(timestamp, template)); 186 | } else { 187 | if (version == (byte) 0x00) { 188 | LOG.debug("[{}] Failed to parse timestamp. Using stored timestamp", partitionId); 189 | timestamp = Converter.longFromBytes(bytes, 2); 190 | count(injectedCounters, Converter.timestampTemplateBuilder(timestamp, template)); 191 | tsParseErrors++; 192 | } else { 193 | LOG.debug("[{}] Error parsing timestamp.", partitionId); 194 | } 195 | } 196 | 197 | if ((length - pos) < 0) { 198 | LOG.info("[{}] Skipping offset as length - Offset is < 0: timestamp: {}, pos: {}, length: {}", partitionId, timestamp, pos, length); 199 | continue; 200 | } 201 | 202 | } catch (Exception e) { 203 | LOG.error("[{}] Error processing message: ", partitionId, e); 204 | throw e; 205 | } 206 | } 207 | 208 | LOG.info("total counts for parsed timestamps within messages"); 209 | for (String parsed : msgCounters.keySet()) { 210 | long total = msgCounters.get(parsed); 211 | LOG.info("parsed from messages: string: {}, total:{}", parsed, total); 212 | } 213 | 214 | LOG.info("total counts for parsed timestamps from what klogger injected"); 215 | for (String parsed : injectedCounters.keySet()) { 216 | long total = injectedCounters.get(parsed); 217 | LOG.info("parsed from klogger's injected timestamp: {}, total:{}", parsed, total); 218 | } 219 | LOG.info("there were {} total timestamp parsing errors", tsParseErrors); 220 | 221 | LOG.info("[{}] Timestamp Counter Thingy version of KaBoom client shutting down.", partitionId); 222 | 223 | LOG.info("[{}] Worker stopped. (Read {} lines. Next offset is {})", partitionId, messagesConsumed, offset); 224 | } catch (BrokerUnavailableException | IOException e) { 225 | LOG.error("[{}] An exception occured while setting up this worker thread giving up and returing => error : {} ", partitionId, e); 226 | } 227 | } 228 | 229 | private void count(HashMap counter, String parsedDate) { 230 | long val; 231 | if (counter.containsKey(parsedDate)) { 232 | val = counter.get(parsedDate); 233 | val++; 234 | counter.put(parsedDate, val); 235 | } else { 236 | LOG.info("offset {} introduced date {} with message: {}" , thisOffset, parsedDate, new String(bytes, pos, length - pos, UTF8)); 237 | val = 1; 238 | counter.put(parsedDate, val); 239 | } 240 | } 241 | 242 | public void stop() { 243 | LOG.info("[{}] Stop request received", partitionId); 244 | stopping = true; 245 | } 246 | 247 | public long getMsgWrittenPerSec() { 248 | return messagesConsumed / ((System.currentTimeMillis() - startTime) / 1000); 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/KaBoom.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 BlackBerry Limited. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import com.blackberry.bdp.common.jmx.MetricRegistrySingleton; 19 | import com.blackberry.bdp.common.props.Parser; 20 | import com.blackberry.bdp.kaboom.api.KaBoomClient; 21 | import com.codahale.metrics.Meter; 22 | import java.nio.charset.Charset; 23 | import java.util.regex.Matcher; 24 | import java.util.regex.Pattern; 25 | import org.apache.curator.framework.recipes.leader.LeaderSelector; 26 | import org.apache.hadoop.fs.FileSystem; 27 | import org.apache.zookeeper.CreateMode; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | import java.util.HashMap; 31 | import java.util.Iterator; 32 | import java.util.Map; 33 | import java.util.Properties; 34 | 35 | public class KaBoom { 36 | 37 | private static final Logger LOG = LoggerFactory.getLogger(KaBoom.class); 38 | private static final Charset UTF8 = Charset.forName("UTF-8"); 39 | boolean shutdown = false; 40 | private StartupConfig config; 41 | private KaBoomClient client; 42 | private final static Object serverLock = new Object(); 43 | private Meter deadWorkerMeter; 44 | private Meter gracefulWorkerShutdownMeter; 45 | 46 | public static void main(String[] args) throws Exception { 47 | 48 | MetricRegistrySingleton.getInstance().enableJmx(); 49 | 50 | LOG.info("*******************************************"); 51 | LOG.info("*** KABOOM SERVER START ***"); 52 | LOG.info("*******************************************"); 53 | 54 | new KaBoom().run(); 55 | } 56 | 57 | public KaBoom() throws Exception { 58 | } 59 | 60 | private void run() throws Exception { 61 | if (Boolean.parseBoolean(System.getProperty("metrics.to.console", "false").trim())) { 62 | MetricRegistrySingleton.getInstance().enableConsole(); 63 | } 64 | 65 | deadWorkerMeter = MetricRegistrySingleton.getInstance(). 66 | getMetricsRegistry().meter("kaboom:total:dead workers"); 67 | 68 | gracefulWorkerShutdownMeter = MetricRegistrySingleton.getInstance(). 69 | getMetricsRegistry().meter("kaboom:total:gracefully shutdown workers"); 70 | 71 | try { 72 | Properties props = StartupConfig.getProperties(); 73 | Parser propsParser = new Parser(props); 74 | 75 | if (propsParser.parseBoolean("configuration.authority.zk", false)) { 76 | // TODO: ZK 77 | } else { 78 | LOG.info("Configuration authority is file based"); 79 | config = new StartupConfig(props); 80 | } 81 | 82 | config.logConfiguraton(); 83 | } catch (Exception e) { 84 | LOG.error("an error occured while building configuration object: ", e); 85 | throw e; 86 | } 87 | 88 | // Ensure that the required zk paths exist 89 | for (String path : new String[]{config.getZkPathLeaderClientId(), 90 | config.getZkRootPathClients(), 91 | config.getZkRootPathPartitionAssignments(), 92 | config.getZkRootPathFlagAssignments()}) { 93 | if (config.getKaBoomCurator().checkExists().forPath(path) == null) { 94 | try { 95 | LOG.warn("the path {} was not found in ZK and needs to be created", path); 96 | config.getKaBoomCurator().create().creatingParentsIfNeeded().withMode(CreateMode.PERSISTENT).forPath(path); 97 | LOG.warn("path {} created in ZK", path); 98 | } catch (Exception e) { 99 | LOG.error("Error creating ZooKeeper node {} ", path, e); 100 | } 101 | } else { 102 | LOG.info("required path {} already exists in zookeeper", path); 103 | } 104 | } 105 | 106 | // Register our existence 107 | { 108 | client = new KaBoomClient(config.getKaBoomCurator(), 109 | String.format("%s/%s", config.getZkRootPathClients(), config.getKaboomId())); 110 | client.setId(config.getKaboomId()); 111 | client.setMode(CreateMode.EPHEMERAL); 112 | client.setHostname(config.getHostname()); 113 | client.setWeight(config.getWeight()); 114 | client.save(); 115 | } 116 | 117 | // Instantiate our load balancer 118 | Leader loadBalancer = null; 119 | if (config.getLoadBalancerType().equals("even")) { 120 | loadBalancer = new EvenLoadBalancer(config); 121 | } else { 122 | if (config.getLoadBalancerType().equals("local")) { 123 | loadBalancer = new LocalLoadBalancer(config); 124 | } 125 | } 126 | 127 | // Start leader election thread 128 | final LeaderSelector leaderSelector = new LeaderSelector(config.getKaBoomCurator(), 129 | config.getZkPathLeaderClientId(), loadBalancer); 130 | leaderSelector.autoRequeue(); 131 | leaderSelector.start(); 132 | 133 | final Map partitionToWorkerMap = new HashMap<>(); 134 | final Map partitionToThreadsMap = new HashMap<>(); 135 | 136 | { 137 | Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { 138 | @Override 139 | public void run() { 140 | // Request shutdown and wait for the lock 141 | shutdown(); 142 | synchronized (serverLock) { 143 | // Stop all workers 144 | Iterator> workers = partitionToWorkerMap.entrySet().iterator(); 145 | while (workers.hasNext()) { 146 | Map.Entry entry = workers.next(); 147 | entry.getValue().stop(); 148 | } 149 | 150 | // Wait for threads to finish 151 | Iterator> threads = partitionToThreadsMap.entrySet().iterator(); 152 | while (threads.hasNext()) { 153 | Map.Entry entry = threads.next(); 154 | try { 155 | entry.getValue().join(); 156 | } catch (InterruptedException ie) { 157 | LOG.error("Interrupted waiting for thread to finish", ie); 158 | } 159 | } 160 | 161 | try { 162 | FileSystem.get(config.getHadoopConfiguration()).close(); 163 | } catch (Throwable t) { 164 | LOG.error("Error closing Hadoop filesystem", t); 165 | } 166 | try { 167 | config.getKaBoomCurator().delete().forPath("/kaboom/clients/" + config.getKaboomId()); 168 | } catch (Exception e) { 169 | LOG.error("Error deleting /kaboom/clients/{}", config.getKaboomId(), e); 170 | } 171 | leaderSelector.close(); 172 | config.getKaBoomCurator().close(); 173 | } 174 | } 175 | })); 176 | } 177 | 178 | Pattern topicPartitionPattern = Pattern.compile("^(.*)-(\\d+)$"); 179 | 180 | while (shutdown == false) { 181 | synchronized (serverLock) { 182 | // Get all my assignments and create a worker if there's anything not already being worked 183 | Map validWorkingPartitions = new HashMap<>(); 184 | for (String partitionId : client.getAssignments(config.getKaBoomCurator(), config.getZkRootPathPartitionAssignments())) { 185 | if (partitionToWorkerMap.containsKey(partitionId)) { 186 | if (false == partitionToThreadsMap.get(partitionId).isAlive()) { 187 | if (false == partitionToWorkerMap.get(partitionId).isAborting()) { 188 | LOG.info("worker thead for {} found to have been shutdown gracefully", partitionId); 189 | gracefulWorkerShutdownMeter.mark(); 190 | } else { 191 | LOG.error("worker thead for {} found dead (removed thread/worker objects)", partitionId); 192 | deadWorkerMeter.mark(); 193 | } 194 | validWorkingPartitions.remove(partitionId); 195 | partitionToWorkerMap.remove(partitionId); 196 | partitionToThreadsMap.remove(partitionId); 197 | } else { 198 | validWorkingPartitions.put(partitionId, true); 199 | } 200 | } else { 201 | LOG.info("KaBoom clientId {} assigned to partitonId {} and a worker doesn't exist", config.getKaboomId(), partitionId); 202 | Matcher m = topicPartitionPattern.matcher(partitionId); 203 | if (m.matches()) { 204 | String topic = m.group(1); 205 | int partition = Integer.parseInt(m.group(2)); 206 | try { 207 | Worker worker = new Worker(config, topic, partition); 208 | partitionToWorkerMap.put(partitionId, worker); 209 | partitionToThreadsMap.put(partitionId, new Thread(worker)); 210 | partitionToThreadsMap.get(partitionId).start(); 211 | LOG.info("KaBoom clientId {} assigned to partitonId {} and a new worker has been started", 212 | config.getKaboomId(), partitionId); 213 | validWorkingPartitions.put(partitionId, true); 214 | } catch (Exception e) { 215 | LOG.error("failed to create new worker for {}-{}", topic, partition, e); 216 | } 217 | } else { 218 | LOG.error("Could not get topic and partition from node name. ({})", partitionId); 219 | } 220 | } 221 | } 222 | 223 | Iterator> iter = partitionToWorkerMap.entrySet().iterator(); 224 | while (iter.hasNext()) { 225 | Map.Entry entry = iter.next(); 226 | Worker worker = entry.getValue(); 227 | if (!validWorkingPartitions.containsKey(worker.getPartitionId())) { 228 | worker.stop(); 229 | LOG.info("Worker currently assigned to {} is no longer valid has been instructed to stop working", worker.getPartitionId()); 230 | } 231 | if (worker.pinged()) { 232 | if (!worker.getPong()) { 233 | LOG.error("[{}] has not responded from being pinged, aborting", worker.getPartitionId()); 234 | synchronized(worker.getZkLock()) { 235 | worker.abort(); 236 | partitionToThreadsMap.get(worker.getPartitionId()).interrupt(); 237 | iter.remove(); 238 | } 239 | } else { 240 | worker.ping(); 241 | } 242 | } else { 243 | worker.ping(); 244 | } 245 | } 246 | } 247 | Thread.sleep(config.getRunningConfig().getKaboomServerSleepDurationMs()); 248 | } 249 | } 250 | 251 | public void shutdown() { 252 | shutdown = true; 253 | } 254 | 255 | } 256 | -------------------------------------------------------------------------------- /docs/changes.md: -------------------------------------------------------------------------------- 1 | # KaBoom Changes 2 | 3 | ## 0.9.1-HF2 4 | 5 | * KABOOM-47: Release 0.9.1-HF2 (pull in formal Krackle 0.9.2 release) 6 | 7 | ## 0.9.1-HF1 8 | 9 | * KABOOM-46: Do not share consumer configurations 10 | 11 | ## 0.9.1 12 | 13 | * KABOOM-44: Upgrade Hadoop Client to 2.7.2 and Login via Keytab 14 | 15 | ## 0.9.0 16 | 17 | * KABOOM-3: Add JMX metric: Failed Block Writes 18 | * KABOOM-31: expose bytes written to HDFS via JMX 19 | * KABOOM-35: add a warning/error message in kaboom when /etc/hadoop/conf is not found or not defined in the classpath 20 | * KABOOM-36: Expose login context name as a confguration option 21 | * KABOOM-39: deprecate kerberos configs in kaboom.properties for kaboom versions >= 0.9.0 22 | * KABOOM-40: Create KaBoom temp dir on package install and set snappy temp dir to use it 23 | * KABOOM-41: Update Krackle dependency to 0.9.0 and remove -SNAPSHOT tag of BDP Common Dependency 24 | 25 | ## 0.8.4-HF2 26 | 27 | * KABOOM-37: Old timestamps are not being treated as skewed 28 | 29 | ## 0.8.4-HF1 30 | 31 | * KABOOM-33: Provide Better Resiliency around ZK Corruption 32 | 33 | ## 0.8.4 34 | 35 | Tickets: 36 | 37 | * KABOOM-26 Fix Overburdening Assignment (Even Load Balancer will not assign a partition to a node if that would make the node over-burdened). EvenLoadBalancer ensures clients meet the criteria of canTakeAnotherAssignment() and hasLocalPartition(KaBoomPartition partition) before assigning an unassigned partition 38 | * KABOOM-27 Implement Skewed Timestamp (Old/Future Parsed Dates) Handling 39 | * KABOOM-28 Improve Assignment and Lock Error Handling 40 | 41 | Features: 42 | 43 | TimeBasedHdfsOutputPath: New method skewed() return true if the boom files date directory is too far into the past/future based on new running configuration options 44 | TimeBasedHdfsOutputPath: OutputFile private class overwrites the boom filename, date directory, and data directory according to new running configuration options 45 | 46 | New Metrics: 47 | 48 | * kaboom:total:skewed time boom files // the total number of skewed boom files for the node 49 | * kaboom:partitions::skewed time boom files // the total number of skewed boom files for the partiton 50 | 51 | ## 0.8.3 (never released outside of labs) 52 | 53 | * kaboom-api dependency now on version 0.8.4 54 | * AsyncAssignee: moved node cache listener on assignment path to attribute and closes on release lock 55 | * AsyncAssignee: moved connection state listener to attribute and removes on release lock 56 | * TimeBasedHdfsOutputPath now waits for existing open files to close waiting nodeOpenFileWaittimeMs between isFileClosed() 57 | * TimeBasedHdfsOutputPath gives up and deletes existing open files if not closed after nodeOpenFileForceDeleteSeconds 58 | * TimeBasedHdfsOutputPath now has an instance of the Worker as an attribute and can respond to pings when it's busy 59 | * Leader has a new method called refreshMetadata() that clears all convenience mappings and refreshes all metadata 60 | * Leader has a new method called pauseOnFirstDisconnectedAssignee() that iterates through all assignments and waits leaderNodeDisconnectionWaittimeSeconds on the first disconnected assigned node before calling refreshMetadata() and returning 61 | * Leader's main loop now calls refreshMetadata() and then pauseOnFirstDisconnectedAssignee() 62 | * Leader worst case running time increased by at least leaderNodeDisconnectionWaittimeSeconds 63 | 64 | 65 | ## 0.8.2-HF4 66 | 67 | * Updates the kaboom-api dependency to 0.8.3 (which resolves leaking Kafka simple consumer sockets) 68 | * Exposes the startup config's node cache listener as an accessible attribute 69 | * Removes unnecessary and unused mapping of proxy user to file system 70 | 71 | ## 0.8.2-HF3 72 | 73 | * Fixes KABOOM-20 - Maps Used by Leader Not Emptied 74 | * Fixes KABOOM-21 - Ensure Exceptions Are Not Swallowed And Abort Accordingly 75 | * Fixes KABOOM-22 - Bug in Closing LRU Boom File Closing Most Recently Used v2 76 | 77 | ## 0.8.2-HF2 78 | 79 | * Fixes IPGBD-4245/KABOOM-18 - Bug closing LRU Output File 80 | 81 | 82 | ## 0.8.2-HF1 83 | 84 | * Moves the node cache listener to an attribute of worker and closes off in the finally block executed after the run 85 | 86 | ## 0.8.2: 87 | 88 | * TODO: Re-write the change log 89 | 90 | ## 0.8.1 91 | 92 | * New metric, kaboom:total:gracefully restarted workers 93 | 94 | ## 0.8.0 95 | 96 | * Refactored configuration into startup/running, running configuration migrated to ZK 97 | * Introduction of worker sprints 98 | * Intended to be manage via Kontroller API and web interface 99 | 100 | ## 0.7.16-HF1 101 | 102 | * Improves exception handling and reduce file corruption when closing boom files to ensure that any problems closing the file result in the worker aborting the file. 103 | 104 | ## 0.7.16 105 | 106 | * Adds a new stand-alone utility to write a boom file for a specific partition, start offset and end offset to a specific destination 107 | * Adds a new meter metric: kaboom:total:dead workers 108 | 109 | ## 0.7.15-HF1 110 | 111 | * Fixes IPGBD-3830 [Kaboom] Bug when handling zero padded date field 112 | 113 | ## 0.7.15 114 | 115 | * Ensures that only topics with unique HDFS root paths are examined 116 | * New configuration option: kaboom.propagate.ready.flags.delay.ms (long, how often wait between paths, default 0) can be used to ease the burden on the name nodes if there's a massive amount of HDFS path traversal required 117 | 118 | ## 0.7.14 119 | 120 | * Moved the READY flag propagation to KaBoom (from an older to-be-deprecated project 121 | * New JMX metric: kaboom:topic::flag propagator timer (timer for recursive HDFS directory traversal) 122 | * New JMX metric: kaboom:topic::flag propagator paths checked (merter for number of paths checked) 123 | * New JMX metric: kaboom:topic::flags written (merter for number of flags created) 124 | * New configuration option: kaboom.propagate.ready.flags (boolean, default false) 125 | * New configuration option: kaboom.propagate.ready.flags.frequency (long, how often in ms to spawn propagator thread, default 10 * 60 * 1000) 126 | * Supports bdp-common 0,0.6 which provides all logging and monitoring deps 127 | * Instruments log4j2 with io.dropwizard.metrics 128 | * Adds a new API 129 | 130 | ## 0.7.13 131 | 132 | * Added kafkaOffset argument to FastBoomWriter.writeLine() 133 | * Added a lastKafkaOffset to FastBoomWriter 134 | * FastBoomWriter now keeps track of lastKafkaOffset and lastMessageTimestamp 135 | * Changed the periodicCloseExpiredPoll in TimeBasedHdfsOutputPath to store the expired boom file's lastKafakaOffset and lastMessageTimestamp instead of the worker's current values 136 | * Added a new global configuration property max.open.boom.files.per.partition (default: 5) to limit the number of open boom files per partition 137 | * Modified TimeBasedHdfsOutputPath to close off the oldest FastBoomWriter when the number of open boom files is greater than max.open.boom.files.per.partition, and update zookeeper with lastKafakaOffset and lastMessageTimestamp 138 | 139 | ## 0.7.12-HF4 140 | 141 | * ReadFlagWriter now doesn't check for the existence of the data directory when it's writing flags 142 | 143 | ## 0.7.12-HF3 144 | 145 | * New configuration option boom.file.expired.grace.time.ms (default: 30 * 1000, thirty seconds): The time after a TimeBasedHdfsOutputPath output file expires before it's closed via the periodic file close interval triggered upon each message consumed from Kafka. This option is introduced to make the previously hard coded value configurable and uses the same default value as the previous hard coded value. 146 | * New configuration option forced.zk.offsetTimestamp.update.time.ms (default: 10 * 60 * 1000, ten minutes): Ensure that very quiet partitions are updating their offset timestamp in ZK even when they are not receiving any messages. If the last received message was during the previous hour and it's been more than this amount of milliseconds then write the start of the hour's timestamp into ZK for the partition (providing it hasn't already been stored for the current hour already). 147 | * New configuration option kaboom.server.sleep.duration.ms (default: 10 000, ten seconds): Exposes a configuration option for a previously hard coded property value with the same default value. This is the time waited after a worker processes it's work assignments before it fetches another update from ZK and processes again. 148 | * Fixes the NullPointerException caused by improper handling of the NoNodeException that very rarely occurs when a worker fetches it's assignments. The null assignee is now logged and ignored and skipped until the next round of worker assignments is reviewed (after the newly introduced: kaboom.server.sleep.duration.ms). 149 | * Fixes the "hung KaBoom workers" by sending a basic ping() health check to each KaBoom worker. The worker has until kaboom.server.sleep.duration.ms to respond with a pong before the KaBoom server will send it a kill request and send an interrupt to the worker's thread. The worker (if it comes back to acknowledge it's been killed) will throw an exception that will get caught and trigger the abort sequence (delete files, and stop). 150 | 151 | ## 0.7.12 152 | 153 | * Fixes bug that threw an NPE when there was no work assigned to a client and the load balancer tries to check if it's over worked 154 | 155 | ## 0.7.11 156 | 157 | * Adds an additional kafka ready flag in the topic root 158 | * Logs a warning if the load balancer is still running after leader.sleep.duration.ms 159 | 160 | ## 0.7.10 161 | 162 | * Adds a new optional configuration option (String) kaboom.kafkaReady.flag.filename, default=_KAFKA_READY 163 | 164 | ## 0.7.9 165 | 166 | * Fixes a ReadyFlagWriter bug that would write flags to the current hour 167 | * Adds logging around the maxTimestamps that are stored in ZK 168 | * Depends on common-utils-dev 0.0.5 (to get the new ZK get/set utility) 169 | 170 | ## 0.7.8 171 | 172 | * Adds a total compression ratio histogram 173 | 174 | ## 0.7.7 175 | 176 | * Adds new optional configuration option (Short): kaboom.deflate.compression.level, default=6 177 | * Adds new optional configuration option (Short): topic..compression.level 178 | * Adds new histogram metric for compression ratio 179 | 180 | ## 0.7.6 181 | 182 | * Resolves hostname bug in the LocalLoadBalancer 183 | 184 | ## 0.7.5 185 | 186 | * Improved ReadyFlagWriter logic: Previous versions were buggy and had too many operations included in their hourly loop instead of in the topic loop 187 | * Removes checks on _READY flag in ReadyFlagWriter (_READY flags from LogDriver's LogMaintenance are deprecated by KaBoom 0.7.1 and later) 188 | * Fixes bugs related to concurrent access on shared TimeBasedHdfsOutputPath objects (each worker now instantiates their own) 189 | * Reduced INFO level log messages throughout to limit logs to more important messages 190 | * Separates the pre-install script for DEB and RPM (as DEB's don't require the /var/run/kaboom) 191 | * Adds new optional configuration option (Long): leader.sleep.duration.ms, default=10 * 60 * 1000 192 | * Adds new optional configuration option (String) kaboom.load.balancer.type, default=even 193 | 194 | ## 0.7.4 195 | 196 | * Abstracts load balancing and adds two implementation: even and local 197 | 198 | ## 0.7.3 199 | 200 | * Adds native compression 201 | 202 | ## 0.7.2 203 | 204 | * Bumps Krackle dependency to 0.7.10 to have the consumer's broker socket have the keep alive flag set 205 | 206 | ## 0.7.1 207 | 208 | * New timer metrics for HDFS flush time for topic-partition, topic, and total per server 209 | * New meter metrics for boom writes for topic-partition, topic, and total per server 210 | * Adds new required configuration option: hadoop.fs.uri 211 | * Adds new required topic configuration for HDFS root directory (string): topic..hdfsRootDir 212 | * Supports multiple numbered template based HDFS output paths per topic 213 | * Topic HDFS output paths are now configurable to be left open for specific durations 214 | * Adds new optional configuration option (boolean): kaboom.useTempOpenFileDirectory 215 | * Adds new optional configuration option (Integer): boom.file.buffer.size, default=16384 216 | * Adds new optional configuration option (Short): boom.file.replicas, default=3 217 | * Adds new optional configuration option (Long): boom.file.block.size=268435456 218 | * Adds new optional configuration option (String): boom.file.temp.prefix, default=_tmp_ 219 | * Adds new optional configuration option (Long): boom.file.flush.interval, default=30000 220 | * Adds new optional configuration option (Long): boom.file.close.expired.interval, default=60000 221 | 222 | ## 0.7.0 223 | 224 | * Deprecates all CDH-specific content, configuration, and project files 225 | * New dependency on Krackle 0.7.7 for configuring socket timeouts 226 | * New KaboomConfiguration class that encapsulates all the configuration 227 | * New accessor methods for instantiating CuratorFramework objects 228 | * Project builds produce an RPM artifact 229 | * Fixes synchronization on non-final workersLock object (used when instantiating metrics) 230 | * Removes unused imports 231 | * Worker.java, int length; byte version; int pos; are no longer initialized with default values that are never used 232 | * New method: private Map getTopicPathsFromProps(Properties props) 233 | * New method: private Map getTopicProxyUsersFromProps(Properties props) 234 | * new method: private Properties getProperties() 235 | 236 | ## 0.6.10 237 | 238 | * Re-formats source for Kaboom and Worker class 239 | * Adds offset overrides feature for single partitions to be set to specific offsets in ZK 240 | * Adds feature and configuration property to sink to lower offsets when offsets surpass the high watermark 241 | * Re-writes the offset handling code for when last offsets do not match expected offset 242 | * Adds new dependency to the new com.blackberry.common.props that simplifies parsing property files and will eventually be enhanced with ZK support 243 | -------------------------------------------------------------------------------- /dependency-reduced-pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | com.blackberry.bdp.kaboom 5 | kaboom 6 | kaboom 7 | 0.8.1 8 | http://blackberry.com 9 | 10 | scm:git:git@gitlab.rim.net:ipgbd-software/kaboom.git 11 | scm:git:git@gitlab.rim.net:ipgbd-software/kaboom.git 12 | https://gitlab.rim.net/ipgbd-software/kaboom 13 | 14 | 15 | 16 | 17 | 18 | org.eclipse.m2e 19 | lifecycle-mapping 20 | 1.0.0 21 | 22 | 23 | 24 | 25 | 26 | org.apache.maven.plugins 27 | maven-dependency-plugin 28 | [2.1,) 29 | 30 | copy-dependencies 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | maven-compiler-plugin 46 | 3.0 47 | 48 | 1.7 49 | 1.7 50 | 51 | 52 | 53 | maven-dependency-plugin 54 | 55 | 56 | install 57 | 58 | copy-dependencies 59 | 60 | 61 | ${project.build.directory}/lib 62 | runtime 63 | slf4j-log4j12,log4j 64 | 65 | 66 | 67 | 68 | 69 | org.codehaus.mojo 70 | buildnumber-maven-plugin 71 | 1.3 72 | 73 | 74 | validate 75 | 76 | create 77 | 78 | 79 | 80 | 81 | false 82 | false 83 | 84 | 85 | 86 | maven-jar-plugin 87 | 2.5 88 | 89 | 90 | 91 | true 92 | 93 | 94 | ${buildNumber} 95 | ${scmBranch} 96 | 97 | 98 | 99 | 100 | 101 | org.codehaus.mojo 102 | native-maven-plugin 103 | 1.0-alpha-8 104 | true 105 | 106 | com.blackberry.bdp.kaboom.FastBoomWriter 107 | 108 | 109 | com.blackberry.bdp.kaboom.FastBoomWriter 110 | com.blackberry.bdp.kaboom.FastBoomWriter.h 111 | 112 | 113 | 114 | 115 | 116 | maven-antrun-plugin 117 | 1.8 118 | 119 | 120 | process-classes 121 | 122 | run 123 | 124 | 125 | true 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | org.vafer 142 | jdeb 143 | 1.3 144 | 145 | 146 | install 147 | 148 | jdeb 149 | 150 | 151 | true 152 | src/deb/control 153 | true 154 | 155 | 156 | template 157 | 158 | /opt/kaboom/config 159 | /opt/kaboom/lib 160 | /opt/kaboom/src 161 | /opt/kaboom/file_positions_cache 162 | /var/log/kaboom 163 | /var/run/kaboom 164 | 165 | 166 | perm 167 | 755 168 | kaboom 169 | kaboom 170 | 171 | 172 | 173 | file 174 | target/${project.build.finalName}.jar 175 | /opt/kaboom/lib/${project.build.finalName}.jar 176 | 177 | perm 178 | 644 179 | root 180 | root 181 | 182 | 183 | 184 | directory 185 | target/lib 186 | *.jar 187 | 188 | perm 189 | /opt/kaboom/lib 190 | 644 191 | root 192 | root 193 | 194 | 195 | 196 | files 197 | 198 | ${project.basedir}/conf/kaboom.properties.sample 199 | ${project.basedir}/conf/kaboom-env.sh.sample 200 | ${project.basedir}/conf/log4j2.xml.sample 201 | 202 | /opt/kaboom/config 203 | 204 | 205 | ${project.basedir}/init/kaboom.conf 206 | /etc/init/kaboom.conf 207 | file 208 | 209 | 210 | link 211 | /etc/init.d/kaboom 212 | /lib/init/upstart-job 213 | true 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | org.codehaus.mojo 222 | rpm-maven-plugin 223 | 2.1 224 | 225 | 226 | 227 | rpm 228 | 229 | 230 | 231 | 232 | 233 | _tmppath /tmp 234 | 235 | 2014, BlackBerry, Limited. 236 | true 237 | Application/Hadoop 238 | 239 | shadow-utils 240 | 241 | 755 242 | 644 243 | kaboom 244 | kaboom 245 | 246 | 247 | /opt/kaboom/lib 248 | 249 | 250 | 251 | 252 | /opt/kaboom/config 253 | 254 | 255 | ${project.basedir}/conf 256 | 257 | * 258 | 259 | 260 | 261 | 262 | 263 | /opt/kaboom/bin 264 | 744 265 | 266 | 267 | ${project.basedir}/bin 268 | 269 | * 270 | 271 | 272 | 273 | 274 | 275 | /var/run/kaboom 276 | 277 | 278 | /var/log/kaboom 279 | 280 | 281 | 282 | src/rpm/preinst 283 | 284 | 285 | 286 | 287 | maven-shade-plugin 288 | 2.3 289 | 290 | 291 | package 292 | 293 | shade 294 | 295 | 296 | 297 | 298 | 299 | com.blackberry.bdp.kaboom.KaBoom 300 | 301 | 302 | 303 | 304 | 305 | 306 | true 307 | 308 | 309 | *:* 310 | 311 | META-INF/*.SF 312 | META-INF/*.DSA 313 | META-INF/*.RSA 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | always 325 | warn 326 | 327 | 328 | false 329 | never 330 | fail 331 | 332 | HDPReleases 333 | HDP Releases 334 | http://repo.hortonworks.com/content/repositories/releases/ 335 | 336 | 337 | 338 | 339 | junit 340 | junit 341 | 4.11 342 | test 343 | 344 | 345 | hamcrest-core 346 | org.hamcrest 347 | 348 | 349 | 350 | 351 | 352 | UTF-8 353 | 354 | 355 | 356 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/TimeBasedHdfsOutputPath.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2014 BlackBerry, Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import com.blackberry.bdp.common.conversion.Converter; 19 | import com.blackberry.bdp.common.jmx.MetricRegistrySingleton; 20 | import com.blackberry.bdp.kaboom.api.KaBoomTopicConfig; 21 | import com.codahale.metrics.Meter; 22 | import java.io.IOException; 23 | import java.text.SimpleDateFormat; 24 | import java.util.Date; 25 | import java.util.HashMap; 26 | import java.util.Iterator; 27 | import java.util.Map; 28 | import java.util.Map.Entry; 29 | 30 | import org.apache.hadoop.fs.FileSystem; 31 | import org.apache.hadoop.fs.Path; 32 | import org.apache.hadoop.hdfs.DistributedFileSystem; 33 | import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | /** 38 | * 39 | * @author dariens 40 | */ 41 | public class TimeBasedHdfsOutputPath { 42 | 43 | private static final Logger LOG = LoggerFactory.getLogger(TimeBasedHdfsOutputPath.class); 44 | 45 | private final StartupConfig config; 46 | private final KaBoomTopicConfig topicConfig; 47 | private final String topic; 48 | private final int partition; 49 | private final FileSystem fileSystem; 50 | private final String partitionId; 51 | private Worker worker; 52 | 53 | private final Map outputFileMap = new HashMap<>(); 54 | 55 | public TimeBasedHdfsOutputPath(StartupConfig kaboomConfig, 56 | KaBoomTopicConfig topicConfig, 57 | int partition) 58 | throws IOException, InterruptedException { 59 | this.config = kaboomConfig; 60 | this.topicConfig = topicConfig; 61 | this.partition = partition; 62 | this.topic = topicConfig.getId(); 63 | this.fileSystem = config.authenticatedFsForProxyUser(topicConfig.getProxyUser()); 64 | this.partitionId = String.format("%s-%d", topic, partition); 65 | } 66 | 67 | public FastBoomWriter getBoomWriter(long shiftNumber, long ts, String filename) throws IOException, Exception { 68 | long requestedStartTime = ts - ts % (this.config.getRunningConfig().getWorkerShiftDurationSeconds() * 1000); 69 | OutputFile requestedOutputFile = outputFileMap.get(requestedStartTime); 70 | if (requestedOutputFile == null) { 71 | requestedOutputFile = new OutputFile(shiftNumber, filename, requestedStartTime); 72 | outputFileMap.put(requestedStartTime, requestedOutputFile); 73 | if (outputFileMap.size() > config.getRunningConfig().getMaxOpenBoomFilesPerPartition()) { 74 | long oldestTs = getOldestLastUsedTimestamp(); 75 | try { 76 | OutputFile oldestOutputFile = outputFileMap.get(oldestTs); 77 | if (oldestOutputFile == null) { 78 | throw new Exception("Attempt at finding LRU output file returned null"); 79 | } 80 | oldestOutputFile.close(); 81 | LOG.info("[{}] Over max open boom file limit ({}/{}) closing LRU boom file: {}", 82 | partitionId, 83 | outputFileMap.size(), 84 | config.getRunningConfig().getMaxOpenBoomFilesPerPartition(), 85 | oldestOutputFile.openFilePath); 86 | outputFileMap.remove(oldestTs); 87 | } catch (Exception e) { 88 | LOG.error("[{}] Failed to close off oldest boom writer: ", partitionId, e); 89 | throw e; 90 | } 91 | } 92 | } 93 | requestedOutputFile.lastUsedTimestmap = System.currentTimeMillis(); 94 | return requestedOutputFile.getBoomWriter(); 95 | } 96 | 97 | private long getOldestLastUsedTimestamp() { 98 | long oldestTs = outputFileMap.entrySet().iterator().next().getValue().lastUsedTimestmap; 99 | long outputFileStartTime = outputFileMap.entrySet().iterator().next().getKey(); 100 | for (Entry entry : outputFileMap.entrySet()) { 101 | if (entry.getValue().lastUsedTimestmap < oldestTs) { 102 | oldestTs = entry.getValue().lastUsedTimestmap; 103 | // We actually need the entry's key, which represents the outputfile's start time 104 | outputFileStartTime = entry.getKey(); 105 | } 106 | } 107 | return outputFileStartTime; 108 | } 109 | 110 | public void abortAll() { 111 | for (Map.Entry entry : outputFileMap.entrySet()) { 112 | entry.getValue().abort(); 113 | } 114 | } 115 | 116 | public void closeAll() throws IOException { 117 | for (Map.Entry entry : outputFileMap.entrySet()) { 118 | entry.getValue().close(); 119 | } 120 | } 121 | 122 | public void closeOffShift(long shiftNumber) throws Exception { 123 | Iterator> iter = outputFileMap.entrySet().iterator(); 124 | while (iter.hasNext()) { 125 | Map.Entry entry = iter.next(); 126 | if (entry.getValue().shiftNumber == shiftNumber) { 127 | try { 128 | entry.getValue().close(); 129 | LOG.info("[{}] Shift #{} file closed: {} ({} files still open", 130 | partitionId, 131 | shiftNumber, 132 | entry.getValue().openFilePath, 133 | outputFileMap.size()); 134 | iter.remove(); 135 | } catch (IOException | IllegalArgumentException e) { 136 | LOG.error("Error closing output path {}", this, e); 137 | throw e; 138 | } 139 | } 140 | } 141 | } 142 | 143 | /** 144 | * @return the partition 145 | */ 146 | public int getPartition() { 147 | return partition; 148 | } 149 | 150 | /** 151 | * @param worker the worker to set 152 | */ 153 | public void setWorker(Worker worker) { 154 | this.worker = worker; 155 | } 156 | 157 | private String dateString(Long ts) { 158 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm"); 159 | Date now = new Date(); 160 | String strDate = sdf.format(ts); 161 | return strDate; 162 | } 163 | 164 | private class OutputFile { 165 | 166 | private String dir; 167 | private String openFileDirectory; 168 | private String filename; 169 | private Path finalPath; 170 | private Path openFilePath; 171 | private FastBoomWriter boomWriter; 172 | private HdfsDataOutputStream hdfsDataOut; 173 | private long startTime; 174 | private Boolean useTempOpenFileDir; 175 | private long lastUsedTimestmap = System.currentTimeMillis(); 176 | private final long shiftNumber; 177 | private String dataDirectory; 178 | private Meter skewedTsBoomFilesTotal; 179 | private Meter skewedTsBoomFilesTopic; 180 | 181 | public OutputFile(long shiftNumber, String filename, Long startTime) throws Exception { 182 | this.shiftNumber = shiftNumber; 183 | this.filename = filename; 184 | this.startTime = startTime; 185 | this.useTempOpenFileDir = config.getRunningConfig().getUseTempOpenFileDirectory(); 186 | this.dataDirectory = topicConfig.getDefaultDirectory(); 187 | 188 | this.skewedTsBoomFilesTotal = MetricRegistrySingleton.getInstance().getMetricsRegistry() 189 | .meter("kaboom:total:skewed time boom files"); 190 | 191 | this.skewedTsBoomFilesTopic = MetricRegistrySingleton.getInstance().getMetricsRegistry() 192 | .meter("kaboom:partitions:" + partitionId + ":skewed time boom files"); 193 | 194 | if (skewed()) { 195 | if (config.getRunningConfig().getSkewedTsBoomFilenamePrefix() != null) 196 | filename = config.getRunningConfig().getSkewedTsBoomFilenamePrefix() 197 | + filename; 198 | 199 | if (config.getRunningConfig().getSkewedTsDataDir() != null) 200 | dataDirectory = config.getRunningConfig().getSkewedTsDataDir(); 201 | 202 | if (config.getRunningConfig().isSkewedTsDateDirToNow()) 203 | startTime = System.currentTimeMillis(); 204 | 205 | skewedTsBoomFilesTotal.mark(); 206 | skewedTsBoomFilesTopic.mark(); 207 | } 208 | 209 | dir = Converter.timestampTemplateBuilder(startTime, 210 | String.format("%s/%s", topicConfig.getHdfsRootDir(), dataDirectory)); 211 | finalPath = new Path(dir + "/" + filename); 212 | openFilePath = finalPath; 213 | 214 | if (useTempOpenFileDir) { 215 | openFileDirectory = dir; 216 | openFileDirectory = String.format("%s/%s%s", dir, config.getRunningConfig().getBoomFileTmpPrefix(), this.filename); 217 | openFilePath = new Path(openFileDirectory + "/" + filename); 218 | } 219 | 220 | try { 221 | if (fileSystem.exists(openFilePath)) { 222 | long startWaitTime = System.currentTimeMillis(); 223 | DistributedFileSystem dfs = (DistributedFileSystem) fileSystem; 224 | if (!dfs.isFileClosed(openFilePath)) { 225 | LOG.warn("[{}] open file: waiting up to {} seconds for file " 226 | + "to close checking every {} ms if still open file {}", 227 | partitionId, 228 | config.getRunningConfig().getNodeOpenFileForceDeleteSeconds(), 229 | config.getRunningConfig().getNodeOpenFileWaittimeMs(), 230 | openFilePath); 231 | 232 | } 233 | while (!dfs.isFileClosed(openFilePath)) { 234 | if (System.currentTimeMillis() - startWaitTime 235 | > (config.getRunningConfig().getNodeOpenFileForceDeleteSeconds() * 1000)) { 236 | LOG.warn("[{}] max wait time ({} seconds) elapsed for file close on {}", 237 | partitionId, 238 | config.getRunningConfig().getNodeOpenFileForceDeleteSeconds(), 239 | openFilePath); 240 | break; 241 | } 242 | Thread.sleep(config.getRunningConfig().getNodeOpenFileWaittimeMs()); 243 | if (worker.pinged()) 244 | worker.setPong(true); 245 | } 246 | 247 | fileSystem.delete(openFilePath, false); 248 | LOG.info("[{}] removing file from HDFS because it already exists: {}", 249 | partitionId, 250 | openFilePath); 251 | } 252 | 253 | hdfsDataOut = (HdfsDataOutputStream) fileSystem.create( 254 | openFilePath, 255 | config.getBoomFilePerms(), 256 | false, 257 | config.getRunningConfig().getBoomFileBufferSize(), 258 | config.getRunningConfig().getBoomFileReplicas(), 259 | config.getRunningConfig().getBoomFileBlocksize(), 260 | null); 261 | 262 | boomWriter = new FastBoomWriter( 263 | hdfsDataOut, 264 | topic, 265 | partition, 266 | config); 267 | 268 | if (config.getRunningConfig().getUseNativeCompression()) { 269 | boomWriter.loadNativeDeflateLib(); 270 | } 271 | 272 | LOG.info("[{}] FastBoomWriter created {}", partitionId, openFilePath); 273 | 274 | } catch (IOException | InterruptedException e) { 275 | LOG.error("[{}] Error creating file {}: ", partitionId, openFilePath, e); 276 | throw e; 277 | } 278 | } 279 | 280 | private boolean skewed() { 281 | if (config.getRunningConfig().getSkewedTsSecondsFuture() != null) { 282 | long futureThreshold = System.currentTimeMillis() 283 | + (config.getRunningConfig().getSkewedTsSecondsFuture() * 1000); 284 | if (startTime > futureThreshold) { 285 | LOG.info("[{}] skewed timestamp {} beyond future date {}", 286 | partitionId, 287 | dateString(startTime), 288 | dateString(futureThreshold)); 289 | return true; 290 | } 291 | } 292 | 293 | if (config.getRunningConfig().getSkewedTsSecondsPast() != null) { 294 | long pastThreshold = System.currentTimeMillis() - 295 | (config.getRunningConfig().getSkewedTsSecondsPast() * 1000); 296 | if (startTime < pastThreshold) { 297 | LOG.info("[{}] skewed timestamp {} beyond past date {}", 298 | partitionId, 299 | dateString(startTime), 300 | dateString(pastThreshold)); 301 | return true; 302 | } 303 | 304 | } 305 | return false; 306 | } 307 | 308 | public void abort() { 309 | LOG.info("Aborting output file: {}", openFilePath); 310 | 311 | try { 312 | boomWriter.close(); 313 | } catch (IOException e) { 314 | LOG.error("[{}] Error closing boom writer: {}", partitionId, openFilePath, e); 315 | } 316 | 317 | try { 318 | hdfsDataOut.close(); 319 | } catch (IOException e) { 320 | LOG.error("[{}] Error closing boom writer output file: {}", partitionId, openFilePath, e); 321 | } 322 | 323 | try { 324 | if (useTempOpenFileDir) { 325 | fileSystem.delete(new Path(openFileDirectory), true); 326 | LOG.info("[{}] Deleted temp open file directory: {}", partitionId, openFileDirectory); 327 | } else { 328 | fileSystem.delete(openFilePath, true); 329 | LOG.info("[{}] Deleted open file: {}", partitionId, openFilePath); 330 | } 331 | } catch (IOException e) { 332 | LOG.error("[{}] Error deleting open file: {}", partitionId, openFilePath, e); 333 | } 334 | } 335 | 336 | public void close() throws IOException, IllegalArgumentException { 337 | LOG.info("[{}] Closing {}", partitionId, openFilePath); 338 | try { 339 | boomWriter.close(); 340 | LOG.info("[{}] Boom writer closed for {}", partitionId, openFilePath); 341 | 342 | hdfsDataOut.close(); 343 | LOG.info("[{}] Output stream closed for {}", partitionId, openFilePath); 344 | 345 | if (useTempOpenFileDir) { 346 | fileSystem.rename(openFilePath, finalPath); 347 | LOG.info("[{}] moved {} to {}", partitionId, openFilePath, finalPath); 348 | 349 | fileSystem.delete(new Path(openFileDirectory), true); 350 | LOG.info("[{}] Deleted temp open file directory: {}", partitionId, openFileDirectory); 351 | } 352 | } catch (IOException ioe) { 353 | LOG.error("[{}] Error closing up boomWriter {}:", partitionId, openFilePath, ioe); 354 | throw ioe; 355 | } 356 | } 357 | 358 | public Long getStartTime() { 359 | return startTime; 360 | } 361 | 362 | public FastBoomWriter getBoomWriter() { 363 | return boomWriter; 364 | } 365 | 366 | } 367 | 368 | } 369 | -------------------------------------------------------------------------------- /src/main/java/com/blackberry/bdp/kaboom/Authenticator.java: -------------------------------------------------------------------------------- 1 | /** Copyright 2014 BlackBerry, Limited. 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.blackberry.bdp.kaboom; 17 | 18 | import java.io.IOException; 19 | import java.security.PrivilegedExceptionAction; 20 | import java.util.HashMap; 21 | import java.util.Map; 22 | import java.util.concurrent.atomic.AtomicReference; 23 | 24 | import org.apache.hadoop.security.SecurityUtil; 25 | import org.apache.hadoop.security.UserGroupInformation; 26 | import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import com.google.common.base.Preconditions; 31 | 32 | public class Authenticator { 33 | private static final Logger LOG = LoggerFactory 34 | .getLogger(Authenticator.class); 35 | 36 | private String kerbConfPrincipal; 37 | private String kerbKeytab; 38 | /** 39 | * Singleton credential manager that manages static credentials for the entire 40 | * JVM 41 | */ 42 | private static final AtomicReference staticLogin = new AtomicReference<>(); 43 | 44 | private final Map proxyUserMap; 45 | private final Object lock = new Object(); 46 | 47 | private final long reauthenticationRetryInterval = 10000; 48 | 49 | private Authenticator() { 50 | proxyUserMap = new HashMap<>(); 51 | } 52 | 53 | private static class SingletonHolder { 54 | public static final Authenticator INSTANCE = new Authenticator(); 55 | } 56 | 57 | public static Authenticator getInstance() { 58 | return SingletonHolder.INSTANCE; 59 | } 60 | 61 | public String getKerbConfPrincipal() { 62 | return kerbConfPrincipal; 63 | } 64 | 65 | public void setKerbConfPrincipal(String kerbConfPrincipal) { 66 | this.kerbConfPrincipal = kerbConfPrincipal; 67 | } 68 | 69 | public String getKerbKeytab() { 70 | return kerbKeytab; 71 | } 72 | 73 | public void setKerbKeytab(String kerbKeytab) { 74 | this.kerbKeytab = kerbKeytab; 75 | } 76 | 77 | /* 78 | * The following methods were taken from the Apache Flume project, and are 79 | * used under license. 80 | * 81 | * Licensed to the Apache Software Foundation (ASF) under one or more 82 | * contributor license agreements. See the NOTICE file distributed with this 83 | * work for additional information regarding copyright ownership. The ASF 84 | * licenses this file to you under the Apache License, Version 2.0 (the 85 | * "License"); you may not use this file except in compliance with the 86 | * License. You may obtain a copy of the License at 87 | * 88 | * http://www.apache.org/licenses/LICENSE-2.0 89 | * 90 | * Unless required by applicable law or agreed to in writing, software 91 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 92 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 93 | * License for the specific language governing permissions and limitations 94 | * under the License. 95 | */ 96 | private boolean authenticate(String proxyUserName) { 97 | UserGroupInformation proxyTicket; 98 | 99 | // logic for kerberos login 100 | boolean useSecurity = UserGroupInformation.isSecurityEnabled(); 101 | 102 | LOG.info("Hadoop Security enabled: " + useSecurity); 103 | 104 | if (useSecurity) { 105 | // sanity checking 106 | if (kerbConfPrincipal.isEmpty()) { 107 | LOG.error("Hadoop running in secure mode, but Flume config doesn't " 108 | + "specify a principal to use for Kerberos auth."); 109 | return false; 110 | } 111 | if (kerbKeytab.isEmpty()) { 112 | LOG.error("Hadoop running in secure mode, but Flume config doesn't " 113 | + "specify a keytab to use for Kerberos auth."); 114 | return false; 115 | } 116 | 117 | String principal; 118 | try { 119 | // resolves _HOST pattern using standard Hadoop search/replace 120 | // via DNS lookup when 2nd argument is empty 121 | principal = SecurityUtil.getServerPrincipal(kerbConfPrincipal, ""); 122 | } catch (IOException e) { 123 | LOG.error("Host lookup error resolving kerberos principal (" 124 | + kerbConfPrincipal + "). Exception follows.", e); 125 | return false; 126 | } 127 | 128 | Preconditions.checkNotNull(principal, "Principal must not be null"); 129 | KerberosUser prevUser = staticLogin.get(); 130 | KerberosUser newUser = new KerberosUser(principal, kerbKeytab); 131 | 132 | // be cruel and unusual when user tries to login as multiple principals 133 | // this isn't really valid with a reconfigure but this should be rare 134 | // enough to warrant a restart of the agent JVM 135 | // TODO: find a way to interrogate the entire current config state, 136 | // since we don't have to be unnecessarily protective if they switch all 137 | // HDFS sinks to use a different principal all at once. 138 | Preconditions.checkState(prevUser == null || prevUser.equals(newUser), 139 | "Cannot use multiple kerberos principals in the same agent. " 140 | + " Must restart agent to use new principal or keytab. " 141 | + "Previous = %s, New = %s", prevUser, newUser); 142 | 143 | // attempt to use cached credential if the user is the same 144 | // this is polite and should avoid flooding the KDC with auth requests 145 | UserGroupInformation curUser = null; 146 | if (prevUser != null && prevUser.equals(newUser)) { 147 | try { 148 | LOG.info("Attempting login as {} with cached credentials", prevUser.getPrincipal()); 149 | curUser = UserGroupInformation.getLoginUser(); 150 | } catch (IOException e) { 151 | LOG.warn("User unexpectedly had no active login. Continuing with " 152 | + "authentication", e); 153 | } 154 | } 155 | 156 | if (curUser == null || !curUser.getUserName().equals(principal)) { 157 | try { 158 | // static login 159 | curUser = kerberosLogin(this, principal, kerbKeytab); 160 | LOG.info("Current user obtained from Kerberos login {}", curUser.getUserName()); 161 | } catch (IOException e) { 162 | LOG.error("Authentication or file read error while attempting to " 163 | + "login as kerberos principal (" + principal + ") using " 164 | + "keytab (" + kerbKeytab + "). Exception follows.", e); 165 | return false; 166 | } 167 | } else { 168 | LOG.debug("{}: Using existing principal login: {}", this, curUser); 169 | } 170 | 171 | try { 172 | if (UserGroupInformation.getLoginUser().isFromKeytab() == false) 173 | { 174 | LOG.warn("Using a keytab for authentication is {}", UserGroupInformation.getLoginUser().isFromKeytab()); 175 | LOG.warn("curUser.isFromKeytab(): {}", curUser.isFromKeytab()); 176 | LOG.warn("UserGroupInformation.getCurrentUser().isLoginKeytabBased(): {}", UserGroupInformation.getCurrentUser().isLoginKeytabBased()); 177 | LOG.warn("UserGroupInformation.isLoginKeytabBased(): {}", UserGroupInformation.isLoginKeytabBased()); 178 | LOG.warn("curUser.getAuthenticationMethod(): {}", curUser.getAuthenticationMethod()); 179 | //System.exit(1); 180 | } 181 | } catch (IOException e) { 182 | LOG.error("Failed to get login user.", e); 183 | System.exit(1); 184 | } 185 | 186 | // we supposedly got through this unscathed... so store the static user 187 | staticLogin.set(newUser); 188 | } 189 | 190 | // hadoop impersonation works with or without kerberos security 191 | proxyTicket = null; 192 | if (!proxyUserName.isEmpty()) { 193 | try { 194 | proxyTicket = UserGroupInformation.createProxyUser(proxyUserName, 195 | UserGroupInformation.getLoginUser()); 196 | } catch (IOException e) { 197 | LOG.error("Unable to login as proxy user. Exception follows.", e); 198 | return false; 199 | } 200 | } 201 | 202 | UserGroupInformation ugi = null; 203 | if (proxyTicket != null) { 204 | ugi = proxyTicket; 205 | } else if (useSecurity) { 206 | try { 207 | ugi = UserGroupInformation.getLoginUser(); 208 | } catch (IOException e) { 209 | LOG.error("Unexpected error: Unable to get authenticated user after " 210 | + "apparent successful login! Exception follows.", e); 211 | return false; 212 | } 213 | } 214 | 215 | if (ugi != null) { 216 | // dump login information 217 | AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); 218 | LOG.info("Auth method: {}", authMethod); 219 | LOG.info(" User name: {}", ugi.getUserName()); 220 | LOG.info(" Using keytab: {}", ugi.isFromKeytab()); 221 | if (authMethod == AuthenticationMethod.PROXY) { 222 | UserGroupInformation superUser; 223 | try { 224 | superUser = UserGroupInformation.getLoginUser(); 225 | LOG.info(" Superuser auth: {}", superUser.getAuthenticationMethod()); 226 | LOG.info(" Superuser name: {}", superUser.getUserName()); 227 | LOG.info(" Superuser using keytab: {}", superUser.isFromKeytab()); 228 | } catch (IOException e) { 229 | LOG.error("Unexpected error: unknown superuser impersonating proxy.", 230 | e); 231 | return false; 232 | } 233 | } 234 | 235 | LOG.info("Logged in as user {}", ugi.getUserName()); 236 | 237 | UGIState state = new UGIState(); 238 | state.ugi = proxyTicket; 239 | state.lastAuthenticated = System.currentTimeMillis(); 240 | proxyUserMap.put(proxyUserName, state); 241 | 242 | return true; 243 | } 244 | 245 | return true; 246 | } 247 | 248 | /** 249 | * Static synchronized method for static Kerberos login.
250 | * Static synchronized due to a thundering herd problem when multiple Sinks 251 | * attempt to log in using the same principal at the same time with the 252 | * intention of impersonating different users (or even the same user). If this 253 | * is not controlled, MIT Kerberos v5 believes it is seeing a replay attach 254 | * and it returns:
Request is a replay (34) - 255 | * PROCESS_TGS
In addition, since the underlying Hadoop APIs we 256 | * are using for impersonation are static, we define this method as static as 257 | * well. 258 | * 259 | * @param principal 260 | * Fully-qualified principal to use for authentication. 261 | * @param keytab 262 | * Location of keytab file containing credentials for principal. 263 | * @return Logged-in user 264 | * @throws IOException 265 | * if login fails. 266 | */ 267 | private synchronized UserGroupInformation kerberosLogin( 268 | Authenticator authenticator, String principal, String keytab) 269 | throws IOException { 270 | 271 | // if we are the 2nd user thru the lock, the login should already be 272 | // available statically if login was successful 273 | UserGroupInformation curUser = null; 274 | try { 275 | curUser = UserGroupInformation.getLoginUser(); 276 | } catch (IOException e) { 277 | // not a big deal but this shouldn't typically happen because it will 278 | // generally fall back to the UNIX user 279 | LOG.debug("Unable to get login user before Kerberos auth attempt.", e); 280 | } 281 | 282 | // we already have logged in successfully 283 | if (curUser != null && curUser.getUserName().equals(principal)) { 284 | LOG.debug("{}: Using existing principal ({}): {}", new Object[] { 285 | authenticator, principal, curUser }); 286 | 287 | // no principal found 288 | } else { 289 | 290 | LOG.info("{}: Attempting kerberos login as principal ({}) from keytab " 291 | + "file ({})", new Object[] { authenticator, principal, keytab }); 292 | 293 | // attempt static kerberos login 294 | UserGroupInformation.loginUserFromKeytab(principal, keytab); 295 | curUser = UserGroupInformation.getLoginUser(); 296 | } 297 | 298 | return curUser; 299 | } 300 | 301 | private void reauthenticate(String proxyUser) { 302 | try { 303 | Thread.sleep(reauthenticationRetryInterval); 304 | } catch (InterruptedException e) { 305 | // do nothing. 306 | } 307 | 308 | synchronized (lock) { 309 | UGIState state = proxyUserMap.get(proxyUser); 310 | if (state == null 311 | || System.currentTimeMillis() - state.lastAuthenticated < reauthenticationRetryInterval) { 312 | authenticate(proxyUser); 313 | 314 | } 315 | } 316 | } 317 | 318 | /** 319 | * Allow methods to act as another user (typically used for HDFS Kerberos) 320 | * 321 | * @param 322 | * @param action 323 | * @return 324 | * @throws IOException 325 | * @throws InterruptedException 326 | */ 327 | public T runPrivileged(final String proxyUser, 328 | final PrivilegedExceptionAction action) throws IOException, 329 | InterruptedException { 330 | 331 | UGIState state = null; 332 | synchronized (lock) { 333 | state = proxyUserMap.get(proxyUser); 334 | if (state == null) { 335 | authenticate(proxyUser); 336 | state = proxyUserMap.get(proxyUser); 337 | } 338 | } 339 | 340 | UserGroupInformation proxyTicket = state.ugi; 341 | 342 | if (proxyTicket != null) { 343 | LOG.debug("Using proxy ticket {}", proxyTicket); 344 | try { 345 | return proxyTicket.doAs(action); 346 | } catch (IOException e) { 347 | LOG.error( 348 | "Caught IO exception while performing a privileged action. Reauthenticating.", 349 | e); 350 | reauthenticate(proxyUser); 351 | throw e; 352 | } catch (InterruptedException e) { 353 | LOG.error( 354 | "Caught interrupted exception while performing a privileged action. Reauthenticating.", 355 | e); 356 | reauthenticate(proxyUser); 357 | throw e; 358 | } 359 | } else { 360 | try { 361 | return action.run(); 362 | } catch (IOException ex) { 363 | throw ex; 364 | } catch (InterruptedException ex) { 365 | throw ex; 366 | } catch (RuntimeException ex) { 367 | throw ex; 368 | } catch (Exception ex) { 369 | throw new RuntimeException("Unexpected exception.", ex); 370 | } 371 | } 372 | } 373 | 374 | private class UGIState { 375 | public UserGroupInformation ugi = null; 376 | public long lastAuthenticated = 0l; 377 | } 378 | 379 | } 380 | --------------------------------------------------------------------------------