├── CHANGES ├── VERSION ├── tests ├── Makefile ├── kafkawriter │ └── show-plugin.bro ├── Scripts │ └── get-bro-env └── btest.cfg ├── scripts ├── Kafka │ └── KafkaWriter │ │ ├── __load__.bro │ │ └── logs-to-kafka.bro ├── __load__.bro └── init.bro ├── src ├── Plugin.h ├── kafkawriter.bif ├── Plugin.cc ├── RoundRobinPartitioner.h ├── RoundRobinPartitioner.cc ├── AddingJson.h ├── Kafka.h ├── AddingJson.cc └── Kafka.cc ├── configure.plugin ├── Makefile ├── CMakeLists.txt ├── cmake └── FindLibRDKafka.cmake ├── COPYING ├── configure └── README /CHANGES: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.1 2 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | 2 | test: 3 | @btest 4 | -------------------------------------------------------------------------------- /tests/kafkawriter/show-plugin.bro: -------------------------------------------------------------------------------- 1 | # @TEST-EXEC: bro -NN Kafka::KafkaWriter >output 2 | # @TEST-EXEC: btest-diff output 3 | -------------------------------------------------------------------------------- /scripts/Kafka/KafkaWriter/__load__.bro: -------------------------------------------------------------------------------- 1 | # 2 | # This is loaded when a user activates the plugin. Include scripts here that should be 3 | # loaded automatically at that point. 4 | # 5 | 6 | -------------------------------------------------------------------------------- /scripts/__load__.bro: -------------------------------------------------------------------------------- 1 | # 2 | # This is loaded unconditionally at Bro startup. Include scripts here that should 3 | # always be loaded. 4 | # 5 | # Normally, that will be only code that initializes built-in elements. Load 6 | # your standard scripts in 7 | # scripts///__load__.bro instead. 8 | # 9 | 10 | @load ./init.bro -------------------------------------------------------------------------------- /src/Plugin.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef BRO_PLUGIN_KAFKA_KAFKAWRITER 3 | #define BRO_PLUGIN_KAFKA_KAFKAWRITER 4 | 5 | #include 6 | 7 | namespace plugin { 8 | namespace Kafka_KafkaWriter { 9 | 10 | class Plugin : public ::plugin::Plugin 11 | { 12 | protected: 13 | // Overridden from plugin::Plugin. 14 | virtual plugin::Configuration Configure(); 15 | }; 16 | 17 | extern Plugin plugin; 18 | 19 | } 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /configure.plugin: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Hooks to add custom options to the configure script. 4 | # 5 | 6 | plugin_usage() 7 | { 8 | cat < 2 | 3 | class RoundRobinPartitionerCallback : public RdKafka::PartitionerCb { 4 | //private: 5 | // int32_t partition_num; 6 | 7 | 8 | public: 9 | int32_t partitioner_cb(const RdKafka::Topic *topic, 10 | const std::string *key, 11 | int32_t partition_count, 12 | void *msg_opaque); 13 | 14 | int32_t partitioner_cb(const RdKafka::Topic *topic, 15 | const void *keydata, 16 | size_t keylen, 17 | int32_t partition_cnt, 18 | void *msg_opaque); 19 | 20 | 21 | RoundRobinPartitionerCallback(); 22 | }; 23 | -------------------------------------------------------------------------------- /tests/Scripts/get-bro-env: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # BTest helper for getting values for Bro-related environment variables. 4 | 5 | base=`dirname $0` 6 | bro=`cat ${base}/../../build/CMakeCache.txt | grep BRO_DIST | cut -d = -f 2` 7 | 8 | if [ "$1" = "brobase" ]; then 9 | echo ${bro} 10 | elif [ "$1" = "bropath" ]; then 11 | ${bro}/build/bro-path-dev 12 | elif [ "$1" = "bro_plugin_path" ]; then 13 | ( cd ${base}/../.. && pwd ) 14 | elif [ "$1" = "bro_seed_file" ]; then 15 | echo ${bro}/testing/btest/random.seed 16 | elif [ "$1" = "path" ]; then 17 | echo ${bro}/build/src:${bro}/aux/btest:${base}/:${bro}/aux/bro-cut:$PATH 18 | else 19 | echo "usage: `basename $0` " >&2 20 | exit 1 21 | fi 22 | -------------------------------------------------------------------------------- /tests/btest.cfg: -------------------------------------------------------------------------------- 1 | [btest] 2 | TestDirs = kafkawriter 3 | TmpDir = %(testbase)s/.tmp 4 | BaselineDir = %(testbase)s/Baseline 5 | IgnoreDirs = .svn CVS .tmp 6 | IgnoreFiles = *.tmp *.swp #* *.trace .DS_Store 7 | 8 | [environment] 9 | BROBASE=`%(testbase)s/Scripts/get-bro-env brobase` 10 | BROPATH=`%(testbase)s/Scripts/get-bro-env bropath` 11 | BRO_PLUGIN_PATH=`%(testbase)s/Scripts/get-bro-env bro_plugin_path` 12 | BRO_SEED_FILE=`%(testbase)s/Scripts/get-bro-env bro_seed_file` 13 | PATH=`%(testbase)s/Scripts/get-bro-env path` 14 | TZ=UTC 15 | LC_ALL=C 16 | TRACES=%(testbase)s/Traces 17 | TMPDIR=%(testbase)s/.tmp 18 | BRO_TRACES=`%(testbase)s/Scripts/get-bro-env brobase`/testing/btest/Traces 19 | TEST_DIFF_CANONIFIER=`%(testbase)s/Scripts/get-bro-env brobase`/testing/scripts/diff-canonifier 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Convenience Makefile providing a few common top-level targets. 3 | # 4 | 5 | cmake_build_dir=build 6 | arch=`uname -s | tr A-Z a-z`-`uname -m` 7 | 8 | all: build-it 9 | 10 | build-it: 11 | @test -e $(cmake_build_dir)/config.status || ./configure 12 | -@test -e $(cmake_build_dir)/CMakeCache.txt && \ 13 | test $(cmake_build_dir)/CMakeCache.txt -ot `cat $(cmake_build_dir)/CMakeCache.txt | grep BRO_DIST | cut -d '=' -f 2`/build/CMakeCache.txt && \ 14 | echo Updating stale CMake cache && \ 15 | touch $(cmake_build_dir)/CMakeCache.txt 16 | 17 | ( cd $(cmake_build_dir) && make ) 18 | 19 | install: 20 | ( cd $(cmake_build_dir) && make install ) 21 | 22 | clean: 23 | ( cd $(cmake_build_dir) && make clean ) 24 | 25 | distclean: 26 | rm -rf $(cmake_build_dir) 27 | 28 | test: 29 | make -C tests 30 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) 2 | 3 | cmake_minimum_required(VERSION 2.8) 4 | 5 | project(Plugin) 6 | 7 | include(BroPlugin) 8 | 9 | find_package(LibRDKafka) 10 | 11 | if ( LIBRDKAFKA_FOUND ) 12 | include_directories(BEFORE ${LibRDKafka_INCLUDE_DIR}) 13 | 14 | bro_plugin_begin(Kafka KafkaWriter) 15 | bro_plugin_cc(src/AddingJson.cc) 16 | bro_plugin_cc(src/Kafka.cc) 17 | bro_plugin_cc(src/Plugin.cc) 18 | bro_plugin_bif(src/kafkawriter.bif) 19 | bro_plugin_dist_files(README CHANGES COPYING VERSION) 20 | bro_plugin_link_library(${LibRDKafka_LIBRARIES}) 21 | bro_plugin_link_library(${LibRDKafka_C_LIBRARIES}) 22 | bro_plugin_end() 23 | message(STATUS "LibRDKafka prefix : ${LibRDKafka_ROOT_DIR}") 24 | message(STATUS "LibRDKafka Library : ${LibRDKafka_LIBRARIES}") 25 | else () 26 | message(FATAL_ERROR "LibRDKafka not found.") 27 | endif () 28 | -------------------------------------------------------------------------------- /src/RoundRobinPartitioner.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "RoundRobinPartitioner.h" 6 | #include 7 | 8 | using namespace RdKafka; 9 | 10 | 11 | RoundRobinPartitionerCallback::RoundRobinPartitionerCallback(){ 12 | srand(time(NULL)); 13 | } 14 | 15 | int32_t RoundRobinPartitionerCallback::partitioner_cb(const RdKafka::Topic *topic, 16 | const std::string *key, 17 | int32_t partition_count, 18 | void *msg_opaque) { 19 | // dead simple partitioner...simply chooses random partition 20 | return ((int32_t)rand()) % partition_count; 21 | } 22 | 23 | int32_t RoundRobinPartitionerCallback::partitioner_cb(const RdKafka::Topic *topic, 24 | const void *keydata, 25 | size_t keylen, 26 | int32_t partition_count, 27 | void *msg_opaque) 28 | { 29 | return ((int32_t)rand()) % partition_count; 30 | } 31 | 32 | -------------------------------------------------------------------------------- /scripts/Kafka/KafkaWriter/logs-to-kafka.bro: -------------------------------------------------------------------------------- 1 | module KafkaLogger; 2 | 3 | export { 4 | # redefine this in your script to identify the logs 5 | # that should be sent up to bro. 6 | # for example: 7 | # 8 | # redef KafkaLogger::logs_to_send = set(HTTP::LOG, Conn::Log, DNS::LOG); 9 | # 10 | # that will send the HTTP, Conn, and DNS logs up to Kafka. 11 | # 12 | const logs_to_send: set[Log::ID] &redef; 13 | } 14 | 15 | event bro_init() &priority=-5 16 | { 17 | 18 | for (stream_id in Log::active_streams) 19 | { 20 | if (stream_id !in logs_to_send){ 21 | next; 22 | } 23 | # note: the filter name is different for each log, to cause Bro to instantiate 24 | # a new Writer instance for each log. The bro folks might want me 25 | # to do this with a single writer instance, but the mechanics of 26 | # modifying the field names and adding fields made it quite complicated, 27 | # so I opted to make one log writer per bro log going to Kafka. 28 | # this means there will be multiple connections from bro to the kafka 29 | # server, one per log file. 30 | local streamString = fmt("%s", stream_id); 31 | local pathname = fmt("%s", KafkaLogger::log_names[streamString]); 32 | local filter: Log::Filter = [$name = fmt("kafka-%s",stream_id), 33 | $writer = Log::WRITER_KAFKAWRITER, 34 | $path = pathname 35 | ]; 36 | Log::add_filter(stream_id, filter); 37 | 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /cmake/FindLibRDKafka.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find LibRDKafka headers and libraries. 2 | # 3 | # Usage of this module as follows: 4 | # 5 | # find_package(LibRDKafka) 6 | # 7 | # Variables used by this module, they can change the default behaviour and need 8 | # to be set before calling find_package: 9 | # 10 | # LibRDKafka_ROOT_DIR Set this variable to the root installation of 11 | # LibRDKafka if the module has problems finding 12 | # the proper installation path. 13 | # 14 | # Variables defined by this module: 15 | # 16 | # LIBRDKAFKA_FOUND System has LibRDKafka libs/headers 17 | # LibRDKafka_LIBRARIES The LibRDKafka libraries 18 | # LibRDKafka_INCLUDE_DIR The location of LibRDKafka headers 19 | 20 | find_path(LibRDKafka_ROOT_DIR 21 | NAMES include/librdkafka/rdkafkacpp.h 22 | ) 23 | 24 | find_library(LibRDKafka_LIBRARIES 25 | NAMES rdkafka++ 26 | HINTS ${LibRDKafka_ROOT_DIR}/lib 27 | ) 28 | 29 | find_library(LibRDKafka_C_LIBRARIES 30 | NAMES rdkafka 31 | HINTS ${LibRDKafka_ROT_DIR}/lib 32 | ) 33 | 34 | find_path(LibRDKafka_INCLUDE_DIR 35 | NAMES librdkafka/rdkafkacpp.h 36 | HINTS ${LibRDKafka_ROOT_DIR}/include 37 | ) 38 | 39 | include(FindPackageHandleStandardArgs) 40 | find_package_handle_standard_args(LibRDKafka DEFAULT_MSG 41 | LibRDKafka_LIBRARIES 42 | LibRDKafka_C_LIBRARIES 43 | LibRDKafka_INCLUDE_DIR 44 | ) 45 | 46 | mark_as_advanced( 47 | LibRDKafka_ROOT_DIR 48 | LibRDKafka_LIBRARIES 49 | LibRDKafka_C_LIBRARIES 50 | LibRDKafka_INCLUDE_DIR 51 | ) 52 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2015 by the United States Government/National Institutes of Health 3 | 4 | Written by Aaron Gee-Clough 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | (1) Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | (2) Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in 14 | the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | (3) Neither the name of the National Institutes of Health, the US Government, 18 | nor the names of contributors may be used to endorse or promote products 19 | derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 25 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /src/AddingJson.h: -------------------------------------------------------------------------------- 1 | // See the file "COPYING" in the main distribution directory for copyright. 2 | #ifndef THREADING_FORMATTERS_ADDINGJSON_H 3 | #define THREADING_FORMATTERS_ADDINGJSON_H 4 | 5 | 6 | #include "threading/Formatter.h" 7 | #include "threading/SerialTypes.h" 8 | 9 | /** 10 | * A thread-safe class for converting values into a JSON representation 11 | * and vice versa. 12 | * 13 | * Modified to add fields to the JSON stream, depending on user-supplied 14 | * criteria. 15 | */ 16 | 17 | 18 | namespace threading { namespace formatter { 19 | 20 | class AddingJSON : public Formatter { 21 | public: 22 | enum TimeFormat { 23 | TS_EPOCH, // Doubles that represents seconds from the UNIX epoch. 24 | TS_ISO8601, // ISO 8601 defined human readable timestamp format. 25 | TS_MILLIS // Milliseconds from the UNIX epoch. Some consumers need this (e.g., elasticsearch). 26 | }; 27 | 28 | AddingJSON(threading::MsgThread* t, TimeFormat tf, char* argsensor_name, char* argtype_name, bool logstash_format_timestamps); 29 | ~AddingJSON(); 30 | 31 | bool AddingDescribe(ODesc* desc, threading::Value* val, const string& name="") const; 32 | bool Describe(ODesc* desc, threading::Value* val, const string& name = "") const; 33 | bool Describe(ODesc* desc, int num_fields, const threading::Field* const * fields, 34 | threading::Value** vals) const; 35 | threading::Value* ParseValue(const string& s, const string& name, TypeTag type, TypeTag subtype = TYPE_ERROR) const; 36 | 37 | void SurroundingBraces(bool use_braces); 38 | 39 | private: 40 | TimeFormat timestamps; 41 | bool surrounding_braces; 42 | const char* sensor_name; 43 | const char* type_name; 44 | bool logstash_timestamps; 45 | }; 46 | 47 | }} 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /src/Kafka.h: -------------------------------------------------------------------------------- 1 | // See the file "COPYING" in the main distribution directory for copyright. 2 | // 3 | // Log writer for writing to a Kafka broker 4 | // 5 | // This is experimental code that is not yet ready for production usage. 6 | // 7 | // check on these ifndef. Having this in both here and plugin.h causes 8 | // compile problems. 9 | //#ifndef BRO_PLUGIN_KAFKA_KAFKAWRITER 10 | //#define BRO_PLUGIN_KAFKA_KAFKAWRITER 11 | 12 | #include "logging/WriterBackend.h" 13 | #include "logging/WriterFrontend.h" 14 | #include "threading/formatters/JSON.h" 15 | #include 16 | #include "Type.h" 17 | #include "AddingJson.h" 18 | #include 19 | 20 | namespace logging { namespace writer { 21 | 22 | class KafkaWriter : public WriterBackend { 23 | public: 24 | KafkaWriter(WriterFrontend* frontend); 25 | ~KafkaWriter(); 26 | 27 | static string LogExt(); 28 | static WriterBackend* Instantiate(WriterFrontend* frontend) 29 | {return new KafkaWriter(frontend); } 30 | 31 | 32 | protected: 33 | 34 | virtual bool DoInit(const WriterInfo& info, int num_fields, const threading::Field* const* fields); 35 | virtual bool DoWrite(int num_fields, const threading::Field* const* fields, threading::Value** vals); 36 | virtual bool DoSetBuf(bool enabled); 37 | virtual bool DoRotate(const char* rotated_path, double open, double close, bool terminating); 38 | virtual bool DoFlush(double network_time); 39 | virtual bool DoFinish(double network_time); 40 | virtual bool DoHeartbeat(double network_time, double current_time); 41 | 42 | 43 | private: 44 | 45 | threading::Field** MakeFields(const threading::Field* const* fields, int num_fields, std::string path); 46 | long transfer_timeout; 47 | 48 | // kafka configurations 49 | char* broker_name; 50 | int broker_name_len; 51 | char* topic_name; 52 | int topic_name_len; 53 | char* client_id; 54 | int client_id_len; 55 | char* compression_codec; 56 | int compression_codec_len; 57 | char* security_protocol; 58 | int security_protocol_len; 59 | char* kerberos_service_name; 60 | int kerberos_service_name_len; 61 | char* kerberos_keytab; 62 | int kerberos_keytab_len; 63 | char* kerberos_principal; 64 | int kerberos_principal_len; 65 | RdKafka::Conf *conf; 66 | RdKafka::Conf *tconf; 67 | RdKafka::Producer *producer; 68 | RdKafka::Topic *topic; 69 | uint64 batch_size; 70 | 71 | 72 | //varible to hold renamed fields 73 | threading::Field** fixed_fields; 74 | 75 | // formatter to turn fields/values into json. 76 | threading::formatter::AddingJSON* json_formatter; 77 | 78 | }; 79 | 80 | } 81 | } 82 | 83 | //#endif 84 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Wrapper for viewing/setting options that the plugin's CMake 4 | # scripts will recognize. 5 | # 6 | # Don't edit this. Edit configure.plugin to add plugin-specific options. 7 | # 8 | 9 | set -e 10 | command="$0 $*" 11 | 12 | if [ -e `dirname $0`/configure.plugin ]; then 13 | # Include custom additions. 14 | . `dirname $0`/configure.plugin 15 | fi 16 | 17 | # Check for `cmake` command. 18 | type cmake > /dev/null 2>&1 || { 19 | echo "\ 20 | This package requires CMake, please install it first, then you may 21 | use this configure script to access CMake equivalent functionality.\ 22 | " >&2; 23 | exit 1; 24 | } 25 | 26 | usage() { 27 | 28 | cat 1>&2 </dev/null 2>&1; then 37 | plugin_usage 1>&2 38 | fi 39 | 40 | echo 41 | 42 | exit 1 43 | } 44 | 45 | # Function to append a CMake cache entry definition to the 46 | # CMakeCacheEntries variable 47 | # $1 is the cache entry variable name 48 | # $2 is the cache entry variable type 49 | # $3 is the cache entry variable value 50 | append_cache_entry () { 51 | CMakeCacheEntries="$CMakeCacheEntries -D $1:$2=$3" 52 | } 53 | 54 | # set defaults 55 | builddir=build 56 | brodist=`cd ../../.. && pwd` 57 | installroot="default" 58 | CMakeCacheEntries="" 59 | 60 | while [ $# -ne 0 ]; do 61 | case "$1" in 62 | -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; 63 | *) optarg= ;; 64 | esac 65 | 66 | case "$1" in 67 | --help|-h) 68 | usage 69 | ;; 70 | 71 | --bro-dist=*) 72 | brodist=`cd $optarg && pwd` 73 | ;; 74 | 75 | --install-root=*) 76 | installroot=$optarg 77 | ;; 78 | 79 | *) 80 | if type plugin_option >/dev/null 2>&1; then 81 | plugin_option $1 && shift && continue; 82 | fi 83 | 84 | echo "Invalid option '$1'. Try $0 --help to see available options." 85 | exit 1 86 | ;; 87 | esac 88 | shift 89 | done 90 | 91 | if [ ! -e "$brodist/bro-path-dev.in" ]; then 92 | echo "Cannot determine Bro source directory, use --bro-dist=DIR." 93 | exit 1 94 | fi 95 | 96 | append_cache_entry BRO_DIST PATH $brodist 97 | append_cache_entry CMAKE_MODULE_PATH PATH $brodist/cmake 98 | 99 | if [ "$installroot" != "default" ]; then 100 | mkdir -p $installroot 101 | append_cache_entry BRO_PLUGIN_INSTALL_ROOT PATH $installroot 102 | fi 103 | 104 | echo "Build Directory : $builddir" 105 | echo "Bro Source Directory : $brodist" 106 | 107 | mkdir -p $builddir 108 | cd $builddir 109 | 110 | cmake $CMakeCacheEntries .. 111 | 112 | echo "# This is the command used to configure this build" > config.status 113 | echo $command >> config.status 114 | chmod u+x config.status 115 | -------------------------------------------------------------------------------- /scripts/init.bro: -------------------------------------------------------------------------------- 1 | module KafkaLogger; 2 | 3 | export { 4 | 5 | # Logstash requires quotes around timestamp values, even if they 6 | # are doubles/floats. If you're consuming this data via logstash 7 | # you will want this to be set to "T". If you're consuming 8 | # these logs with something else, that's expecting timestamps 9 | # to be raw double values, set this to "F". 10 | const logstash_style_timestamp = F &redef; 11 | 12 | # the name of the kafka topic to connect to & send messages to. 13 | const topic_name = "logs" &redef; 14 | 15 | # the name of the kafka broker to send messages to. 16 | # This is a comma-separated list of kafka brokers. 17 | # format: hostname1:port,hostname2:port 18 | # no spaces. 19 | const broker_name = "localhost" &redef; 20 | 21 | # this is the name to add to each message to identify 22 | # this sensor in the logs. It will be added to a "sensor" 23 | # field in the submitted json. 24 | const sensor_name = "brosensor" &redef; 25 | 26 | # This is used internally by Kafka to trace requests and 27 | # errors. Kafka advises setting it to identify the 28 | # application making the requests/produces. 29 | const client_id = "bro" &redef; 30 | 31 | # codec to use. 0=none, 1=gzip, 2=snappy. 32 | const compression_codec = "0" &redef; 33 | 34 | # controls for how often to send messages up to 35 | # kafka. Default is either every 500 messages, 36 | # every 10MB of buffer size, or every 10 seconds. 37 | # Any of these conditions hitting will cause 38 | # the produce up to Kafka. 39 | # if more than 10,000 messages are queued up, 40 | # the kafka producer will begin dropping messages. 41 | # max_batch_interval is the maximun number of milliseconds 42 | # that kafka will wait before sending logs. 43 | const default_batch_size = "500" &redef; 44 | const max_batch_size = "10000" &redef; 45 | const max_batch_interval = "1000" &redef; 46 | 47 | 48 | # These are the names of the log files that will be sent in the 49 | # json as the "type" field. These are also the keys used in the 50 | # column_rename variable above. 51 | const log_names = table(["AppStats::LOG"] = "app_stats", 52 | ["Barnyard2::LOG"] = "barnyard2", 53 | ["CaptureLoss::LOG"] = "capture_loss", 54 | ["Cluster::LOG"] = "cluster", 55 | ["Communication::LOG"] = "communication", 56 | ["Conn::LOG"] = "conn", 57 | ["DHCP::LOG"] = "dhcp", 58 | ["DNP3::LOG"] = "dnps", 59 | ["DNS::LOG"] = "dns", 60 | ["DPD::LOG"] = "dpd", 61 | ["Files::LOG"] = "files", 62 | ["FTP::LOG"] = "ftp", 63 | ["HTTP::LOG"] = "http", 64 | ["Intel::LOG"] = "intel", 65 | ["IRC::LOG"] = "irc", 66 | ["KRB::LOG"] = "kerberos", 67 | ["Known::CERTS_LOG"] = "known_certs", 68 | ["Known::DEVICES_LOG"] = "known_devices", 69 | ["Known::HOSTS_LOG"] = "known_hosts", 70 | ["Known::MODBUS_LOG"] = "known_modbus", 71 | ["Known::SERVICES_LOG"] = "known_services", 72 | ["LoadedScripts::LOG"] = "loaded_scripts", 73 | ["Modbus::LOG"] = "modbus", 74 | ["Modbus::REGISTER_CHANGE_LOG"] = "modbus_register_change_log", 75 | ["mysql::LOG"] = "mysql", 76 | ["Notice::LOG"] = "notice", 77 | ["Notice::ALARM_LOG"] = "notice_alarm", 78 | ["PacketFilter::LOG"] = "packet_filter", 79 | ["PE::LOG"] = "pe", 80 | ["RADIUS::LOG"] = "radius", 81 | ["RDP::LOG"] = "rdp", 82 | ["Reporter::LOG"] = "reporter", 83 | ["Signatures::LOG"] = "signatures", 84 | ["SIP::LOG"] = "sip", 85 | ["SMTP::LOG"] = "smtp", 86 | ["SNMP::LOG"] = "snmp", 87 | ["SOCKS::LOG"] = "socks", 88 | ["Software::LOG"] = "software", 89 | ["SSH::LOG"] = "ssh", 90 | ["SSL::LOG"] = "ssl", 91 | ["Stats::LOG"] = "stats", 92 | ["Syslog::LOG"] = "syslog", 93 | ["Traceroute::LOG"] = "traceroute", 94 | ["Tunnel::LOG"] = "tunnel", 95 | ["Unified2::LOG"] = "unified2", 96 | ["Weird::LOG"] = "weird", 97 | ["X509::LOG"] = "x509" 98 | ) &redef; 99 | const security_protocol = "NONE" &redef; 100 | const kerberos_service_name = "bro" &redef; 101 | const kerberos_principal = "kafka/kafkabroker@domain.com" &redef; 102 | const kerberos_keytab = "/etc/security/keytabs/bro.keytab" &redef; 103 | 104 | } 105 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 2 | Kafka::KafkaWriter 3 | ================================= 4 | 5 | Kafka Writer plugin for Bro. This plugin will connect to a Kafka server, and will send JSON-format 6 | logs to the topic specified. 7 | 8 | Aside from sending bro logs to Kafka, it will also add two fields to each JSON message, and rename 9 | 5 of the fields from the bro native names. 10 | 11 | The added fields are: 12 | a "sensor" name as specified in the config, used to identify which bro server originated this log. 13 | and a "type" field, populated with the log type (dns, ssl, conn, etc). 14 | 15 | The renamed fields are: 16 | "ts" will become "timestamp", 17 | "id.orig_h"becomes "source_ip", 18 | "id.orig_p" becomes "source_port", 19 | "id.resp_h" becomes "dest_ip", and 20 | "id.resp_p" becomes "dest_port". 21 | 22 | 23 | Users can override the sensor name as a standard redef in their config. Changing the type field names can 24 | be done by editing the init.bro script for the KafkaLogger (though I don't recommend it). I would like 25 | to make the field renaming configurable in the future, but had problems with the bro table structure 26 | in C++, so those are hard-coded in the C++ for the moment. 27 | 28 | (note: timestamps are sent as millisecond timestamps I.e., a UNIX-format timestamp that includees 29 | the milliseconds aka epoch * 1000) 30 | 31 | Configuration 32 | ================================ 33 | In your bro policy, include the logs-to-kafka bro script, and define the particular logs you want 34 | to send to bro, like this: 35 | 36 | @load Kafka/KafkaWriter/logs-to-kafka 37 | redef KafkaLogger::logs_to_send = set(HTTP::LOG, Conn::LOG, DNS::LOG, SMTP::LOG); 38 | 39 | see the Kafka init.bro file for the full list of logs you can flag to be sent to Kafka. (that file 40 | also contains the clean names that the log files will be called in the "type" field.) 41 | 42 | You can also redefine how the bro client will connect to kafka with the following variables: 43 | 44 | redef KafkaLogger::topic_name = "logs"; 45 | which Kafka topic to send logs to. All logs from bro will be sent to this topic. 46 | 47 | 48 | redef KafkaLogger::broker_name = "hostname:port,hostname:port"; 49 | the name of the kafka broker to send messages to. 50 | This is a comma-separated list of kafka brokers. 51 | format: hostname1:port,hostname2:port 52 | no spaces. 53 | 54 | 55 | redef KafkaLogger::sensor_name = "brosensor"; 56 | The name to add to each message to identify 57 | this sensor in the logs. It will be added to a "sensor" 58 | field in the submitted json. 59 | 60 | 61 | redef KafkaLogger::client_id = "bro"; 62 | This is used internally by Kafka to trace requests and 63 | errors. Kafka advises setting it to identify the 64 | application making the requests/produces. the log name 65 | will be appended to this in the Kafka logs. 66 | 67 | 68 | redef KafkaLogger::compression_codec = "0"; 69 | Kafka can compress messages on the wire. This specifies 70 | which compression codec to use. 0=none, 1=gzip, 2=snappy. 71 | 72 | 73 | redef KafkaLogger::max_batch_size = "1000"; 74 | redef KafkaLogger::max_batch_interval = "10000"; 75 | how often to send logs (number of messages, bytes, and seconds). 76 | 77 | If you use the Kerberised version of kafka, you'll need to switch this from PLAINTEXT to SASL_PLAINTEXT or SASL_SSL, 78 | make sure you've setup the keytab properly see 79 | https://github.com/edenhill/librdkafka/wiki/Using-SASL-with-librdkafka for more info on how to perform 80 | that, you will also need a version of librdkafka built with ssl and kerberos support. 81 | 82 | redef KafkaLogger::security_protocol = "SASL_PLAINTEXT" 83 | 84 | redef KafkaLogger::kerberos_service_name = "kafka"; 85 | Needs to be redifined to match the service name for Kafka. 86 | 87 | redef KafkaLogger::kerberos_keytab = "/etc/security/keytabs/bro.keytab"; 88 | Needs to be redefined if your keytab file is stored elsewhere. 89 | 90 | redef KafkaLogger::kerberos_principal = "kafka/kafkabroker@domain.com" 91 | Note: the must exactly match the hostname part of the broker principal. 92 | 93 | If after setting up you get an error about "no common mechs", you proably need to install additionnal modules, on Debian and derivatives 94 | ''' 95 | apt-get install libsasl2-modules-gssapi-heimda 96 | ''' 97 | Should do the trick. 98 | 99 | Lastly, if you are consuming the events with logstash, you will need to set 100 | redef KafkaLogger::logstash_style_timestamp = T; 101 | This will set quotes around the "timestamp" field. Logstash is apparently 102 | looking for the timestamp to be a string, even if you configure it to be 103 | looking for unix timestamp values. (BTW, you will want to configure 104 | logstash with a filter that says: 105 | filter { date { match => ["timestamp", "UNIX_MS"] } } } 106 | in order to get it to parse the timestamps properly. 107 | 108 | Requirements 109 | ================================ 110 | You will need to install the librdkafka library independently of this plugin. The system when building will 111 | attempt to find the librdkafka libraries, but if you put them somewhere unusual, you will need to tell 112 | configure where to find it with the --with-librdkafka option. 113 | 114 | 115 | Notes & thank yous 116 | =============================== 117 | Much of this plugin is based on earlier code from Kurt Grutzmacher who wrote a bro 2.2 logger for kafka. 118 | Also, many thanks for Robin Sommer for answering my beginner-level questions as I started this process. 119 | 120 | Lastly, this plugin will create a new connection from Bro to kafka for each log type to be sent to 121 | kafka. This may cause a lot of connections. At the moment, the plugin does not try to re-connect if 122 | it loses all connectivity to kafka, but the librdkafka library should handle failover between brokers 123 | transparently. 124 | -------------------------------------------------------------------------------- /src/AddingJson.cc: -------------------------------------------------------------------------------- 1 | // See the file "COPYING" in the main distribution directory for copyright. 2 | 3 | #include "bro-config.h" 4 | 5 | #ifndef __STDC_LIMIT_MACROS 6 | #define __STDC_LIMIT_MACROS 7 | #endif 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "BroString.h" 15 | #include "AddingJson.h" 16 | 17 | using threading::Value; 18 | using threading::Field; 19 | 20 | using namespace threading::formatter; 21 | 22 | AddingJSON::AddingJSON(MsgThread* t, 23 | TimeFormat tf, 24 | char* argsensor_name, 25 | char* argtype_name, 26 | bool logstash_format_timestamps) : Formatter(t), surrounding_braces(true) 27 | { 28 | timestamps = tf; 29 | sensor_name = argsensor_name; 30 | type_name = argtype_name; 31 | logstash_timestamps = logstash_format_timestamps; 32 | } 33 | 34 | AddingJSON::~AddingJSON() 35 | { 36 | } 37 | 38 | bool AddingJSON::Describe(ODesc* desc, 39 | int num_fields, 40 | const Field* const * fields, 41 | Value** vals) const 42 | { 43 | if ( surrounding_braces ) 44 | desc->AddRaw("{"); 45 | 46 | for ( int i = 0; i < num_fields; i++ ) 47 | { 48 | const u_char* bytes = desc->Bytes(); 49 | int len = desc->Len(); 50 | 51 | if ( i > 0 && len > 0 && bytes[len-1] != ',' && vals[i]->present ) 52 | desc->AddRaw(","); 53 | 54 | if ( ! Describe(desc, vals[i], fields[i]->name) ) 55 | return false; 56 | } 57 | // add sensor_name 58 | desc->AddRaw(",\"", 2); 59 | desc->AddRaw("sensor", 6); 60 | desc->AddRaw("\":\"", 3); 61 | desc->AddRaw((const char*)sensor_name, strlen(sensor_name)); 62 | desc->AddRaw("\",",2); 63 | 64 | // add log type 65 | desc->AddRaw("\"", 1); 66 | desc->AddRaw("type", 4); 67 | desc->AddRaw("\":\"", 3); 68 | desc->AddRaw((const char*)type_name, strlen(type_name)); 69 | desc->AddRaw("\"", 1); 70 | 71 | if ( surrounding_braces ) 72 | desc->AddRaw("}"); 73 | 74 | return true; 75 | } 76 | 77 | bool AddingJSON::Describe(ODesc* desc, Value* val, const string& name) const 78 | { 79 | if ( ! val->present ) 80 | return true; 81 | 82 | if ( name.size() ) 83 | { 84 | desc->AddRaw("\"", 1); 85 | desc->Add(name); 86 | desc->AddRaw("\":", 2); 87 | } 88 | 89 | switch ( val->type ) 90 | { 91 | case TYPE_BOOL: 92 | desc->AddRaw(val->val.int_val == 0 ? "false" : "true"); 93 | break; 94 | 95 | case TYPE_INT: 96 | desc->Add(val->val.int_val); 97 | break; 98 | 99 | case TYPE_COUNT: 100 | case TYPE_COUNTER: 101 | { 102 | // JSON doesn't support unsigned 64bit ints. 103 | if ( val->val.uint_val >= INT64_MAX ) 104 | { 105 | GetThread()->Error(GetThread()->Fmt("count value too large for JSON: %" PRIu64, val->val.uint_val)); 106 | desc->AddRaw("null", 4); 107 | } 108 | else 109 | desc->Add(val->val.uint_val); 110 | break; 111 | } 112 | 113 | case TYPE_PORT: 114 | desc->Add(val->val.port_val.port); 115 | break; 116 | 117 | case TYPE_SUBNET: 118 | desc->AddRaw("\"", 1); 119 | desc->Add(Render(val->val.subnet_val)); 120 | desc->AddRaw("\"", 1); 121 | break; 122 | 123 | case TYPE_ADDR: 124 | desc->AddRaw("\"", 1); 125 | desc->Add(Render(val->val.addr_val)); 126 | desc->AddRaw("\"", 1); 127 | break; 128 | 129 | case TYPE_DOUBLE: 130 | case TYPE_INTERVAL: 131 | desc->Add(val->val.double_val); 132 | break; 133 | 134 | case TYPE_TIME: 135 | { 136 | if ( timestamps == TS_ISO8601 ) 137 | { 138 | char buffer[40]; 139 | char buffer2[40]; 140 | time_t t = time_t(val->val.double_val); 141 | if ( strftime(buffer, sizeof(buffer), "%Y-%m-%dT%H:%M:%S", gmtime(&t)) > 0 ) 142 | { 143 | double integ; 144 | double frac = modf(val->val.double_val, &integ); 145 | snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, frac * 1000000); 146 | desc->AddRaw("\"", 1); 147 | desc->Add(buffer2); 148 | desc->AddRaw("\"", 1); 149 | } 150 | else 151 | { 152 | GetThread()->Error(GetThread()->Fmt("strftime error for JSON: %" PRIu64)); 153 | 154 | } 155 | } 156 | else if ( timestamps == TS_EPOCH ) 157 | { 158 | if (logstash_timestamps){ 159 | desc->AddRaw("\"", 1); 160 | } 161 | 162 | desc->Add(val->val.double_val); 163 | 164 | if (logstash_timestamps){ 165 | desc->AddRaw("\"", 1); 166 | } 167 | } 168 | else if ( timestamps == TS_MILLIS ) 169 | { 170 | // ElasticSearch uses milliseconds for timestamps and json only 171 | // supports signed ints (uints can be too large). 172 | uint64_t ts = (uint64_t) (val->val.double_val * 1000); 173 | if ( ts < INT64_MAX ) 174 | if (logstash_timestamps){ 175 | desc->AddRaw("\"", 1); 176 | } 177 | 178 | desc->Add(ts); 179 | 180 | if (logstash_timestamps){ 181 | desc->AddRaw("\"", 1); 182 | } 183 | else 184 | { 185 | GetThread()->Error(GetThread()->Fmt("time value too large for JSON milliseconds: %" PRIu64, ts)); 186 | desc->AddRaw("null", 4); 187 | } 188 | } 189 | 190 | break; 191 | } 192 | 193 | case TYPE_ENUM: 194 | case TYPE_STRING: 195 | case TYPE_FILE: 196 | case TYPE_FUNC: 197 | { 198 | desc->AddRaw("\"", 1); 199 | 200 | for ( int i = 0; i < val->val.string_val.length; ++i ) 201 | { 202 | char c = val->val.string_val.data[i]; 203 | 204 | // 2byte Unicode escape special characters. 205 | if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' ) 206 | { 207 | desc->AddRaw("\\u00", 4); 208 | char hex[2] = {'0', '0'}; 209 | bytetohex(c, hex); 210 | desc->AddRaw(hex, 1); 211 | desc->AddRaw(hex + 1, 1); 212 | } 213 | else 214 | desc->AddRaw(&c, 1); 215 | } 216 | 217 | desc->AddRaw("\"", 1); 218 | break; 219 | } 220 | 221 | case TYPE_TABLE: 222 | { 223 | desc->AddRaw("[", 1); 224 | 225 | for ( int j = 0; j < val->val.set_val.size; j++ ) 226 | { 227 | if ( j > 0 ) 228 | desc->AddRaw(",", 1); 229 | 230 | Describe(desc, val->val.set_val.vals[j]); 231 | } 232 | 233 | desc->AddRaw("]", 1); 234 | break; 235 | } 236 | 237 | case TYPE_VECTOR: 238 | { 239 | desc->AddRaw("[", 1); 240 | 241 | for ( int j = 0; j < val->val.vector_val.size; j++ ) 242 | { 243 | if ( j > 0 ) 244 | desc->AddRaw(",", 1); 245 | Describe(desc, val->val.vector_val.vals[j]); 246 | } 247 | 248 | desc->AddRaw("]", 1); 249 | break; 250 | } 251 | 252 | default: 253 | return false; 254 | } 255 | 256 | return true; 257 | } 258 | 259 | threading::Value* AddingJSON::ParseValue(const string& s, const string& name, TypeTag type, TypeTag subtype) const 260 | { 261 | GetThread()->Error("JSON formatter does not support parsing yet."); 262 | return NULL; 263 | } 264 | 265 | void AddingJSON::SurroundingBraces(bool use_braces) 266 | { 267 | surrounding_braces = use_braces; 268 | } 269 | -------------------------------------------------------------------------------- /src/Kafka.cc: -------------------------------------------------------------------------------- 1 | #include "bro-config.h" 2 | 3 | #include "util.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "BroString.h" 10 | #include "threading/SerialTypes.h" 11 | #include "Kafka.h" 12 | #include "kafkawriter.bif.h" 13 | 14 | using namespace logging; 15 | using namespace writer; 16 | using threading::Value; 17 | using threading::Field; 18 | using namespace RdKafka; 19 | 20 | 21 | class RandomPartitionerCallback : public RdKafka::PartitionerCb { 22 | private: 23 | unsigned int seed; 24 | 25 | public: 26 | int32_t partitioner_cb(const RdKafka::Topic *topic, const std::string *key, 27 | int32_t partition_count, void *msg_opaque) 28 | { 29 | return (int32_t)rand_r(&seed) % partition_count; 30 | } 31 | 32 | RandomPartitionerCallback(){ 33 | seed = time(NULL); 34 | } 35 | }; 36 | 37 | 38 | KafkaWriter::KafkaWriter(WriterFrontend* frontend) : WriterBackend(frontend) 39 | { 40 | 41 | json_formatter = 0; 42 | //srand(time(NULL)); 43 | producer = NULL; 44 | topic = NULL; 45 | 46 | 47 | // initialize kafka variables... 48 | broker_name_len = BifConst::KafkaLogger::broker_name->Len(); 49 | broker_name = new char[broker_name_len + 1]; 50 | memcpy(broker_name, BifConst::KafkaLogger::broker_name->Bytes(), broker_name_len); 51 | broker_name[broker_name_len] = 0; 52 | 53 | topic_name_len = BifConst::KafkaLogger::topic_name->Len(); 54 | topic_name = new char[topic_name_len + 1]; 55 | memcpy(topic_name, BifConst::KafkaLogger::topic_name->Bytes(), topic_name_len); 56 | topic_name[topic_name_len] = 0; 57 | 58 | compression_codec_len = BifConst::KafkaLogger::compression_codec->Len(); 59 | compression_codec = new char[compression_codec_len + 1]; 60 | memcpy(compression_codec, BifConst::KafkaLogger::compression_codec->Bytes(), compression_codec_len); 61 | compression_codec[compression_codec_len] = 0; 62 | 63 | // initialize varibles used to store extra data appended to every message 64 | // (sensor name and log type) 65 | int sensor_name_len = BifConst::KafkaLogger::sensor_name->Len(); 66 | char* sensor_name = new char[sensor_name_len + 1]; 67 | memcpy(sensor_name, BifConst::KafkaLogger::sensor_name->Bytes(), sensor_name_len); 68 | sensor_name[sensor_name_len] = 0; 69 | 70 | security_protocol_len = BifConst::KafkaLogger::security_protocol->Len(); 71 | security_protocol = new char[security_protocol_len + 1]; 72 | memcpy(security_protocol, BifConst::KafkaLogger::security_protocol->Bytes(), security_protocol_len); 73 | security_protocol[security_protocol_len] = 0; 74 | 75 | kerberos_service_name_len = BifConst::KafkaLogger::kerberos_service_name->Len(); 76 | kerberos_service_name = new char[kerberos_service_name_len + 1]; 77 | memcpy(kerberos_service_name, BifConst::KafkaLogger::kerberos_service_name->Bytes(), kerberos_service_name_len); 78 | kerberos_service_name[kerberos_service_name_len] = 0; 79 | 80 | kerberos_keytab_len = BifConst::KafkaLogger::kerberos_keytab->Len(); 81 | kerberos_keytab = new char[kerberos_keytab_len + 1]; 82 | memcpy(kerberos_keytab, BifConst::KafkaLogger::kerberos_keytab->Bytes(), kerberos_keytab_len); 83 | kerberos_keytab[kerberos_keytab_len] = 0; 84 | 85 | kerberos_principal_len = BifConst::KafkaLogger::kerberos_principal->Len(); 86 | kerberos_principal = new char[kerberos_principal_len + 1]; 87 | memcpy(kerberos_principal, BifConst::KafkaLogger::kerberos_principal->Bytes(), kerberos_principal_len); 88 | kerberos_principal[kerberos_principal_len] = 0; 89 | 90 | int type_name_len = strlen(Info().path); 91 | char* type_name = new char[type_name_len + 1]; 92 | memcpy(type_name, Info().path, type_name_len); 93 | type_name[type_name_len] = 0; 94 | 95 | client_id_len = BifConst::KafkaLogger::client_id->Len() + strlen(Info().path) + 1; 96 | client_id = new char[client_id_len + 1]; 97 | memcpy(client_id, BifConst::KafkaLogger::client_id->Bytes(), client_id_len); 98 | strcat(client_id, "-"); 99 | strcat(client_id, type_name); 100 | client_id[client_id_len] = 0; 101 | 102 | json_formatter = new threading::formatter::AddingJSON(this, 103 | threading::formatter::AddingJSON::TS_MILLIS, 104 | sensor_name, 105 | type_name, 106 | BifConst::KafkaLogger::logstash_style_timestamp 107 | ); 108 | } 109 | 110 | KafkaWriter::~KafkaWriter() 111 | { 112 | delete [] broker_name; 113 | delete [] topic_name; 114 | delete [] client_id; 115 | delete [] compression_codec; 116 | delete [] fixed_fields; 117 | // I think I need to shut down the connection to the producer before deleting 118 | // these variables. Also, if I just blindly delete these two, bro segfaults when 119 | // shutting down. Confess I don't understand what's happening here. 120 | //delete producer; 121 | //delete topic; 122 | delete json_formatter; 123 | } 124 | 125 | 126 | threading::Field** KafkaWriter::MakeFields(const threading::Field* const* fields, int num_fields, std::string path){ 127 | // create the renamed fields, based on user-supplied config. 128 | threading::Field** newFields = (threading::Field**)malloc(sizeof(threading::Field*) * (num_fields)); 129 | 130 | // what I'd like to do is 131 | // first, grab the rename table for just this log 132 | // The config will have a table of table of strings. 133 | // the first table key is the name of the log (dns, http, etc) 134 | // the internal table key is the column name, the internal table value 135 | // will be the name to change that to. 136 | // loop over the existing fields, look up the field name in the rename table. 137 | // if it exists, create a new field entry with the new name, otherwise, 138 | // copy the existing field name in to the new field list. 139 | // 140 | // However, I can't get the bro TableVar Lookup to return anything 141 | // even for tables that I know have data in them. I'm clearly doing 142 | // something wrong. So, hardcode the renames in the interest of 143 | // getting something done. 144 | // 145 | // Also, need to remove "."s from names for ElasticSearch. 146 | // 147 | for (int i = 0; i < num_fields; i++){ 148 | std::string newName; 149 | 150 | if (strcmp(fields[i]->name, "ts") == 0) 151 | { 152 | newName = "timestamp"; 153 | } 154 | else if (strcmp(fields[i]->name, "id.orig_h") == 0) 155 | { 156 | newName = "source_ip"; 157 | } 158 | else if (strcmp(fields[i]->name, "id.orig_p") == 0) 159 | { 160 | newName = "source_port"; 161 | } 162 | else if (strcmp(fields[i]->name, "id.resp_h") == 0) 163 | { 164 | newName = "dest_ip"; 165 | } 166 | else if (strcmp(fields[i]->name,"id.resp_p") == 0) 167 | { 168 | newName = "dest_port"; 169 | } 170 | else if (strcmp(fields[i]->name, "seen.indicator") == 0) 171 | { 172 | newName = "indicator"; 173 | } 174 | else if (strcmp(fields[i]->name, "seen.indicator_type") == 0) 175 | { 176 | newName = "indicator_type"; 177 | } 178 | else if (strcmp(fields[i]->name, "seen.where") == 0) 179 | { 180 | newName = "seen_where"; 181 | } 182 | else if (strcmp(fields[i]->name, "seen.node") == 0) 183 | { 184 | newName = "seen_node"; 185 | } 186 | else if (strcmp(fields[i]->name, "data_channel.orig_h") == 0) //ftp 187 | { 188 | newName = "data_channel_source_port"; 189 | } 190 | else if (strcmp(fields[i]->name, "data_channel.passive") == 0) 191 | { 192 | newName = "data_channel_passive"; 193 | } 194 | else if (strcmp(fields[i]->name, "data_channel.resp_h") == 0) 195 | { 196 | newName = "data_channel_dest_ip"; 197 | } 198 | else if (strcmp(fields[i]->name, "data_channel.resp_p") == 0) 199 | { 200 | newName = "data_channel_dest_port"; 201 | } 202 | else if (strcmp(fields[i]->name, "bound.host") == 0) //socks 203 | { 204 | newName = "bound_host"; 205 | } 206 | else if (strcmp(fields[i]->name, "bounds.host") == 0) 207 | { 208 | newName = "bounds_host"; 209 | } 210 | else if (strcmp(fields[i]->name, "bound.name") == 0) 211 | { 212 | newName = "bound_name"; 213 | } 214 | 215 | if (newName.empty()){ 216 | newFields[i] = new threading::Field(fields[i]->name, 217 | fields[i]->secondary_name, 218 | fields[i]->type, 219 | fields[i]->subtype, 220 | true); 221 | } 222 | else { 223 | newFields[i]= new threading::Field(newName.c_str(), 224 | fields[i]->secondary_name, 225 | fields[i]->type, 226 | fields[i]->subtype, 227 | true); 228 | } 229 | 230 | } 231 | 232 | return newFields; 233 | } 234 | 235 | bool KafkaWriter::DoInit(const WriterInfo& info, int num_fields, const threading::Field* const* fields) 236 | { 237 | conf = RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL); 238 | tconf = RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC); 239 | // note: hard-coding to use the random partitioner right now... 240 | RandomPartitionerCallback* part_cb = new RandomPartitionerCallback(); 241 | std::string errstr; 242 | 243 | // set up kafka connection (get brokers, set partitioner, etc) 244 | if (conf->set("metadata.broker.list", broker_name, errstr) != RdKafka::Conf::CONF_OK){ 245 | reporter->Error("Failed to set metatdata.broker.list: %s", errstr.c_str()); 246 | return false; 247 | } 248 | int default_batch_size_len = BifConst::KafkaLogger::default_batch_size->Len(); 249 | char* default_batch_size = new char[default_batch_size_len + 1]; 250 | memcpy(default_batch_size, BifConst::KafkaLogger::default_batch_size->Bytes(), default_batch_size_len); 251 | default_batch_size[default_batch_size_len] = 0; 252 | 253 | int max_batch_size_len = BifConst::KafkaLogger::max_batch_size->Len(); 254 | char* max_batch_size = new char[max_batch_size_len + 1]; 255 | memcpy(max_batch_size, BifConst::KafkaLogger::max_batch_size->Bytes(), max_batch_size_len); 256 | max_batch_size[max_batch_size_len] = 0; 257 | 258 | int max_batch_interval_len = BifConst::KafkaLogger::max_batch_interval->Len(); 259 | char* max_batch_interval = new char[max_batch_interval_len + 1]; 260 | memcpy(max_batch_interval, BifConst::KafkaLogger::max_batch_interval->Bytes(), max_batch_interval_len); 261 | max_batch_interval[max_batch_interval_len] = 0; 262 | 263 | conf->set("compression.codec", compression_codec, errstr); 264 | conf->set("client.id", client_id, errstr); 265 | conf->set("batch.num.messages", default_batch_size, errstr); 266 | conf->set("queue.buffering.max.messages", max_batch_size, errstr); 267 | conf->set("queue.buffering.max.ms", max_batch_interval, errstr); 268 | conf->set("producer.type", "async", errstr); 269 | 270 | if (strcmp("SASL_SSL", security_protocol) == 0) { 271 | //if ssl is supported without kerberos then this will have 272 | // to change to enable SSL but I'm not sure it make sense 273 | conf->set("security.protocol", security_protocol, errstr); 274 | } 275 | 276 | if ( (strcmp("SASL_SSL", security_protocol) ==0 ) || (strcmp("SASL_PLAINTEXT", security_protocol) ==0 ) ){ 277 | //SASL is enabled and we need to setup the kerberos options. 278 | 279 | conf->set("sasl.kerberos.service.name", kerberos_service_name, errstr); 280 | conf->set("sasl.kerberos.keytab", kerberos_keytab, errstr); 281 | conf->set("sasl.kerberos.principal", kerberos_principal, errstr); 282 | } 283 | 284 | if (tconf->set("partitioner_cb", part_cb, errstr) != RdKafka::Conf::CONF_OK){ 285 | reporter->Error("failed to set partitioner for Kafka. %s", errstr.c_str()); 286 | } 287 | 288 | producer = RdKafka::Producer::create(conf, errstr); 289 | if (!producer) { 290 | reporter->Error("Failed to create producer: %s", errstr.c_str()); 291 | return false; 292 | } 293 | topic = RdKafka::Topic::create(producer, topic_name, tconf, errstr); 294 | 295 | if (!topic) { 296 | reporter->Error("Failed to create topic."); 297 | return false; 298 | } 299 | 300 | // set up lookups and renamed fields. 301 | fixed_fields = MakeFields(fields, num_fields, Info().path); 302 | 303 | return true; 304 | } 305 | 306 | 307 | bool KafkaWriter::DoWrite(int num_fields, const Field* const * fields, Value** vals) 308 | { 309 | ODesc buffer; 310 | 311 | // this may look silly, but as of this writing, Kafka's default 312 | // partitioning is poor. if you do not supply a key, kafka will never 313 | // call your partition function, even if one is specified in the config. 314 | // What it will do instead is choose a partition at random when it starts 315 | // up, and send everything to that partition. So, you need to supply a 316 | // partition key if you want your partitioner to be used 317 | const std::string partition_key = "this is a key to trigger partitioning."; 318 | 319 | 320 | buffer.Clear(); 321 | 322 | json_formatter->Describe(&buffer, num_fields, fixed_fields, vals); 323 | const char* bytes = (const char*)buffer.Bytes(); 324 | std::string errstr; 325 | 326 | // actually send the data to Kafka. 327 | RdKafka::ErrorCode resp = producer->produce(topic, 328 | RdKafka::Topic::PARTITION_UA, 329 | RdKafka::Producer::MSG_COPY /* Copy payload */, 330 | const_cast(bytes), 331 | strlen(bytes), 332 | &partition_key, 333 | NULL); 334 | if (resp != RdKafka::ERR_NO_ERROR) { 335 | errstr = RdKafka::err2str(resp); 336 | reporter->Error("Produce failed: %s", errstr.c_str()); 337 | reporter->Error("failed line: %s", bytes); 338 | } 339 | 340 | // Note: this bit here means that even if the send to kafka fails, we're just going to 341 | // drop the messages. Such is life. 342 | producer->poll(0); 343 | 344 | return true; 345 | } 346 | 347 | 348 | 349 | bool KafkaWriter::DoSetBuf(bool enabled) 350 | { 351 | // Nothing to do. 352 | return true; 353 | } 354 | 355 | bool KafkaWriter::DoFlush(double network_time) 356 | { 357 | // Nothing to do. 358 | return true; 359 | } 360 | 361 | bool KafkaWriter::DoFinish(double network_time) 362 | { 363 | RdKafka::wait_destroyed(5000); 364 | return true; 365 | } 366 | 367 | bool KafkaWriter::DoHeartbeat(double network_time, double current_time) 368 | { 369 | //nothing to do...all timing handled inside Kafka. 370 | return true; 371 | } 372 | 373 | bool KafkaWriter::DoRotate(const char* rotated_path, double open, double close, bool terminating) 374 | { 375 | // Nothing to do. 376 | FinishedRotation(); 377 | return true; 378 | } 379 | --------------------------------------------------------------------------------