├── CHANGES
├── VERSION
├── tests
    ├── Makefile
    ├── kafkawriter
    │   └── show-plugin.bro
    ├── Scripts
    │   └── get-bro-env
    └── btest.cfg
├── scripts
    ├── Kafka
    │   └── KafkaWriter
    │   │   ├── __load__.bro
    │   │   └── logs-to-kafka.bro
    ├── __load__.bro
    └── init.bro
├── src
    ├── Plugin.h
    ├── kafkawriter.bif
    ├── Plugin.cc
    ├── RoundRobinPartitioner.h
    ├── RoundRobinPartitioner.cc
    ├── AddingJson.h
    ├── Kafka.h
    ├── AddingJson.cc
    └── Kafka.cc
├── configure.plugin
├── Makefile
├── CMakeLists.txt
├── cmake
    └── FindLibRDKafka.cmake
├── COPYING
├── configure
└── README


/CHANGES:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.1
2 | 


--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | test:
3 | 	@btest
4 | 


--------------------------------------------------------------------------------
/tests/kafkawriter/show-plugin.bro:
--------------------------------------------------------------------------------
1 | # @TEST-EXEC: bro -NN Kafka::KafkaWriter >output
2 | # @TEST-EXEC: btest-diff output
3 | 


--------------------------------------------------------------------------------
/scripts/Kafka/KafkaWriter/__load__.bro:
--------------------------------------------------------------------------------
1 | #
2 | # This is loaded when a user activates the plugin. Include scripts here that should be
3 | # loaded automatically at that point.
4 | # 
5 | 
6 | 


--------------------------------------------------------------------------------
/scripts/__load__.bro:
--------------------------------------------------------------------------------
 1 | #
 2 | # This is loaded unconditionally at Bro startup. Include scripts here that should
 3 | # always be loaded.
 4 | # 
 5 | # Normally, that will be only code that initializes built-in elements. Load
 6 | # your standard scripts in
 7 | # scripts/<plugin-namespace>/<plugin-name>/__load__.bro instead.
 8 | #
 9 | 
10 | @load ./init.bro


--------------------------------------------------------------------------------
/src/Plugin.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef BRO_PLUGIN_KAFKA_KAFKAWRITER
 3 | #define BRO_PLUGIN_KAFKA_KAFKAWRITER
 4 | 
 5 | #include <plugin/Plugin.h>
 6 | 
 7 | namespace plugin {
 8 | 	namespace Kafka_KafkaWriter {
 9 | 
10 | 		class Plugin : public ::plugin::Plugin
11 | 			{
12 | 			protected:
13 | 				// Overridden from plugin::Plugin.
14 | 				virtual plugin::Configuration Configure();
15 | 			};
16 | 
17 | 		extern Plugin plugin;
18 | 
19 | 	}
20 | }
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/configure.plugin:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #
 3 | # Hooks to add custom options to the configure script.
 4 | #
 5 | 
 6 | plugin_usage()
 7 | {
 8 | 	cat <<EOF
 9 | 	--with-librdkafka=PATH		Path to librdkafka installation
10 | EOF
11 | }
12 | 
13 | plugin_option()
14 | {
15 |     case "$1" in
16 | 	--with-librdkafka=*)
17 | 		append_cache_entry LibRDKafka_ROOT_DIR PATH $optarg
18 | 		;;
19 | 
20 |         *)
21 |             return 1;
22 |             ;;
23 |     esac
24 | }
25 | 


--------------------------------------------------------------------------------
/src/kafkawriter.bif:
--------------------------------------------------------------------------------
 1 | module KafkaLogger;
 2 | 
 3 | const topic_name: string; 
 4 | const broker_name: string; 
 5 | const sensor_name: string; 
 6 | const client_id: string;
 7 | const compression_codec: string; 
 8 | const default_batch_size: string;
 9 | const max_batch_size: string; 
10 | const max_batch_interval: string;
11 | const logstash_style_timestamp: boolean;
12 | const security_protocol: string;
13 | const kerberos_service_name: string;
14 | const kerberos_keytab: string;
15 | const kerberos_principal: string;


--------------------------------------------------------------------------------
/src/Plugin.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "Plugin.h"
 3 | #include "Kafka.h"
 4 | 
 5 | namespace plugin { namespace Kafka_KafkaWriter { Plugin plugin; } }
 6 | 
 7 | using namespace plugin::Kafka_KafkaWriter;
 8 | 
 9 | plugin::Configuration Plugin::Configure()
10 | 	{
11 | 	AddComponent(new ::logging::Component("KafkaWriter", ::logging::writer::KafkaWriter::Instantiate));
12 | 	
13 | 	plugin::Configuration config;
14 | 	config.name = "Kafka::KafkaWriter";
15 | 	config.description = "Kafka Json writer.";
16 | 	config.version.major = 0;
17 | 	config.version.minor = 1;
18 | 	return config;
19 | 	}
20 | 


--------------------------------------------------------------------------------
/src/RoundRobinPartitioner.h:
--------------------------------------------------------------------------------
 1 | #include <librdkafka/rdkafkacpp.h>
 2 | 
 3 | class RoundRobinPartitionerCallback : public RdKafka::PartitionerCb {
 4 | 	//private:
 5 | 	//	int32_t partition_num;
 6 | 
 7 | 
 8 | 	public:
 9 | 		int32_t partitioner_cb(const RdKafka::Topic *topic,
10 | 								const std::string *key,
11 | 								int32_t partition_count,
12 | 								void *msg_opaque);
13 | 
14 | 		int32_t partitioner_cb(const RdKafka::Topic *topic,
15 | 								const void *keydata,
16 | 								size_t keylen,
17 | 								int32_t partition_cnt,
18 | 								void *msg_opaque);
19 | 
20 | 
21 | 		RoundRobinPartitionerCallback();
22 | };
23 | 


--------------------------------------------------------------------------------
/tests/Scripts/get-bro-env:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | #
 3 | # BTest helper for getting values for Bro-related environment variables.
 4 | 
 5 | base=`dirname $0`
 6 | bro=`cat ${base}/../../build/CMakeCache.txt | grep BRO_DIST | cut -d = -f 2`
 7 | 
 8 | if [ "$1" = "brobase" ]; then
 9 |     echo ${bro}
10 | elif [ "$1" = "bropath" ]; then
11 |     ${bro}/build/bro-path-dev
12 | elif [ "$1" = "bro_plugin_path" ]; then
13 |     ( cd ${base}/../.. && pwd )
14 | elif [ "$1" = "bro_seed_file" ]; then
15 |     echo ${bro}/testing/btest/random.seed
16 | elif [ "$1" = "path" ]; then
17 |     echo ${bro}/build/src:${bro}/aux/btest:${base}/:${bro}/aux/bro-cut:$PATH
18 | else
19 |     echo "usage: `basename $0` <var>" >&2
20 |     exit 1
21 | fi
22 | 


--------------------------------------------------------------------------------
/tests/btest.cfg:
--------------------------------------------------------------------------------
 1 | [btest]
 2 | TestDirs    = kafkawriter
 3 | TmpDir      = %(testbase)s/.tmp
 4 | BaselineDir = %(testbase)s/Baseline
 5 | IgnoreDirs  = .svn CVS .tmp
 6 | IgnoreFiles = *.tmp *.swp #* *.trace .DS_Store
 7 | 
 8 | [environment]
 9 | BROBASE=`%(testbase)s/Scripts/get-bro-env brobase`
10 | BROPATH=`%(testbase)s/Scripts/get-bro-env bropath`
11 | BRO_PLUGIN_PATH=`%(testbase)s/Scripts/get-bro-env bro_plugin_path`
12 | BRO_SEED_FILE=`%(testbase)s/Scripts/get-bro-env bro_seed_file`
13 | PATH=`%(testbase)s/Scripts/get-bro-env path`
14 | TZ=UTC
15 | LC_ALL=C
16 | TRACES=%(testbase)s/Traces
17 | TMPDIR=%(testbase)s/.tmp
18 | BRO_TRACES=`%(testbase)s/Scripts/get-bro-env brobase`/testing/btest/Traces
19 | TEST_DIFF_CANONIFIER=`%(testbase)s/Scripts/get-bro-env brobase`/testing/scripts/diff-canonifier
20 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Convenience Makefile providing a few common top-level targets.
 3 | #
 4 | 
 5 | cmake_build_dir=build
 6 | arch=`uname -s | tr A-Z a-z`-`uname -m`
 7 | 
 8 | all: build-it
 9 | 
10 | build-it:
11 | 	@test -e $(cmake_build_dir)/config.status || ./configure
12 | 	-@test -e $(cmake_build_dir)/CMakeCache.txt && \
13 |       test $(cmake_build_dir)/CMakeCache.txt -ot `cat $(cmake_build_dir)/CMakeCache.txt | grep BRO_DIST | cut -d '=' -f 2`/build/CMakeCache.txt && \
14 |       echo Updating stale CMake cache && \
15 |       touch $(cmake_build_dir)/CMakeCache.txt
16 | 
17 | 	( cd $(cmake_build_dir) && make )
18 | 
19 | install:
20 | 	( cd $(cmake_build_dir) && make install )
21 | 
22 | clean:
23 | 	( cd $(cmake_build_dir) && make clean )
24 | 
25 | distclean:
26 | 	rm -rf $(cmake_build_dir)
27 | 
28 | test:
29 | 	make -C tests
30 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
 2 | 
 3 | cmake_minimum_required(VERSION 2.8)
 4 | 
 5 | project(Plugin)
 6 | 
 7 | include(BroPlugin)
 8 | 
 9 | find_package(LibRDKafka)
10 | 
11 | if ( LIBRDKAFKA_FOUND )
12 | 	include_directories(BEFORE ${LibRDKafka_INCLUDE_DIR})
13 | 
14 | 	bro_plugin_begin(Kafka KafkaWriter)
15 | 	bro_plugin_cc(src/AddingJson.cc)
16 | 	bro_plugin_cc(src/Kafka.cc)
17 | 	bro_plugin_cc(src/Plugin.cc)
18 | 	bro_plugin_bif(src/kafkawriter.bif)
19 | 	bro_plugin_dist_files(README CHANGES COPYING VERSION)
20 | 	bro_plugin_link_library(${LibRDKafka_LIBRARIES})
21 | 	bro_plugin_link_library(${LibRDKafka_C_LIBRARIES})
22 | 	bro_plugin_end()
23 | 	message(STATUS "LibRDKafka prefix	: ${LibRDKafka_ROOT_DIR}")
24 | 	message(STATUS "LibRDKafka Library	: ${LibRDKafka_LIBRARIES}")
25 | else ()
26 | 	message(FATAL_ERROR "LibRDKafka not found.")
27 | endif ()
28 | 


--------------------------------------------------------------------------------
/src/RoundRobinPartitioner.cc:
--------------------------------------------------------------------------------
 1 | #include <librdkafka/rdkafkacpp.h>
 2 | #include <string>
 3 | #include <cstdlib>
 4 | #include <cstdio>
 5 | #include "RoundRobinPartitioner.h"
 6 | #include <time.h>
 7 | 
 8 | using namespace RdKafka;
 9 | 
10 | 
11 | RoundRobinPartitionerCallback::RoundRobinPartitionerCallback(){
12 | 	srand(time(NULL));
13 | }
14 | 
15 | int32_t RoundRobinPartitionerCallback::partitioner_cb(const RdKafka::Topic *topic,
16 | 													  const std::string *key,
17 | 													  int32_t partition_count,
18 | 													  void *msg_opaque) {
19 | 	// dead simple partitioner...simply chooses random partition
20 | 	return ((int32_t)rand()) % partition_count;
21 | }
22 | 
23 | int32_t RoundRobinPartitionerCallback::partitioner_cb(const RdKafka::Topic *topic,
24 | 														const void *keydata,
25 | 														size_t keylen,
26 | 														int32_t partition_count,
27 | 														void *msg_opaque)
28 | {
29 | 	return ((int32_t)rand()) % partition_count;
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/scripts/Kafka/KafkaWriter/logs-to-kafka.bro:
--------------------------------------------------------------------------------
 1 | module KafkaLogger;
 2 | 
 3 | export {
 4 | 	# redefine this in your script to identify the logs
 5 | 	# that should be sent up to bro. 
 6 | 	# for example: 
 7 | 	#
 8 | 	# redef KafkaLogger::logs_to_send = set(HTTP::LOG, Conn::Log, DNS::LOG);
 9 | 	#
10 | 	# that will send the HTTP, Conn, and DNS logs up to Kafka.
11 | 	#
12 | 	const logs_to_send: set[Log::ID] &redef;
13 | }
14 | 
15 | event bro_init() &priority=-5
16 | {
17 | 	
18 | 	for (stream_id in Log::active_streams)
19 | 	{
20 | 	    if (stream_id !in logs_to_send){
21 | 	        next;
22 | 	    }
23 | 	    # note: the filter name is different for each log, to cause Bro to instantiate
24 | 	    # a new Writer instance for each log. The bro folks might want me
25 | 	    # to do this with a single writer instance, but the mechanics of
26 | 	    # modifying the field names and adding fields made it quite complicated,
27 | 	    # so I opted to make one log writer per bro log going to Kafka.
28 | 	    # this means there will be multiple connections from bro to the kafka
29 | 	    # server, one per log file. 
30 | 	    local streamString = fmt("%s", stream_id);
31 | 	    local pathname = fmt("%s", KafkaLogger::log_names[streamString]);
32 | 	    local filter: Log::Filter = [$name = fmt("kafka-%s",stream_id),
33 | 	    							 $writer = Log::WRITER_KAFKAWRITER,
34 | 	    							 $path = pathname
35 | 	    							];
36 | 	    Log::add_filter(stream_id, filter);
37 | 	
38 | 	}
39 | 
40 | }
41 | 	


--------------------------------------------------------------------------------
/cmake/FindLibRDKafka.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find LibRDKafka headers and libraries.
 2 | #
 3 | # Usage of this module as follows:
 4 | #
 5 | #     find_package(LibRDKafka)
 6 | #
 7 | # Variables used by this module, they can change the default behaviour and need
 8 | # to be set before calling find_package:
 9 | #
10 | #  LibRDKafka_ROOT_DIR  Set this variable to the root installation of
11 | #                    LibRDKafka if the module has problems finding
12 | #                    the proper installation path.
13 | #
14 | # Variables defined by this module:
15 | #
16 | #  LIBRDKAFKA_FOUND              System has LibRDKafka libs/headers
17 | #  LibRDKafka_LIBRARIES          The LibRDKafka libraries
18 | #  LibRDKafka_INCLUDE_DIR        The location of LibRDKafka headers
19 | 
20 | find_path(LibRDKafka_ROOT_DIR
21 |     NAMES include/librdkafka/rdkafkacpp.h
22 | )
23 | 
24 | find_library(LibRDKafka_LIBRARIES
25 |     NAMES rdkafka++
26 |     HINTS ${LibRDKafka_ROOT_DIR}/lib
27 | )
28 | 
29 | find_library(LibRDKafka_C_LIBRARIES
30 | 	NAMES rdkafka
31 | 	HINTS ${LibRDKafka_ROT_DIR}/lib
32 | )
33 | 
34 | find_path(LibRDKafka_INCLUDE_DIR
35 |     NAMES librdkafka/rdkafkacpp.h
36 |     HINTS ${LibRDKafka_ROOT_DIR}/include
37 | )
38 | 
39 | include(FindPackageHandleStandardArgs)
40 | find_package_handle_standard_args(LibRDKafka DEFAULT_MSG
41 |     LibRDKafka_LIBRARIES
42 |     LibRDKafka_C_LIBRARIES
43 |     LibRDKafka_INCLUDE_DIR
44 | )
45 | 
46 | mark_as_advanced(
47 |     LibRDKafka_ROOT_DIR
48 |     LibRDKafka_LIBRARIES
49 |     LibRDKafka_C_LIBRARIES
50 |     LibRDKafka_INCLUDE_DIR
51 | )
52 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2015 by the United States Government/National Institutes of Health
 3 | 
 4 | Written by Aaron Gee-Clough
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | (1) Redistributions of source code must retain the above copyright
10 |     notice, this list of conditions and the following disclaimer.
11 | 
12 | (2) Redistributions in binary form must reproduce the above copyright
13 |     notice, this list of conditions and the following disclaimer in
14 |     the documentation and/or other materials provided with the
15 |     distribution.
16 | 
17 | (3) Neither the name of the National Institutes of Health, the US Government,
18 |     nor the names of contributors may be used to endorse or promote products 
19 |     derived from this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 | POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/src/AddingJson.h:
--------------------------------------------------------------------------------
 1 | // See the file "COPYING" in the main distribution directory for copyright.
 2 | #ifndef THREADING_FORMATTERS_ADDINGJSON_H
 3 | #define THREADING_FORMATTERS_ADDINGJSON_H
 4 | 
 5 | 
 6 | #include "threading/Formatter.h"
 7 | #include "threading/SerialTypes.h"
 8 | 
 9 | /**
10 |   * A thread-safe class for converting values into a JSON representation
11 |   * and vice versa.
12 |   *
13 |   * Modified to add fields to the JSON stream, depending on user-supplied
14 |   * criteria.
15 |   */
16 | 
17 | 
18 | namespace threading { namespace formatter {
19 | 
20 | class AddingJSON : public Formatter {
21 | 	public:
22 | 		enum TimeFormat {
23 | 			TS_EPOCH,	// Doubles that represents seconds from the UNIX epoch.
24 | 			TS_ISO8601,	// ISO 8601 defined human readable timestamp format.
25 | 			TS_MILLIS	// Milliseconds from the UNIX epoch.  Some consumers need this (e.g., elasticsearch).
26 | 			};
27 | 
28 | 		AddingJSON(threading::MsgThread* t, TimeFormat tf, char* argsensor_name, char* argtype_name, bool logstash_format_timestamps);
29 | 		~AddingJSON();
30 | 
31 | 		bool AddingDescribe(ODesc* desc, threading::Value* val, const string& name="") const;
32 | 		bool Describe(ODesc* desc, threading::Value* val, const string& name = "") const;
33 | 		bool Describe(ODesc* desc, int num_fields, const threading::Field* const * fields,
34 | 	                      threading::Value** vals) const;
35 | 		threading::Value* ParseValue(const string& s, const string& name, TypeTag type, TypeTag subtype = TYPE_ERROR) const;
36 | 
37 | 		void SurroundingBraces(bool use_braces);
38 | 
39 | 	private:
40 | 		TimeFormat timestamps;
41 | 		bool surrounding_braces;
42 | 		const char* sensor_name;
43 | 		const char* type_name;
44 | 		bool logstash_timestamps;
45 | };
46 | 
47 | }}
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/src/Kafka.h:
--------------------------------------------------------------------------------
 1 | // See the file "COPYING" in the main distribution directory for copyright.
 2 | //
 3 | // Log writer for writing to a Kafka broker
 4 | //
 5 | // This is experimental code that is not yet ready for production usage.
 6 | //
 7 | // check on these ifndef. Having this in both here and plugin.h causes
 8 | // compile problems.
 9 | //#ifndef BRO_PLUGIN_KAFKA_KAFKAWRITER
10 | //#define BRO_PLUGIN_KAFKA_KAFKAWRITER
11 | 
12 | #include "logging/WriterBackend.h"
13 | #include "logging/WriterFrontend.h"
14 | #include "threading/formatters/JSON.h"
15 | #include <librdkafka/rdkafkacpp.h>
16 | #include "Type.h"
17 | #include "AddingJson.h"
18 | #include <string>
19 | 
20 | namespace logging { namespace writer {
21 | 
22 | class KafkaWriter : public WriterBackend {
23 | public:
24 | 	KafkaWriter(WriterFrontend* frontend);
25 | 	~KafkaWriter();
26 | 	
27 | 	static string LogExt();
28 | 	static WriterBackend* Instantiate(WriterFrontend* frontend)
29 | 		{return new KafkaWriter(frontend); }
30 | 		
31 | 
32 | protected:
33 | 
34 | 	virtual bool DoInit(const WriterInfo& info, int num_fields, const threading::Field* const* fields);
35 | 	virtual bool DoWrite(int num_fields, const threading::Field* const* fields, threading::Value** vals);
36 | 	virtual bool DoSetBuf(bool enabled);
37 | 	virtual bool DoRotate(const char* rotated_path, double open, double close, bool terminating);
38 | 	virtual bool DoFlush(double network_time);
39 | 	virtual bool DoFinish(double network_time);
40 | 	virtual bool DoHeartbeat(double network_time, double current_time);
41 | 	
42 | 
43 | private:
44 | 
45 | 	threading::Field** MakeFields(const threading::Field* const* fields, int num_fields, std::string path);
46 | 	long transfer_timeout;
47 | 	
48 | 	// kafka configurations
49 | 	char* broker_name;
50 | 	int broker_name_len;
51 | 	char* topic_name;
52 | 	int topic_name_len;
53 | 	char* client_id;
54 | 	int client_id_len;
55 | 	char* compression_codec;
56 | 	int compression_codec_len;
57 | 	char* security_protocol;
58 | 	int security_protocol_len;
59 | 	char* kerberos_service_name;
60 | 	int kerberos_service_name_len;
61 | 	char* kerberos_keytab;
62 | 	int kerberos_keytab_len;
63 | 	char* kerberos_principal;
64 | 	int kerberos_principal_len;
65 | 	RdKafka::Conf *conf;
66 | 	RdKafka::Conf *tconf;
67 | 	RdKafka::Producer *producer;
68 | 	RdKafka::Topic *topic;
69 | 	uint64 batch_size;
70 | 
71 | 
72 | 	//varible to hold renamed fields
73 | 	threading::Field** fixed_fields;
74 | 	
75 | 	// formatter to turn fields/values into json.
76 | 	threading::formatter::AddingJSON* json_formatter;
77 | 	
78 | };
79 | 
80 | }
81 | }
82 | 
83 | //#endif
84 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | #
  3 | # Wrapper for viewing/setting options that the plugin's CMake
  4 | # scripts will recognize.
  5 | #
  6 | # Don't edit this. Edit configure.plugin to add plugin-specific options.
  7 | #
  8 | 
  9 | set -e
 10 | command="$0 $*"
 11 | 
 12 | if [ -e `dirname $0`/configure.plugin ]; then
 13 |     # Include custom additions.
 14 |     . `dirname $0`/configure.plugin
 15 | fi
 16 | 
 17 | # Check for `cmake` command.
 18 | type cmake > /dev/null 2>&1 || {
 19 |     echo "\
 20 | This package requires CMake, please install it first, then you may
 21 | use this configure script to access CMake equivalent functionality.\
 22 | " >&2;
 23 |     exit 1;
 24 | }
 25 | 
 26 | usage() {
 27 | 
 28 | cat 1>&2 <<EOF
 29 | Usage: $0 [OPTIONS]
 30 | 
 31 |   Plugin Options:
 32 |     --bro-dist=DIR             Path to Bro source tree
 33 |     --install-root=DIR         Path where to install plugin into
 34 | EOF
 35 | 
 36 | if type plugin_usage >/dev/null 2>&1; then
 37 |     plugin_usage 1>&2
 38 | fi
 39 | 
 40 | echo
 41 | 
 42 | exit 1
 43 | }
 44 | 
 45 | # Function to append a CMake cache entry definition to the
 46 | # CMakeCacheEntries variable
 47 | #   $1 is the cache entry variable name
 48 | #   $2 is the cache entry variable type
 49 | #   $3 is the cache entry variable value
 50 | append_cache_entry () {
 51 |     CMakeCacheEntries="$CMakeCacheEntries -D $1:$2=$3"
 52 | }
 53 | 
 54 | # set defaults
 55 | builddir=build
 56 | brodist=`cd ../../.. && pwd`
 57 | installroot="default"
 58 | CMakeCacheEntries=""
 59 | 
 60 | while [ $# -ne 0 ]; do
 61 |     case "$1" in
 62 |         -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
 63 |         *) optarg= ;;
 64 |     esac
 65 | 
 66 |     case "$1" in
 67 |         --help|-h)
 68 |             usage
 69 |             ;;
 70 | 
 71 |         --bro-dist=*)
 72 |             brodist=`cd $optarg && pwd`
 73 |             ;;
 74 | 
 75 |         --install-root=*)
 76 |             installroot=$optarg
 77 |             ;;
 78 | 
 79 |         *)
 80 |             if type plugin_option >/dev/null 2>&1; then
 81 |                 plugin_option $1 && shift && continue;
 82 |             fi
 83 | 
 84 |             echo "Invalid option '$1'.  Try $0 --help to see available options."
 85 |             exit 1
 86 |             ;;
 87 |     esac
 88 |     shift
 89 | done
 90 | 
 91 | if [ ! -e "$brodist/bro-path-dev.in" ]; then
 92 |     echo "Cannot determine Bro source directory, use --bro-dist=DIR."
 93 |     exit 1
 94 | fi
 95 | 
 96 | append_cache_entry BRO_DIST PATH $brodist
 97 | append_cache_entry CMAKE_MODULE_PATH PATH $brodist/cmake
 98 | 
 99 | if [ "$installroot" != "default" ]; then
100 |     mkdir -p $installroot
101 |     append_cache_entry BRO_PLUGIN_INSTALL_ROOT PATH $installroot
102 | fi
103 | 
104 | echo "Build Directory        : $builddir"
105 | echo "Bro Source Directory   : $brodist"
106 | 
107 | mkdir -p $builddir
108 | cd $builddir
109 | 
110 | cmake $CMakeCacheEntries ..
111 | 
112 | echo "# This is the command used to configure this build" > config.status
113 | echo $command >> config.status
114 | chmod u+x config.status
115 | 


--------------------------------------------------------------------------------
/scripts/init.bro:
--------------------------------------------------------------------------------
  1 | module KafkaLogger;
  2 | 
  3 | export {
  4 | 
  5 | 	# Logstash requires quotes around timestamp values, even if they 
  6 | 	# are doubles/floats. If you're consuming this data via logstash
  7 | 	# you will want this to be set to "T". If you're consuming
  8 | 	# these logs with something else, that's expecting timestamps
  9 | 	# to be raw double values, set this to "F".
 10 | 	const logstash_style_timestamp = F &redef;
 11 | 
 12 | 	# the name of the kafka topic to connect to & send messages to.
 13 | 	const topic_name = "logs" &redef; 
 14 | 	
 15 | 	# the name of the kafka broker to send messages to. 
 16 | 	# This is a comma-separated list of kafka brokers.
 17 | 	# format: hostname1:port,hostname2:port
 18 | 	# no spaces.
 19 | 	const broker_name = "localhost" &redef;
 20 | 	
 21 | 	# this is the name to add to each message to identify
 22 | 	# this sensor in the logs. It will be added to a "sensor"
 23 | 	# field in the submitted json. 
 24 | 	const sensor_name = "brosensor" &redef;
 25 | 	
 26 | 	# This is used internally by Kafka to trace requests and
 27 | 	# errors. Kafka advises setting it to identify the 
 28 | 	# application making the requests/produces.
 29 | 	const client_id = "bro" &redef;
 30 | 	
 31 | 	# codec to use. 0=none, 1=gzip, 2=snappy.
 32 | 	const compression_codec = "0" &redef; 
 33 | 	
 34 | 	# controls for how often to send messages up to 
 35 | 	# kafka. Default is either every 500 messages,
 36 | 	# every 10MB of buffer size, or every 10 seconds.
 37 | 	# Any of these conditions hitting will cause 
 38 | 	# the produce up to Kafka.
 39 | 	# if more than 10,000 messages are queued up, 
 40 | 	# the kafka producer will begin dropping messages.
 41 | 	# max_batch_interval is the maximun number of milliseconds
 42 | 	# that kafka will wait before sending logs.
 43 | 	const default_batch_size = "500" &redef;
 44 | 	const max_batch_size = "10000" &redef; 
 45 | 	const max_batch_interval = "1000" &redef;
 46 | 	
 47 | 
 48 | 	# These are the names of the log files that will be sent in the 
 49 | 	# json as the "type" field. These are also the keys used in the
 50 | 	# column_rename variable above.
 51 | 	const log_names = table(["AppStats::LOG"] = "app_stats",
 52 | 							["Barnyard2::LOG"] = "barnyard2",
 53 | 							["CaptureLoss::LOG"] = "capture_loss",
 54 | 							["Cluster::LOG"] = "cluster",
 55 | 							["Communication::LOG"] = "communication",
 56 | 							["Conn::LOG"] = "conn",
 57 | 							["DHCP::LOG"] = "dhcp",
 58 | 							["DNP3::LOG"] = "dnps",
 59 | 							["DNS::LOG"] = "dns",
 60 | 							["DPD::LOG"] = "dpd",
 61 | 							["Files::LOG"] = "files",
 62 | 							["FTP::LOG"] = "ftp",
 63 | 							["HTTP::LOG"] = "http",
 64 | 							["Intel::LOG"] = "intel",
 65 | 							["IRC::LOG"] = "irc",
 66 | 							["KRB::LOG"] = "kerberos",
 67 | 							["Known::CERTS_LOG"] = "known_certs",
 68 | 							["Known::DEVICES_LOG"] = "known_devices",
 69 | 							["Known::HOSTS_LOG"] = "known_hosts",
 70 | 							["Known::MODBUS_LOG"] = "known_modbus",
 71 | 							["Known::SERVICES_LOG"] = "known_services",
 72 | 							["LoadedScripts::LOG"] = "loaded_scripts",
 73 | 							["Modbus::LOG"] = "modbus",
 74 | 							["Modbus::REGISTER_CHANGE_LOG"] = "modbus_register_change_log",
 75 | 							["mysql::LOG"] = "mysql",
 76 | 							["Notice::LOG"] = "notice",
 77 | 							["Notice::ALARM_LOG"] = "notice_alarm",
 78 | 							["PacketFilter::LOG"] = "packet_filter",
 79 | 							["PE::LOG"] = "pe",
 80 | 							["RADIUS::LOG"] = "radius",
 81 | 							["RDP::LOG"] = "rdp",
 82 | 							["Reporter::LOG"] = "reporter",
 83 | 							["Signatures::LOG"] = "signatures",
 84 | 							["SIP::LOG"] = "sip",
 85 | 							["SMTP::LOG"] = "smtp",
 86 | 							["SNMP::LOG"] = "snmp",
 87 | 							["SOCKS::LOG"] = "socks",
 88 | 							["Software::LOG"] = "software",
 89 | 							["SSH::LOG"] = "ssh",
 90 | 							["SSL::LOG"] = "ssl",
 91 | 							["Stats::LOG"] = "stats",
 92 | 							["Syslog::LOG"] = "syslog",
 93 | 							["Traceroute::LOG"] = "traceroute",
 94 | 							["Tunnel::LOG"] = "tunnel",
 95 | 							["Unified2::LOG"] = "unified2",
 96 | 							["Weird::LOG"] = "weird",
 97 | 							["X509::LOG"] = "x509"
 98 | 							) &redef;
 99 | 	const security_protocol = "NONE" &redef;	
100 | 	const kerberos_service_name = "bro" &redef;
101 | 	const kerberos_principal = "kafka/kafkabroker@domain.com" &redef;
102 | 	const kerberos_keytab = "/etc/security/keytabs/bro.keytab" &redef;
103 | 	
104 | }
105 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 | 
  2 | Kafka::KafkaWriter
  3 | =================================
  4 | 
  5 | Kafka Writer plugin for Bro. This plugin will connect to a Kafka server, and will send JSON-format
  6 | logs to the topic specified. 
  7 | 
  8 | Aside from sending bro logs to Kafka, it will also add two fields to each JSON message, and rename
  9 | 5 of the fields from the bro native names. 
 10 | 
 11 | The added fields are: 
 12 | 	a "sensor" name as specified in the config, used to identify which bro server originated this log. 
 13 | 	and a "type" field, populated with the log type (dns, ssl, conn, etc).
 14 | 	
 15 | The renamed fields are: 
 16 | 	"ts" will become "timestamp", 
 17 | 	"id.orig_h"becomes "source_ip", 
 18 | 	"id.orig_p" becomes "source_port", 
 19 | 	"id.resp_h" becomes "dest_ip", and 
 20 | 	"id.resp_p" becomes "dest_port". 
 21 | 	
 22 | 	
 23 | Users can override the sensor name as a standard redef in their config. Changing the type field names can
 24 | be done by editing the init.bro script for the KafkaLogger (though I don't recommend it).  I would like 
 25 | to make the field renaming configurable in the future, but had problems with the bro table structure 
 26 | in C++, so those are hard-coded in the C++ for the moment.
 27 | 
 28 | (note: timestamps are sent as millisecond timestamps I.e., a UNIX-format timestamp that includees
 29 | the milliseconds aka epoch * 1000)
 30 | 
 31 | Configuration
 32 | ================================
 33 | In your bro policy, include the logs-to-kafka bro script, and define the particular logs you want 
 34 | to send to bro, like this:
 35 | 
 36 | 	@load Kafka/KafkaWriter/logs-to-kafka
 37 | 	redef KafkaLogger::logs_to_send = set(HTTP::LOG, Conn::LOG, DNS::LOG, SMTP::LOG);
 38 | 	
 39 | see the Kafka init.bro file for the full list of logs you can flag to be sent to Kafka. (that file
 40 | also contains the clean names that the log files will be called in the "type" field.)
 41 | 	
 42 | You can also redefine how the bro client will connect to kafka with the following variables:
 43 | 
 44 | 	redef KafkaLogger::topic_name = "logs"; 
 45 | 		which Kafka topic to send logs to. All logs from bro will be sent to this topic.
 46 | 	
 47 | 	
 48 | 	redef KafkaLogger::broker_name = "hostname:port,hostname:port";
 49 | 		the name of the kafka broker to send messages to. 
 50 | 		This is a comma-separated list of kafka brokers.
 51 | 			format: hostname1:port,hostname2:port
 52 | 		no spaces.
 53 | 	
 54 | 	
 55 | 	redef KafkaLogger::sensor_name = "brosensor";
 56 | 		The name to add to each message to identify
 57 | 		this sensor in the logs. It will be added to a "sensor"
 58 | 		field in the submitted json.
 59 | 	
 60 | 	 
 61 | 	redef KafkaLogger::client_id = "bro"; 
 62 | 		This is used internally by Kafka to trace requests and
 63 | 		errors. Kafka advises setting it to identify the 
 64 | 		application making the requests/produces. the log name
 65 | 		will be appended to this in the Kafka logs.
 66 | 	
 67 | 	
 68 | 	redef KafkaLogger::compression_codec = "0"; 
 69 | 		Kafka can compress messages on the wire. This specifies
 70 | 		which compression codec to use. 0=none, 1=gzip, 2=snappy.
 71 | 	
 72 | 	
 73 | 	redef KafkaLogger::max_batch_size = "1000"; 
 74 | 	redef KafkaLogger::max_batch_interval = "10000";
 75 | 		how often to send logs (number of messages, bytes, and seconds).
 76 | 
 77 | If you use the Kerberised version of kafka, you'll need to switch this from PLAINTEXT to SASL_PLAINTEXT or SASL_SSL, 
 78 | make sure you've setup the keytab properly see 
 79 | https://github.com/edenhill/librdkafka/wiki/Using-SASL-with-librdkafka for more info on how to perform
 80 | that, you will also need a version of librdkafka built with ssl and kerberos support.
 81 | 	
 82 | 	redef KafkaLogger::security_protocol = "SASL_PLAINTEXT"
 83 | 
 84 | 	redef KafkaLogger::kerberos_service_name = "kafka";
 85 | 		Needs to be redifined to match the service name for Kafka.
 86 | 
 87 | 	redef KafkaLogger::kerberos_keytab = "/etc/security/keytabs/bro.keytab";
 88 | 		Needs to be redefined if your keytab file is stored elsewhere.
 89 | 		
 90 | 	redef KafkaLogger::kerberos_principal = "kafka/kafkabroker@domain.com"
 91 | 		Note: the <kafkabroker> must exactly match the hostname part of the broker principal. 
 92 | 
 93 | If after setting up you get an error about "no common mechs", you proably need to install additionnal modules, on Debian and derivatives
 94 | '''
 95 | apt-get install libsasl2-modules-gssapi-heimda
 96 | '''
 97 | Should do the trick.
 98 | 
 99 | Lastly, if you are consuming the events with logstash, you will need to set
100 | 	redef KafkaLogger::logstash_style_timestamp = T;
101 | 		This will set quotes around the "timestamp" field. Logstash is apparently
102 | 		looking for the timestamp to be a string, even if you configure it to be
103 | 		looking for unix timestamp values. (BTW, you will want to configure
104 | 		logstash with a filter that says: 
105 | 			filter { date { match => ["timestamp", "UNIX_MS"] } } }
106 | 		in order to get it to parse the timestamps properly.
107 | 
108 | Requirements
109 | ================================
110 | You will need to install the librdkafka library independently of this plugin. The system when building will 
111 | attempt to find the librdkafka libraries, but if  you put them somewhere unusual, you will need to tell
112 | configure where to find it with the --with-librdkafka option.
113 | 
114 | 
115 | Notes & thank yous
116 | ===============================
117 | Much of this plugin is based on earlier code from Kurt Grutzmacher who wrote a bro 2.2 logger for kafka.
118 | Also, many thanks for Robin Sommer for answering my beginner-level questions as I started this process.
119 | 
120 | Lastly, this plugin will create a new connection from Bro to kafka for each log type to be sent to 
121 | kafka. This may cause a lot of connections. At the moment, the plugin does not try to re-connect if
122 | it loses all connectivity to kafka, but the librdkafka library should handle failover between brokers
123 | transparently.
124 | 


--------------------------------------------------------------------------------
/src/AddingJson.cc:
--------------------------------------------------------------------------------
  1 | // See the file "COPYING" in the main distribution directory for copyright.
  2 | 
  3 | #include "bro-config.h"
  4 | 
  5 | #ifndef __STDC_LIMIT_MACROS
  6 | #define __STDC_LIMIT_MACROS
  7 | #endif
  8 | 
  9 | #include <sstream>
 10 | #include <errno.h>
 11 | #include <math.h>
 12 | #include <stdint.h>
 13 | 
 14 | #include "BroString.h"
 15 | #include "AddingJson.h"
 16 | 
 17 | using threading::Value;
 18 | using threading::Field;
 19 | 
 20 | using namespace threading::formatter;
 21 | 
 22 | AddingJSON::AddingJSON(MsgThread* t,
 23 | 						TimeFormat tf,
 24 | 						char* argsensor_name,
 25 | 						char* argtype_name,
 26 | 						bool logstash_format_timestamps) : Formatter(t), surrounding_braces(true)
 27 | 	{
 28 | 	timestamps = tf;
 29 | 	sensor_name = argsensor_name;
 30 | 	type_name = argtype_name;
 31 | 	logstash_timestamps = logstash_format_timestamps;
 32 | 	}
 33 | 
 34 | AddingJSON::~AddingJSON()
 35 | 	{
 36 | 	}
 37 | 
 38 | bool AddingJSON::Describe(ODesc* desc,
 39 | 							int num_fields,
 40 | 							const Field* const * fields,
 41 | 							Value** vals) const
 42 | 	{
 43 | 	if ( surrounding_braces )
 44 | 		desc->AddRaw("{");
 45 | 
 46 | 	for ( int i = 0; i < num_fields; i++ )
 47 | 		{
 48 | 		const u_char* bytes = desc->Bytes();
 49 | 		int len = desc->Len();
 50 | 
 51 | 		if ( i > 0 && len > 0 && bytes[len-1] != ',' && vals[i]->present )
 52 | 			desc->AddRaw(",");
 53 | 
 54 | 		if ( ! Describe(desc, vals[i], fields[i]->name) )
 55 | 			return false;
 56 | 		}
 57 | 	// add sensor_name
 58 | 	desc->AddRaw(",\"", 2);
 59 | 	desc->AddRaw("sensor", 6);
 60 | 	desc->AddRaw("\":\"", 3);
 61 | 	desc->AddRaw((const char*)sensor_name, strlen(sensor_name));
 62 | 	desc->AddRaw("\",",2);
 63 | 
 64 | 	// add log type
 65 | 	desc->AddRaw("\"", 1);
 66 | 	desc->AddRaw("type", 4);
 67 | 	desc->AddRaw("\":\"", 3);
 68 | 	desc->AddRaw((const char*)type_name, strlen(type_name));
 69 | 	desc->AddRaw("\"", 1);
 70 | 
 71 | 	if ( surrounding_braces )
 72 | 		desc->AddRaw("}");
 73 | 
 74 | 	return true;
 75 | 	}
 76 | 
 77 | bool AddingJSON::Describe(ODesc* desc, Value* val, const string& name) const
 78 | 	{
 79 | 	if ( ! val->present )
 80 | 		return true;
 81 | 
 82 | 	if ( name.size() )
 83 | 		{
 84 | 		desc->AddRaw("\"", 1);
 85 | 		desc->Add(name);
 86 | 		desc->AddRaw("\":", 2);
 87 | 		}
 88 | 
 89 | 	switch ( val->type )
 90 | 		{
 91 | 		case TYPE_BOOL:
 92 | 			desc->AddRaw(val->val.int_val == 0 ? "false" : "true");
 93 | 			break;
 94 | 
 95 | 		case TYPE_INT:
 96 | 			desc->Add(val->val.int_val);
 97 | 			break;
 98 | 
 99 | 		case TYPE_COUNT:
100 | 		case TYPE_COUNTER:
101 | 			{
102 | 			// JSON doesn't support unsigned 64bit ints.
103 | 			if ( val->val.uint_val >= INT64_MAX )
104 | 				{
105 | 				GetThread()->Error(GetThread()->Fmt("count value too large for JSON: %" PRIu64, val->val.uint_val));
106 | 				desc->AddRaw("null", 4);
107 | 				}
108 | 			else
109 | 				desc->Add(val->val.uint_val);
110 | 			break;
111 | 			}
112 | 
113 | 		case TYPE_PORT:
114 | 			desc->Add(val->val.port_val.port);
115 | 			break;
116 | 
117 | 		case TYPE_SUBNET:
118 | 			desc->AddRaw("\"", 1);
119 | 			desc->Add(Render(val->val.subnet_val));
120 | 			desc->AddRaw("\"", 1);
121 | 			break;
122 | 
123 | 		case TYPE_ADDR:
124 | 			desc->AddRaw("\"", 1);
125 | 			desc->Add(Render(val->val.addr_val));
126 | 			desc->AddRaw("\"", 1);
127 | 			break;
128 | 
129 | 		case TYPE_DOUBLE:
130 | 		case TYPE_INTERVAL:
131 | 			desc->Add(val->val.double_val);
132 | 			break;
133 | 
134 | 		case TYPE_TIME:
135 | 			{
136 | 			if ( timestamps == TS_ISO8601 )
137 | 				{
138 | 				char buffer[40];
139 | 				char buffer2[40];
140 | 				time_t t = time_t(val->val.double_val);
141 | 				if ( strftime(buffer, sizeof(buffer), "%Y-%m-%dT%H:%M:%S", gmtime(&t)) > 0 )
142 | 					{
143 | 						double integ;
144 | 						double frac = modf(val->val.double_val, &integ);
145 | 						snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, frac * 1000000);
146 | 						desc->AddRaw("\"", 1);
147 | 						desc->Add(buffer2);
148 | 						desc->AddRaw("\"", 1);
149 | 					}
150 | 				else
151 | 				{
152 | 					GetThread()->Error(GetThread()->Fmt("strftime error for JSON: %" PRIu64));
153 | 
154 | 				}
155 | 			}
156 | 			else if ( timestamps == TS_EPOCH )
157 | 			{
158 | 				if (logstash_timestamps){
159 | 					desc->AddRaw("\"", 1);
160 | 				}
161 | 
162 | 				desc->Add(val->val.double_val);
163 | 
164 | 				if (logstash_timestamps){
165 | 					desc->AddRaw("\"", 1);
166 | 				}
167 | 			}
168 | 			else if ( timestamps == TS_MILLIS )
169 | 				{
170 | 				// ElasticSearch uses milliseconds for timestamps and json only
171 | 				// supports signed ints (uints can be too large).
172 | 				uint64_t ts = (uint64_t) (val->val.double_val * 1000);
173 | 				if ( ts < INT64_MAX )
174 | 					if (logstash_timestamps){
175 | 						desc->AddRaw("\"", 1);
176 | 					}
177 | 
178 | 					desc->Add(ts);
179 | 
180 | 					if (logstash_timestamps){
181 | 						desc->AddRaw("\"", 1);
182 | 					}
183 | 				else
184 | 					{
185 | 					GetThread()->Error(GetThread()->Fmt("time value too large for JSON milliseconds: %" PRIu64, ts));
186 | 					desc->AddRaw("null", 4);
187 | 					}
188 | 				}
189 | 
190 | 			break;
191 | 			}
192 | 
193 | 		case TYPE_ENUM:
194 | 		case TYPE_STRING:
195 | 		case TYPE_FILE:
196 | 		case TYPE_FUNC:
197 | 			{
198 | 			desc->AddRaw("\"", 1);
199 | 
200 | 			for ( int i = 0; i < val->val.string_val.length; ++i )
201 | 				{
202 | 				char c = val->val.string_val.data[i];
203 | 
204 | 				// 2byte Unicode escape special characters.
205 | 				if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' )
206 | 					{
207 | 					desc->AddRaw("\\u00", 4);
208 | 					char hex[2] = {'0', '0'};
209 | 					bytetohex(c, hex);
210 | 					desc->AddRaw(hex, 1);
211 | 					desc->AddRaw(hex + 1, 1);
212 | 					}
213 | 				else
214 | 					desc->AddRaw(&c, 1);
215 | 				}
216 | 
217 | 			desc->AddRaw("\"", 1);
218 | 			break;
219 | 			}
220 | 
221 | 		case TYPE_TABLE:
222 | 			{
223 | 			desc->AddRaw("[", 1);
224 | 
225 | 			for ( int j = 0; j < val->val.set_val.size; j++ )
226 | 				{
227 | 				if ( j > 0 )
228 | 					desc->AddRaw(",", 1);
229 | 
230 | 				Describe(desc, val->val.set_val.vals[j]);
231 | 				}
232 | 
233 | 			desc->AddRaw("]", 1);
234 | 			break;
235 | 			}
236 | 
237 | 		case TYPE_VECTOR:
238 | 			{
239 | 			desc->AddRaw("[", 1);
240 | 
241 | 			for ( int j = 0; j < val->val.vector_val.size; j++ )
242 | 				{
243 | 				if ( j > 0 )
244 | 					desc->AddRaw(",", 1);
245 | 				Describe(desc, val->val.vector_val.vals[j]);
246 | 				}
247 | 
248 | 			desc->AddRaw("]", 1);
249 | 			break;
250 | 			}
251 | 
252 | 		default:
253 | 			return false;
254 | 		}
255 | 
256 | 	return true;
257 | 	}
258 | 
259 | threading::Value* AddingJSON::ParseValue(const string& s, const string& name, TypeTag type, TypeTag subtype) const
260 | 	{
261 | 	GetThread()->Error("JSON formatter does not support parsing yet.");
262 | 	return NULL;
263 | 	}
264 | 
265 | void AddingJSON::SurroundingBraces(bool use_braces)
266 | 	{
267 | 	surrounding_braces = use_braces;
268 | 	}
269 | 


--------------------------------------------------------------------------------
/src/Kafka.cc:
--------------------------------------------------------------------------------
  1 | #include "bro-config.h"
  2 | 
  3 | #include "util.h"
  4 | #include <errno.h>
  5 | #include <string>
  6 | #include <cstdlib>
  7 | #include <time.h>
  8 | #include <librdkafka/rdkafkacpp.h>
  9 | #include "BroString.h"
 10 | #include "threading/SerialTypes.h"
 11 | #include "Kafka.h"
 12 | #include "kafkawriter.bif.h"
 13 | 
 14 | using namespace logging;
 15 | using namespace writer;
 16 | using threading::Value;
 17 | using threading::Field;
 18 | using namespace RdKafka;
 19 | 
 20 | 
 21 | class RandomPartitionerCallback : public RdKafka::PartitionerCb {
 22 | private:
 23 | 	unsigned int seed;
 24 | 
 25 | public:
 26 | 	int32_t partitioner_cb(const RdKafka::Topic *topic, const std::string *key,
 27 | 							int32_t partition_count, void *msg_opaque)
 28 | 	{
 29 | 		return (int32_t)rand_r(&seed) % partition_count;
 30 | 	}
 31 | 
 32 | 	RandomPartitionerCallback(){
 33 | 		seed = time(NULL);
 34 | 	}
 35 | };
 36 | 
 37 | 
 38 | KafkaWriter::KafkaWriter(WriterFrontend* frontend) : WriterBackend(frontend)
 39 | {
 40 | 
 41 | 	json_formatter = 0;
 42 | 	//srand(time(NULL));
 43 | 	producer = NULL;
 44 | 	topic = NULL;
 45 | 
 46 | 
 47 | 	// initialize kafka variables...
 48 | 	broker_name_len = BifConst::KafkaLogger::broker_name->Len();
 49 | 	broker_name = new char[broker_name_len + 1];
 50 | 	memcpy(broker_name, BifConst::KafkaLogger::broker_name->Bytes(), broker_name_len);
 51 | 	broker_name[broker_name_len] = 0;
 52 | 
 53 |     topic_name_len = BifConst::KafkaLogger::topic_name->Len();
 54 |     topic_name = new char[topic_name_len + 1];
 55 |     memcpy(topic_name, BifConst::KafkaLogger::topic_name->Bytes(), topic_name_len);
 56 |     topic_name[topic_name_len] = 0;
 57 | 
 58 |     compression_codec_len = BifConst::KafkaLogger::compression_codec->Len();
 59 |     compression_codec = new char[compression_codec_len + 1];
 60 |     memcpy(compression_codec, BifConst::KafkaLogger::compression_codec->Bytes(), compression_codec_len);
 61 |     compression_codec[compression_codec_len] = 0;
 62 | 
 63 |     // initialize varibles used to store extra data appended to every message
 64 |     // (sensor name and log type)
 65 |     int sensor_name_len = BifConst::KafkaLogger::sensor_name->Len();
 66 |     char* sensor_name = new char[sensor_name_len + 1];
 67 |     memcpy(sensor_name, BifConst::KafkaLogger::sensor_name->Bytes(), sensor_name_len);
 68 |     sensor_name[sensor_name_len] = 0;
 69 | 
 70 |     security_protocol_len = BifConst::KafkaLogger::security_protocol->Len();
 71 |     security_protocol = new char[security_protocol_len + 1];
 72 |     memcpy(security_protocol, BifConst::KafkaLogger::security_protocol->Bytes(), security_protocol_len);
 73 |     security_protocol[security_protocol_len] = 0;
 74 | 
 75 |     kerberos_service_name_len = BifConst::KafkaLogger::kerberos_service_name->Len();
 76 |     kerberos_service_name = new char[kerberos_service_name_len + 1];
 77 |     memcpy(kerberos_service_name, BifConst::KafkaLogger::kerberos_service_name->Bytes(), kerberos_service_name_len);
 78 |     kerberos_service_name[kerberos_service_name_len] = 0;
 79 | 
 80 |     kerberos_keytab_len = BifConst::KafkaLogger::kerberos_keytab->Len();
 81 |     kerberos_keytab = new char[kerberos_keytab_len + 1];
 82 |     memcpy(kerberos_keytab, BifConst::KafkaLogger::kerberos_keytab->Bytes(), kerberos_keytab_len);
 83 |     kerberos_keytab[kerberos_keytab_len] = 0;
 84 | 
 85 |     kerberos_principal_len = BifConst::KafkaLogger::kerberos_principal->Len();
 86 |     kerberos_principal = new char[kerberos_principal_len + 1];
 87 |     memcpy(kerberos_principal, BifConst::KafkaLogger::kerberos_principal->Bytes(), kerberos_principal_len);
 88 |     kerberos_principal[kerberos_principal_len] = 0;
 89 | 
 90 |     int type_name_len = strlen(Info().path);
 91 |     char* type_name = new char[type_name_len + 1];
 92 |     memcpy(type_name, Info().path, type_name_len);
 93 |     type_name[type_name_len] = 0;
 94 | 
 95 |     client_id_len = BifConst::KafkaLogger::client_id->Len() + strlen(Info().path) + 1;
 96 |     client_id = new char[client_id_len + 1];
 97 |     memcpy(client_id, BifConst::KafkaLogger::client_id->Bytes(), client_id_len);
 98 |     strcat(client_id, "-");
 99 |     strcat(client_id, type_name);
100 |     client_id[client_id_len] = 0;
101 | 
102 |     json_formatter = new threading::formatter::AddingJSON(this,
103 |     								threading::formatter::AddingJSON::TS_MILLIS,
104 |     								sensor_name,
105 |     								type_name,
106 |     								BifConst::KafkaLogger::logstash_style_timestamp
107 |     							   );
108 | }
109 | 
110 | KafkaWriter::~KafkaWriter()
111 | {
112 |     delete [] broker_name;
113 |     delete [] topic_name;
114 |     delete [] client_id;
115 |     delete [] compression_codec;
116 |     delete [] fixed_fields;
117 |     // I think I need to shut down the connection to the producer before deleting
118 |     // these variables. Also, if I just blindly delete these two, bro segfaults when
119 |     // shutting down. Confess I don't understand what's happening here.
120 |     //delete producer;
121 |     //delete topic;
122 |     delete json_formatter;
123 | }
124 | 
125 | 
126 | threading::Field** KafkaWriter::MakeFields(const threading::Field* const* fields, int num_fields, std::string path){
127 | 	// create the renamed fields, based on user-supplied config.
128 | 	threading::Field** newFields = (threading::Field**)malloc(sizeof(threading::Field*) * (num_fields));
129 | 
130 | 	// what I'd like to do is
131 | 	// first, grab the rename table for just this log
132 | 	// The config will have a table of table of strings.
133 | 	// the first table key is the name of the log (dns, http, etc)
134 | 	// the internal table key is the column name, the internal table value
135 | 	// will be the name to change that to.
136 | 	// loop over the existing fields, look up the field name in the rename table.
137 | 	// if it exists, create a new field entry with the new name, otherwise,
138 | 	// copy the existing field name in to the new field list.
139 | 	//
140 | 	// However, I can't get the bro TableVar Lookup to return anything
141 | 	// even for tables that I know have data in them. I'm clearly doing
142 | 	// something wrong. So,  hardcode the renames in the interest of
143 | 	// getting something done.
144 | 	//
145 | 	// Also, need to remove "."s from names for ElasticSearch.
146 | 	//
147 | 	for (int i = 0; i < num_fields; i++){
148 | 		std::string newName;
149 | 
150 | 		if (strcmp(fields[i]->name, "ts") == 0)
151 | 		{
152 | 			newName = "timestamp";
153 | 		}
154 | 		else if (strcmp(fields[i]->name, "id.orig_h") == 0)
155 | 		{
156 | 			newName = "source_ip";
157 | 		}
158 | 		else if (strcmp(fields[i]->name, "id.orig_p") == 0)
159 | 		{
160 | 			newName = "source_port";
161 | 		}
162 | 		else if (strcmp(fields[i]->name, "id.resp_h") == 0)
163 | 		{
164 | 			newName = "dest_ip";
165 | 		}
166 | 		else if (strcmp(fields[i]->name,"id.resp_p") == 0)
167 | 		{
168 | 			newName = "dest_port";
169 | 		}
170 | 		else if (strcmp(fields[i]->name, "seen.indicator") == 0)
171 | 		{
172 | 			newName = "indicator";
173 | 		}
174 | 		else if (strcmp(fields[i]->name, "seen.indicator_type") == 0)
175 | 		{
176 | 			newName = "indicator_type";
177 | 		}
178 | 		else if (strcmp(fields[i]->name, "seen.where") == 0)
179 | 		{
180 | 			newName = "seen_where";
181 | 		}
182 | 		else if (strcmp(fields[i]->name, "seen.node") == 0)
183 | 		{
184 | 			newName = "seen_node";
185 | 		}
186 | 		else if (strcmp(fields[i]->name, "data_channel.orig_h") == 0) //ftp
187 | 		{
188 | 			newName = "data_channel_source_port";
189 | 		}
190 | 		else if (strcmp(fields[i]->name, "data_channel.passive") == 0)
191 | 		{
192 | 			newName = "data_channel_passive";
193 | 		}
194 | 		else if (strcmp(fields[i]->name, "data_channel.resp_h") == 0)
195 | 		{
196 | 			newName = "data_channel_dest_ip";
197 | 		}
198 | 		else if (strcmp(fields[i]->name, "data_channel.resp_p") == 0)
199 | 		{
200 | 			newName = "data_channel_dest_port";
201 | 		}
202 | 		else if (strcmp(fields[i]->name, "bound.host") == 0) //socks
203 | 		{
204 | 			newName = "bound_host";
205 | 		}
206 | 		else if (strcmp(fields[i]->name, "bounds.host") == 0)
207 | 		{
208 | 			newName = "bounds_host";
209 | 		}
210 | 		else if (strcmp(fields[i]->name, "bound.name") == 0)
211 | 		{
212 | 			newName = "bound_name";
213 | 		}
214 | 
215 | 		if (newName.empty()){
216 | 			newFields[i] = new threading::Field(fields[i]->name,
217 | 												fields[i]->secondary_name,
218 | 												fields[i]->type,
219 | 												fields[i]->subtype,
220 | 												true);
221 | 		}
222 | 		else {
223 | 			newFields[i]= new threading::Field(newName.c_str(),
224 | 												fields[i]->secondary_name,
225 | 												fields[i]->type,
226 | 												fields[i]->subtype,
227 | 												true);
228 | 		}
229 | 
230 | 	}
231 | 
232 | 	return newFields;
233 | }
234 | 
235 | bool KafkaWriter::DoInit(const WriterInfo& info, int num_fields, const threading::Field* const* fields)
236 | {
237 | 	conf = RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL);
238 | 	tconf = RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC);
239 | 	// note: hard-coding to use the random partitioner right now...
240 | 	RandomPartitionerCallback* part_cb = new RandomPartitionerCallback();
241 | 	std::string errstr;
242 | 
243 | 	// set up kafka connection (get brokers, set partitioner, etc)
244 | 	if (conf->set("metadata.broker.list", broker_name, errstr) != RdKafka::Conf::CONF_OK){
245 | 		reporter->Error("Failed to set metatdata.broker.list: %s", errstr.c_str());
246 | 		return false;
247 | 	}
248 |     int default_batch_size_len = BifConst::KafkaLogger::default_batch_size->Len();
249 |     char* default_batch_size = new char[default_batch_size_len + 1];
250 |     memcpy(default_batch_size, BifConst::KafkaLogger::default_batch_size->Bytes(), default_batch_size_len);
251 |     default_batch_size[default_batch_size_len] = 0;
252 | 
253 |     int max_batch_size_len = BifConst::KafkaLogger::max_batch_size->Len();
254 |     char* max_batch_size = new char[max_batch_size_len + 1];
255 |     memcpy(max_batch_size, BifConst::KafkaLogger::max_batch_size->Bytes(), max_batch_size_len);
256 |     max_batch_size[max_batch_size_len] = 0;
257 | 
258 |     int max_batch_interval_len = BifConst::KafkaLogger::max_batch_interval->Len();
259 |     char* max_batch_interval = new char[max_batch_interval_len + 1];
260 |     memcpy(max_batch_interval, BifConst::KafkaLogger::max_batch_interval->Bytes(), max_batch_interval_len);
261 |     max_batch_interval[max_batch_interval_len] = 0;
262 | 
263 | 	conf->set("compression.codec", compression_codec, errstr);
264 | 	conf->set("client.id", client_id, errstr);
265 | 	conf->set("batch.num.messages", default_batch_size, errstr);
266 | 	conf->set("queue.buffering.max.messages", max_batch_size, errstr);
267 | 	conf->set("queue.buffering.max.ms", max_batch_interval, errstr);
268 | 	conf->set("producer.type", "async", errstr);
269 | 
270 | 	if (strcmp("SASL_SSL", security_protocol) == 0) {
271 | 		//if ssl is supported without kerberos then this will have 
272 | 		//  to change to enable SSL but I'm not sure it make sense
273 | 		conf->set("security.protocol", security_protocol, errstr);
274 | 	}
275 | 	
276 | 	if ( (strcmp("SASL_SSL", security_protocol) ==0 ) || (strcmp("SASL_PLAINTEXT", security_protocol) ==0 ) ){
277 | 		//SASL is enabled and we need to setup the kerberos options.
278 | 
279 | 		conf->set("sasl.kerberos.service.name", kerberos_service_name, errstr);
280 | 		conf->set("sasl.kerberos.keytab", kerberos_keytab, errstr);
281 | 		conf->set("sasl.kerberos.principal", kerberos_principal, errstr);
282 | 	}
283 | 
284 | 	if (tconf->set("partitioner_cb",  part_cb, errstr) != RdKafka::Conf::CONF_OK){
285 | 		reporter->Error("failed to set partitioner for Kafka. %s", errstr.c_str());
286 | 	}
287 | 
288 | 	producer = RdKafka::Producer::create(conf, errstr);
289 | 	if (!producer) {
290 | 		reporter->Error("Failed to create producer: %s", errstr.c_str());
291 | 		return false;
292 | 	}
293 | 	topic = RdKafka::Topic::create(producer, topic_name, tconf, errstr);
294 | 
295 | 	if (!topic) {
296 | 		reporter->Error("Failed to create topic.");
297 | 		return false;
298 | 	}
299 | 
300 | 	// set up lookups and renamed fields.
301 | 	fixed_fields = MakeFields(fields, num_fields, Info().path);
302 | 
303 | 	return true;
304 | }
305 | 
306 | 
307 | bool KafkaWriter::DoWrite(int num_fields, const Field* const * fields, Value** vals)
308 |     {
309 | 	ODesc buffer;
310 | 
311 |     // this may look silly, but as of this writing, Kafka's default
312 |     // partitioning is poor. if you do not supply a key, kafka will never
313 |     // call your partition function, even if one is specified in the config.
314 |     // What it will do instead is choose a partition at random when it starts
315 |     // up, and send everything to that partition. So, you need to supply a
316 |     // partition key if you want your partitioner to be used
317 |     const std::string partition_key = "this is a key to trigger partitioning.";
318 | 
319 | 
320 | 	buffer.Clear();
321 | 
322 |     json_formatter->Describe(&buffer, num_fields, fixed_fields, vals);
323 |     const char* bytes = (const char*)buffer.Bytes();
324 |     std::string errstr;
325 | 
326 | 	// actually send the data to Kafka.
327 |     RdKafka::ErrorCode resp = producer->produce(topic,
328 |     											RdKafka::Topic::PARTITION_UA,
329 |     											RdKafka::Producer::MSG_COPY /* Copy payload */,
330 |     											const_cast<char *>(bytes),
331 |     											strlen(bytes),
332 |     											&partition_key,
333 |     											NULL);
334 |     if (resp != RdKafka::ERR_NO_ERROR) {
335 |         errstr = RdKafka::err2str(resp);
336 |         reporter->Error("Produce failed: %s", errstr.c_str());
337 |         reporter->Error("failed line: %s", bytes);
338 |     }
339 | 
340 |     // Note: this bit here means that even if the send to kafka fails, we're just going to
341 |     // drop the messages. Such is life.
342 |     producer->poll(0);
343 | 
344 |     return true;
345 |     }
346 | 
347 | 
348 | 
349 | bool KafkaWriter::DoSetBuf(bool enabled)
350 |     {
351 |     // Nothing to do.
352 |     return true;
353 |     }
354 | 
355 | bool KafkaWriter::DoFlush(double network_time)
356 |     {
357 |     // Nothing to do.
358 |     return true;
359 |     }
360 | 
361 | bool KafkaWriter::DoFinish(double network_time)
362 |     {
363 |     RdKafka::wait_destroyed(5000);
364 |     return true;
365 |     }
366 | 
367 | bool KafkaWriter::DoHeartbeat(double network_time, double current_time)
368 |     {
369 | 	//nothing to do...all timing handled inside Kafka.
370 |     return true;
371 |     }
372 | 
373 | bool KafkaWriter::DoRotate(const char* rotated_path, double open, double close, bool terminating)
374 |     {
375 |     // Nothing to do.
376 |     FinishedRotation();
377 |     return true;
378 |     }
379 | 


--------------------------------------------------------------------------------