├── .gitignore ├── blackboxtest ├── tail2es │ ├── nginx_json.log │ ├── nginx_log.log │ ├── basic.lua │ ├── indexdoc.lua │ └── main.lua ├── loadtest │ ├── linecopy.lua │ └── main.lua ├── tail2kafka │ ├── filter.lua │ ├── basic.lua │ ├── grep.lua │ ├── basic2.lua │ ├── match.lua │ ├── transform.lua │ ├── aggregate.lua │ └── main.lua ├── kafka2file │ ├── test_rotate.lua │ └── nginx.lua ├── blackbox_test.sh ├── loadtest.sh └── kafka_service_unavailable_test.sh ├── src ├── bitshelper.h ├── metrics.h ├── cmdnotify.h ├── runstatus.h ├── inotifyctx.h ├── filerecord.h ├── fileoff.h ├── gnuatomic.h ├── kafkactx.h ├── util.cc ├── common.h ├── taskqueue.cc ├── uint64offset.h ├── sys.h ├── luafunction.h ├── util.h ├── filereader.h ├── taskqueue.h ├── cmdnotify.cc ├── metrics.cc ├── fileoff.cc ├── esctx.h ├── unittesthelper.h ├── luactx.h ├── sys.cc ├── transform.h ├── cnfctx.h ├── cnfctx.cc ├── inotifyctx.cc ├── kafka2file.cc ├── tail2kafka.cc ├── common.cc ├── tail2es_unittest.cc ├── kafkactx.cc └── kafka2file_unittest.cc ├── scripts ├── tail2kafka.cron ├── tail2kafka.config ├── makerpm ├── catnull.sh ├── tail2kafka.init └── auto-upgrade.sh ├── tools └── es_clean.example ├── hyperscan.sh ├── mix ├── speedlimit.cc ├── config-kafka ├── kafka.config.template └── ckeeper ├── doc ├── tail2kafka-cluster.org └── tail2kafka-config.org ├── README.org ├── consumer ├── de.cgi ├── httpdata.pl ├── de │ ├── cgi │ │ └── de.profile.cgi │ └── index.html └── cqlexec.cc ├── tail2kafka.spec └── Makefile /.gitignore: -------------------------------------------------------------------------------- 1 | GPATH 2 | GRTAGS 3 | GTAGS 4 | .deps 5 | build 6 | -------------------------------------------------------------------------------- /blackboxtest/tail2es/nginx_json.log: -------------------------------------------------------------------------------- 1 | "{\"receiver\":\"bb_up\"}\n" -------------------------------------------------------------------------------- /blackboxtest/tail2es/nginx_log.log: -------------------------------------------------------------------------------- 1 | {\x22y\x22:\x22\x5Cufffd\x5Cufffd\x22} -------------------------------------------------------------------------------- /blackboxtest/loadtest/linecopy.lua: -------------------------------------------------------------------------------- 1 | file = "BIGLOG" 2 | topic = "biglog" 3 | autocreat = true 4 | autoparti = false 5 | rawcopy = false 6 | -------------------------------------------------------------------------------- /blackboxtest/tail2es/basic.lua: -------------------------------------------------------------------------------- 1 | file = "logs/basic.log" 2 | es_index = "#1_%F" 3 | es_doc = "#3/NGINX" 4 | autocreat = true 5 | startpos = "LOG_START" 6 | -------------------------------------------------------------------------------- /blackboxtest/tail2kafka/filter.lua: -------------------------------------------------------------------------------- 1 | file = "logs/filter.log" 2 | topic = "filter" 3 | timeidx = 4 4 | filter = {4, 5, 6, -1} 5 | autocreat = true 6 | -------------------------------------------------------------------------------- /blackboxtest/tail2es/indexdoc.lua: -------------------------------------------------------------------------------- 1 | file = "logs/indexdoc.log" 2 | autocreat = true 3 | startpos = "LOG_START" 4 | indexdoc = function(line) 5 | return "indexdoc", line 6 | end 7 | -------------------------------------------------------------------------------- /blackboxtest/kafka2file/test_rotate.lua: -------------------------------------------------------------------------------- 1 | -- time_local, request must exist 2 | informat = { "time_local", "request" } 3 | 4 | delete_request_field = true 5 | time_local_format = "iso8601" 6 | -------------------------------------------------------------------------------- /blackboxtest/tail2kafka/basic.lua: -------------------------------------------------------------------------------- 1 | file = "logs/basic.log" 2 | topic = "basic" 3 | autocreat = true 4 | autoparti = true 5 | startpos = "LOG_START" 6 | -- autonl = false 7 | -- rawcopy = true 8 | -------------------------------------------------------------------------------- /blackboxtest/tail2kafka/grep.lua: -------------------------------------------------------------------------------- 1 | file = "logs/grep.log" 2 | topic = "grep" 3 | autocreat = true 4 | grep = function(fields) 5 | return {'[' .. fields[4] .. '] "' .. fields[5] .. '"', fields[6], fields[table.maxn(fields)]} 6 | end 7 | -------------------------------------------------------------------------------- /src/bitshelper.h: -------------------------------------------------------------------------------- 1 | #ifndef _BITS_HELPER_H_ 2 | #define _BITS_HELPER_H_ 3 | 4 | #define bits_set(flags, bit) (flags) |= (bit) 5 | #define bits_clear(flags, bit) (flags) &= ~(bit) 6 | #define bits_test(flags, bit) ((flags) & (bit)) 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /blackboxtest/tail2kafka/basic2.lua: -------------------------------------------------------------------------------- 1 | file = "logs/basic.%Y-%m-%d_%H-%M.log" 2 | fileWithTimeFormat = true 3 | topic = "basic2" 4 | autocreat = true 5 | fileOwner = "nobody:nobody" 6 | autoparti = true 7 | partition = -100 8 | startpos = "LOG_START" 9 | -------------------------------------------------------------------------------- /blackboxtest/tail2kafka/match.lua: -------------------------------------------------------------------------------- 1 | file = "logs/match.log" 2 | topic = "match" 3 | autocreat = true 4 | autoparti = true 5 | startpos = "LOG_START" 6 | 7 | match = { 8 | pattern = "\\[\\d{2}\\]" 9 | } 10 | 11 | -- print(match.pattern) 12 | -------------------------------------------------------------------------------- /blackboxtest/tail2kafka/transform.lua: -------------------------------------------------------------------------------- 1 | file = "logs/transform.log" 2 | topic = "transform" 3 | autocreat = true 4 | transform = function(line) 5 | local s = string.sub(line, 1, 7); 6 | if s == "[error]" then return line 7 | else return nil end 8 | end 9 | -------------------------------------------------------------------------------- /blackboxtest/tail2es/main.lua: -------------------------------------------------------------------------------- 1 | -- remove the trailing newline 2 | hostshell = "hostname" 3 | pidfile = "/var/run/tail2kafka.pid" 4 | polllimit = 50 5 | es_nodes = {"127.0.0.1:9200"} 6 | es_max_conns = 50 7 | 8 | rotatedelay = 10 9 | -- optional 10 | pingbackurl = "http://localhost/pingback/tail2kafka" 11 | -------------------------------------------------------------------------------- /scripts/tail2kafka.cron: -------------------------------------------------------------------------------- 1 | 30 0 * * * root find /var/log/tail2kafka -type f -mtime +10 -delete 2 | * * * * * root /usr/local/bin/tail2kafka-auto-upgrade.sh config >/dev/null 2>&1 3 | */10 * * * * root sleep $(($RANDOM\%300)); /usr/local/bin/tail2kafka-auto-upgrade.sh rpm >/dev/null 2>&1 4 | 5 | # WARN: NL is nessary for cron -------------------------------------------------------------------------------- /scripts/tail2kafka.config: -------------------------------------------------------------------------------- 1 | # configuration file for the tail2kafka service 2 | 3 | ETCDIR=/etc/tail2kafka 4 | PIDFILE=/var/run/tail2kafka.pid 5 | BIN=/usr/local/bin/tail2kafka 6 | 7 | #PRODUCT=test.tail2kafka 8 | #CONFIGURL=http://localhost/download 9 | #RPMURL=http://localhost/download/rpm 10 | #HOSTID=$(hostname) 11 | LIBDIR=/var/lib/tail2kafka 12 | #PINGBACKURL=http://localhost/pingback/tail2kafka 13 | -------------------------------------------------------------------------------- /src/metrics.h: -------------------------------------------------------------------------------- 1 | #ifndef _METRICS_H_ 2 | #define _METRICS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "taskqueue.h" 8 | 9 | namespace util { 10 | 11 | class Metrics { 12 | public: 13 | static bool create(const char *pingbackUrl, char *errbuf); 14 | static void pingback(const char *event, const char *fmt, ...); 15 | 16 | private: 17 | Metrics() {} 18 | static void destroy(); 19 | static Metrics *metrics_; 20 | 21 | private: 22 | std::string pingbackUrl_; 23 | TaskQueue tq_; 24 | 25 | CURL *curl_; 26 | }; 27 | 28 | } // namespace util 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/cmdnotify.h: -------------------------------------------------------------------------------- 1 | #ifndef _CMD_NOTIFY_H_ 2 | #define _CMD_NOTIFY_H_ 3 | 4 | #include 5 | #include 6 | 7 | class CmdNotify { 8 | public: 9 | CmdNotify(const char *cmd, const char *wdir, const char *topic, int partition) 10 | : cmd_(cmd), wdir_(wdir), topic_(topic), partition_(partition) {} 11 | bool exec(const char *file, const char *oriFile = 0, time_t timestamp = -1, uint64_t size = -1, const char *md5 = 0); 12 | 13 | private: 14 | char * const *buildEnv(const char *file, const char *oriFile, time_t timestamp, uint64_t size, const char *md5); 15 | 16 | private: 17 | const char *cmd_; 18 | const char *wdir_; 19 | const char *topic_; 20 | int partition_; 21 | }; 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /tools/es_clean.example: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ES="http://127.0.0.1:9200" 4 | 5 | TMPF=$1 6 | if [ "$TMPF" = "" ]; then 7 | TMPF=$(mktemp /tmp/es_clean.XXXXXX) 8 | curl -Ss -f "$ES/_cat/indices?local=true" > $TMPF 9 | fi 10 | 11 | EXPIRE_DAY=10 12 | 13 | NOW=$(date +%s) 14 | 15 | IFS='' 16 | while read line; do 17 | INDICE=$(echo $line | awk -e '{print $3}') 18 | 19 | PARTS=() 20 | IFS='_' read -ra PARTS <<< "$INDICE" 21 | SUFFIX=${PARTS[-1]} 22 | 23 | EXPIRE=0 24 | TS=$(date --date $SUFFIX +%s) 25 | (test $? != 0 || test $(($TS + 86400 * $EXPIRE_DAY)) -le $NOW) && EXPIRE=1 26 | 27 | test $EXPIRE == 1 && { 28 | echo "DELETE $ES/$INDICE" 29 | curl -Ss -f -X DELETE "$ES/$INDICE" 30 | sleep 5 31 | } 32 | done < $TMPF 33 | 34 | rm -f /tmp/es_clean.* 35 | -------------------------------------------------------------------------------- /hyperscan.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .deps 4 | test -f ragel-6.10.tar.gz || { 5 | curl -LO http://www.colm.net/files/ragel/ragel-6.10.tar.gz 6 | tar xzvf ragel-6.10.tar.gz 7 | cd ragel-6.10 8 | ./configure && make && make install 9 | cd - 10 | } 11 | 12 | test -f boost_1_73_0.tar.gz || { 13 | curl -LO https://dl.bintray.com/boostorg/release/1.73.0/source/boost_1_73_0.tar.gz 14 | tar xzvf boost_1_73_0.tar.gz 15 | } 16 | 17 | test -f hyperscan-5.2.1.tar.gz || { 18 | curl -L https://github.com/intel/hyperscan/archive/v5.2.1.tar.gz -o hyperscan-5.2.1.tar.gz 19 | tar xzvf hyperscan-5.2.1.tar.gz 20 | ln -sf boost_1_73_0/boost/ hyperscan-5.2.1/include/boost 21 | mkdir hyperscan-5.2.1/build 22 | cd hyperscan-5.2.1/build 23 | cmake .. && make && make install 24 | cd - 25 | } 26 | -------------------------------------------------------------------------------- /blackboxtest/loadtest/main.lua: -------------------------------------------------------------------------------- 1 | -- remove the trailing newline 2 | hostshell = "hostname" 3 | pidfile = "/var/run/tail2kafka.pid" 4 | partition = 0 5 | polllimit = 50 6 | brokers = "localhost:9092" 7 | 8 | rotatedelay = 10 9 | -- optional 10 | pingbackurl = "http://localhost/pingback/tail2kafka" 11 | 12 | kafka_global = { 13 | ["client.id"] = "tail2kafka", 14 | ["broker.version.fallback"] = "0.8.2.1", 15 | ["compression.codec"] = "snappy", 16 | ["max.in.flight"] = 10000, 17 | ["queue.buffering.max.messages"] = 100000, -- default 100000 18 | ["queue.buffering.max.kbytes"] = 512000, -- default 1048576 19 | ["message.send.max.retries"] = "10", 20 | ["statistics.interval.ms"] = "60000", 21 | } 22 | 23 | kafka_topic = { 24 | ["request.required.acks"] = _ACK_, 25 | ["message.timeout.ms"] = 0, -- infinite 26 | } 27 | -------------------------------------------------------------------------------- /blackboxtest/tail2kafka/aggregate.lua: -------------------------------------------------------------------------------- 1 | file = "logs/aggregate.log" 2 | topic = "aggregate" 3 | autosplit = true 4 | timeidx = 4 5 | withhost = true 6 | withtime = true 7 | autocreat = true 8 | pkey = "yuntu" 9 | 10 | aggregate = function(fields) 11 | local n = table.getn(fields) 12 | if n < 16 then return nil end 13 | 14 | local reqt = tonumber(fields[11]); 15 | if not reqt then return nil end 16 | 17 | local status = "status_" .. fields[9] 18 | local size = fields[10] 19 | local appid = fields[n]; 20 | 21 | if reqt <= 0.1 then reqt = "reqt<0.1" 22 | elseif reqt <= 0.3 then reqt = "reqt<0.3" 23 | elseif reqt <= 0.5 then reqt = "reqt<0.5" 24 | elseif reqt <= 1 then reqt = "reqt<1" 25 | else reqt = "reqt_show" end 26 | 27 | local tbl = {size = tonumber(size)}; 28 | tbl[status] = 1 29 | tbl[reqt] = 1 30 | 31 | return appid, tbl 32 | end 33 | -------------------------------------------------------------------------------- /src/runstatus.h: -------------------------------------------------------------------------------- 1 | #ifndef _RUN_STATUS_H_ 2 | #define _RUN_STATUS_H_ 3 | 4 | #include 5 | 6 | class RunStatus { 7 | public: 8 | enum Want {WAIT, START1, START2, RELOAD, REOPEN, STOP, IGNORE}; 9 | static RunStatus *create() { 10 | RunStatus *runStatus = new RunStatus; 11 | runStatus->want_ = START1; 12 | return runStatus; 13 | } 14 | 15 | Want get() const { 16 | return want_; 17 | } 18 | 19 | const char *status() const { 20 | switch (want_) { 21 | case WAIT: return "wait"; 22 | case START1: return "start1"; 23 | case START2: return "start2"; 24 | case RELOAD: return "reload"; 25 | case REOPEN: return "reopen"; 26 | case STOP: return "stop"; 27 | default: assert(0); 28 | } 29 | } 30 | 31 | void set(Want want) { 32 | want_ = want; 33 | } 34 | 35 | private: 36 | RunStatus() {} 37 | Want want_; 38 | }; 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/inotifyctx.h: -------------------------------------------------------------------------------- 1 | #ifndef _INOTIFY_CTX_H_ 2 | #define _INOTIFY_CTX_H_ 3 | 4 | #include 5 | #include "runstatus.h" 6 | 7 | class LuaCtx; 8 | class CnfCtx; 9 | 10 | class InotifyCtx { 11 | template friend class UNITTEST_HELPER; 12 | public: 13 | InotifyCtx(CnfCtx *cnf) : cnf_(cnf), wfd_(-1) {} 14 | ~InotifyCtx(); 15 | 16 | bool init(); 17 | void loop(); 18 | 19 | private: 20 | LuaCtx *getLuaCtx(int wd) { 21 | std::map::iterator pos = fdToCtx_.find(wd); 22 | return pos != fdToCtx_.end() ? pos->second : 0; 23 | } 24 | 25 | bool addWatch(LuaCtx *ctx, bool strict); 26 | void tryReWatch(bool remedy); 27 | void tagRotate(LuaCtx *ctx, int wd); 28 | void globalCheck(); 29 | 30 | void flowControl(RunStatus *runStatus, bool remedy); 31 | 32 | private: 33 | CnfCtx *cnf_; 34 | 35 | int wfd_; 36 | std::map fdToCtx_; 37 | }; 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /blackboxtest/tail2kafka/main.lua: -------------------------------------------------------------------------------- 1 | -- remove the trailing newline 2 | hostshell = "hostname" 3 | pidfile = "/var/run/tail2kafka.pid" 4 | partition = 0 5 | polllimit = 50 6 | brokers = "localhost:9092" 7 | 8 | rotatedelay = 10 9 | -- optional 10 | pingbackurl = "http://localhost/pingback/tail2kafka" 11 | 12 | kafka_global = { 13 | ["client.id"] = "tail2kafka", 14 | ["compression.codec"] = "snappy", 15 | ["message.max.bytes"] = 8000000, -- 8M 16 | ["max.in.flight"] = 10000, 17 | ["queue.buffering.max.messages"] = 100000, -- default 100000 18 | ["queue.buffering.max.kbytes"] = 512000, -- default 1048576 19 | ["queue.buffering.max.ms"] = 0, 20 | ["batch.num.messages"] = 10000, 21 | ["message.send.max.retries"] = "10", 22 | ["statistics.interval.ms"] = "60000", 23 | } 24 | 25 | kafka_topic = { 26 | ["request.required.acks"] = 1, 27 | ["message.timeout.ms"] = 0, -- infinite 28 | } 29 | -------------------------------------------------------------------------------- /src/filerecord.h: -------------------------------------------------------------------------------- 1 | #ifndef _FILE_RECORD_H_ 2 | #define _FILE_RECORD_H_ 3 | 4 | #include 5 | #include 6 | 7 | class LuaCtx; 8 | 9 | struct FileRecord { 10 | LuaCtx *ctx; 11 | ino_t inode; 12 | off_t off; 13 | 14 | const std::string *esIndex; 15 | const std::string *data; 16 | 17 | static FileRecord *create(ino_t inode_, off_t off_, const std::string *data_) { 18 | return create(inode_, off_, 0, data_); 19 | } 20 | 21 | static FileRecord *create(ino_t inode_, off_t off_, const std::string *esIndex_, 22 | const std::string *data_) { 23 | FileRecord *record = new FileRecord; 24 | record->inode = inode_; 25 | record->off = off_; 26 | 27 | record->esIndex = esIndex_; 28 | record->data = data_; 29 | return record; 30 | } 31 | 32 | static void destroy(FileRecord *record) { 33 | if (record->esIndex) delete record->esIndex; 34 | 35 | delete record->data; 36 | delete record; 37 | } 38 | }; 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/fileoff.h: -------------------------------------------------------------------------------- 1 | #ifndef _FILE_OFF_H_ 2 | #define _FILE_OFF_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | class CnfCtx; 11 | 12 | struct FileOffRecord { 13 | ino_t inode; 14 | off_t off; 15 | 16 | FileOffRecord() {} 17 | FileOffRecord(ino_t inode_, off_t off_) : inode(inode_), off(off_) {} 18 | 19 | // char file[FileOff::MAX_FILENAME_LENGTH]; 20 | }; 21 | 22 | class FileOff { 23 | template friend class UNITTEST_HELPER; 24 | public: 25 | static const size_t MAX_FILENAME_LENGTH; 26 | 27 | FileOff(); 28 | ~FileOff(); 29 | 30 | bool init(CnfCtx *cnf, char *errbuf); 31 | bool reinit(); 32 | off_t getOff(ino_t inode) const; 33 | bool setOff(ino_t inode, off_t off); 34 | 35 | private: 36 | bool loadFromFile(char *errbuf); 37 | void deleteMallocPtr(); 38 | 39 | private: 40 | CnfCtx *cnf_; 41 | std::string file_; 42 | void *addr_; 43 | size_t length_; 44 | std::map map_; 45 | }; 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /src/gnuatomic.h: -------------------------------------------------------------------------------- 1 | #ifndef _GNUATOMIC_H_ 2 | #define _GNUATOMIC_H_ 3 | 4 | #include 5 | 6 | namespace util { 7 | 8 | template 9 | IntegralType atomic_get(IntegralType *ptr) { 10 | return __sync_add_and_fetch(ptr, 0); 11 | } 12 | 13 | template 14 | IntegralType atomic_set(IntegralType *ptr, int val) { 15 | return __sync_lock_test_and_set(ptr, val); 16 | } 17 | 18 | template 19 | IntegralType atomic_inc(IntegralType *ptr, int val = 1) { 20 | return __sync_add_and_fetch(ptr, val); 21 | } 22 | 23 | template 24 | IntegralType atomic_dec(IntegralType *ptr, int val = 1) { 25 | return __sync_sub_and_fetch(ptr, val); 26 | } 27 | 28 | template 29 | IntegralType atomic_and(IntegralType *ptr, uint64_t val) { 30 | return __sync_and_and_fetch(ptr, val); 31 | } 32 | 33 | template 34 | IntegralType atomic_or(IntegralType *ptr, uint64_t val) { 35 | return __sync_or_and_fetch(ptr, val); 36 | } 37 | 38 | } // namespace util 39 | #endif 40 | -------------------------------------------------------------------------------- /scripts/makerpm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | SPECFILE=tail2kafka.spec 5 | 6 | RPMNAM=$(grep "Name:" $SPECFILE | awk '{print $2}') 7 | RPMVER=$(grep "Version:" $SPECFILE | awk '{print $2}') 8 | RPMREL=$(grep "Release:" $SPECFILE | awk '{print $2}') 9 | 10 | TMPDIR=/tmp/$RPMNAM-$RPMVER 11 | rm -rf $TMPDIR && mkdir $TMPDIR && mkdir $TMPDIR/.deps 12 | 13 | cp Makefile src scripts blackboxtest $TMPDIR -a 14 | cp .deps/*.a $TMPDIR/.deps 15 | 16 | RPMROOT=/usr/src/redhat 17 | test -d $RPMROOT || RPMROOT="$HOME/rpmbuild" 18 | mkdir -p $RPMROOT/SOURCES/ 19 | 20 | cd /tmp 21 | tar czf $RPMNAM-$RPMVER.tar.gz $RPMNAM-$RPMVER 22 | mv -f $RPMNAM-$RPMVER.tar.gz $RPMROOT/SOURCES/ 23 | cd - 24 | 25 | OSARCH=$(uname -r | grep -o 'el.*') 26 | OS=$(echo $OSARCH | cut -d'.' -f1) 27 | ARCH=$(echo $OSARCH | cut -d'.' -f2) 28 | test "$ARCH" != "" || ARCH="x86_64" 29 | 30 | rpmbuild -bb $SPECFILE 31 | mv $RPMROOT/RPMS/$ARCH/$RPMNAM-$RPMVER-$RPMREL.$ARCH.rpm $RPMNAM-$RPMVER-$RPMREL.$OS.$ARCH.rpm 32 | mv $RPMROOT/RPMS/$ARCH/$RPMNAM-debuginfo-$RPMVER-$RPMREL.$ARCH.rpm $RPMNAM-debuginfo-$RPMVER-$RPMREL.$OS.$ARCH.rpm 33 | -------------------------------------------------------------------------------- /src/kafkactx.h: -------------------------------------------------------------------------------- 1 | #ifndef _KAFKACTX_H_ 2 | #define _KAFKACTX_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "filerecord.h" 10 | class CnfCtx; 11 | class LuaCtx; 12 | 13 | class KafkaCtx { 14 | template friend class UNITTEST_HELPER; 15 | public: 16 | KafkaCtx() : rk_(0), nrkt_(0), rkts_(0), errors_(0) {} 17 | ~KafkaCtx(); 18 | bool init(CnfCtx *cnf, char *errbuf); 19 | bool produce(std::vector *datas); 20 | void poll(int timeout) { rd_kafka_poll(rk_, timeout); } 21 | bool ping(LuaCtx *ctx); 22 | 23 | private: 24 | CnfCtx *cnf_; 25 | 26 | rd_kafka_t *rk_; 27 | size_t nrkt_; 28 | rd_kafka_topic_t **rkts_; 29 | int *errors_; 30 | 31 | static void error_cb(rd_kafka_t *, int, const char *, void *); 32 | 33 | bool initKafka(const char *brokers, const std::map &gcnf, char *errbuf); 34 | rd_kafka_topic_t *initKafkaTopic(LuaCtx *ctx, const std::map &tcnf, char *errbuf); 35 | bool produce(FileRecord *data); 36 | }; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /scripts/catnull.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | if [ "$NOTIFY_TOPIC" = "" ]; then 4 | echo "NOTIFY_TOPIC is required" 5 | exit 1 6 | fi 7 | 8 | if [ "$NOTIFY_FILE" = "" ]; then 9 | echo "NOTIFY_FILE is required" 10 | exit 1 11 | fi 12 | 13 | if [ "$NOTIFY_FILESIZE" = "" ]; then 14 | echo "NOTIFY_FILESIZE is required" 15 | exit 1 16 | fi 17 | 18 | test -f /etc/sysconfig/tail2kafka && source /etc/sysconfig/tail2kafka 19 | 20 | if [ "$NOTIFY_ORIFILE" != "" ] && [ "$PINGBACKURL" != "" ] && [ "$NOTIFY_FILEMD5" != "" ]; then 21 | curl -Ss "$PINGBACKURL?event=CATNULL&file=$NOTIFY_ORIFILE&size=$NOTIFY_FILESIZE&md5=$NOTIFY_FILEMD5&topic=$NOTIFY_TOPIC" 22 | fi 23 | 24 | DT=$(date +%F_%H-%M-%S) 25 | 26 | DIR=$(dirname $NOTIFY_FILE) 27 | for f in $(ls $DIR/* -t | tail -n +7); do 28 | size=$(stat -c '%s' $f) 29 | echo $DT rm $f size $size 30 | rm -f $f 31 | done 32 | 33 | if [ "$BLACKBOXTEST_OUTFILE" != "" ]; then 34 | printenv | grep NOTIFY_ > $BLACKBOXTEST_OUTFILE 35 | fi 36 | 37 | if [ "$BLACKBOXTEST_OUTFILE_TPL" != "" ]; then 38 | ORIFILE=$(basename $NOTIFY_ORIFILE) 39 | printenv | grep NOTIFY_ > $BLACKBOXTEST_OUTFILE_TPL.$ORIFILE 40 | fi 41 | -------------------------------------------------------------------------------- /src/util.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "util.h" 3 | 4 | namespace util { 5 | 6 | bool split(const char *str, char sp, std::vector *list) 7 | { 8 | int n = 0; 9 | for (const char *p = str; /* */; ++p) { 10 | if (!*p || *p == sp) { 11 | list->push_back(n); 12 | if (!*p) break; 13 | n = 0; 14 | } else if (*p >= '0' && *p <= '9') { 15 | n = n * 10 + *p - '0'; 16 | } else { 17 | return false; 18 | } 19 | } 20 | return true; 21 | } 22 | 23 | std::string trim(const std::string &str, bool left, bool right, const char *space) 24 | { 25 | std::string s; 26 | size_t start = 0, end = str.size(); 27 | for (size_t i = 0; left && i < str.size(); ++i) { 28 | if (!strchr(space, str[i])) { 29 | start = i; 30 | break; 31 | } 32 | } 33 | 34 | for (size_t i = str.size(); right && i > start; --i) { 35 | if (!strchr(space, str[i-1])) { 36 | end = i; 37 | break; 38 | } 39 | } 40 | 41 | if (start < end) return str.substr(start, end - start); 42 | else return ""; 43 | } 44 | 45 | std::string &replace(std::string *s, char o, char n) 46 | { 47 | for (size_t i = 0, end = s->size(); i < end; ++i) { 48 | if (s->at(i) == o) (*s)[i] = n; 49 | } 50 | return *s; 51 | } 52 | 53 | } // namespace util 54 | -------------------------------------------------------------------------------- /blackboxtest/kafka2file/nginx.lua: -------------------------------------------------------------------------------- 1 | -- time_local, request must exist 2 | informat = { 3 | "ip", "-", "#remote_user", "time_local", "request", 4 | "status", "#body_bytes_sent", "request_time", "#http_referer", 5 | "#http_user_agent", "#http_x_forwarded_for", 6 | } 7 | 8 | delete_request_field = true 9 | time_local_format = "iso8601" 10 | 11 | -- format time_local to iso8601 12 | 13 | -- request: GET /pingback/storage?event=UPLOAD&hdfs_src=/pathtosrc&hdfs_dst=/hdfspath HTTP/1.1 14 | -- auto add request_method GET 15 | -- auto add request_uri /pingback/storage 16 | -- auto add request_qs TABLE, event=UPLOAD, hdfs_src=/pathtosrc, hdfs_dst=/hdfspath 17 | 18 | -- move method/uri/field in querystring to fields 19 | request_map = { 20 | ["uri"] = "__uri__", 21 | ["querystring"] = "__query__", 22 | 23 | ["ip"] = "ip", -- ip in querystring has a higher priority 24 | ["event"] = "event", 25 | } 26 | 27 | request_type = { 28 | ["status"] = "i", 29 | ["request_time"] = 'f', 30 | ["uri"] = {"prefix", "/host"} 31 | } 32 | 33 | -- if transform_param_fields is not nil, pass the selected fields to transform 34 | -- the third value transform return must be nil 35 | -- transform_param_fields = {} 36 | 37 | -- return timestamp, type, field-table 38 | transform = function(fields) 39 | return time, nil, fields 40 | end 41 | -------------------------------------------------------------------------------- /mix/speedlimit.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static void microsleep(float ms) 9 | { 10 | struct timespec spec = {0, ms * 1000}; 11 | nanosleep(&spec, NULL); 12 | } 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | if (argc != 2) { 17 | fprintf(stderr, "%s limit(MB)\n", argv[0]); 18 | return EXIT_FAILURE; 19 | } 20 | 21 | int limit = atoi(argv[1]); 22 | if (limit > 500) { 23 | fprintf(stderr, "limit must <= 500\n"); 24 | return EXIT_FAILURE; 25 | } 26 | limit *= 1024 * 1024; 27 | 28 | size_t N = 1024 * 32; 29 | float micros = 1000 * 1000 / (limit/N + 1); 30 | int phase = 0; 31 | int total = 0; 32 | time_t start = time(0); 33 | 34 | char buffer[N]; 35 | ssize_t nn; 36 | while ((nn = read(STDIN_FILENO, buffer, N)) > 0) { 37 | ssize_t left = nn; 38 | while (left > 0) { 39 | ssize_t nw = write(STDOUT_FILENO, buffer + nn - left, left); 40 | assert(nw > 0); 41 | left -= nw; 42 | } 43 | 44 | phase += nn; 45 | total += nn; 46 | if (phase * 5 > limit) { 47 | time_t end = time(0); 48 | if (total > (end - start) * limit) { 49 | micros *= 1.1; 50 | } else { 51 | if (micros != 0) micros /= 1.1; 52 | } 53 | phase = 0; 54 | } 55 | microsleep(micros); 56 | } 57 | 58 | assert(nn >= 0); 59 | 60 | return EXIT_SUCCESS; 61 | } 62 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMMON_H_ 2 | #define _COMMON_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | static const int UNSET_INT = INT_MAX; 14 | 15 | struct LuaCtx; 16 | typedef std::vector LuaCtxPtrList; 17 | 18 | #define MAX_ERR_LEN 512 19 | 20 | bool shell(const char *cmd, std::string *output, char *errbuf); 21 | bool hostAddr(const std::string &host, uint32_t *addr, char *errbuf); 22 | void split(const char *line, size_t nline, std::vector *items); 23 | void splitn(const char *line, size_t nline, std::vector *items, 24 | int limit = -1, char delimiter = ' '); 25 | bool timeLocalToIso8601(const std::string &t, std::string *iso, time_t *timestamp = 0); 26 | bool parseIso8601(const std::string &t, time_t *timestamp); 27 | 28 | inline time_t mktime(int year, int mon, int day, int hour, int min, int sec) 29 | { 30 | struct tm tm; 31 | tm.tm_sec = sec; 32 | tm.tm_min = min; 33 | tm.tm_hour = hour; 34 | tm.tm_mday = day; 35 | tm.tm_mon = mon-1; 36 | tm.tm_year = year-1900; 37 | 38 | return mktime(&tm); 39 | } 40 | 41 | bool parseRequest(const char *ptr, std::string *method, std::string *path, std::map *query); 42 | 43 | inline int absidx(int idx, size_t total) 44 | { 45 | assert(total != 0); 46 | return idx > 0 ? idx-1 : total + idx; 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /src/taskqueue.cc: -------------------------------------------------------------------------------- 1 | #include "taskqueue.h" 2 | 3 | using namespace util; 4 | 5 | TaskQueue::Task::~Task() {} 6 | 7 | TaskQueue::TaskQueue(const std::string &nam) 8 | : name_(nam), quit_(true) 9 | { 10 | pthread_mutex_init(&mutex_, 0); 11 | pthread_cond_init(&cond_, 0); 12 | } 13 | 14 | TaskQueue::~TaskQueue() 15 | { 16 | if (!tids_.empty()) stop(); 17 | 18 | pthread_mutex_destroy(&mutex_); 19 | pthread_cond_destroy(&cond_); 20 | } 21 | 22 | void *TaskQueue::run(void *ctx) 23 | { 24 | TaskQueue *tq = (TaskQueue *) ctx; 25 | tq->run(); 26 | return 0; 27 | } 28 | 29 | void TaskQueue::run() 30 | { 31 | quit_ = false; 32 | 33 | while (true) { 34 | pthread_mutex_lock(&mutex_); 35 | if (tasks_.empty()) { 36 | if (!quit_) { 37 | pthread_cond_wait(&cond_, &mutex_); 38 | } else { 39 | pthread_mutex_unlock(&mutex_); 40 | break; 41 | } 42 | } 43 | 44 | Task *task = tasks_.front(); 45 | tasks_.pop(); 46 | pthread_mutex_unlock(&mutex_); 47 | 48 | if (task == (Task *) 0) { 49 | quit_ = true; 50 | break; 51 | } else if (task == (Task *) 0x01) { 52 | quit_ = true; 53 | continue; 54 | } 55 | 56 | if (task->doIt()) { 57 | delete task; 58 | } else { 59 | if (task->canRetry()) { 60 | task->incRetry(); 61 | 62 | pthread_mutex_lock(&mutex_); 63 | tasks_.push(task); 64 | pthread_mutex_unlock(&mutex_); 65 | } else { 66 | delete task; 67 | } 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/uint64offset.h: -------------------------------------------------------------------------------- 1 | #ifndef _UINT64OFFSET_H_ 2 | #define _UINT64OFFSET_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | class Offset { 12 | public: 13 | Offset(uint64_t defaultOffset) : fd_(-1), offset_(defaultOffset) {} 14 | ~Offset() { if (fd_ != -1) close(fd_); } 15 | 16 | bool init(const char *path, char *errbuf) { 17 | bool rc = true; 18 | struct stat st; 19 | if (stat(path, &st) == 0) { 20 | fd_ = open(path, O_RDWR, 0644); 21 | ssize_t nn = pread(fd_, &offset_, sizeof(offset_), 0); 22 | if (nn == 0) { 23 | sprintf(errbuf, "%s empty offset file, use default %lu", path, offset_); 24 | } else if (nn < 0) { 25 | sprintf(errbuf, "%s pread() error %d:%s", path, errno, strerror(errno)); 26 | rc = false; 27 | } 28 | } else if (errno == ENOENT) { 29 | fd_ = open(path, O_CREAT | O_WRONLY, 0644); 30 | if (fd_ == -1) { 31 | sprintf(errbuf, "open %s error, %d:%s", path, errno, strerror(errno)); 32 | rc = false; 33 | } else { 34 | sprintf(errbuf, "%s first create, use default %lu", path, offset_); 35 | } 36 | } else { 37 | sprintf(errbuf, "%s stat error, %d:%s", path, errno, strerror(errno)); 38 | rc = false; 39 | } 40 | 41 | return rc; 42 | } 43 | 44 | uint64_t get() const { return offset_; } 45 | 46 | void update(uint64_t offset) { 47 | offset_ = offset; 48 | pwrite(fd_, &offset, sizeof(offset), 0); 49 | } 50 | 51 | private: 52 | int fd_; 53 | uint64_t offset_; 54 | }; 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /doc/tail2kafka-cluster.org: -------------------------------------------------------------------------------- 1 | * 为什么要有集群部署 2 | =tail2kafka= 是采集数据的客户端,肯定部署在很多机器上,这些机器甚至不是大数据团队能控制的,但是大数据团队却要对tail2kafka的运行情况负责,例如tail2kafka是否有崩溃,tail2kafka有急需修复的BUG。为此需要有机制管理这些客户端。集群部署主要关注三个问题,tail2kafka配置管理,tail2kafka升级,tail2kafka运行数据。 3 | 4 | * 配置中心 5 | rpm方式安装后,会自动配置一个cron,用来自动升级配置,升级tail2kafka自身。为此需要一个配置中心。 6 | 7 | ** 相关配置 8 | 假设配置中心域名是 ~configcenter~ ,销售部门有客户数据需要上传,给此类数据定义ID ~sales.client~ 。配置文件的位置是 ~/etc/sysconfig/tail2kafka~ 9 | 10 | 1. 配置 ~RPMURL="http://configcenter/tail2kafka/rpm"~ 和 ~CONFIGURL="http://configcenter/tail2kafka/config"~ 11 | 2. 配置产品ID ~PRODUCT="sales.client"~ 12 | 3. 配置机器ID,例如 ~HOSTID=$(hostname)~ 13 | 14 | ** 配置中心目录结构 15 | #+BEGIN_EXAMPLE 16 | tail2kafka/ 17 | ├── config 18 | │   └── sales.client 19 | │   ├── access_log.lua 20 | │   ├── main.lua 21 | │   ├── meta 22 | │   ├── sales.client-0.0.1.tar.gz 23 | │   └── tail2kafka.config 24 | └── rpm 25 | ├── tail2kafka-2.0.0-10.x86_64.rpm 26 | ├── tail2kafka-2.0.0-11.x86_64.rpm 27 | └── version 28 | #+END_EXAMPLE 29 | 30 | 更新配置版本命令 ~PRODUCT=sales.client; VER=0.0.1; mkdir $PRODUCT-$VER; cp *.lua $PRODUCT-$VER; tar czf $PRODUCT-$VER.tar.gz $PRODUCT-$VER && rm -rf $PRODUCT-$VER; MD5=$(md5sum $PRODUCT-$VER.tar.gz | cut -d' ' -f1); echo "$VER-$MD5" > meta~ 31 | 32 | rpm的version文件样例,这里 =HOSTID= 包含web01的rpm版本是2.0.0-11,其它机器的版本是2.0.0-10。这里没有区分rpm的os版本,如果有多个os版本,需要用不同目录区分下。 33 | #+BEGIN_EXAMPLE 34 | web01=2.0.0-11 35 | *=2.0.0-10 36 | #+END_EXAMPLE 37 | 38 | * 回调数据收集 39 | 1. 在 ~main.lua~ 配置回调地址 ~pingbackurl="http://configcenter/tail2kafka/pingback"~ ,tail2kafka 的运行信息回调到这个地址 40 | 2. 在 ~/etc/sysconfig/tail2kafka~ 中配置回调地址 ~PINGBACKURL="http://configcenter/tail2kafka/pingback"~ ,tail2kafka-auto-upgrade.sh 的运行信息会回调到这个地址 41 | -------------------------------------------------------------------------------- /src/sys.h: -------------------------------------------------------------------------------- 1 | #ifndef _SYS_H_ 2 | #define _SYS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "runstatus.h" 8 | 9 | namespace sys { 10 | 11 | inline void millisleep(int ms) 12 | { 13 | struct timespec spec = {0, ms * 1000 * 1000}; 14 | nanosleep(&spec, 0); 15 | } 16 | 17 | inline std::string timeFormat(time_t time, const char *format, int len = -1) 18 | { 19 | struct tm ltm; 20 | localtime_r(&time, <m); 21 | 22 | if (len > 0) { 23 | char *buffer = new char[len + 64]; 24 | int n = strftime(buffer, len + 64, format, <m); 25 | std::string s(buffer, n); 26 | delete[] buffer; 27 | return s; 28 | } else { 29 | char buffer[64]; 30 | int n = strftime(buffer, 64, format, <m); 31 | return std::string(buffer, n); 32 | } 33 | } 34 | 35 | class SignalHelper { 36 | public: 37 | static RunStatus *runStatusPtr; 38 | static size_t signoCount; 39 | static int *signosPtr; 40 | static RunStatus::Want *wantsPtr; 41 | 42 | SignalHelper(char *errbuf) : errbuf_(errbuf) {} 43 | 44 | bool signal(RunStatus *runStatus, int num, int *signos, RunStatus::Want *wants); 45 | 46 | // last argument must less than 0 47 | bool block(int signo, ...); 48 | int suspend(int signo, ...); 49 | bool setmask(int signo, ...); 50 | 51 | private: 52 | char *errbuf_; 53 | }; 54 | 55 | bool endsWith(const char *haystack, const char *needle); 56 | bool readdir(const char *dir, const char *suffix, std::vector *files, char *errbuf); 57 | bool isdir(const char *dir, char *errbuf); 58 | 59 | bool file2vector(const char *file, std::vector *files, size_t start = 0, size_t end = -1); 60 | 61 | bool initSingleton(const char *pidfile, char *errbuf); 62 | 63 | } // sys 64 | #endif 65 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | * 简介 2 | tail2kafka 是一个小工具,用于把文件内容实时 *逐行* 发送到 [[https://kafka.apache.org/][kafka]], 类似于linux ~tail~ 命令实时把文件输出到屏幕上。在把文件行发送到kafka前,可以对文件行做一些 ~编程工作~ ,这里用到了 [[https://www.lua.org/][lua]]。 3 | 4 | 编译后的tail2kafka只有一个可执行文件,没有任何依赖,使用起来非常简单 5 | 6 | * 编译安装启动 7 | ** 准备依赖 8 | ~make get-deps~ 下载依赖库。在编译完成后,依赖库被静态链接到tail2kafka,所以tail2kafka仅有一个可执行文件,仅复制这一个文件到目标机器,就完成部署。 9 | 10 | ** 编译安装 11 | 编译 ~make~ 安装 ~make install~ ,默认路径是 ~/usr/local/bin~ 。如果需要更改安装路径 ~make install INSTALLDIR=/installpath~ 12 | 13 | 也可以使用 =./scripts/makerpm= 打rpm包 14 | 15 | ** 启动 16 | 启动 ~tail2kafka /etc/tail2kafka~ 读取 =/etc/tail2kafka= 目录下的lua文件,并启动。 17 | 18 | 停止 ~kill $(cat /var/run/tail2kafka.pid)~ 19 | 20 | 重新加载配置 ~kill -HUP $(cat /var/run/tail2kafka.pid)~ 21 | 22 | ** 架构 23 | =tail2kafka= 启动后,有两个进程,子进程完成实际工作,父进程负责重新加载配置和子进程存活检测。 24 | 25 | * 数据分区和完整性 26 | =tail2kafka= 支持三种分区方式,固定分区、根据机器IP分区和随机分区。 27 | 28 | 固定分区是指定一个固定分区,根据机器IP分区是对IP做hash自动计算出分区,两者的共同点是文件的数据总是发送到同一个分区,这两种方式可以保证绝对不丢数据。因为文件本身就是一个队列,kafka的一个分区也是一个队列, =队列= 之间复制保证数据完整性。缺点是分区不均衡,且分区数量受机器数量限制,为了克服这个限制可以在消费时做二次分区。 29 | 30 | 随机分区是随机选择一个分区发送数据,一个文件的数据发送到多个分区,相当于一个 =队列= 的数据复制到多个 =队列= ,没办法维护队列之间的映射关系,也就没法记录已发送数据在文件里的offset,进程重启时没法保证数据不丢失。好处是分区均衡。 31 | 32 | * 配置 33 | 参考 [[./doc/tail2kafka-config.org][tail2kafka 配置]] 34 | 35 | * 内置集群部署支持 36 | 参考 [[./doc/tail2kafka-cluster.org][tail2kafka 集群]] 37 | 38 | * 性能 39 | 虚拟机测试供参考,测试脚本在 ~blackboxtest/loadtest.sh~ 40 | 41 | | 数据大小 | 平均每行大小 | ACK | 行/ 秒 | 字节/ 秒 | 42 | |----------+--------------+-----+--------+----------| 43 | | 4G | 2048 | 1 | 15267 | 31M | 44 | | 4G | 2048 | 0 | 25641 | 52M | 45 | | 2G | 1024 | 1 | 30769 | 32M | 46 | | 2G | 1024 | 0 | 40816 | 42M | 47 | | 1G | 512 | 1 | 58823 | 31M | 48 | | 1G | 512 | 0 | 90909 | 48M | 49 | | 512M | 256 | 1 | 117647 | 32M | 50 | | 512M | 256 | 0 | 117647 | 32M | 51 | 52 | * 限制 53 | - 需要文件使用 =\n= 作为换行符 54 | - 一行最长2M,不建议使用很长的行,可能触发librdkafka的BUG 55 | -------------------------------------------------------------------------------- /consumer/de.cgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use Time::Local; 6 | use FindBin qw($Bin); 7 | use LWP; 8 | use JSON::PP qw(decode_json); 9 | 10 | # QUERY_STRING="topic=yuntu-app-tair&id=yuntu&start=2015-04-21T16:00:00&end=2015-04-21T16:30:00" /var/www/cgi-bin/de.cgi 11 | 12 | my $Cmd = "$Bin/cassandra2aggregate"; 13 | my $Ca = "10.134.72.118"; 14 | 15 | my $query = $ENV{QUERY_STRING}; 16 | $query =~ s/%(.{2})/chr(hex($1))/ge; 17 | 18 | my %param = map {my ($k, $v) = split "=", $_; $k => $v} split "&", $query; 19 | 20 | print "Content-Type: text/event-stream\n"; 21 | print "Cache-Control: no-cache\n"; 22 | print "Access-Control-Allow-Origin: *\n"; 23 | print "\n"; 24 | 25 | print "retry: 10000\n"; 26 | 27 | if ($param{end} eq "forever") { 28 | my ($time, $unit) = str2time($param{start}); 29 | 30 | while (1) { 31 | while ($time + ($unit == 1 ? 5 : 60) > time) { 32 | sleep($unit); 33 | } 34 | my $start = time2str($time, $unit); 35 | my $cmd = join(" ", $Cmd, $Ca, $param{topic}, $param{id}, $start, $start, "asc", "all"); 36 | my $out = `$cmd`; 37 | print scalar localtime, " ", $cmd, "\n"; 38 | print $out unless ($out eq ""); 39 | $time += $unit; 40 | } 41 | } else { 42 | my $cmd = join(" ", $Cmd, $Ca, 43 | $param{topic}, $param{id}, $param{start}, $param{end}, 44 | $param{order} || "desc", $param{mode} || "all"); 45 | exec($cmd); 46 | } 47 | 48 | sub time2str { 49 | my ($time, $unit) = @_; 50 | my @v = localtime($time); 51 | if ($unit == 1) { 52 | return sprintf("%04d-%02d-%02dT%02d:%02d:%02d", $v[5]+1900, $v[4]+1, $v[3], $v[2], $v[1], $v[0]); 53 | } else { 54 | return sprintf("%04d-%02d-%02dT%02d:%02d", $v[5]+1900, $v[4]+1, $v[3], $v[2], $v[1]); 55 | } 56 | } 57 | 58 | sub str2time { 59 | my $s = shift; 60 | $s =~ /^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?/; 61 | my $unit = 1; 62 | my $second; 63 | if (! defined $7) { 64 | $unit = 60; 65 | $second = 0; 66 | } else { 67 | $second = $7; 68 | } 69 | return (timelocal($second, $5, $4, $3, $2-1, $1-1900), $unit); 70 | } 71 | -------------------------------------------------------------------------------- /scripts/tail2kafka.init: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # chkconfig: - 58 74 4 | 5 | ETCDIR=/etc/tail2kafka 6 | PIDFILE=/var/run/tail2kafka.pid 7 | BIN=/usr/local/bin/tail2kafka 8 | PNAME=$(basename $BIN) 9 | 10 | CFG=/etc/sysconfig/tail2kafka 11 | test -f $CFG && source $CFG 12 | 13 | proc_exists() { 14 | if [ -f $PIDFILE ]; then 15 | PID=$(cat $PIDFILE) 16 | if [ -d "/proc/$PID" ]; then 17 | EXE=$(readlink "/proc/$PID/exe" | awk '{print $1}') 18 | if test -f "$EXE"; then 19 | if [ "$EXE" == "$BIN" ]; then 20 | return 0 21 | fi 22 | else 23 | return 0 24 | fi 25 | fi 26 | fi 27 | return 1 28 | } 29 | 30 | proc_start() { 31 | $BIN $ETCDIR 32 | for ((i=0; i<3; ++i)); do 33 | proc_exists 34 | if [ $? == 0 ]; then 35 | echo "Start $PNAME success." 36 | exit 0 37 | fi 38 | sleep 1 39 | done 40 | echo "Start $PNAME failed." 41 | exit 1 42 | } 43 | 44 | proc_stop() { 45 | proc_exists 46 | if [ $? == 0 ]; then 47 | kill $(cat $PIDFILE) 48 | fi 49 | echo "Stop $PNAME ok." 50 | exit 0 51 | } 52 | 53 | proc_reload() { 54 | proc_exists 55 | if [ $? == 0 ]; then 56 | kill -HUP $(cat $PIDFILE) 57 | echo "reload $PNAME ok." 58 | else 59 | echo "$PNAME is not running" 60 | fi 61 | } 62 | 63 | proc_restart() { 64 | proc_exists 65 | if [ $? == 0 ]; then 66 | kill $(cat $PIDFILE) 67 | fi 68 | echo "Stop $PNAME ok." 69 | proc_start 70 | } 71 | 72 | proc_status() { 73 | proc_exists 74 | if [ $? == 0 ]; then 75 | echo "$PNAME is running." 76 | exit 0 77 | fi 78 | echo "$PNAME is stoped." 79 | exit 1 80 | } 81 | 82 | case "$1" in 83 | start) 84 | proc_start 85 | ;; 86 | stop) 87 | proc_stop 88 | ;; 89 | reload) 90 | proc_reload 91 | ;; 92 | restart) 93 | proc_restart 94 | ;; 95 | status) 96 | proc_status 97 | ;; 98 | *) 99 | echo "Usage: $0 {start|stop|reload|restart|status}" >&2 100 | exit 1 101 | ;; 102 | esac 103 | 104 | exit 0 105 | -------------------------------------------------------------------------------- /consumer/httpdata.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use Time::Local; 6 | use FindBin qw($Bin); 7 | use LWP; 8 | use JSON::PP qw(decode_json encode_json); 9 | 10 | my $api = shift or usage(); 11 | my $start = shift || yesterday(); 12 | my $end = shift || $start; 13 | 14 | $start = str2time($start); 15 | $end = str2time($end); 16 | while ($start <= $end) { 17 | my $day = time2str($start); 18 | my $rsp = LWP::UserAgent->new()->get("$api?date=$day"); 19 | $rsp->is_success or die "$api?date=$day error"; 20 | my $json = decode_json($rsp->content); 21 | 22 | my %total = ( 23 | appid => "total", 24 | reqnum => 0, 25 | qps => 0, 26 | disk => 0, 27 | ); 28 | 29 | $day = time2str2($start); 30 | 31 | foreach my $o (@$json) { 32 | delete $o->{date}; 33 | delete $o->{id}; 34 | delete $o->{ori_appid}; 35 | delete $o->{ol_appid}; 36 | 37 | my $data = encode_json({cluster => $o}); 38 | print "cluster $day ", $o->{appid}, " $data\n"; 39 | 40 | $total{qps} += $o->{qps}; 41 | $total{reqnum} += $o->{reqnum}; 42 | $total{disk} += $o->{disk}; 43 | 44 | if (exists $o->{bw}) { 45 | $total{bw} = (exists $total{bw}) ? $total{bw} + $o->{bw} : $o->{bw}; 46 | } 47 | if (exists $o->{bwcdn}) { 48 | $total{bwcdn} = (exists $total{bwcdn}) ? $total{bwcdn} + $o->{bwcdn} : $o->{bwcdn}; 49 | } 50 | } 51 | my $data = encode_json({cluster => \%total}); 52 | print "cluster $day total $data\n"; 53 | 54 | $start += 86400; 55 | } 56 | 57 | sub time2str { 58 | my $time = shift; 59 | my @v = localtime($time); 60 | return sprintf('%04d%02d%02d', $v[5]+1900, $v[4]+1, $v[3]); 61 | } 62 | 63 | sub time2str2 { 64 | my $time = shift; 65 | my @v = localtime($time); 66 | return sprintf('%04d-%02d-%02d', $v[5]+1900, $v[4]+1, $v[3]); 67 | } 68 | 69 | sub str2time { 70 | my $s = shift; 71 | $s =~ /^(\d{4})-(\d{2})-(\d{2})/; 72 | return timelocal(0, 0, 0, $3, $2-1, $1-1900); 73 | } 74 | 75 | sub yesterday { 76 | my @v = localtime(time - 86400); 77 | return sprintf('%04d-%02d-%02d', $v[5]+1900, $v[4]+1, $v[3]); 78 | } 79 | 80 | sub usage { 81 | print "$0 api [start] [end]\n"; 82 | exit(0); 83 | } 84 | -------------------------------------------------------------------------------- /src/luafunction.h: -------------------------------------------------------------------------------- 1 | #ifndef _LUAFUNCTION_H_ 2 | #define _LUAFUNCTION_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "luahelper.h" 9 | #include "luactx.h" 10 | #include "filerecord.h" 11 | 12 | class RegexFun; 13 | 14 | class LuaFunction { 15 | template friend class UNITTEST_HELPER; 16 | public: 17 | enum Type { FILTER, GREP, TRANSFORM, AGGREGATE, INDEXDOC, KAFKAPLAIN, ESPLAIN, NIL }; 18 | 19 | static LuaFunction *create(LuaCtx *ctx, LuaHelper *helper, Type defType); 20 | int process(off_t off, const char *line, size_t nline, std::vector *records); 21 | int serializeCache(std::vector *records); 22 | 23 | Type getType() const { return type_; } 24 | size_t extraSize() const { return extraSize_; } 25 | 26 | private: 27 | static const char *typeToString(Type type); 28 | 29 | LuaFunction(LuaCtx *ctx) : ctx_(ctx), helper_(0), type_(NIL) {} 30 | void init(LuaHelper *helper, const std::string &funName, Type type) { 31 | helper_ = helper; 32 | funName_ = funName; 33 | type_ = type; 34 | } 35 | 36 | int filter(off_t off, const std::vector &fields, std::vector *records); 37 | int grep(off_t off, const std::vector &fields, std::vector *records); 38 | int transform(off_t off, const char *line, size_t nline, std::vector *records); 39 | int aggregate(const std::vector &fields, std::vector *records); 40 | int kafkaPlain(off_t off, const char *line, size_t nline, std::vector *records); 41 | 42 | int indexdoc(off_t off, const char *line, size_t nline, std::vector *records); 43 | int esPlain(off_t off, const char *line, size_t nline, std::vector *records); 44 | 45 | static void transformEsDocNginxLog(const std::string &src, std::string *dst); 46 | static void transformEsDocNginxJson(const std::string &src, std::string *dst); 47 | 48 | private: 49 | LuaCtx *ctx_; 50 | LuaHelper *helper_; 51 | std::string funName_; 52 | Type type_; 53 | size_t extraSize_; 54 | 55 | std::vector filters_; 56 | RegexFun *matchFun_; 57 | 58 | std::string lasttime_; 59 | std::map > aggregateCache_; 60 | }; 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | #ifndef _UTIL_H_ 2 | #define _UTIL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace util { 10 | 11 | bool split(const char *str, char sp, std::vector *list); 12 | 13 | template 14 | std::string join(Iterator begin, Iterator end, char sp) 15 | { 16 | bool first = true; 17 | std::string s; 18 | for (Iterator ite = begin; ite != end; ++ite) { 19 | if (!first) s.append(1, sp); 20 | s.append(*ite); 21 | first = false; 22 | } 23 | return s; 24 | } 25 | 26 | template 27 | bool hexToInt(const char *ptr, T *val) 28 | { 29 | *val = 0; 30 | for (int i = 0; i < 2; ++i) { 31 | int v; 32 | if (ptr[i] >= '0' && ptr[i] <= '9') v = ptr[i] - '0'; 33 | else if (ptr[i] >= 'a' && ptr[i] <= 'f') v = ptr[i] - 'a' + 10; 34 | else if (ptr[i] >= 'A' && ptr[i] <= 'F') v = ptr[i] - 'A' + 10; 35 | else return false; 36 | 37 | *val = *val * 16 + v; 38 | } 39 | return true; 40 | } 41 | 42 | #define HEXMAP "0123456789abcdef" 43 | inline const char *binToHex(const unsigned char *bin, size_t len, char *buffer) 44 | { 45 | const static char *hexmap = HEXMAP; 46 | char *ptr = buffer; 47 | for (size_t i = 0; i < len; ++i) { 48 | *ptr++ = hexmap[(bin[i] >> 4)]; 49 | *ptr++ = hexmap[bin[i] & 0x0F]; 50 | } 51 | *ptr = '\0'; 52 | return buffer; 53 | } 54 | 55 | inline int toInt(const char *ptr, size_t maxlen = -1) 56 | { 57 | int i = 0; 58 | size_t len = 0; 59 | while (len++ < maxlen && *ptr) { 60 | i = i * 10 + *ptr - '0'; 61 | ++ptr; 62 | } 63 | return i; 64 | } 65 | 66 | inline long toLong(const char *ptr, size_t maxlen = -1) 67 | { 68 | long l = 0; 69 | size_t len = 0; 70 | while (len++ < maxlen && *ptr) { 71 | l = l * 10 + *ptr - '0'; 72 | ++ptr; 73 | } 74 | return l; 75 | } 76 | 77 | template 78 | inline std::string toStr(IntType i, int len = -1, char padding = '0') 79 | { 80 | std::string s; 81 | do { 82 | s.append(1, i%10 + '0'); 83 | i /= 10; 84 | } while (i); 85 | 86 | if (len > 0 && (int) s.size() < len) s.append(len - (int) s.size(), padding); 87 | 88 | std::reverse(s.begin(), s.end()); 89 | return s; 90 | } 91 | 92 | std::string trim(const std::string &str, bool left = true, bool right = true, const char *space = " \t\n"); 93 | std::string &replace(std::string *s, char o, char n); 94 | 95 | } // namespace util 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /src/filereader.h: -------------------------------------------------------------------------------- 1 | #ifndef _FILE_READER_H_ 2 | #define _FILE_READER_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "filerecord.h" 11 | class LuaCtx; 12 | class FileOffRecord; 13 | 14 | enum FileInotifyStatus { 15 | FILE_MOVED = 0x0001, 16 | FILE_CREATED = 0x0002, 17 | FILE_ICHANGE = 0x0004, 18 | FILE_TRUNCATED = 0x0008, 19 | FILE_DELETED = 0x0010, 20 | }; 21 | 22 | class FileReader { 23 | template friend class UNITTEST_HELPER; 24 | public: 25 | enum StartPosition { LOG_START, LOG_END, START, END, NIL }; 26 | static StartPosition stringToStartPosition(const char *); 27 | 28 | FileReader(LuaCtx *ctx); 29 | ~FileReader(); 30 | 31 | void init(FileReader *reader) { 32 | parent_ = reader; 33 | } 34 | 35 | bool init(char *errbuf); 36 | 37 | bool eof() const { return eof_; } 38 | 39 | void tagRotate(int action, const char *newFile); 40 | bool remove(); 41 | 42 | bool tryReinit(); 43 | bool tail2kafka(StartPosition pos = NIL, const struct stat *stPtr = 0, std::string *rawData = 0); 44 | bool checkCache(); 45 | 46 | void initFileOffRecord(FileOffRecord * fileOffRecord); 47 | void updateFileOffRecord(const FileRecord *record); 48 | 49 | private: 50 | void propagateTailContent(size_t size); 51 | void propagateProcessLines(ino_t inode, off_t *off); 52 | void processLines(ino_t inode, off_t *off); 53 | int processLine(off_t off, char *line, size_t nline, std::vector *records); 54 | bool sendLines(ino_t inode, std::vector *records); 55 | 56 | bool openFile(struct stat *st, char *errbuf = 0); 57 | bool setStartPosition(off_t fileSize, char *errbuf); 58 | bool setStartPositionEnd(off_t fileSize, char *errbuf); 59 | 60 | std::string *buildFileStartRecord(time_t now); 61 | std::string *buildFileEndRecord(time_t now, off_t size, const char *oldFileName); 62 | void propagateRawData(const std::string *data); 63 | 64 | private: 65 | int fd_; 66 | off_t size_; 67 | ino_t inode_; 68 | uint32_t flags_; 69 | bool eof_; 70 | 71 | FileOffRecord *fileOffRecord_; 72 | 73 | size_t line_; 74 | size_t dline_; // send line 75 | off_t dsize_; // send size 76 | 77 | MD5_CTX md5Ctx_; 78 | std::string md5_; 79 | 80 | FileReader *parent_; 81 | 82 | char *buffer_; 83 | size_t npos_; 84 | LuaCtx *ctx_; 85 | }; 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /src/taskqueue.h: -------------------------------------------------------------------------------- 1 | #ifndef _TASK_QUEUE_H_ 2 | #define _TASK_QUEUE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace util { 13 | 14 | class TaskQueue { 15 | public: 16 | class Task { 17 | protected: 18 | int retry_; 19 | int maxRetry_; 20 | public: 21 | Task(int maxRetry = 1) : retry_(0), maxRetry_(maxRetry) {} 22 | /* if return true, finish 23 | * if return false, try again 24 | */ 25 | void incRetry() { 26 | ++retry_; 27 | } 28 | 29 | bool canRetry() { 30 | return retry_+1 < maxRetry_; 31 | } 32 | 33 | virtual bool doIt() = 0; 34 | virtual ~Task(); 35 | }; 36 | 37 | TaskQueue(const std::string &name = ""); 38 | ~TaskQueue(); 39 | 40 | const std::string &name() const { 41 | return name_; 42 | } 43 | 44 | bool submit(Task *task) { 45 | pthread_mutex_lock(&mutex_); 46 | if (!quit_) tasks_.push(task); 47 | pthread_mutex_unlock(&mutex_); 48 | pthread_cond_signal(&cond_); 49 | return !quit_; 50 | } 51 | 52 | typedef std::vector TaskArray; 53 | bool submit(TaskArray *tasks) { 54 | for (TaskArray::iterator ite = tasks->begin(); ite != tasks->end(); ++ite) { 55 | if (!submit(*ite)) return false; 56 | } 57 | } 58 | 59 | static void* run(void *ctx); 60 | 61 | bool start(char *errbuf, size_t nthread = 1) { 62 | bool ret = true; 63 | for (size_t i = 0; ret && i < nthread; ++i) { 64 | pthread_t tid; 65 | int rc = pthread_create(&tid, 0, run, this); 66 | if (rc != 0) { 67 | snprintf(errbuf, 1024, "pthread_create error %s", strerror(rc)); 68 | ret = false; 69 | } else { 70 | tids_.push_back(tid); 71 | } 72 | } 73 | 74 | if (!ret) stop(true); 75 | else quit_ = false; 76 | 77 | return ret; 78 | } 79 | 80 | void run(); 81 | 82 | void stop(bool force = false) { 83 | submit(static_cast(force ? 0 : (void *) 0x01)); 84 | 85 | for (std::vector::iterator ite = tids_.begin(); ite != tids_.end(); ++ite) { 86 | pthread_join(*ite, 0); 87 | } 88 | tids_.clear(); 89 | 90 | assert(force || tasks_.empty()); 91 | } 92 | 93 | private: 94 | std::string name_; 95 | bool quit_; 96 | std::queue tasks_; 97 | std::vector tids_; 98 | 99 | pthread_mutex_t mutex_; 100 | pthread_cond_t cond_; 101 | }; 102 | 103 | } // namespace util 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /mix/config-kafka: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | my %opts = getopt(); 7 | 8 | open(IN, "<". $opts{'template'}); 9 | my @lines = ; 10 | close(IN); 11 | 12 | foreach my $line (@lines) { 13 | $line =~ s/_BROKER_ID_/${opts{'broker-id'}}/; 14 | $line =~ s/_HOST_NAME_/${opts{'host-name'}}/; 15 | $line =~ s/_LOG_DIRS_/${opts{'log-dirs'}}/; 16 | $line =~ s/_LOG_RETENTION_HOURS_/${opts{'log-retention-hours'}}/; 17 | $line =~ s/_NUM_PARTITIONS_/${opts{'num-partitions'}}/; 18 | $line =~ s/_DEFAULT_REPLICATION_FACTOR_/${opts{'default-replication-factor'}}/; 19 | $line =~ s/_ZOOKEEPER_CONNECT_/${opts{'zookeeper-connect'}}/; 20 | print $line; 21 | } 22 | 23 | sub help { 24 | print "config-kafka \n"; 25 | print " --broker-id=id default last ip segment\n"; 26 | print " --host-name=eth1\n"; 27 | print " --log-dirs=dirs\n"; 28 | print " --log-retention-hours=48\n"; 29 | print " --num-partitions=1\n"; 30 | print " --default-replication-factor=1\n"; 31 | print " --zookeeper-connect=zk\n"; 32 | print " --template=file template config file\n"; 33 | exit(0); 34 | } 35 | 36 | sub getopt { 37 | my %opt = ( 38 | 'broker-id' => undef, 39 | 'host-name' => 'eth1', 40 | 'log-dirs' => undef, 41 | 'log-retention-hours' => 48, 42 | 'num-partitions' => 1, 43 | 'default-replication-factor' => 1, 44 | 'zookeeper-connect' => undef, 45 | 'template' => undef, 46 | ); 47 | foreach my $arg (@ARGV) { 48 | if ($arg =~ /--([^=]+)=(.+)/) { 49 | unless (exists $opt{$1}) { 50 | print "unknow option $1\n"; 51 | help(); 52 | } 53 | $opt{$1} = $2; 54 | } 55 | } 56 | 57 | if ($opt{'host-name'} =~ /^eth/) { 58 | my $cmd = "/sbin/ip addr show ". $opt{'host-name'}; 59 | my $out = `$cmd`; 60 | unless ($out =~ /inet (\d+\.\d+\.\d+\.(\d+))/g) { 61 | print "unknow host inter ". $opt{'host-name'}, "\n"; 62 | help(); 63 | } 64 | $opt{'host-name'} = $1; 65 | unless (defined $opt{'broker-id'}) { 66 | $opt{'broker-id'} = $2; 67 | } 68 | } 69 | unless (defined $opt{'broker-id'}) { 70 | print "broker-id required\n"; 71 | help(); 72 | } 73 | unless (defined $opt{'log-dirs'}) { 74 | print "log-dirs required\n"; 75 | help(); 76 | } 77 | unless (defined $opt{'zookeeper-connect'}) { 78 | print "zookeeper-connect required\n"; 79 | help(); 80 | } 81 | unless (defined $opt{'template'}) { 82 | print "template required\n"; 83 | help(); 84 | } 85 | unless (-f $opt{'template'}) { 86 | print "template must be a file\n"; 87 | help(); 88 | } 89 | return %opt; 90 | } 91 | -------------------------------------------------------------------------------- /consumer/de/cgi/de.profile.cgi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use JSON::PP qw(encode_json); 6 | use Data::Dumper; 7 | 8 | my $DIR = "/search/de.profile"; 9 | 10 | if ($ENV{REQUEST_METHOD} eq "POST" || $ENV{REQUEST_METHOD} eq "PUT") { 11 | process_put(); 12 | } else { 13 | process_get(); 14 | } 15 | 16 | sub process_put { 17 | my $user; 18 | my $cookie = $ENV{HTTP_COOKIE}; 19 | if ($cookie && $cookie =~ /user=([^\s]+)/) { 20 | $user = $1; 21 | } 22 | if ($user && $user =~ /^[a-zA-Z0-9]+$/) { 23 | my $c = join("", <>); 24 | wfile("$DIR/$user.js", $c); 25 | print "Content-Type: text/plain\n"; 26 | print "\nOK"; 27 | } else { 28 | print "Status: 400\n"; 29 | print "Content-Type: text/plain\n"; 30 | print "\n"; 31 | } 32 | } 33 | 34 | sub process_get { 35 | my $query = $ENV{QUERY_STRING}; 36 | $query =~ s/%(.{2})/chr(hex($1))/ge; 37 | my %param = map {my ($k, $v) = split "=", $_; $k => $v} split "&", $query; 38 | 39 | my $user = $param{user}; 40 | unless ($user) { 41 | my $cookie = $ENV{HTTP_COOKIE}; 42 | if ($cookie && $cookie =~ /user=([^\s]+)/) { 43 | $user = $1; 44 | } 45 | } 46 | 47 | my $result; 48 | if ($user && $user =~ /^[a-zA-Z0-9_.-]+$/) { 49 | my $file = "$DIR/$user.js"; 50 | if (exists $param{'topic'} && exists $param{'id'} 51 | && exists $param{'attr'}) { 52 | $result = encode_json({ 53 | topic => $param{'topic'}, id => $param{'id'}, 54 | attr => [split /,/, $param{'attr'}], host => [split /,/, ($param{'host'} || "cluster")], 55 | attrs => {}, 56 | }); 57 | wfile($file, $result); 58 | } elsif (-f $file) { 59 | open(my $fh, "<$DIR/$user.js"); 60 | $result = join("", <$fh>); 61 | close($fh); 62 | } 63 | } 64 | 65 | if ($result) { 66 | my $gmt = gmt(time() + 86400 * 30); 67 | print "Content-Type: text/javascript\n"; 68 | print "Set-Cookie: user=$user Expires=$gmt Secure; HttpOnly\n"; 69 | print "\n"; 70 | print $result; 71 | } else { 72 | print "Status: 400\n"; 73 | print "Content-Type: text/plain\n"; 74 | print "\n"; 75 | print "Usage: ?user=_user_&topic=_topic_&id=_id_&attr=_attr1_,_attr2_[&host=_host_]\n"; 76 | } 77 | } 78 | 79 | sub wfile { 80 | my ($f, $c) = @_; 81 | open(my $fh, ">$f"); 82 | print $fh $c; 83 | close($fh); 84 | } 85 | 86 | sub gmt { 87 | my $t = shift || time(); 88 | my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($t); 89 | $wday = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]->[$wday]; 90 | $mon = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]->[$mon]; 91 | $year += 1900; 92 | return "$wday, $mday $mon $year $hour:$min:$sec GMT"; 93 | } 94 | -------------------------------------------------------------------------------- /src/cmdnotify.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "logger.h" 8 | #include "cmdnotify.h" 9 | 10 | #define MAX_ENVP_NUM 511 11 | extern char **environ; 12 | 13 | char * const *CmdNotify::buildEnv(const char *file, const char *oriFile, time_t timestamp, uint64_t size, const char *md5) 14 | { 15 | int i = 0; 16 | static char *envp[MAX_ENVP_NUM+1]; 17 | 18 | static char topicPtr[128]; 19 | snprintf(topicPtr, 128, "NOTIFY_TOPIC=%s", topic_); 20 | envp[i++] = topicPtr; 21 | 22 | static char partitionPtr[128]; 23 | snprintf(partitionPtr, 128, "NOTIFY_PARTITION=%d", partition_); 24 | envp[i++] = partitionPtr; 25 | 26 | static char filePtr[1024]; 27 | snprintf(filePtr, 1024, "NOTIFY_FILE=%s", file); 28 | envp[i++] = filePtr; 29 | 30 | static char oriFilePtr[1024]; 31 | if (oriFile) { 32 | snprintf(oriFilePtr, 1024, "NOTIFY_ORIFILE=%s", oriFile); 33 | envp[i++] = oriFilePtr; 34 | } 35 | 36 | static char timestampPtr[64]; 37 | if (timestamp != (time_t) -1) { 38 | snprintf(timestampPtr, 64, "NOTIFY_TIMESTAMP=%ld", timestamp); 39 | envp[i++] = timestampPtr; 40 | } 41 | 42 | static char sizePtr[64]; 43 | if (size != (uint64_t) -1) { 44 | snprintf(sizePtr, 64, "NOTIFY_FILESIZE=%lu", size); 45 | envp[i++] = sizePtr; 46 | } 47 | 48 | static char md5Ptr[64]; 49 | if (md5) { 50 | snprintf(md5Ptr, 64, "NOTIFY_FILEMD5=%s", md5); 51 | envp[i++] = md5Ptr; 52 | } 53 | 54 | for (int j = 0; i < MAX_ENVP_NUM && environ[j]; ++j) envp[i++] = environ[j]; 55 | 56 | envp[i] = 0; 57 | return envp; 58 | } 59 | 60 | bool CmdNotify::exec(const char *file, const char *oriFile, time_t timestamp, uint64_t size, const char *md5) 61 | { 62 | if (!cmd_) return false; 63 | 64 | pid_t pid = fork(); 65 | if (pid == 0) { 66 | char log[2048]; 67 | snprintf(log, 2048, "%s/%s.%d.notify.log", wdir_, topic_, partition_); 68 | int fd = open(log, O_CREAT | O_WRONLY | O_APPEND, 0666); 69 | if (fd != -1) { 70 | dup2(fd, STDOUT_FILENO); 71 | dup2(fd, STDERR_FILENO); 72 | } 73 | 74 | char * const argv[] = { (char *) cmd_, NULL }; 75 | char * const *envp = buildEnv(file, oriFile, timestamp, size, md5); 76 | 77 | std::string buffer; 78 | for (int i = 0; envp[i]; ++i) buffer.append(envp[i]).append(1, ' '); 79 | log_info(0, "exec cmd %s with env %s", cmd_, buffer.c_str()); 80 | 81 | if (execve(cmd_, argv, envp) == -1) { 82 | log_fatal(errno, "exec cmd %s with env %s error", cmd_, buffer.c_str()); 83 | } 84 | exit(0); 85 | } else if (pid > 0) { 86 | return true; 87 | } else { 88 | log_fatal(errno, "exec cmd %s fork() error", cmd_); 89 | return false; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/metrics.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "logger.h" 7 | #include "taskqueue.h" 8 | #include "metrics.h" 9 | using namespace util; 10 | 11 | class PingbackTask : public TaskQueue::Task { 12 | public: 13 | PingbackTask(CURL *curl, const char *url) 14 | : TaskQueue::Task(3), curl_(curl), url_(url) { 15 | } 16 | bool doIt(); 17 | ~PingbackTask(); 18 | 19 | private: 20 | CURL *curl_; 21 | const char *url_; 22 | }; 23 | 24 | // black hole 25 | static size_t curlWriteCallback(void *, size_t size, size_t nmemb, void *) 26 | { 27 | return size * nmemb; 28 | } 29 | 30 | 31 | bool PingbackTask::doIt() 32 | { 33 | curl_easy_reset(curl_); 34 | curl_easy_setopt(curl_, CURLOPT_URL, url_); 35 | curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 1L); 36 | #ifdef CURLOPT_TCP_KEEPALIVE 37 | curl_easy_setopt(curl_, CURLOPT_TCP_KEEPALIVE, 1L); 38 | #endif 39 | curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, curlWriteCallback); 40 | 41 | CURLcode rc = curl_easy_perform(curl_); 42 | if (rc != CURLE_OK) { 43 | log_error(0, "pingback %s error %s", url_, curl_easy_strerror(rc)); 44 | return false; 45 | } 46 | 47 | long status = 0; 48 | curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, &status); 49 | log_info(0, "pingback %s status %ld", url_, status); 50 | 51 | return true; 52 | } 53 | 54 | PingbackTask::~PingbackTask() { 55 | delete []url_; 56 | } 57 | 58 | Metrics *Metrics::metrics_ = 0; 59 | 60 | bool Metrics::create(const char *pingbackUrl, char *errbuf) 61 | { 62 | if (metrics_ != 0) return true; 63 | 64 | std::auto_ptr metrics(new Metrics()); 65 | bool rc = false; 66 | do { 67 | metrics->curl_ = 0; 68 | if (pingbackUrl) { 69 | metrics->pingbackUrl_ = pingbackUrl; 70 | metrics->curl_ = curl_easy_init(); 71 | if (!metrics->curl_) break; 72 | } 73 | 74 | if (!metrics->tq_.start(errbuf)) break; 75 | rc = true; 76 | } while (0); 77 | 78 | metrics_ = metrics.release(); 79 | if (!rc) destroy(); 80 | 81 | atexit(Metrics::destroy); 82 | return true; 83 | } 84 | 85 | void Metrics::destroy() 86 | { 87 | if (metrics_ == 0) return; 88 | 89 | if (metrics_->curl_) curl_easy_cleanup(metrics_->curl_); 90 | metrics_->tq_.stop(true); 91 | 92 | delete metrics_; 93 | metrics_ = 0; 94 | } 95 | 96 | #define URLN 8192 97 | void Metrics::pingback(const char *event, const char *fmt, ...) 98 | { 99 | if (metrics_ == 0 || metrics_->curl_ == 0) return; 100 | 101 | char *url = new char[URLN]; 102 | int n = snprintf(url, URLN, "%s?event=%s&", metrics_->pingbackUrl_.c_str(), event); 103 | 104 | va_list ap; 105 | va_start(ap, fmt); 106 | n += vsnprintf(url + n, URLN - n, fmt, ap); 107 | va_end(ap); 108 | 109 | metrics_->tq_.submit(new PingbackTask(metrics_->curl_, url)); 110 | } 111 | -------------------------------------------------------------------------------- /src/fileoff.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cnfctx.h" 11 | #include "luactx.h" 12 | #include "filereader.h" 13 | #include "fileoff.h" 14 | 15 | const size_t FileOff::MAX_FILENAME_LENGTH = 256; 16 | 17 | FileOff::FileOff() 18 | { 19 | cnf_ = 0; 20 | addr_ = MAP_FAILED; 21 | } 22 | 23 | FileOff::~FileOff() 24 | { 25 | if (addr_ != MAP_FAILED) munmap(addr_, length_); 26 | } 27 | 28 | bool FileOff::loadFromFile(char *errbuf) 29 | { 30 | FILE *fp = fopen(file_.c_str(), "r"); 31 | if (!fp) { 32 | if (errno != ENOENT) { 33 | snprintf(errbuf, MAX_ERR_LEN, "FileOff load error %s", strerror(errno)); 34 | return false; 35 | } else { 36 | return true; 37 | } 38 | } 39 | FileOffRecord record; 40 | while (fread(&record, sizeof(record), 1, fp) == 1) { 41 | if (record.inode == 0 && record.off == 0) continue; 42 | map_.insert(std::make_pair(record.inode, record.off)); 43 | } 44 | 45 | fclose(fp); 46 | return true; 47 | } 48 | 49 | bool FileOff::init(CnfCtx *cnf, char *errbuf) 50 | { 51 | cnf_ = cnf; 52 | addr_ = MAP_FAILED; 53 | file_ = cnf->libdir() + "/fileoff"; 54 | 55 | if (!loadFromFile(errbuf)) return false; 56 | return true; 57 | } 58 | 59 | bool FileOff::reinit() 60 | { 61 | length_ = sizeof(FileOffRecord) * cnf_->getLuaCtxs().size(); 62 | 63 | if (addr_ != MAP_FAILED) { 64 | munmap(addr_, length_); 65 | addr_ = MAP_FAILED; 66 | } 67 | 68 | int fd = open(file_.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); 69 | if (fd == -1) { 70 | snprintf(cnf_->errbuf(), MAX_ERR_LEN, "open %s error %s", file_.c_str(), strerror(errno)); 71 | return false; 72 | } 73 | 74 | if (ftruncate(fd, length_) == -1) { 75 | snprintf(cnf_->errbuf(), MAX_ERR_LEN, "ftruncate %s error %s", file_.c_str(), strerror(errno)); 76 | close(fd); 77 | return false; 78 | } 79 | 80 | addr_ = mmap(NULL, length_, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 81 | if (addr_ == MAP_FAILED) { 82 | snprintf(cnf_->errbuf(), MAX_ERR_LEN, "mmap %s error %s", file_.c_str(), strerror(errno)); 83 | close(fd); 84 | return false; 85 | } 86 | 87 | char *ptr = (char *) addr_; 88 | for (std::vector::iterator ite = cnf_->getLuaCtxs().begin(); ite != cnf_->getLuaCtxs().end(); ++ite) { 89 | (*ite)->getFileReader()->initFileOffRecord((FileOffRecord *) ptr); 90 | ptr += sizeof(FileOffRecord); 91 | } 92 | memset(ptr, 0x00, (length_ - (ptr - (char *) addr_))); 93 | return true; 94 | } 95 | 96 | off_t FileOff::getOff(ino_t inode) const 97 | { 98 | std::map::const_iterator pos = map_.find(inode); 99 | if (pos == map_.end()) return -1; 100 | else return pos->second; 101 | return (off_t) -1; 102 | } 103 | 104 | bool FileOff::setOff(ino_t inode, off_t off) 105 | { 106 | std::map::iterator pos = map_.find(inode); 107 | if (pos == map_.end()) return false; 108 | 109 | pos->second = off; 110 | return true; 111 | } 112 | -------------------------------------------------------------------------------- /blackboxtest/blackbox_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIN="${BASH_SOURCE[0]}" 4 | BINDIR=$(readlink -e $(dirname $BIN)) 5 | 6 | CFGDIR="$BINDIR/tail2kafka" 7 | PIDF=/var/run/tail2kafka.pid 8 | LIBDIR=/var/lib/tail2kafka 9 | BUILDDIR=$BINDIR/../build 10 | 11 | if [ ! -d $CFGDIR ]; then 12 | echo "$CFGDIR NOT FOUND" 13 | echo "disable autoparti" 14 | echo "main.lua partition=0" 15 | echo "main.lua pidfile=$PIDF" 16 | exit 1 17 | fi 18 | 19 | # delete.topic.enable=true 20 | test -f $BINDIR/../ENV.sh && source $BINDIR/../ENV.sh 21 | KAFKAHOME=${KAFKAHOME:-"/opt/kafka"} 22 | KAFKASERVER=${KAFKASERVER:-"localhost:9092"} 23 | HOSTNAME=${HOSTNAME:-$(hostname)} 24 | 25 | echo "WARN: YOU MUST KILL tail2kafka and kafka2file first, both may create topic automatic" 26 | 27 | T2KDIR=logs 28 | K2FDIR=kafka2filedir 29 | TOPICS="basic basic2 filter grep aggregate transform match" 30 | 31 | echo "kill tail2kafka" 32 | (test -f $PIDF && test -d /proc/$(cat $PIDF)) && kill $(cat $PIDF); sleep 2 33 | echo "kill kafka2file" 34 | for TOPIC in $TOPICS; do 35 | K2FPID=$K2FDIR/$TOPIC.0.lock 36 | (test -f $K2FPID && test -d /proc/$(cat $K2FPID)) && kill $(cat $K2FPID); sleep 2; kill -9 $(cat $K2FPID 2>/dev/null) 2>/dev/null 37 | done 38 | 39 | find $T2KDIR -type f -name "*.log" -delete 40 | 41 | cd $KAFKAHOME 42 | for TOPIC in $TOPICS; do 43 | bin/kafka-topics.sh --bootstrap-server $KAFKASERVER --delete --topic $TOPIC 44 | done 45 | bin/kafka-topics.sh --bootstrap-server $KAFKASERVER --list | egrep "$(echo $TOPICS | tr ' ' '|')" && { 46 | echo "$LINENO delete kafka topic error" 47 | exit 1 48 | } 49 | for TOPIC in $TOPICS; do 50 | bin/kafka-topics.sh --bootstrap-server $KAFKASERVER --create --replication-factor 1 --partitions 1 --topic $TOPIC 51 | done 52 | 53 | cd - 54 | 55 | OLDFILE=$K2FDIR/basic/${HOSTNAME}_basic.log.old 56 | test -d $K2FDIR || mkdir $K2FDIR 57 | rm -f $K2FDIR/basic.0.offset $OLDFILE 58 | 59 | export BLACKBOXTEST_OUTFILE=$K2FDIR/catnull 60 | 61 | K2FPID=$K2FDIR/basic.0.lock 62 | $BUILDDIR/kafka2file $KAFKASERVER basic 0 offset-end $K2FDIR $BUILDDIR/../scripts/catnull.sh & 63 | sleep 5 64 | if [ ! -f $K2FPID ] || [ ! -d /proc/$(cat $K2FPID) ]; then 65 | echo "start kafka2file failed" 66 | exit 1 67 | fi 68 | 69 | rm -rf $LIBDIR/*.history && rm -rf $LIBDIR/*.current 70 | cp $CFGDIR/main.lua $CFGDIR/main.lua.backup 71 | sed -i -E "s|localhost:9092|$KAFKASERVER|g" $CFGDIR/main.lua 72 | $BUILDDIR/tail2kafka $CFGDIR; sleep 2 73 | mv $CFGDIR/main.lua.backup $CFGDIR/main.lua 74 | 75 | if [ ! -f $PIDF ] || [ ! -d /proc/$(cat $PIDF) ]; then 76 | echo "start tail2kafka failed" 77 | exit 1; 78 | fi 79 | 80 | 81 | sleep 1 82 | export KAFKASERVER 83 | $BUILDDIR/tail2kafka_blackbox 84 | 85 | echo "WAIT kafka2file ... "; sleep 20 86 | 87 | if [ ! -f $OLDFILE ]; then 88 | echo "kafka2file rotate error, expect $OLDFILE" 89 | exit 1 90 | fi 91 | 92 | NUM=$(wc -l $OLDFILE | cut -d' ' -f 1) 93 | if [ "$NUM" != 200 ]; then 94 | echo "line of $OLDFILE is $NUM should be 200" 95 | exit 1 96 | fi 97 | 98 | source $BLACKBOXTEST_OUTFILE 99 | if [ "$NOTIFY_TOPIC" = "" ] || [ $NOTIFY_TOPIC != "basic" ]; then 100 | echo "NOTIFY_TOPIC $NOTIFY_TOPIC != basic" 101 | exit 1 102 | fi 103 | 104 | if [ "$NOTIFY_ORIFILE" = "" ]; then 105 | echo "NOTIFY_ORIFIL is not set" 106 | exit 1 107 | fi 108 | 109 | if [ "$NOTIFY_FILE" = "" ]; then 110 | echo "NOTIFY_FILE is not set" 111 | exit 1 112 | fi 113 | 114 | if [ "$NOTIFY_FILESIZE" = "" ]; then 115 | echo "NOTIFY_FILESIZE is not set" 116 | exit 1 117 | fi 118 | 119 | if [ "$NOTIFY_FILEMD5" = "" ]; then 120 | echo "NOTIFY_FILEMD5 is not set" 121 | exit 1 122 | fi 123 | 124 | MD5FILE=$(md5sum $NOTIFY_FILE | cut -d' ' -f1) 125 | MD5ORIFILE=$(md5sum $NOTIFY_ORIFILE | cut -d' ' -f1) 126 | if [ "$MD5FILE" != "$MD5ORIFILE" ]; then 127 | echo "$NOTIFY_FILE and $NOTIFY_ORIFILE are not the same md5" 128 | exit 1 129 | fi 130 | 131 | echo "$0 test ok" 132 | -------------------------------------------------------------------------------- /tail2kafka.spec: -------------------------------------------------------------------------------- 1 | Name: tail2kafka 2 | Version: 2.3.2 3 | Release: 2 4 | Summary: stream file data to kafka/stream kafka data to file 5 | Group: tail2kafka 6 | License: Apache2 7 | Source0: tail2kafka-2.3.2.tar.gz 8 | BuildRoot: /var/tmp/tail2kafka 9 | BuildRequires: libcurl-devel >= 7.19.7 10 | BuildRequires: openssl-devel >= 1.0.1e-30 11 | Requires: libcurl >= 7.19.7 12 | Requires: openssl >= 1.0.1e-30 13 | AutoReqProv: no 14 | 15 | %description 16 | use inotify stream file data to kafka 17 | consume kafka data to file 18 | 19 | %prep 20 | %setup -q 21 | 22 | %build 23 | make clean 24 | make 25 | 26 | %install 27 | mkdir -p $RPM_BUILD_ROOT/usr/local/bin 28 | cp build/tail2kafka $RPM_BUILD_ROOT/usr/local/bin 29 | cp build/kafka2file $RPM_BUILD_ROOT/usr/local/bin 30 | cp scripts/auto-upgrade.sh $RPM_BUILD_ROOT/usr/local/bin/tail2kafka-auto-upgrade.sh 31 | 32 | mkdir -p $RPM_BUILD_ROOT/etc/cron.d 33 | cp scripts/tail2kafka.cron $RPM_BUILD_ROOT/etc/cron.d/tail2kafka 34 | 35 | mkdir -p $RPM_BUILD_ROOT/etc/rc.d/init.d 36 | cp scripts/tail2kafka.init $RPM_BUILD_ROOT/etc/rc.d/init.d/tail2kafka 37 | 38 | mkdir -p $RPM_BUILD_ROOT/etc/sysconfig 39 | cp scripts/tail2kafka.config $RPM_BUILD_ROOT/etc/sysconfig/tail2kafka 40 | 41 | mkdir -p $RPM_BUILD_ROOT/etc/tail2kafka 42 | mkdir -p $RPM_BUILD_ROOT/usr/share/tail2kafka/etc 43 | cp blackboxtest/tail2kafka/*.lua $RPM_BUILD_ROOT/usr/share/tail2kafka/etc 44 | 45 | mkdir -p $RPM_BUILD_ROOT/etc/kafka2file 46 | mkdir -p $RPM_BUILD_ROOT/usr/share/kafka2file/etc 47 | cp blackboxtest/kafka2file/*.lua $RPM_BUILD_ROOT/usr/share/kafka2file/etc 48 | 49 | mkdir -p $RPM_BUILD_ROOT/var/lib/tail2kafka 50 | mkdir -p $RPM_BUILD_ROOT/var/log/tail2kafka 51 | 52 | %files 53 | %defattr(-,root,root) 54 | /usr/local/bin 55 | /usr/share/tail2kafka/etc 56 | /usr/share/kafka2file/etc 57 | 58 | /etc/tail2kafka 59 | /etc/kafka2file 60 | /etc/rc.d/init.d 61 | /etc/cron.d 62 | 63 | /var/lib/tail2kafka 64 | /var/log/tail2kafka 65 | 66 | %config(noreplace) /etc/sysconfig/tail2kafka 67 | 68 | %clean 69 | rm -rf $RPM_BUILD_ROOT 70 | 71 | %post 72 | ln -sf ../init.d/tail2kafka /etc/rc.d/rc3.d/S88tail2kafka 73 | 74 | %changelog 75 | * Thu Jul 16 2020 zzyongx -2.3.1-1 76 | - Bugfix: truncate file 77 | 78 | * Thu May 28 2020 zzyongx -2.3.0-1 79 | - Changes: use hyperscan filter log 80 | - Changes: specify partiion parameter when calling rd_kafka_produce_batch 81 | 82 | * Wed Apr 1 2020 zzyongx -2.2.2-1 83 | - Bugfix: multi file tail to one kafka cause coredump 84 | 85 | * Mon Mar 18 2019 zzyongx -2.1.2-1 86 | - Changes: tail2es add flow control 87 | - Changes: tail2es use multi-thread 88 | 89 | * Thu Feb 21 2019 zzyongx -2.1.1-1 90 | - Feature: tail2es add basic auth 91 | - Bugfix: make cleantransformEsDocNginxLog 92 | 93 | * Mon Jan 28 2019 zzyongx -2.1.0-1 94 | - Feature: add tail2es 95 | 96 | * Wed Apr 18 2018 zzyongx -2.0.0-19 97 | - bugfix: turn off withhost flag leading to a dead cycle 98 | 99 | * Wed Apr 11 2018 zzyongx -2.0.0-18 100 | - bugfix: reset cnfctx when respawn 101 | - bugfix: when rotate, record the file corresponding th fd, so as not to lose data when the process crashs 102 | - changes: exit when trap in KafkaCtx::produce 103 | - changes: support random partitioner 104 | 105 | * Sun Apr 8 2018 zzyongx -2.0.0-15 106 | - bugfix: checkRotate should use tail2kafka(NIL)\'s return value 107 | 108 | * Wed Apr 4 2018 zzyongx -2.0.0-14 109 | - changes: refactor kafka block 110 | - bugfix: tagRotate 111 | 112 | * Tue Feb 27 2018 zzyongx -2.0.0-10 113 | - bugfix: history file may lost 114 | 115 | * Thu Feb 22 2018 zzyongx -2.0.0-9 116 | - Feature: support md5 checksum 117 | 118 | * Wed Feb 14 2018 zzyongx -2.0.0-8 119 | - Feature: first release 120 | -------------------------------------------------------------------------------- /blackboxtest/loadtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BINDIR=$(readlink -e $(dirname "${BASH_SOURCE[0]}")) 4 | CFGDIR="$BINDIR/loadtest" 5 | PIDF=/var/run/tail2kafka.pid 6 | LIBDIR=/var/lib/tail2kafka 7 | 8 | DATADIR=${DATADIR:-$HOME} 9 | T2KDIR=$DATADIR/data 10 | K2FDIR=$DATADIR/data/kafka2file 11 | 12 | test -f $BINDIR/../ENV.sh && source $BINDIR/../ENV.sh 13 | KAFKASERVER=${KAFKASERVER:-"localhost:9092"} 14 | ACK=${ACK:-1} 15 | cp $CFGDIR/main.lua $CFGDIR/main.lua.backup 16 | cp $CFGDIR/linecopy.lua $CFGDIR/linecopy.lua.backup 17 | sed -i -E "s|localhost:9092|$KAFKASERVER|g" $CFGDIR/main.lua 18 | sed -i -E "s|_ACK_|$ACK|g" $CFGDIR/main.lua 19 | sed -i -E "s|BIGLOG|$T2KDIR/big.log|g" $CFGDIR/linecopy.lua 20 | 21 | mkdir -p $K2FDIR 22 | find $K2FDIR -type f -delete 23 | 24 | gen_bigdata() 25 | { 26 | local name="$1" 27 | local loop="$2" 28 | local size="$3" 29 | local f="$4" 30 | perl -e "\$name='$name';\$loop=$loop;\$size=$size;" -e 'for $i (1 .. $loop) {print "$name $i ", "f" x int(rand($size)), "\n";}' >$f 31 | } 32 | 33 | (test -f $PIDF && test -d /proc/$(cat $PIDF)) && kill $(cat $PIDF); sleep 2 34 | TOPIC=biglog 35 | K2FPID=$K2FDIR/$TOPIC.0.lock 36 | (test -f $K2FPID && test -d /proc/$(cat $K2FPID)) && kill $(cat $K2FPID); sleep 2; kill -9 $(cat $K2FPID 2>/dev/null) 2>/dev/null 37 | 38 | N=${LOADTEST_N:-500000} 39 | SIZE=0 40 | 41 | # prepare history file 42 | rm -f $LIBDIR/{biglog.history,biglog.current} 43 | for suffix in 2 1; do 44 | gen_bigdata "history.$suffix" $N 4096 $T2KDIR/big.log.history.$suffix 45 | echo "$T2KDIR/big.log.history.$suffix" >> $LIBDIR/biglog.history 46 | SIZE=$((SIZE + $(stat -c %s $T2KDIR/big.log.history.$suffix))) 47 | done 48 | 49 | echo "history file" 50 | cat $LIBDIR/biglog.history 51 | 52 | export BLACKBOXTEST_OUTFILE_TPL=$K2FDIR/catnull 53 | 54 | find $K2FDIR -type f -delete 55 | $BINDIR/../build/kafka2file $KAFKASERVER $TOPIC 0 offset-end $K2FDIR $BINDIR/../scripts/catnull.sh & 56 | sleep 1 57 | if [ ! -f $K2FPID ] || [ ! -d /proc/$(cat $K2FPID) ]; then 58 | echo "start kafka2file failed" 59 | exit 1 60 | fi 61 | 62 | START=$(date +%s) 63 | $BINDIR/../build/tail2kafka $CFGDIR; sleep 2 64 | if [ ! -f $PIDF ] || [ ! -d /proc/$(cat $PIDF) ]; then 65 | echo "start tail2kafka failed" 66 | exit 1 67 | fi 68 | mv $CFGDIR/main.lua.backup $CFGDIR/main.lua 69 | mv $CFGDIR/linecopy.lua.backup $CFGDIR/linecopy.lua 70 | 71 | gen_bigdata "current" $N 4096 $T2KDIR/big.log 72 | SIZE=$((SIZE + $(stat -c %s $T2KDIR/big.log))) 73 | mv $T2KDIR/big.log $T2KDIR/big.log.2 74 | 75 | echo "$(date) current move to ${T2KDIR}/big.log.2" 76 | 77 | CHILDPID=$(pgrep -P $(cat $PIDF)) 78 | while [ true ]; do 79 | FOPEN=$(ls -l /proc/$CHILDPID/fd | grep big.log | wc -l) 80 | if [ "$FOPEN" = 1 ] && ls -l /proc/$CHILDPID/fd | grep -qP 'big.log$'; then 81 | break 82 | else 83 | sleep 1 84 | fi 85 | done 86 | 87 | SPAN=$(($(date +%s) - START - 2)) 88 | echo "tail size $SIZE, time consumption ${SPAN}s" 89 | 90 | for f in "big.log.history.2" "big.log.history.1" "big.log.2"; do 91 | test -f $BLACKBOXTEST_OUTFILE_TPL.$f || { 92 | echo "$f catnull notfound" 93 | exit 1 94 | } 95 | 96 | source $BLACKBOXTEST_OUTFILE_TPL.$f 97 | MD5ORIFILE=$(md5sum $T2KDIR/$f | cut -d' ' -f1) 98 | if [ $MD5ORIFILE != "$NOTIFY_FILEMD5" ]; then 99 | echo "$f md5error, expect $MD5ORIFILE, get $NOTIFY_FILEMD5" 100 | exit 1 101 | fi 102 | done 103 | 104 | N=2000000 105 | for block in 4096 2048 1024 512; do 106 | START=$(date +%s) 107 | 108 | gen_bigdata "current" $N $block $T2KDIR/big.log 109 | 110 | sleep 5 111 | echo "$(date) current move to /search/odin/data/big.log.$block" 112 | mv $T2KDIR/big.log $T2KDIR/big.log.$block 113 | 114 | ROTATESPAN=$(($(date +%s) - START - 2)) 115 | 116 | while [ true ]; do 117 | if [ -f $BLACKBOXTEST_OUTFILE_TPL.big.log.$block ]; then 118 | T2KSPAN=$(($(date +%s) - START - 2)) 119 | READSPAN=$(/usr/bin/time -f %e cp /root/data/big.log.$block /dev/null 2>&1) # put it in before md5 to avoid read cache 120 | 121 | source $BLACKBOXTEST_OUTFILE_TPL.big.log.$block 122 | if [ $(md5sum $T2KDIR/big.log.$block | cut -d' ' -f1) != "$NOTIFY_FILEMD5" ]; then 123 | echo "big.log.1 md5error" 124 | exit 1 125 | fi 126 | break 127 | else 128 | sleep 1 129 | fi 130 | done 131 | 132 | SIZE=$(stat -c %s $T2KDIR/big.log.$block) 133 | echo -n "tail size $SIZE, line $N, generation time consumption ${ROTATESPAN}s, " 134 | echo "read time consumption ${READSPAN}s, tail2kafka time consumption in ${T2KSPAN}s" 135 | done 136 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CXX = g++ 3 | INSTALL = install 4 | LDFLAGS = -lcurl -lrt -ldl -lpthread -lz -lcrypto 5 | DEPSDIR = ".deps" 6 | ARLIBS = $(DEPSDIR)/librdkafka.a $(DEPSDIR)/libluajit-5.1.a $(DEPSDIR)/libjsoncpp.a $(DEPSDIR)/libhs.a 7 | CFLAGS += -I/usr/local/include/luajit-2.0 $(PARAM_CFLAGS) 8 | PREDEF += $(PARAM_PREDEF) 9 | WARN = -Werror -Wall -Wshadow -Wextra -Wno-comment -Wno-deprecated-declarations -Wno-format-truncation -Wno-format-overflow -Wno-literal-suffix 10 | 11 | ifeq ($(DEBUG), 1) 12 | CFLAGS += -O0 -g 13 | else 14 | CFLAGS += -O2 -g 15 | endif 16 | 17 | ifndef ($(INSTALLDIR)) 18 | INSTALLDIR = /usr/local 19 | endif 20 | 21 | VPATH = .:./libs 22 | BUILDDIR = build 23 | 24 | OBJ = $(BUILDDIR)/common.o $(BUILDDIR)/cnfctx.o $(BUILDDIR)/luactx.o $(BUILDDIR)/transform.o \ 25 | $(BUILDDIR)/filereader.o $(BUILDDIR)/inotifyctx.o $(BUILDDIR)/fileoff.o $(BUILDDIR)/cmdnotify.o \ 26 | $(BUILDDIR)/luafunction.o $(BUILDDIR)/kafkactx.o $(BUILDDIR)/sys.o $(BUILDDIR)/util.o \ 27 | $(BUILDDIR)/esctx.o $(BUILDDIR)/metrics.o $(BUILDDIR)/taskqueue.o 28 | 29 | default: configure tail2kafka kafka2file tail2kafka_unittest tail2es_unittest kafka2file_unittest 30 | @echo finished 31 | 32 | tail2kafka: $(BUILDDIR)/tail2kafka.o $(OBJ) 33 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(ARLIBS) $(LDFLAGS) 34 | 35 | tail2kafka_unittest: $(BUILDDIR)/tail2kafka_unittest.o $(OBJ) 36 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(ARLIBS) $(LDFLAGS) 37 | 38 | tail2es_unittest: $(BUILDDIR)/tail2es_unittest.o $(OBJ) 39 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(ARLIBS) $(LDFLAGS) 40 | 41 | kafka2file_unittest: $(BUILDDIR)/kafka2file_unittest.o $(OBJ) 42 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(ARLIBS) $(LDFLAGS) 43 | 44 | kafka2file: $(BUILDDIR)/kafka2file.o $(OBJ) 45 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(ARLIBS) $(LDFLAGS) 46 | 47 | speedlimit: $(BUILDDIR)/mix/speedlimit.o 48 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ 49 | 50 | .PHONY: get-deps 51 | get-deps: 52 | @mkdir -p $(DEPSDIR) 53 | 54 | @echo "compile jsoncpp" && \ 55 | cd $(DEPSDIR) && \ 56 | (test -f 0.10.4.tar.gz || wget https://github.com/open-source-parsers/jsoncpp/archive/0.10.4.tar.gz) && \ 57 | rm -rf jsoncpp-0.10.4 && tar xzf 0.10.4.tar.gz && \ 58 | mkdir -p jsoncpp-0.10.4/build && cd jsoncpp-0.10.4/build && \ 59 | cmake -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations -Wno-implicit-fallthrough -Wno-shift-negative-value" -DCMAKE_BUILD_TYPE=debug -DBUILD_STATIC_LIBS=ON -DBUILD_SHARED_LIBS=OFF \ 60 | -DARCHIVE_INSTALL_DIR=../.. -G "Unix Makefiles" .. && make install 61 | 62 | @echo "compile librdkafka" && \ 63 | cd $(DEPSDIR) && \ 64 | (test -f v0.11.3.tar.gz || wget https://github.com/edenhill/librdkafka/archive/v0.11.3.tar.gz) && \ 65 | rm -rf librdkafka-0.11.3 && tar xzf v0.11.3.tar.gz && cd librdkafka-0.11.3 && \ 66 | ./configure --disable-ssl --disable-sasl && make -j2 && make install 67 | cp /usr/local/lib/librdkafka.a $(DEPSDIR) 68 | 69 | @echo "compile libluajit" && \ 70 | cd $(DEPSDIR) && \ 71 | (test -f LuaJIT-2.0.4.tar.gz || wget http://luajit.org/download/LuaJIT-2.0.4.tar.gz) && \ 72 | rm -rf LuaJIT-2.0.4 && tar xzf LuaJIT-2.0.4.tar.gz && cd LuaJIT-2.0.4 && \ 73 | make -j2 && make install 74 | cp /usr/local/lib/libluajit-5.1.a $(DEPSDIR) 75 | 76 | .PHONY: configure 77 | configure: 78 | @mkdir -p $(BUILDDIR) 79 | @mkdir -p $(BUILDDIR)/mix 80 | @ls -l $(ARLIBS) >/dev/null || (echo "make get-deps first" && exit 2) 81 | 82 | .PHONY: debug 83 | debug: 84 | make DEBUG=1 85 | 86 | $(BUILDDIR)/%.o: src/%.cc 87 | $(CXX) -o $@ $(WARN) $(CXXWARN) $(CFLAGS) $(PREDEF) -c $< 88 | 89 | $(BUILDDIR)/mix/%.o: mix/%.cc 90 | $(CXX) -o $@ $(WARN) $(CXXWARN) $(CFLAGS) $(PREDEF) -c $< 91 | 92 | tail2kafka_blackbox: $(BUILDDIR)/tail2kafka_blackbox.o 93 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(ARLIBS) $(LDFLAGS) 94 | 95 | .PHONY: test 96 | test: 97 | mkdir -p logs kafka2filedir 98 | 99 | @echo "unit test" 100 | find logs -type f -name "*.log" -delete 101 | find kafka2filedir -type f -delete 102 | make clean && make PARAM_PREDEF="-D_DEBUG_" DEBUG=1 103 | $(BUILDDIR)/tail2kafka_unittest 104 | $(BUILDDIR)/tail2es_unittest 105 | $(BUILDDIR)/kafka2file_unittest 106 | 107 | @echo "blackbox test" 108 | make clean && make PARAM_PREDEF="-D_DEBUG_" && make tail2kafka_blackbox 109 | ./blackboxtest/blackbox_test.sh 110 | 111 | .PHONY: install 112 | install: 113 | $(INSTALL) -D tail2kafka $(RPM_BUILD_ROOT)$(INSTALLDIR)/bin 114 | mkdir -p $(RPM_BUILD_ROOT)/etc/tail2kafka 115 | mkdir -p $(RPM_BUILD_ROOT)/var/lib/ 116 | 117 | .PHONY: clean 118 | clean: 119 | rm -rf $(BUILDDIR)/* 120 | -------------------------------------------------------------------------------- /consumer/de/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Dashboard Everything 5 | 6 | 7 | 8 | 9 | 11 | 12 | 13 |
14 |

If you first use this app, input all; If not, input you user name is enough, 15 | If this does not work, try to input all. 16 |

17 | 18 | 19 | 20 | 21 | 22 |
23 |
24 |
25 |
26 | 27 | 28 | 29 | 30 | 31 |
32 |
33 |
34 | 35 |
36 |
37 | 38 |
39 |
40 |
41 | 42 |
43 |
44 | 45 | 46 |
47 |
48 | 49 | 50 |
51 |
52 | 53 | 54 |
55 |
56 | 57 |
58 |
59 |
60 |
61 | 62 |
63 |
64 |
65 |
66 | 67 |
68 |
69 | 70 | 71 | 72 |
73 |
74 | 75 |
76 |
77 |
78 | 79 |
80 |
81 |
82 |

system attr

83 |
84 |
85 |

custom attr

86 |
87 |
88 |
89 |

You can Edit exists attr, get New attr, OR just define one

90 | 91 | 93 |
94 |
95 | 96 |
97 |
98 | 99 |
100 |
101 |
102 |
103 |
104 |
105 | 106 | 107 |
108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /src/esctx.h: -------------------------------------------------------------------------------- 1 | #ifndef _ESCTX_H_ 2 | #define _ESCTX_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #include "gnuatomic.h" 11 | #include "filerecord.h" 12 | class CnfCtx; 13 | 14 | #define MAX_HTTP_HEADER_LEN 8192 15 | 16 | enum EventStatus { 17 | UNINIT, ESTABLISHING, WRITING, READING, IDLE 18 | }; 19 | 20 | inline const char *eventStatusToString(EventStatus status) 21 | { 22 | switch (status) { 23 | case UNINIT: return "uninit"; 24 | case ESTABLISHING: return "establishing"; 25 | case WRITING: return "writing"; 26 | case READING: return "reading"; 27 | case IDLE: return "idle"; 28 | default: return "unknow"; 29 | } 30 | } 31 | 32 | enum HttpRespWant { 33 | STATUS_LINE, HEADER, HEADER_NAME, HEADER_VALUE, 34 | BODY, BODY_CHUNK_LEN, BODY_CHUNK_CONTENT, RESP_EOF, 35 | }; 36 | 37 | class EsUrlManager; 38 | 39 | class EsUrl { 40 | template friend class UNITTEST_HELPER; 41 | public: 42 | EsUrl(const std::vector &nodes, int idx, EsUrlManager *mgr) 43 | : pool_(true), status_(UNINIT), fd_(-1), urlManager_(mgr), keepalive_(0), 44 | idx_(idx), nodes_(nodes), node_(nodes_[idx_]), record_(0) {} 45 | 46 | ~EsUrl() { 47 | if (fd_ > 0) close(fd_); 48 | } 49 | 50 | bool idle() const { 51 | return status_ == IDLE; 52 | } 53 | 54 | void reinit(FileRecord *record, bool move = false); 55 | bool onEvent(int pfd); 56 | bool onTimeout(int pfd, time_t now); 57 | bool onError(int pfd, const char *error); 58 | 59 | bool pool(bool p) { 60 | assert(record_ == 0 && (status_ == IDLE || status_ == UNINIT)); 61 | 62 | bool r = pool_; 63 | pool_ = p; 64 | return r; 65 | } 66 | 67 | private: 68 | void initHttpResponseStatusLine(const char *eof); 69 | void initHttpResponseHeader(const char *eof); 70 | void initHttpResponseBody(const char *eof); 71 | bool initHttpResponse(const char *eof); 72 | 73 | int initIOV(struct iovec *iov); 74 | 75 | bool doConnect(int pfd, char *errbuf); 76 | bool doConnectFinish(int pfd, char *errbuf); 77 | bool doRequest(int pfd, char *errbuf); 78 | bool doResponse(int pfd, char *errbuf); 79 | 80 | void destroy(int pfd); 81 | 82 | private: 83 | bool pool_; 84 | EventStatus status_; 85 | int fd_; 86 | time_t activeTime_; 87 | size_t timeoutRetry_; 88 | EsUrlManager *urlManager_; 89 | int keepalive_; 90 | 91 | int idx_; 92 | std::vector nodes_; 93 | std::string node_; 94 | 95 | FileRecord *record_; 96 | 97 | std::string url_; 98 | char header_[MAX_HTTP_HEADER_LEN]; 99 | int nheader_; 100 | const char *body_; 101 | int nbody_; 102 | int offset_; 103 | 104 | bool esError_; 105 | 106 | HttpRespWant respWant_; 107 | int respCode_; 108 | int wantLen_; 109 | int chunkLen_; 110 | char *resp_; 111 | std::string respBody_; 112 | }; 113 | 114 | class EsUrlManager { 115 | public: 116 | EsUrlManager(const std::vector &nodes, int capacity) 117 | : active_(0), capacity_(capacity), nodes_(nodes) { 118 | 119 | for (size_t i = 0; i < capacity_; ++i) { 120 | EsUrl *url = new EsUrl(nodes, i % nodes.size(), this); 121 | urls_.push_back(url); 122 | holder_.push_back(url); 123 | } 124 | nodes_ = nodes; 125 | } 126 | 127 | ~EsUrlManager() { 128 | for (std::list::iterator ite = holder_.begin(); 129 | ite != holder_.end(); ++ite) { 130 | delete *ite; 131 | } 132 | } 133 | 134 | EsUrl *get(bool *pool = 0); 135 | bool release(EsUrl *url); 136 | 137 | size_t load() const { 138 | size_t *ptr = const_cast(&active_); 139 | return util::atomic_get(ptr); 140 | } 141 | 142 | private: 143 | size_t active_; 144 | size_t capacity_; 145 | std::vector nodes_; 146 | 147 | std::vector urls_; 148 | std::list holder_; 149 | }; 150 | 151 | class EsSender { 152 | template friend class UNITTEST_HELPER; 153 | public: 154 | EsSender() 155 | : epfd_(-1), pipeRead_(-1), pipeWrite_(-1), events_(0), 156 | urlManager_(0), running_(false) {} 157 | 158 | ~EsSender(); 159 | 160 | bool init(CnfCtx *cnf, size_t capacity); 161 | void eventLoop(); 162 | bool produce(FileRecord *record); 163 | 164 | private: 165 | size_t consume(int pfd, bool once); 166 | bool flowControl(bool block, size_t cn); 167 | 168 | private: 169 | CnfCtx *cnf_; 170 | 171 | std::vector nodes_; 172 | std::string userpass_; 173 | 174 | int epfd_; 175 | 176 | int pipeRead_; 177 | int pipeWrite_; 178 | 179 | struct epoll_event *events_; 180 | std::list urls_; 181 | 182 | size_t capacity_; 183 | EsUrlManager *urlManager_; 184 | 185 | volatile bool running_; 186 | pthread_t tid_; 187 | }; 188 | 189 | class EsCtx { 190 | template friend class UNITTEST_HELPER; 191 | public: 192 | ~EsCtx(); 193 | bool init(CnfCtx *cnf); 194 | bool produce(std::vector *datas); 195 | 196 | private: 197 | CnfCtx *cnf_; 198 | 199 | size_t lastSenderIndex_; 200 | std::vector esSenders_; 201 | 202 | volatile bool running_; 203 | }; 204 | 205 | #endif 206 | -------------------------------------------------------------------------------- /src/unittesthelper.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define COLOR_RED "\x1b[31m" 8 | #define COLOR_GREEN "\x1b[32m" 9 | #define COLOR_RESET "\x1b[0m" 10 | 11 | template 12 | struct UNITTEST_HELPER { 13 | void call() {} 14 | }; 15 | 16 | #define BTOS(b) ((b) ? "TRUE" : "FALSE") 17 | #define PTRS(s) ((s).c_str()) 18 | 19 | #define SAFE_DELETE(ptr) do { delete ptr; (ptr) = 0; } while (0) 20 | 21 | #define CHECK_IMPL(r, name, fmt, arg...) do { \ 22 | if ((r)) break; \ 23 | fprintf(stderr, "%s@%-5d %s -> ["COLOR_RED fmt COLOR_RESET"]\n", \ 24 | name, __LINE__, #r, ##arg); \ 25 | assert((r)); \ 26 | } while(0) 27 | 28 | #define check(r, fmt, arg...) CHECK_IMPL(r, __FILE__, fmt, ##arg) 29 | #define checkx(r, fmt, arg...) CHECK_IMPL(r, TEST_NAME_, fmt, ##arg) 30 | 31 | inline 32 | void *env_safe_get(const std::map &map, 33 | const std::string &key, void *def) { 34 | std::map::const_iterator pos = map.find(key); 35 | if (pos == map.end()) { 36 | if (def) return def; 37 | fprintf(stderr, COLOR_RED "ENV %s notfound" COLOR_RESET, key.c_str()); 38 | exit(1); 39 | } else { 40 | return pos->second; 41 | } 42 | } 43 | 44 | #define UNITTEST_INIT() static std::map UNITTEST_ENV 45 | #define ENV_SET(key, value) UNITTEST_ENV[(key)] = (value) 46 | #define ENV_GET(key, type) (type) (env_safe_get(UNITTEST_ENV, (key), 0)) 47 | 48 | #define DEFINE(func) struct TEST_##func {}; \ 49 | template<> struct UNITTEST_HELPER { \ 50 | UNITTEST_HELPER(const char *name) : TEST_NAME_(name) {} \ 51 | void call(); const char *TEST_NAME_; }; \ 52 | void UNITTEST_HELPER::call() 53 | 54 | #define TEST_IMPL(func, name, t) do { \ 55 | UNITTEST_HELPER test_##func(name); test_##func.call(); \ 56 | if (t) printf("TEST %-60s ["COLOR_GREEN"OK"COLOR_RESET"]\n", name); \ 57 | } while(0) 58 | 59 | #define DO(func) TEST_IMPL(func, #func, false) 60 | #define TEST(func) TEST_IMPL(func, #func, true) 61 | #define TESTX(func, name) TEST_IMPL(func, name, true) 62 | 63 | typedef void (*TEST_RUN_FUNC_PROTO)(); 64 | 65 | #define TEST_RUN(name) static void TEST_RUN_##name(); \ 66 | static TEST_RUN_FUNC_PROTO name = TEST_RUN_##name; \ 67 | void TEST_RUN_##name() 68 | 69 | #define UNITTEST_RUN(...) do { \ 70 | TEST_RUN_FUNC_PROTO funcs[] = { __VA_ARGS__, 0 }; \ 71 | const char *gdbUnitTestEnv = getenv("GDB_UNITTEST"); \ 72 | if (gdbUnitTestEnv && strcmp(gdbUnitTestEnv, "1") == 0) { \ 73 | for (int i = 0; funcs[i]; ++i) funcs[i](); \ 74 | break; \ 75 | } \ 76 | pid_t pid = fork(); \ 77 | if (pid == 0) { \ 78 | for (int i = 0; funcs[i]; ++i) funcs[i](); \ 79 | exit(0); \ 80 | } \ 81 | int status; \ 82 | wait(&status); \ 83 | if (WIFSIGNALED(status)) { \ 84 | char core[32]; \ 85 | snprintf(core, 32, "core.%d", (int) pid); \ 86 | char cmd[256]; \ 87 | snprintf(cmd, 256, "test -f %s && (echo -e 'bt\nquit\n' | gdb $(readlink /proc/%d/exe) -c %s)", \ 88 | core, (int) getpid(), core); \ 89 | system(cmd); \ 90 | } \ 91 | } while(0) 92 | 93 | #define BASH(cmd, status) do { \ 94 | pid_t pid = fork(); \ 95 | int fd[2]; \ 96 | pipe(fd); \ 97 | dup2(fd[0], STDIN_FILENO); \ 98 | if (pid == 0) { \ 99 | close(fd[1]); \ 100 | const char *argv[2] = {"/bin/bash", 0}; \ 101 | execv(argv[0], (char * const *) argv); \ 102 | exit(127); \ 103 | } \ 104 | close(fd[0]); \ 105 | write(fd[1], cmd, strlen(cmd)); \ 106 | wait(&status); \ 107 | if (WIFEXITED(status)) status = WEXITSTATUS(status); \ 108 | else if (WIFSIGNALED(status)) status = WTERMSIG(status); \ 109 | } while (0) 110 | -------------------------------------------------------------------------------- /src/luactx.h: -------------------------------------------------------------------------------- 1 | #ifndef _LUACTX_H_ 2 | #define _LUACTX_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "sys.h" 12 | #include "luafunction.h" 13 | #include "cnfctx.h" 14 | 15 | class FileReader; 16 | 17 | #define PARTITIONER_RANDOM -100 18 | 19 | #define ESDOC_DATAFORMAT_NGINX_JSON 1 20 | #define ESDOC_DATAFORMAT_NGINX_LOG 2 21 | #define ESDOC_DATAFORMAT_JSON 3 22 | 23 | class LuaCtx { 24 | template friend class UNITTEST_HELPER; 25 | public: 26 | static LuaCtx *loadFile(CnfCtx *cnf, const char *file); 27 | ~LuaCtx(); 28 | 29 | bool parseEsIndexDoc(const std::string &esIndex, const std::string &esDoc, char errbuf[]); 30 | void es(std::string *esIndex, bool *esIndexWithTimeFormat, int *esIndexPos, 31 | int *esDocPos, int *esDocDataFormat) const { 32 | *esIndex = esIndex_; 33 | *esIndexWithTimeFormat = esIndexWithTimeFormat_; 34 | *esIndexPos = esIndexPos_; 35 | *esDocPos = esDocPos_; 36 | *esDocDataFormat = esDocDataFormat_; 37 | } 38 | 39 | bool testFile(const char *luaFile, char *errbuf); 40 | bool loadHistoryFile(); 41 | 42 | bool initFileReader(FileReader *reader, char *errbuf); 43 | FileReader *getFileReader() { return fileReader_; } 44 | 45 | void setRktId(int id) { rktId_ = id; } 46 | int rktId() { return rktId_; } 47 | 48 | void setNext(LuaCtx* nxt) { next_ = nxt; } 49 | LuaCtx *next() { return next_; } 50 | 51 | CnfCtx *cnf() { return cnf_; } 52 | 53 | bool copyRawRequired() const { 54 | #ifdef DISABLE_COPYRAW 55 | return false; 56 | #else 57 | return function_->getType() == LuaFunction::KAFKAPLAIN && cnf_->getPollLimit() && rawcopy_; 58 | #endif 59 | } 60 | 61 | int getPartitioner() const { 62 | if (partition_ == PARTITIONER_RANDOM) return partition_; 63 | else return -1; 64 | } 65 | 66 | int getPartition(uint32_t pc) const { 67 | if (partition_ < 0) { 68 | if (autoparti_) { 69 | return (ntohl(addr_) & 0xff) % pc; 70 | } else { 71 | return cnf_->partition(); 72 | } 73 | } else { 74 | return partition_; 75 | } 76 | } 77 | 78 | int rktPartition() const { 79 | return rktPartition_; 80 | } 81 | 82 | void rktSetPartition(int pc) { 83 | rktPartition_ = pc; 84 | } 85 | 86 | bool withhost() const { return withhost_; } 87 | bool withtime() const { return withtime_; } 88 | int timeidx() const { return timeidx_; } 89 | bool autonl() const { return autonl_; } 90 | bool md5sum() const { return md5sum_; } 91 | const std::string &pkey() const { return pkey_; } 92 | 93 | const char *getStartPosition() const { return startPosition_.c_str(); } 94 | const std::string &host() const { return cnf_->host(); } 95 | 96 | bool fileWithTimeFormat() const { return fileWithTimeFormat_; } 97 | 98 | bool getTimeFormatFile(std::string *timeFormatFile) const { 99 | if (fileWithTimeFormat_) { 100 | std::string f = sys::timeFormat(cnf_->fasttime(), file_.c_str(), file_.size()); 101 | if (f != timeFormatFile_) { 102 | timeFormatFile->assign(f); 103 | return true; 104 | } 105 | } 106 | return false; 107 | } 108 | 109 | void setTimeFormatFile(const std::string &timeFormatFile) { 110 | if (fileWithTimeFormat_) timeFormatFile_ = timeFormatFile; 111 | } 112 | 113 | const std::string &file() const { 114 | return fileWithTimeFormat_ ? timeFormatFile_ : file_; 115 | } 116 | 117 | const std::string & datafile() const { 118 | if (!fqueue_.empty()) return fqueue_.front(); 119 | else return fileWithTimeFormat_ ? timeFormatFile_ : file_; 120 | } 121 | 122 | bool addHistoryFile(const std::string &historyFile); 123 | bool removeHistoryFile(); 124 | 125 | const std::string &topic() const { return topic_; } 126 | LuaFunction *function() const { return function_; } 127 | 128 | bool autocreat() const { return autocreat_; } 129 | const char *fileOwner() const { return autocreat_ && !fileOwner_.empty() ? fileOwner_.c_str() : 0; } 130 | int uid() const { return uid_; } 131 | int gid() const { return gid_; } 132 | 133 | int holdFd() const { return holdFd_; } 134 | void holdFd(int fd) { holdFd_ = fd; } 135 | 136 | private: 137 | LuaCtx(); 138 | 139 | private: 140 | /* default set to topic_, it must be unique 141 | * if multi file write to one topic_, manual set fileAlias 142 | */ 143 | std::string fileAlias_; 144 | 145 | bool autocreat_; 146 | std::string fileOwner_; 147 | uid_t uid_; 148 | gid_t gid_; 149 | 150 | std::string file_; 151 | std::string topic_; 152 | 153 | std::string esIndex_; 154 | int esIndexWithTimeFormat_; 155 | int esIndexPos_; 156 | int esDocPos_; 157 | int esDocDataFormat_; 158 | 159 | bool withhost_; 160 | bool withtime_; 161 | int timeidx_; 162 | bool autonl_; 163 | std::string pkey_; 164 | 165 | bool fileWithTimeFormat_; 166 | std::string timeFormatFile_; 167 | std::deque fqueue_; 168 | 169 | uint32_t addr_; 170 | bool autoparti_; 171 | int partition_; 172 | bool rawcopy_; 173 | bool md5sum_; 174 | 175 | LuaFunction *function_; 176 | std::string startPosition_; 177 | FileReader *fileReader_; 178 | 179 | LuaCtx *next_; 180 | CnfCtx *cnf_; 181 | LuaHelper *helper_; 182 | 183 | size_t rktId_; 184 | int rktPartition_; 185 | int holdFd_; 186 | }; 187 | 188 | #endif 189 | -------------------------------------------------------------------------------- /src/sys.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "sys.h" 16 | 17 | #define MAX_ERR_LEN 512 18 | 19 | namespace sys { 20 | RunStatus *SignalHelper::runStatusPtr = 0; 21 | int *SignalHelper::signosPtr = 0; 22 | RunStatus::Want *SignalHelper::wantsPtr = 0; 23 | size_t SignalHelper::signoCount = 0; 24 | 25 | void sigHandle(int signo) 26 | { 27 | for (size_t i = 0; i < SignalHelper::signoCount; ++i) { 28 | if (signo == SignalHelper::signosPtr[i]) { 29 | SignalHelper::runStatusPtr->set(SignalHelper::wantsPtr[i]); 30 | } 31 | } 32 | } 33 | 34 | bool SignalHelper::signal(RunStatus *runStatus, int count, int *signos, RunStatus::Want *wants) 35 | { 36 | if (signosPtr) delete[] signosPtr; 37 | if (wantsPtr) delete[] wantsPtr; 38 | 39 | signosPtr = new int[count]; 40 | wantsPtr = new RunStatus::Want[count]; 41 | signoCount = count; 42 | runStatusPtr = runStatus; 43 | 44 | struct sigaction sa; 45 | for (int i = 0; i < count; ++i) { 46 | memset(&sa, 0x00, sizeof(sa)); 47 | if (wants[i] == RunStatus::IGNORE) { 48 | sa.sa_handler = SIG_IGN; 49 | } else { 50 | sa.sa_handler = sigHandle; 51 | } 52 | sigemptyset(&sa.sa_mask); 53 | if (sigaction(signos[i], &sa, NULL) == -1) { 54 | if (errbuf_) snprintf(errbuf_, MAX_ERR_LEN, "sigaction error %d:%s", errno, strerror(errno)); 55 | return false; 56 | } 57 | signosPtr[i] = signos[i]; 58 | wantsPtr[i] = wants[i]; 59 | } 60 | return true; 61 | } 62 | 63 | #define SIGSET_INIT(set) do { \ 64 | sigemptyset(&set); \ 65 | va_list valist; \ 66 | va_start(valist, signo); \ 67 | while (signo > 0) { \ 68 | sigaddset(&set, signo); \ 69 | signo = va_arg(valist, int); \ 70 | } \ 71 | va_end(valist); \ 72 | } while (0) 73 | 74 | bool SignalHelper::block(int signo, ...) 75 | { 76 | sigset_t set; 77 | SIGSET_INIT(set); 78 | 79 | if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) { 80 | if (errbuf_) snprintf(errbuf_, MAX_ERR_LEN, "sigprocmask block failed, %s", strerror(errno)); 81 | return false; 82 | } 83 | return true; 84 | } 85 | 86 | int SignalHelper::suspend(int signo, ...) 87 | { 88 | sigset_t set; 89 | SIGSET_INIT(set); 90 | return sigsuspend(&set); 91 | } 92 | 93 | 94 | bool SignalHelper::setmask(int signo, ...) 95 | { 96 | sigset_t set; 97 | SIGSET_INIT(set); 98 | 99 | if (sigprocmask(SIG_SETMASK, &set, NULL) == -1) { 100 | if (errbuf_) snprintf(errbuf_, MAX_ERR_LEN, "sigprocmask set failed, %s", strerror(errno)); 101 | return false; 102 | } 103 | return true; 104 | } 105 | 106 | /* pidfile may stale, this's not a perfect method */ 107 | bool initSingleton(const char *pidfile, char *errbuf) 108 | { 109 | int fd = open(pidfile, O_CREAT | O_WRONLY, 0644); 110 | if (lockf(fd, F_TLOCK, 0) == 0) { 111 | ftruncate(fd, 0); 112 | char buffer[32]; 113 | int len = snprintf(buffer, 32, "%d", getpid()); 114 | write(fd, buffer, len); 115 | return true; 116 | } else { 117 | if (errbuf) snprintf(errbuf, MAX_ERR_LEN, "lock %s failed", pidfile); 118 | return false; 119 | } 120 | } 121 | 122 | bool endsWith(const char *haystack, const char *needle) 123 | { 124 | size_t haystackLen = strlen(haystack); 125 | size_t needleLen = strlen(needle); 126 | 127 | size_t i; 128 | for (i = 0; i < haystackLen && i < needleLen; ++i) { 129 | if (haystack[haystackLen-1-i] != needle[needleLen-1-i]) return false; 130 | } 131 | return i == needleLen && i <= haystackLen; 132 | } 133 | 134 | bool readdir(const char *dir, const char *suffix, std::vector *files, char *errbuf) 135 | { 136 | DIR *dh = opendir(dir); 137 | if (!dh) { 138 | if (errbuf) snprintf(errbuf, MAX_ERR_LEN, "could not opendir %s", dir); 139 | return false; 140 | } 141 | 142 | static const size_t N = 1024; 143 | char fullpath[N]; 144 | 145 | struct dirent *ent; 146 | while ((ent = readdir(dh))) { 147 | if (suffix && !endsWith(ent->d_name, suffix)) continue; 148 | snprintf(fullpath, N, "%s/%s", dir, ent->d_name); 149 | files->push_back(fullpath); 150 | } 151 | 152 | closedir(dh); 153 | return true; 154 | } 155 | 156 | bool isdir(const char *dir, char *errbuf) 157 | { 158 | struct stat st; 159 | if (stat(dir, &st) != 0) { 160 | snprintf(errbuf, MAX_ERR_LEN, "stat %s error %s", dir, strerror(errno)); 161 | return false; 162 | } 163 | 164 | if (S_ISDIR(st.st_mode)) { 165 | return true; 166 | } else { 167 | snprintf(errbuf, MAX_ERR_LEN, "%s is not directory", dir); 168 | return false; 169 | } 170 | } 171 | 172 | bool file2vector(const char *file, std::vector *lines, size_t start, size_t size) 173 | { 174 | FILE *fp = fopen(file, "r"); 175 | if (!fp) return false; 176 | 177 | char buffer[8192]; 178 | size_t line = 0; 179 | 180 | while (fgets(buffer, 8192, fp)) { 181 | if (line >= start && line - start < size) { 182 | size_t len = strlen(buffer); 183 | if (buffer[len-1] == '\n') { 184 | lines->push_back(std::string(buffer, len-1)); 185 | line++; 186 | } else { 187 | fclose(fp); 188 | return false; 189 | } 190 | } 191 | } 192 | 193 | fclose(fp); 194 | return true; 195 | } 196 | 197 | } // sys 198 | -------------------------------------------------------------------------------- /scripts/auto-upgrade.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SYSCONFIG=/etc/sysconfig/tail2kafka 4 | test -f $SYSCONFIG && source $SYSCONFIG 5 | 6 | log() 7 | { 8 | local event="$1" 9 | local error="$2" 10 | 11 | local dt=$(date +%F_%H-%M-%S) 12 | echo "$dt $event $error" 13 | 14 | if [ "$PINGBACKURL" != "" ]; then 15 | error=$(echo $error | sed -e 's| |%20|g') 16 | curl -Ss "$PINGBACKURL?event=$event&product=$PRODUCT&hostid=$HOSTID&error=$error" -o /dev/null 17 | fi 18 | } 19 | 20 | try_start() 21 | { 22 | if [ "$BIN" != "" ] && [ -x $BIN ]; then 23 | log TRY_START "tail2kafka is not running, try start" 24 | $BIN $ETCDIR 25 | return 0 26 | else 27 | log RUNNING_ERROR "tail2kafka is not running" 28 | return 1 29 | fi 30 | } 31 | 32 | download_config() 33 | { 34 | local oldver="$1" 35 | local ver="$2" 36 | local md5="$3" 37 | 38 | local tgz=$LIBDIR/$PRODUCT-$ver.tar.gz 39 | curl -Ssf "$CONFIGURL/$PRODUCT/${PRODUCT}-${ver}.tar.gz" -o $tgz 40 | if [ $? != 0 ]; then 41 | log CONFIG_SYNC_ERROR "curl $CONFIGURL/$PRODUCT/${PRODUCT}-${ver}.tar.gz error" 42 | exit 1 43 | fi 44 | 45 | tgzmd5=$(md5sum $tgz | cut -d' ' -f1) 46 | if [ "$md5" != "$tgzmd5" ]; then 47 | log CONFIG_SIGN_ERROR "config md5 error" 48 | exit 1 49 | fi 50 | 51 | cd $LIBDIR 52 | tar xzf $tgz 53 | if [ ! -d $LIBDIR/$PRODUCT-$ver ]; then 54 | log CONFIG_TGZ_ERROR "BAD $tgz, no $LIBDIR/$PRODUCT-$ver found" 55 | exit 1 56 | fi 57 | 58 | mv $ETCDIR $LIBDIR/$PRODUCT.$oldver.$(date +"%F_%H-%M-%S") 59 | mv $LIBDIR/$PRODUCT-$ver $ETCDIR 60 | } 61 | 62 | try_reload() 63 | { 64 | local oldver="$1" 65 | local ver="$2" 66 | 67 | local pid=$(cat $PIDFILE) 68 | child_pid=$(pgrep -P$pid) 69 | kill -HUP $pid 70 | 71 | ret=1 72 | for i in `seq 1 10`; do 73 | sleep 1 74 | new_child_pid=$(pgrep -P $pid) 75 | if [ "$new_child_pid" != "$child_pid" ]; then 76 | ret=0 77 | break 78 | fi 79 | done 80 | 81 | if [ "$ret" = 0 ]; then 82 | echo "$ver" > $LIBDIR/version 83 | log UPGRADE_CONFIG_OK "upgrade config from $oldver to $ver" 84 | else 85 | log UPGRADE_CONFIG_ERROR "upgrade config from $oldver to $ver, reload failed" 86 | exit 1 87 | fi 88 | } 89 | 90 | auto_config() 91 | { 92 | if [ "$CONFIGURL" = "" ]; then 93 | log CONFIG_ERROR "param CONFIGURL is required" 94 | exit 1 95 | fi 96 | 97 | LIBDIR=${LIBDIR:-/tmp/tail2kafka} 98 | 99 | mkdir -p $LIBDIR 100 | find $LIBDIR -name "$PRODUCT.*" -mtime +2 -exec rm -r "{}" \; 101 | 102 | local meta # WARN declare and assign must be separated 103 | meta=$(curl -Ssf "$CONFIGURL/$PRODUCT/meta") 104 | if [ $? != 0 ]; then 105 | log CONFIG_SYNC_ERROR "curl $CONFIGURL/$PRODUCT/meta error" 106 | exit 1 107 | fi 108 | 109 | local ver=$(echo $meta | cut -d'-' -f1) 110 | local md5=$(echo $meta | cut -d'-' -f2) 111 | 112 | local oldver="NIL" 113 | test -f $LIBDIR/version && oldver=$(cat $LIBDIR/version) 114 | if [ "$ver" = "$oldver" ]; then 115 | log CONFIG_VERSION_OK "version $ver has not changed" 116 | 117 | if [ ! -f $PIDFILE ] || [ ! -d /proc/$(cat $PIDFILE) ]; then 118 | try_start 119 | fi 120 | exit 0 121 | fi 122 | 123 | download_config $oldver $ver $md5 124 | 125 | if [ -f $PIDFILE ] && [ -d /proc/$(cat $PIDFILE) ]; then 126 | try_reload $oldver $ver 127 | ret=$? 128 | else 129 | try_start 130 | ret=$? 131 | fi 132 | exit $ret 133 | } 134 | 135 | get_rpm_version() 136 | { 137 | local host_id="$1" 138 | 139 | local ulist; 140 | ulist=$(curl -Ssf $RPMURL/version) 141 | if [ $? != 0 ]; then 142 | log UPGRADE_SYNC_ERROR "curl $RPMURL/version error" 143 | exit 1 144 | fi 145 | 146 | local version="" 147 | for line in $ulist; do 148 | local id=$(echo $line | cut -d'=' -f1) 149 | local ver=$(echo $line | cut -d'=' -f2) 150 | if [ "$id" = "*" ]; then 151 | version=$ver 152 | break; 153 | elif echo $host_id | grep -Pq $id; then 154 | version=$ver 155 | break 156 | fi 157 | done 158 | 159 | if [ "$version" = "" ] || rpm -q tail2kafka | grep $version; then 160 | log UPGRADE_RPM_OK "version $version has not changed" 161 | exit 0 162 | fi 163 | 164 | _RET=$version 165 | return 0 166 | } 167 | 168 | upgrade_rpm() 169 | { 170 | local version="$1" 171 | 172 | rpm -Uvh $RPMURL/tail2kafka-$version.x86_64.rpm 173 | if [ $? != 0 ]; then 174 | log RPM_INSTALL_ERROR "install $version error" 175 | exit 1 176 | fi 177 | 178 | kill $(cat $PIDFILE) 179 | sleep 5 180 | 181 | $BIN $ETCDIR 182 | log RPM_UPGRADE_OK "tail2kafka upgrade to $version" 183 | } 184 | 185 | auto_rpm() 186 | { 187 | if [ "$RPMURL" = "" ]; then 188 | log CONFIG_ERROR "param RPMURL is required" 189 | exit 1 190 | fi 191 | 192 | if [ "$BIN" = "" ]; then 193 | log CONFIG_ERROR "param BIN is required" 194 | exit 1 195 | fi 196 | 197 | local host_id=${HOSTID:-$PRODUCT}; 198 | get_rpm_version $host_id 199 | local version=$_RET 200 | 201 | upgrade_rpm $version 202 | } 203 | 204 | if [ "$ETCDIR" = "" ]; then 205 | log CONFIG_ERROR "param ETCDIR is required" 206 | exit 1 207 | fi 208 | 209 | if [ "$PIDFILE" = "" ]; then 210 | log CONFIG_ERROR "param PIDFILE is required" 211 | exit 1 212 | fi 213 | 214 | if [ "$PRODUCT" = "" ]; then 215 | log CONFIG_ERROR "param PRODUCT is required" 216 | exit 1 217 | fi 218 | 219 | ACTION=$1 220 | 221 | if [ "$ACTION" = "config" ]; then 222 | auto_config 223 | elif [ "$ACTION" = "rpm" ]; then 224 | auto_rpm 225 | else 226 | echo "$0 config|rpm" 227 | exit 1 228 | fi 229 | 230 | # PRODUCT=_PRODUCT_ VER=_VER_ 231 | # mkdir $PRODUCT-$VER; cp *.lua $PRODUCT-$VER 232 | # tar czf $PRODUCT-$VER.tar.gz $PRODUCT-$VER; MD5=$(md5sum $PRODUCT-$VER.tar.gz | cut -d' ' -f1); echo "$VER-$MD5" > meta 233 | -------------------------------------------------------------------------------- /blackboxtest/kafka_service_unavailable_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIN="${BASH_SOURCE[0]}" 4 | BINDIR=$(readlink -e $(dirname $BIN)) 5 | HOST=$(hostname) 6 | 7 | CFGDIR="$BINDIR/tail2kafka" 8 | PIDF=/var/run/tail2kafka.pid 9 | LIBDIR=/var/lib/tail2kafka 10 | BUILDDIR=$BINDIR/../build 11 | 12 | if [ ! -d $CFGDIR ]; then 13 | echo "$CFGDIR NOT FOUND" 14 | exit 1 15 | fi 16 | 17 | UNBLOCK_KAFKA="iptables -D OUTPUT -p tcp --dport 9092 -j REJECT --reject-with tcp-reset" 18 | BLOCK_KAFKA="iptables -A OUTPUT -p tcp --dport 9092 -j REJECT --reject-with tcp-reset" 19 | echo "UNBLOCK_KAFKA $UNBLOCK_KAFKA"; $UNBLOCK_KAFKA 20 | 21 | # delete.topic.enable=true 22 | test -f $BINDIR/../ENV.sh && source $BINDIR/../ENV.sh 23 | KAFKAHOME=${KAFKAHOME:-"/opt/kafka"} 24 | ZOOKEEPER=${ZOOKEEPER:-"localhost:2181/kafka"} 25 | KAFKASERVER=${KAFKASERVER:-"localhost:9092"} 26 | cp $CFGDIR/main.lua $CFGDIR/main.lua.backup 27 | sed -i -E "s|localhost:9092|$KAFKASERVER|g" $CFGDIR/main.lua 28 | 29 | echo "WARN: YOU MUST KILL tail2kafka and kafka2file first, both may create topic automatic" 30 | 31 | TOPIC="basic" 32 | T2KDIR=logs 33 | K2FDIR=kafka2filedir 34 | 35 | echo "kill tail2kafka" 36 | (test -f $PIDF && test -d /proc/$(cat $PIDF)) && kill $(cat $PIDF); sleep 2 37 | echo "kill kafka2file" 38 | K2FPID=$K2FDIR/$TOPIC.0.lock 39 | (test -f $K2FPID && test -d /proc/$(cat $K2FPID)) && kill $(cat $K2FPID); sleep 2 40 | 41 | test -d $T2KDIR || mkdir $T2KDIR 42 | find $T2KDIR -type f -name "*.log*" -delete 43 | 44 | test -d $K2FDIR || mkdir $K2FDIR 45 | find $K2FDIR -type f -delete 46 | 47 | cd $KAFKAHOME 48 | bin/kafka-topics.sh --delete --if-exists --zookeeper $ZOOKEEPER --topic $TOPIC 49 | if bin/kafka-topics.sh --list --zookeeper $ZOOKEEPER | grep -q '\'; then 50 | echo "delete kafka topic $TOPIC error" 51 | exit 1 52 | fi 53 | bin/kafka-topics.sh --create --zookeeper $ZOOKEEPER --replication-factor 1 --partitions 1 --topic $TOPIC 54 | cd - 55 | 56 | $BUILDDIR/kafka2file $KAFKASERVER basic 0 offset-end $K2FDIR & 57 | sleep 5 58 | if [ ! -f $K2FPID ] || [ ! -d /proc/$(cat $K2FPID) ]; then 59 | echo "start kafka2file failed" 60 | exit 1 61 | fi 62 | 63 | # prepare history file 64 | rm -rf $LIBDIR/basic.* && test -f $LIBDIR/fileoff && rm $LIBDIR/fileoff 65 | for suffix in 2 1; do 66 | for i in `seq 1 10000`; do 67 | echo "BASIC_HISTORY_${suffix} $i" >> $T2KDIR/basic.log.history.$suffix 68 | done 69 | echo "$T2KDIR/basic.log.history.$suffix" >> $LIBDIR/basic.history 70 | done 71 | 72 | rm -f /var/log/tail2kafka/tail2kafka.log_$(date +%Y-%m-%d) 73 | $BUILDDIR/tail2kafka $CFGDIR; sleep 2 74 | if [ ! -f $PIDF ] || [ ! -d /proc/$(cat $PIDF) ]; then 75 | echo "start tail2kafka failed" 76 | exit 1; 77 | fi 78 | mv $CFGDIR/main.lua.backup $CFGDIR/main.lua 79 | 80 | echo "wait history file be consumed ..."; sleep 30 81 | if [ -f $LIBDIR/basic.history ]; then 82 | echo "history file should be consumed" 83 | exit 1 84 | fi 85 | 86 | echo "WAIT history file kafka2file ..."; sleep 20 87 | for suffix in 2 1; do 88 | HISTORYFILE_MD5=$(md5sum $T2KDIR/basic.log.history.$suffix | cut -d' ' -f1) 89 | K2FFILE_MD5=$(md5sum $K2FDIR/basic/${HOST}_basic.log.history.$suffix | cut -d' ' -f1) 90 | if [ "$HISTORYFILE_MD5" != "$K2FFILE_MD5" ]; then 91 | echo "HISTORYFILE $T2KDIR/basic.log.history.$suffix != $K2FDIR/basic/${HOST}_basic.log.history.$suffix" 92 | exit 1 93 | fi 94 | done 95 | 96 | echo "BLOCK_KAFKA $BLOCK_KAFKA"; $BLOCK_KAFKA 97 | sleep 1 98 | 99 | NFILE=5 100 | NLINE=120000 # must bigger than queue.buffering.max.messages 101 | LOGFILE=$T2KDIR/basic.log 102 | for suffix in `seq $NFILE -1 1`; do 103 | for i in `seq 1 $NLINE`; do 104 | echo "BASIC_${suffix} $i" >> $LOGFILE 105 | done 106 | mv $LOGFILE $LOGFILE.$suffix && touch $LOGFILE 107 | 108 | echo "$(date +%H:%M:%S) wait inotify $LOGFILE moved $LOGFILE.$suffix ..."; sleep 90 # rotate interval must > 60 109 | 110 | linenum=$(wc -l $LIBDIR/basic.history 2>/dev/null | cut -d' ' -f1) 111 | if [ "$linenum" != $((NFILE+1-suffix)) ]; then 112 | echo "$LINENO $(date +%H:%M:%S) round $suffix expect history file number $linenum != $((NFILE+1-suffix))" 113 | exit 1 114 | fi 115 | 116 | ofile=$(readlink -e $LOGFILE.$suffix) 117 | hfile=$(tail -n 1 $LIBDIR/basic.history) 118 | if [ "$hfile" != "$ofile" ]; then 119 | echo "except history file $ofile != $hfile" 120 | exit 1 121 | fi 122 | done 123 | 124 | touch $LOGFILE 125 | echo "UNBLOCK_KAFKA $UNBLOCK_KAFKA"; $UNBLOCK_KAFKA 126 | 127 | for i in `seq 1 100`; do 128 | echo "BASIC_0 $i" >> $LOGFILE 129 | done 130 | # kafka2file has memory cache, rotate file to trigger kafka2file flush cache 131 | mv $LOGFILE $LOGFILE.0 132 | 133 | echo "WAIT kafka2file ... "; sleep 60 134 | 135 | # WARN basic.log.5 out of order, I haven't found a way to fix it yet 136 | # but the messages was not lost 137 | SIZE1=$(stat -c %s $T2KDIR/basic.log.$NFILE) 138 | SIZE2=$(stat -c %s $K2FDIR/basic/${HOST}_basic.log.$NFILE) 139 | if [ "$SIZE1" != "$SIZE2" ]; then 140 | echo "$LINENO expect $K2FDIR/basic/${HOST}_basic.log.$NFILE size != $T2KDIR/basic.log.$NFILE" 141 | exit 1 142 | fi 143 | 144 | NFILE=$((NFILE-1)) 145 | for suffix in `seq $NFILE -1 0`; do 146 | ofile=$T2KDIR/basic.log.$suffix 147 | dfile=$K2FDIR/basic/${HOST}_basic.log.$suffix 148 | 149 | md5Ofile=$(md5sum $ofile | cut -d' ' -f1) 150 | md5Dfile=$(md5sum $dfile | cut -d' ' -f1) 151 | 152 | if [ "$md5Ofile" != "$md5Dfile" ]; then 153 | echo "$(date +%Y-%m-%d_%H-%M-%S) expect $dfile content != $ofile" 154 | exit 1 155 | fi 156 | done 157 | 158 | CHILDPID=$(pgrep -P $(cat $PIDF)) 159 | OPENFILENUM=0; 160 | for f in $(ls /proc/$CHILDPID/fd/); do 161 | if readlink /proc/$CHILDPID/fd/$f | grep -q basic.log; then 162 | OPENFILENUM=$((OPENFILENUM+1)); 163 | fi 164 | done 165 | 166 | if [ $OPENFILENUM != 1 ]; then 167 | echo "too many open files, file fd may leak" 168 | exit 1 169 | fi 170 | 171 | echo "OK" 172 | -------------------------------------------------------------------------------- /src/transform.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRANSFORM_H_ 2 | #define _TRANSFORM_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "luahelper.h" 13 | #include "cmdnotify.h" 14 | 15 | struct MessageInfo { 16 | enum InfoType { META, NMSG, MSG }; 17 | static bool extract(const char *payload, size_t len, MessageInfo *info, bool nonl); 18 | 19 | InfoType type; 20 | 21 | std::string host; 22 | long pos; 23 | 24 | std::string file; 25 | size_t size; 26 | std::string md5; 27 | 28 | const char *ptr; 29 | int len; 30 | }; 31 | 32 | class Transform { 33 | public: 34 | enum Format { NGINX, TSV, RAW, ORC, JSON, NIL }; 35 | static Format stringToFormat(const char *s, size_t len); 36 | 37 | enum TimeFormat { TIMELOCAL, ISO8601, TIMEFORMAT_NIL }; 38 | static TimeFormat stringToTimeFormat(const char *s); 39 | 40 | static Transform *create(const char *wdir, const char *topic, int partition, 41 | CmdNotify *notify, const char *format, char *errbuf); 42 | virtual ~Transform(); 43 | 44 | static const uint32_t GLOBAL = 0x0001; 45 | static const uint32_t LOCAL = 0x0002; 46 | static const uint32_t IGNORE = 0x0004; 47 | static const uint32_t RKMFREE = 0x0008; 48 | 49 | virtual uint32_t write(rd_kafka_message_t *rkm, uint64_t *offsetPtr) = 0; 50 | virtual uint32_t timeout(uint64_t *offsetPtr); 51 | 52 | protected: 53 | Transform(const char *wdir, const char *topic, int partition, CmdNotify *notify) 54 | : wdir_(wdir), topic_(topic), partition_(partition), notify_(notify) {} 55 | 56 | const char *wdir_; 57 | const char *topic_; 58 | int partition_; 59 | CmdNotify *notify_; 60 | }; 61 | 62 | class MirrorTransform : public Transform { 63 | public: 64 | struct FdCache { 65 | int fd; 66 | std::vector iovs; 67 | 68 | long pos; 69 | size_t rkmSize; 70 | rd_kafka_message_t **rkms; 71 | 72 | FdCache() : fd(-1), pos(-1), rkmSize(0), rkms(0) {} 73 | 74 | ~FdCache() { 75 | assert(rkmSize == 0); 76 | if (fd != -1) close(fd); 77 | if (rkms) delete[] rkms; 78 | } 79 | 80 | void clear() { 81 | for (size_t i = 0; i < rkmSize; ++i) rd_kafka_message_destroy(rkms[i]); 82 | pos = -1; 83 | rkmSize = 0; 84 | iovs.clear(); 85 | } 86 | }; 87 | 88 | MirrorTransform(const char *wdir, const char *topic, int partition, CmdNotify *notify) 89 | : Transform(wdir, topic, partition, notify) {} 90 | uint32_t write(rd_kafka_message_t *rkm, uint64_t *offsetPtr); 91 | 92 | private: 93 | void addToCache(rd_kafka_message_t *rkm, const MessageInfo &info); 94 | bool flushCache(bool eof, const std::string &host); 95 | 96 | std::map fdCache_; 97 | }; 98 | 99 | struct JsonValueTransform { 100 | virtual const char *name() const { return "undefine"; } 101 | virtual Json::Value call(const std::string &val) const = 0; 102 | virtual ~JsonValueTransform(); 103 | }; 104 | 105 | class JsonValueTypeTransform : public JsonValueTransform { 106 | public: 107 | enum Type { INT, DOUBLE, JSON }; 108 | JsonValueTypeTransform(Type type) : type_(type) {} 109 | const char *name() const { return "JsonValueTypeTransform"; } 110 | Json::Value call(const std::string &val) const; 111 | 112 | private: 113 | Type type_; 114 | }; 115 | 116 | class JsonValuePrefixTransform : public JsonValueTransform { 117 | public: 118 | JsonValuePrefixTransform(const std::string &prefix) : prefix_(prefix) {} 119 | const char *name() const { return "JsonValuePrefixTransform"; } 120 | Json::Value call(const std::string &val) const; 121 | 122 | private: 123 | std::string prefix_; 124 | }; 125 | 126 | class LuaTransform : public Transform { 127 | template friend class UNITTEST_HELPER; 128 | public: 129 | LuaTransform(const char *wdir, const char *topic, int partition, CmdNotify *notify) 130 | : Transform(wdir, topic, partition, notify), helper_(0), currentTimestamp_(0), 131 | currentIntervalCnt_(-1), currentIntervalFd_(-1), currentOffset_(-1), 132 | lastIntervalCnt_(-1), lastIntervalFd_(-1), lastOffset_(-1) {} 133 | 134 | ~LuaTransform(); 135 | 136 | bool init(Format inputFormat, Format outputFormat, int interval, int delay, const char *luaFile, char *errbuf); 137 | uint32_t write(rd_kafka_message_t *rkm, uint64_t *offsetPtr); 138 | uint32_t timeout(uint64_t *offsetPtr); 139 | 140 | private: 141 | void updateTimestamp(time_t timestamp) { 142 | if (currentTimestamp_ == -1 || timestamp > currentTimestamp_) currentTimestamp_ = timestamp; 143 | } 144 | 145 | bool parseFields(const char *ptr, size_t len, std::vector *fields, time_t *timestamp); 146 | bool fieldsToJson(const std::vector &fields, const std::string &method, const std::string &path, 147 | std::map *query, std::string *json) const; 148 | 149 | 150 | void initCurrentFile(long intervalCnt, uint64_t offset); 151 | bool lastIntervalTimeout() const { 152 | return lastIntervalFd_ > 0 && currentTimestamp_ > currentIntervalCnt_ * interval_ + delay_; 153 | } 154 | uint32_t timeout_(uint64_t *offsetPtr); 155 | 156 | void rotateCurrentToLast(); 157 | void rotateLastToFinish(); 158 | 159 | uint32_t rotate(long intervalCnt, uint64_t offset, uint64_t *offsetPtr); 160 | 161 | private: 162 | LuaHelper *helper_; 163 | Format inputFormat_; 164 | 165 | std::vector fields_; 166 | TimeFormat timestampFormat_; 167 | size_t timeLocalIndex_; 168 | int requestIndex_; 169 | 170 | bool deleteRequestField_; 171 | std::string timeLocalFormat_; 172 | 173 | std::map requestNameMap_; 174 | std::map requestValueMap_; 175 | 176 | int interval_; 177 | int delay_; 178 | 179 | time_t currentTimestamp_; 180 | 181 | long currentIntervalCnt_; 182 | int currentIntervalFd_; 183 | std::string currentIntervalFile_; 184 | uint64_t currentOffset_; 185 | 186 | long lastIntervalCnt_; 187 | int lastIntervalFd_; 188 | std::string lastIntervalFile_; 189 | uint64_t lastOffset_; 190 | }; 191 | 192 | #endif 193 | -------------------------------------------------------------------------------- /mix/kafka.config.template: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # see kafka.server.KafkaConfig for additional details and defaults 16 | 17 | ############################# Server Basics ############################# 18 | 19 | # The id of the broker. This must be set to a unique integer for each broker. 20 | broker.id=_BROKER_ID_ 21 | 22 | ############################# Socket Server Settings ############################# 23 | 24 | # The port the socket server listens on 25 | port=9092 26 | 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces 28 | host.name=_HOST_NAME_ 29 | 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the 31 | # value for "host.name" if configured. Otherwise, it will use the value returned from 32 | # java.net.InetAddress.getCanonicalHostName(). 33 | #advertised.host.name= 34 | 35 | # The port to publish to ZooKeeper for clients to use. If this is not set, 36 | # it will publish the same port that the broker binds to. 37 | #advertised.port= 38 | 39 | # The number of threads handling network requests 40 | num.network.threads=24 41 | 42 | # The number of threads doing disk I/O 43 | num.io.threads=8 44 | 45 | # The send buffer (SO_SNDBUF) used by the socket server 46 | socket.send.buffer.bytes=102400 47 | 48 | # The receive buffer (SO_RCVBUF) used by the socket server 49 | socket.receive.buffer.bytes=102400 50 | 51 | # The maximum size of a request that the socket server will accept (protection against OOM) 52 | socket.request.max.bytes=104857600 53 | 54 | delete.topic.enable=true 55 | 56 | 57 | ############################# Log Basics ############################# 58 | 59 | # A comma seperated list of directories under which to store log files 60 | log.dirs=_LOG_DIRS_ 61 | 62 | # The default number of log partitions per topic. More partitions allow greater 63 | # parallelism for consumption, but this will also result in more files across 64 | # the brokers. 65 | num.partitions=_NUM_PARTITIONS_ 66 | 67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. 68 | # This value is recommended to be increased for installations with data dirs located in RAID array. 69 | num.recovery.threads.per.data.dir=1 70 | 71 | ##################### Replication ################################## 72 | 73 | default.replication.factor=_DEFAULT_REPLICATION_FACTOR_ 74 | replica.lag.max.messages=40000 75 | replica.lag.time.max.ms=30000 76 | request.required.acks=0 77 | 78 | ############################# Log Flush Policy ############################# 79 | 80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync 81 | # the OS cache lazily. The following configurations control the flush of data to disk. 82 | # There are a few important trade-offs here: 83 | # 1. Durability: Unflushed data may be lost if you are not using replication. 84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. 85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 86 | # The settings below allow one to configure the flush policy to flush data after a period of time or 87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis. 88 | 89 | # The number of messages to accept before forcing a flush of data to disk 90 | #log.flush.interval.messages=10000 91 | 92 | # The maximum amount of time a message can sit in a log before we force a flush 93 | #log.flush.interval.ms=1000 94 | 95 | ############################# Log Retention Policy ############################# 96 | 97 | # The following configurations control the disposal of log segments. The policy can 98 | # be set to delete segments after a period of time, or after a given size has accumulated. 99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens 100 | # from the end of the log. 101 | 102 | # The minimum age of a log file to be eligible for deletion 103 | log.retention.hours=_LOG_RETENTION_HOURS_ 104 | 105 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining 106 | # segments don't drop below log.retention.bytes. 107 | #log.retention.bytes=1073741824 108 | 109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created. 110 | log.segment.bytes=1073741824 111 | 112 | # The interval at which log segments are checked to see if they can be deleted according 113 | # to the retention policies 114 | log.retention.check.interval.ms=300000 115 | 116 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires. 117 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction. 118 | log.cleaner.enable=false 119 | 120 | ############################# Zookeeper ############################# 121 | 122 | # Zookeeper connection string (see zookeeper docs for details). 123 | # This is a comma separated host:port pairs, each corresponding to a zk 124 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". 125 | # You can also append an optional chroot string to the urls to specify the 126 | # root directory for all kafka znodes. 127 | zookeeper.connect=_ZOOKEEPER_CONNECT_ 128 | 129 | # Timeout in ms for connecting to zookeeper 130 | zookeeper.connection.timeout.ms=6000 131 | -------------------------------------------------------------------------------- /src/cnfctx.h: -------------------------------------------------------------------------------- 1 | #ifndef _CNFCTX_H_ 2 | #define _CNFCTX_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "gnuatomic.h" 10 | #include "fileoff.h" 11 | #include "luahelper.h" 12 | #include "esctx.h" 13 | #include "kafkactx.h" 14 | #include "common.h" 15 | 16 | #define VERSION "2.3.2" 17 | 18 | #define QUEUE_ERROR_TIMEOUT 60 19 | #define MAX_FILE_QUEUE_SIZE 50000 20 | 21 | class TailStats { 22 | public: 23 | TailStats() : 24 | fileRead_(0), logRead_(0), logWrite_(0), 25 | logRecv_(0), logSend_(0), logError_(0), 26 | queueSize_(0) {} 27 | 28 | void fileReadInc(int add = 1) { util::atomic_inc(&fileRead_, add); } 29 | void logReadInc(int add = 1) { util::atomic_inc(&logRead_, add); } 30 | void logWriteInc(int add = 1) { util::atomic_inc(&logWrite_, add); } 31 | 32 | void logRecvInc(int add = 1) { util::atomic_inc(&logRecv_, add); } 33 | void logSendInc(int add = 1) { util::atomic_inc(&logSend_, add); } 34 | void logErrorInc(int add = 1) { util::atomic_inc(&logError_, add); } 35 | 36 | void queueSizeInc(int add = 1) { util::atomic_inc(&queueSize_, add); } 37 | void queueSizeDec(int add = 1) { util::atomic_dec(&queueSize_, add); } 38 | 39 | int64_t fileRead() const { return fileRead_; } 40 | int64_t logRead() const { return logRead_; } 41 | int64_t logWrite() const { return logWrite_; } 42 | 43 | int64_t logSend() const { return logSend_; } 44 | int64_t logRecv() const { return logRecv_; } 45 | int64_t logError() const { return logError_; } 46 | 47 | int64_t queueSize() const { return util::atomic_get((int64_t *) &queueSize_); } 48 | 49 | void get(TailStats *stats) { 50 | stats->fileRead_ = util::atomic_get(&fileRead_); 51 | stats->logRead_ = util::atomic_get(&logRead_); 52 | stats->logWrite_ = util::atomic_get(&logWrite_); 53 | 54 | stats->logRecv_ = util::atomic_get(&logRecv_); 55 | stats->logSend_ = util::atomic_get(&logSend_); 56 | stats->logError_ = util::atomic_get(&logError_); 57 | 58 | stats->queueSize_ = util::atomic_get(&queueSize_); 59 | } 60 | 61 | private: 62 | int64_t fileRead_; 63 | int64_t logRead_; 64 | int64_t logWrite_; 65 | 66 | int64_t logRecv_; 67 | int64_t logSend_; 68 | int64_t logError_; 69 | 70 | int64_t queueSize_; 71 | }; 72 | 73 | class RunStatus; 74 | 75 | enum TimeUnit { 76 | TIMEUNIT_MILLI, TIMEUNIT_SECONDS, 77 | }; 78 | 79 | class CnfCtx { 80 | template friend class UNITTEST_HELPER; 81 | public: 82 | int accept; 83 | int server; 84 | 85 | public: 86 | static CnfCtx *loadCnf(const char *dir, char *errbuf); 87 | bool reset(); 88 | 89 | static CnfCtx *loadFile(const char *file, char *errbuf); 90 | ~CnfCtx(); 91 | void addLuaCtx(LuaCtx *ctx); 92 | 93 | bool enableKafka() const { return !brokers_.empty(); } 94 | bool initKafka(); 95 | KafkaCtx *getKafka() { return kafka_; } 96 | 97 | bool enableEs() const { return !esNodes_.empty(); } 98 | 99 | bool initEs(); 100 | EsCtx *getEs() { return es_; } 101 | 102 | bool initFileOff(); 103 | FileOff *getFileOff() { return fileOff_; } 104 | 105 | bool initFileReader(); 106 | 107 | void setRunStatus(RunStatus *runStatus) { runStatus_ = runStatus; } 108 | RunStatus *getRunStatus() { return runStatus_; } 109 | 110 | TailStats *stats() { return &stats_; } 111 | void logStats(); 112 | 113 | const char *getBrokers() const { return brokers_.c_str(); } 114 | const std::map &getKafkaGlobalConf() const { return kafkaGlobal_; } 115 | const std::map &getKafkaTopicConf() const { return kafkaTopic_; } 116 | 117 | std::vector getEsNodes() const { return esNodes_; } 118 | size_t getEsMaxConns() const { return esMaxConns_; } 119 | const std::string getEsUserPass() const { return esUserPass_; } 120 | 121 | const char *getPidFile() const { 122 | return pidfile_.c_str(); 123 | } 124 | 125 | LuaHelper *getLuaHelper() { return helper_; } 126 | 127 | size_t getLuaCtxSize() const { return count_; } 128 | std::vector &getLuaCtxs() { return luaCtxs_; } 129 | 130 | int getPollLimit() const { return pollLimit_; } 131 | const std::string &pingbackUrl() const { return pingbackUrl_; } 132 | 133 | uint32_t addr() const { return addr_; } 134 | int partition() const { return partition_; } 135 | const std::string &host() const { return host_; } 136 | 137 | int64_t fasttime(TimeUnit unit = TIMEUNIT_SECONDS) const { 138 | if (unit == TIMEUNIT_MILLI) return timeval_.tv_sec * 1000 + timeval_.tv_usec / 1000; 139 | else return timeval_.tv_sec; 140 | } 141 | 142 | int64_t fasttime(bool force, TimeUnit unit) { 143 | if (force) gettimeofday(&timeval_, 0); 144 | return fasttime(unit); 145 | } 146 | 147 | char *errbuf() { return errbuf_; } 148 | const std::string &libdir() const { return libdir_; } 149 | const std::string &logdir() const { return logdir_; } 150 | int daemonize() const { return daemonize_; } 151 | 152 | void setTailLimit(bool tailLimit) { tailLimit_ = tailLimit; } 153 | bool getTailLimit() const { return tailLimit_; } 154 | 155 | void flowControl(bool block) { util::atomic_set(&flowControl_, block ? 1 : 0); } 156 | 157 | bool flowControlOn() const { 158 | return util::atomic_get((int *) &flowControl_) || 159 | stats_.queueSize() > MAX_FILE_QUEUE_SIZE; 160 | } 161 | 162 | private: 163 | CnfCtx(); 164 | 165 | long lastLog_; 166 | TailStats stats_; 167 | 168 | std::string pidfile_; 169 | std::string host_; 170 | uint32_t addr_; 171 | int partition_; 172 | int pollLimit_; 173 | std::string pingbackUrl_; 174 | std::string logdir_; 175 | std::string libdir_; 176 | int daemonize_; 177 | 178 | size_t count_; 179 | std::vector luaCtxs_; 180 | 181 | std::string brokers_; 182 | std::map kafkaGlobal_; 183 | std::map kafkaTopic_; 184 | KafkaCtx *kafka_; 185 | 186 | std::vector esNodes_; 187 | std::string esUserPass_; 188 | int esMaxConns_; 189 | EsCtx *es_; 190 | 191 | struct timeval timeval_; 192 | char *errbuf_; 193 | RunStatus *runStatus_; 194 | 195 | LuaHelper *helper_; 196 | FileOff *fileOff_; 197 | 198 | bool tailLimit_; 199 | int flowControl_; 200 | }; 201 | 202 | #endif 203 | -------------------------------------------------------------------------------- /consumer/cqlexec.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | // -lcassandra -ljsoncpp 16 | 17 | struct CaCtx { 18 | CassCluster *cluster; 19 | CassSession *session; 20 | CassFuture *connect; 21 | }; 22 | 23 | bool initCaCtx(const char *db, CaCtx *ctx); 24 | bool execCql(CaCtx *ctx, const char *query, std::string *result); 25 | void destroyCaCtx(CaCtx *ctx); 26 | 27 | int main(int argc, char *argv[]) 28 | { 29 | if (argc != 3) { 30 | fprintf(stderr, "usage: %s ca cql", argv[0]); 31 | return EXIT_FAILURE; 32 | } 33 | 34 | const char *db = argv[1]; 35 | const char *cql = argv[2]; 36 | 37 | CaCtx ctx; 38 | if (!initCaCtx(db, &ctx)) return EXIT_FAILURE; 39 | 40 | std::string json; 41 | bool r = execCql(&ctx, cql, &json); 42 | if (r) { 43 | printf("%s", json.c_str()); 44 | } 45 | 46 | destroyCaCtx(&ctx); 47 | return EXIT_SUCCESS; 48 | } 49 | 50 | bool initCaCtx(const char *db, CaCtx *ctx) 51 | { 52 | ctx->cluster = cass_cluster_new(); 53 | ctx->session = cass_session_new(); 54 | 55 | cass_cluster_set_contact_points(ctx->cluster, db); 56 | cass_cluster_set_max_connections_per_host(ctx->cluster, 1024); 57 | cass_cluster_set_max_concurrent_creation(ctx->cluster, 100); 58 | 59 | ctx->connect = cass_session_connect(ctx->session, ctx->cluster); 60 | if (cass_future_error_code(ctx->connect) != CASS_OK) { 61 | const char *msg; 62 | size_t len; 63 | cass_future_error_message(ctx->connect, &msg, &len); 64 | fprintf(stderr, "connect %s error %.*s\n", db, (int) len, msg); 65 | return false; 66 | } 67 | 68 | return true; 69 | } 70 | 71 | void destroyCaCtx(CaCtx *ctx) 72 | { 73 | if (ctx->connect) { 74 | CassFuture *closeFuture = cass_session_close(ctx->session); 75 | cass_future_wait(closeFuture); 76 | cass_future_free(closeFuture); 77 | cass_future_free(ctx->connect); 78 | } 79 | 80 | if (ctx->session) cass_session_free(ctx->session); 81 | if (ctx->cluster) cass_cluster_free(ctx->cluster); 82 | } 83 | 84 | Json::Value col2json(const CassValue *col) 85 | { 86 | CassValueType type = cass_value_type(col); 87 | if (type == CASS_VALUE_TYPE_INT) { 88 | int i; 89 | cass_value_get_int32(col, &i); 90 | return Json::Value(i); 91 | } else if (type == CASS_VALUE_TYPE_BIGINT || type == CASS_VALUE_TYPE_TIMESTAMP) { 92 | cass_int64_t i; 93 | cass_value_get_int64(col, &i); 94 | return Json::Value(i); 95 | } else if (type == CASS_VALUE_TYPE_FLOAT) { 96 | float f; 97 | cass_value_get_float(col, &f); 98 | return Json::Value(f); 99 | } else if (type == CASS_VALUE_TYPE_DOUBLE) { 100 | double d; 101 | cass_value_get_double(col, &d); 102 | return Json::Value(d); 103 | } else if (type == CASS_VALUE_TYPE_BOOLEAN) { 104 | cass_bool_t b; 105 | cass_value_get_bool(col, &b); 106 | return Json::Value(b == cass_true); 107 | } else if (type == CASS_VALUE_TYPE_TEXT || type == CASS_VALUE_TYPE_VARCHAR) { 108 | const char *output; 109 | size_t len; 110 | cass_value_get_string(col, &output, &len); 111 | return Json::Value(output, output + len); 112 | } else if (type == CASS_VALUE_TYPE_BLOB) { 113 | const char *output; 114 | size_t len; 115 | cass_value_get_bytes(col, (const uint8_t **) &output, &len); 116 | char tmpf[] = "/tmp/cqlexec.XXXXXX"; 117 | int fd = mkstemp(tmpf); 118 | write(fd, output, len); 119 | return Json::Value(tmpf, tmpf + sizeof(tmpf)); 120 | } else if (type == CASS_VALUE_TYPE_MAP) { 121 | CassIterator *ite = cass_iterator_from_map(col); 122 | Json::Value root(Json::objectValue); 123 | 124 | while (cass_iterator_next(ite)) { 125 | const char *key, *value; 126 | size_t keylen, valuelen; 127 | cass_value_get_string(cass_iterator_get_map_key(ite), &key, &keylen); 128 | cass_value_get_string(cass_iterator_get_map_value(ite), &value, &valuelen); 129 | root[std::string(key, keylen)] = Json::Value(value, value + valuelen); 130 | } 131 | cass_iterator_free(ite); 132 | return root; 133 | } else if (type == CASS_VALUE_TYPE_LIST || type == CASS_VALUE_TYPE_SET) { 134 | CassIterator *ite = cass_iterator_from_collection(col); 135 | Json::Value root(Json::arrayValue); 136 | 137 | while (cass_iterator_next(ite)) { 138 | const char *value; 139 | size_t valuelen; 140 | cass_value_get_string(cass_iterator_get_value(ite), &value, &valuelen); 141 | root.append(Json::Value(value, value + valuelen)); 142 | } 143 | cass_iterator_free(ite); 144 | return root; 145 | } else { 146 | fprintf(stderr, "unsupport column type\n"); 147 | return Json::Value(); 148 | } 149 | } 150 | 151 | bool execCql(CaCtx *ctx, const char *query, std::string *json) 152 | { 153 | bool read = false; 154 | if (strncasecmp(query, "select", 6) == 0) { 155 | read = true; 156 | } else if (strncasecmp(query, "update", 6) != 0 && 157 | strncasecmp(query, "insert", 6) != 0 && 158 | strncasecmp(query, "delete", 6) != 0) { 159 | fprintf(stderr, "unsupport cql %s\n", query); 160 | return false; 161 | } 162 | 163 | CassStatement *statm = cass_statement_new(query, 0); 164 | CassFuture *resultFuture = cass_session_execute(ctx->session, statm); 165 | cass_future_wait(resultFuture); 166 | 167 | if (cass_future_error_code(resultFuture) != CASS_OK) { 168 | const char *msg; 169 | size_t len; 170 | cass_future_error_message(resultFuture, &msg, &len); 171 | fprintf(stderr, "exec %s error %.*s\n", query, (int) len, msg); 172 | return false; 173 | } 174 | 175 | if (read) { 176 | Json::Value root(Json::arrayValue); 177 | 178 | const CassResult *result = cass_future_get_result(resultFuture); 179 | CassIterator *ite = cass_iterator_from_result(result); 180 | 181 | while (cass_iterator_next(ite)) { 182 | Json::Value line(Json::arrayValue); 183 | const CassRow *row = cass_iterator_get_row(ite); 184 | 185 | for (size_t i = 0; i < 100; ++i) { 186 | const CassValue *col = cass_row_get_column(row, i); 187 | if (!col) break; 188 | 189 | line.append(col2json(col)); 190 | } 191 | 192 | root.append(line); 193 | } 194 | cass_iterator_free(ite); 195 | *json = Json::FastWriter().write(root); 196 | } 197 | 198 | return true; 199 | } 200 | -------------------------------------------------------------------------------- /src/cnfctx.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "logger.h" 4 | #include "sys.h" 5 | #include "luahelper.h" 6 | #include "luactx.h" 7 | #include "cnfctx.h" 8 | 9 | CnfCtx *CnfCtx::loadCnf(const char *dir, char *errbuf) 10 | { 11 | std::vector luaFiles; 12 | if (!sys::readdir(dir, ".lua", &luaFiles, errbuf)) return 0; 13 | 14 | // useful when lua->next_ != 0 15 | std::sort(luaFiles.begin(), luaFiles.end()); 16 | 17 | std::string mainlua = std::string(dir) + "/main.lua"; 18 | CnfCtx *cnf = CnfCtx::loadFile(mainlua.c_str(), errbuf); 19 | if (!cnf) return 0; 20 | 21 | for (std::vector::iterator ite = luaFiles.begin(); ite != luaFiles.end(); ++ite) { 22 | if (sys::endsWith(ite->c_str(), "/main.lua")) continue; 23 | 24 | LuaCtx *ctx = LuaCtx::loadFile(cnf, ite->c_str()); 25 | if (!ctx) return 0; 26 | 27 | cnf->addLuaCtx(ctx); 28 | } 29 | 30 | return cnf; 31 | } 32 | 33 | inline bool initPipe(int *accept, int *server, char *errbuf) 34 | { 35 | if (*accept != -1) close(*accept); 36 | if (*server != -1) close(*server); 37 | 38 | *accept = *server = -1; 39 | 40 | int fd[2]; 41 | if (pipe(fd) == -1) { 42 | snprintf(errbuf, MAX_ERR_LEN, "pipe error"); 43 | return false; 44 | } 45 | 46 | *accept = fd[0]; 47 | *server = fd[1]; 48 | return true; 49 | } 50 | 51 | bool CnfCtx::reset() 52 | { 53 | for (std::vector::iterator ite = luaCtxs_.begin(); ite != luaCtxs_.end(); ++ite) { 54 | LuaCtx *ctx = *ite; 55 | if (!ctx->testFile(ctx->file().c_str(), errbuf_)) return false; 56 | if (!ctx->loadHistoryFile()) return false; 57 | } 58 | 59 | if (!initPipe(&accept, &server, errbuf_)) return false; 60 | return true; 61 | } 62 | 63 | CnfCtx *CnfCtx::loadFile(const char *file, char *errbuf) 64 | { 65 | std::auto_ptr cnf(new CnfCtx); 66 | 67 | std::auto_ptr helper(new LuaHelper); 68 | if (!helper->dofile(file, errbuf)) return 0; 69 | 70 | std::string hostshell; 71 | if (!helper->getString("hostshell", &hostshell)) return 0; 72 | if (!shell(hostshell.c_str(), &cnf->host_, errbuf)) return 0; 73 | if (!hostAddr(cnf->host_, &cnf->addr_, errbuf)) return 0; 74 | if (cnf->host_.size() >= 1024) { 75 | snprintf(errbuf, MAX_ERR_LEN, "hostname %s is too long", cnf->host_.c_str()); 76 | return 0; 77 | } 78 | 79 | if (!helper->getInt("daemonize", &cnf->daemonize_, 1)) return 0; 80 | 81 | if (!helper->getString("pidfile", &cnf->pidfile_)) return 0; 82 | 83 | if (!helper->getString("brokers", &cnf->brokers_, "")) return 0; 84 | if (!helper->getArray("es_nodes", &cnf->esNodes_, false)) return 0; 85 | 86 | if (!helper->getInt("partition", &cnf->partition_, -1)) return 0; 87 | if (!helper->getInt("polllimit", &cnf->pollLimit_, 100)) return 0; 88 | 89 | if (!helper->getString("pingbackurl", &cnf->pingbackUrl_, "")) return 0; 90 | 91 | if (!cnf->brokers_.empty()) { 92 | if (!helper->getTable("kafka_global", &cnf->kafkaGlobal_)) return 0; 93 | if (!helper->getTable("kafka_topic", &cnf->kafkaTopic_)) return 0; 94 | } else if (!cnf->esNodes_.empty()) { 95 | if (!helper->getInt("es_max_conns", &cnf->esMaxConns_, 1000)) return 0; 96 | if (!helper->getString("es_userpass", &cnf->esUserPass_, "")) return 0; 97 | } else { 98 | snprintf(errbuf, MAX_ERR_LEN, "brokers or esnodes is required"); 99 | return 0; 100 | } 101 | 102 | if (!helper->getString("libdir", &cnf->libdir_, "/var/lib/tail2kafka")) return 0; 103 | if (!sys::isdir(cnf->libdir_.c_str(), errbuf)) return 0; 104 | 105 | if (!helper->getString("logdir", &cnf->logdir_, "/var/log/tail2kafka")) return 0; 106 | if (cnf->logdir_ != "-") if (!sys::isdir(cnf->logdir_.c_str(), errbuf)) return 0; 107 | 108 | cnf->helper_ = helper.release(); 109 | 110 | if (!initPipe(&cnf->accept, &cnf->server, errbuf)) return 0; 111 | 112 | cnf->errbuf_ = errbuf; 113 | return cnf.release(); 114 | } 115 | 116 | void CnfCtx::addLuaCtx(LuaCtx *ctx) 117 | { 118 | count_++; 119 | bool find = false; 120 | for (std::vector::iterator ite = luaCtxs_.begin(); 121 | ite != luaCtxs_.end(); ++ite) { 122 | if ((*ite)->file() == ctx->file()) { 123 | ctx->setNext(*ite); 124 | *ite = ctx; 125 | find = true; 126 | break; 127 | } 128 | } 129 | if (!find) luaCtxs_.push_back(ctx); 130 | } 131 | 132 | bool CnfCtx::initKafka() 133 | { 134 | assert(!brokers_.empty()); 135 | 136 | std::auto_ptr kafka(new KafkaCtx()); 137 | if (!kafka->init(this, errbuf_)) return false; 138 | kafka_ = kafka.release(); 139 | return true; 140 | } 141 | 142 | bool CnfCtx::initEs() 143 | { 144 | assert(!esNodes_.empty()); 145 | std::auto_ptr es(new EsCtx()); 146 | if (!es->init(this)) return false; 147 | es_ = es.release(); 148 | return true; 149 | } 150 | 151 | bool CnfCtx::initFileOff() 152 | { 153 | if (fileOff_) delete fileOff_; 154 | fileOff_ = 0; 155 | 156 | std::auto_ptr fileOff(new FileOff); 157 | if (!fileOff->init(this, errbuf_)) return false; 158 | fileOff_ = fileOff.release(); 159 | return true; 160 | } 161 | 162 | bool CnfCtx::initFileReader() 163 | { 164 | for (std::vector::iterator ite = luaCtxs_.begin(); ite != luaCtxs_.end(); ++ite) { 165 | LuaCtx *ctx = *ite; 166 | FileReader *reader = 0; 167 | while (ctx) { 168 | if (!ctx->initFileReader(reader, errbuf_)) return false; 169 | if (!reader) reader = ctx->getFileReader(); 170 | ctx = ctx->next(); 171 | } 172 | } 173 | return true; 174 | } 175 | 176 | void CnfCtx::logStats() 177 | { 178 | if (fasttime() <= lastLog_ + 5) return; 179 | 180 | bool block = flowControlOn(); 181 | 182 | TailStats s; 183 | stats_.get(&s); 184 | log_info(0, "kafka/es status %s, TailStatus,fileRead=%ld,logRead=%ld,logWrite=%ld,logSend=%ld,logRecv=%ld,logError=%ld,queueSize=%ld", 185 | block ? "block" : "ok", s.fileRead(), s.logRead(), s.logWrite(), 186 | s.logSend(), s.logRecv(), s.logError(), s.queueSize()); 187 | lastLog_ = fasttime(); 188 | } 189 | 190 | CnfCtx::CnfCtx() { 191 | lastLog_ = 0; 192 | partition_ = -1; 193 | 194 | helper_ = 0; 195 | kafka_ = 0; 196 | es_ = 0; 197 | fileOff_ = 0; 198 | 199 | accept = server = -1; 200 | count_ = 0; 201 | gettimeofday(&timeval_, 0); 202 | 203 | tailLimit_ = false; 204 | flowControl_ = 0; 205 | } 206 | 207 | CnfCtx::~CnfCtx() 208 | { 209 | for (std::vector::iterator ite = luaCtxs_.begin(); ite != luaCtxs_.end(); ++ite) { 210 | LuaCtx *ctx = *ite; 211 | while (ctx) { 212 | LuaCtx *next = ctx->next(); 213 | delete ctx; 214 | ctx = next; 215 | } 216 | } 217 | 218 | if (helper_) delete helper_; 219 | if (kafka_) delete kafka_; 220 | if (es_) delete es_; 221 | if (fileOff_) delete fileOff_; 222 | 223 | if (accept != -1) close(accept); 224 | if (server != -1) close(server); 225 | } 226 | -------------------------------------------------------------------------------- /mix/ckeeper: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use LWP; 6 | use JSON::PP qw(decode_json); 7 | use IO::Socket::INET; 8 | use File::Basename; 9 | use FindBin qw($Bin); 10 | use Data::Dumper; 11 | 12 | my $PARTITION = "0,1,2,3"; 13 | my $MAINPARTI = "0"; 14 | my $KAFKA2FILE = "kafka2file"; 15 | my $OFFSETDIR = "/var/lib/kafka2file"; 16 | my $DATADIR = "/var/lib/ckeeper"; 17 | mkdir($DATADIR) unless (-d $DATADIR); 18 | 19 | my $hostf = shift or usage(); 20 | my $appApi = shift or usage(); 21 | my $remoteWdir = shift or usage(); 22 | my $KAFKA = shift or usage(); 23 | 24 | my $hosts = getHosts($hostf); 25 | 26 | while (1) { 27 | my $apps = getApps($appApi); 28 | my $procs = getProcs($hosts, $remoteWdir); 29 | my $healthyHosts = getHostByHealth($hosts); 30 | 31 | print Dumper($apps); 32 | print Dumper($procs); 33 | 34 | my ($timeUp, $token, $ym, $ymd, $ymdh, $ymdhIso) = getTimeToken(); 35 | while (my ($app, $eoi) = each(%$apps)) { 36 | my $host; 37 | my $tokenf = "$DATADIR/$app.$MAINPARTI.$token"; 38 | 39 | if (exists $procs->{$app}) { 40 | $host = $procs->{$app}->{host}; 41 | if (!$procs->{$app}->{status}) { 42 | print "$app was down, start $app on ", $procs->{$app}->{host}, "\n"; 43 | startProc($procs->{$app}->{host}, $app, $remoteWdir); 44 | } 45 | } else { 46 | $host = getHostByLoad($healthyHosts, $procs); 47 | print "$app may new, start $app on $host\n"; 48 | my $err = 0; 49 | if (-f $tokenf) { 50 | if (!scpKafkaOffset($tokenf, "$host:$OFFSETDIR/$app.$MAINPARTI")) { 51 | print "$app copy kafka offset file to $host error"; 52 | $err = 1; 53 | } 54 | } 55 | unless ($err) { 56 | startProc($host, $app, $remoteWdir); 57 | $procs->{$app} = {host => $host, status => 1}; 58 | } 59 | } 60 | 61 | if ($timeUp) { 62 | unless (-f $tokenf) { 63 | if (scpKafkaOffset("$host:$OFFSETDIR/$app.$MAINPARTI", $tokenf)) { 64 | my $dir = eoi2dir($eoi); 65 | my $lfile = "$remoteWdir/$app.$MAINPARTI.$ymdhIso"; 66 | copyDataToHdfs($host, $lfile, "$dir/$app/$ym/$ymd", "$app.$MAINPARTI.$ymd"); 67 | } 68 | } 69 | } 70 | } 71 | 72 | local $| = 1; 73 | sleep(30); 74 | } 75 | 76 | sub usage { 77 | print "$0 host-file app-api remote-wdir kafka-host-list\n"; 78 | exit(0); 79 | } 80 | 81 | sub getHosts { 82 | my $f = shift; 83 | open(my $fh, "<$f") or die "open $f $!"; 84 | my @hosts = grep {chomp; $_ =~ /[a-z0-9]+/; } <$fh>; 85 | close($fh); 86 | return \@hosts; 87 | } 88 | 89 | sub getApps { 90 | my $api = shift; 91 | return {"12" => "test", "34" => "test", "56" => "test"} if ($api eq "test"); 92 | my $rsp = LWP::UserAgent->new()->get($api); 93 | unless ($rsp->is_success) { 94 | die "get $api http error"; 95 | } 96 | my $obj = decode_json($rsp->content); 97 | unless ($obj) { 98 | die "get $api invalid json"; 99 | } 100 | unless ($obj->{ret_code} == 0) { 101 | die "get $api ret_code not 0"; 102 | } 103 | 104 | my $apps = {}; 105 | while (my ($k, $v) = each(%{$obj->{ret_info}})) { 106 | $apps->{$k} = $v->{eoi}; 107 | } 108 | return $apps; 109 | } 110 | 111 | sub getProcs { 112 | my ($hosts, $wdir) = @_; 113 | my $procs = {}; 114 | foreach my $host (@$hosts) { 115 | $procs = getHostProc($host, $wdir, $procs); 116 | } 117 | return $procs; 118 | } 119 | 120 | sub getHostProc { 121 | my ($host, $wdir, $procs) = @_; 122 | my $locks = ssh3($host, "ls $wdir/*.lock"); 123 | foreach my $lock (split /\n/, $locks) { 124 | $lock = basename($lock); 125 | next unless ($lock =~ /^([^.]+)/); 126 | my $app = $1; 127 | system("ssh $host 'ls /proc/\$(cat $wdir/$lock)/exe' 2>/dev/null"); 128 | if ($? == 0) { 129 | $procs->{$app} = {host => $host, status => 1}; 130 | } else { 131 | if (!$procs->{$app}) { 132 | $procs->{$app} = {host => $host, stats => 0}; 133 | } 134 | } 135 | } 136 | return $procs; 137 | } 138 | 139 | sub startProc { 140 | my ($host, $app, $wdir) = @_; 141 | system("ssh $host '$KAFKA2FILE $KAFKA $app $PARTITION $wdir'"); 142 | } 143 | 144 | sub getHostByHealth { 145 | my ($hosts) = @_; 146 | my @healthyHosts; 147 | foreach my $host (@$hosts) { 148 | for (my $i = 0; $i < 3; $i++) { 149 | system("ssh $host cd"); 150 | if ($? == 0) { 151 | push(@healthyHosts, $host); 152 | last; 153 | } 154 | sleep(1) if ($i+1 < 3); 155 | } 156 | } 157 | return \@healthyHosts; 158 | } 159 | 160 | sub getHostByLoad { 161 | my ($hosts, $procs) = @_; 162 | 163 | my %hosts; 164 | $hosts{$_} = 0 foreach (@$hosts); 165 | 166 | while (my ($k, $o) = each(%$procs)) { 167 | $hosts{$o->{host}}++; 168 | } 169 | 170 | my $min = 10000; 171 | my $host = ""; 172 | foreach (my ($k, $v) = each(%hosts)) { 173 | if ($v < $min) { 174 | $host = $k; 175 | $min = $v; 176 | } 177 | } 178 | return $host; 179 | } 180 | 181 | sub ssh3 { 182 | my ($host, $cmd) = @_; 183 | for (my $i = 0; $i < 3; $i++) { 184 | my $output = `ssh $host '$cmd' 2>/dev/null`; 185 | return $output if ($? == 0); 186 | 187 | `ssh $host cd`; 188 | return "" if ($? == 0); 189 | 190 | sleep(1) if ($i+1 < 3); 191 | } 192 | 193 | `ssh $host 'cd /'`; 194 | if ($? != 0) { 195 | print scalar localtime, " $host is down\n"; 196 | } 197 | 198 | return undef; 199 | } 200 | 201 | sub scpKafkaOffset { 202 | my ($src, $dst) = @_; 203 | for (my $i = 0; $i < 3; $i++) { 204 | system("scp $src $dst"); 205 | return 1 if ($? == 0); 206 | 207 | sleep(1) if ($i+1 < 3); 208 | } 209 | return 0; 210 | } 211 | 212 | sub copyDataToHdfs { 213 | my ($host, $fsFile, $hdfsDir, $hdfsFile) = @_; 214 | my $r = ssh3($host, "hadoop fs -mkdir -p $hdfsDir"); 215 | if (!defined($r)) { 216 | print "ssh $host 'hadoop fs -mkdir -p $hdfsDir' failed"; 217 | return 0; 218 | } 219 | 220 | my $pid = fork(); 221 | if ($pid == 0) { 222 | print "ssh $host 'bzip2 -c -z $fsFile | hadoop fs -put - $hdfsDir/$hdfsFile'"; 223 | exit(0); 224 | } 225 | 226 | return 1; 227 | } 228 | 229 | sub getTimeToken { 230 | my $now = time(); 231 | my @v = localtime($now); 232 | my $token = sprintf("%04d-%02d-%02dT%02d", $v[5]+1900, $v[4]+1, $v[3], $v[2]); 233 | 234 | if ($now % 3600 < 600) { 235 | @v = localtime($now + 60 - 3600); 236 | my $ym = sprintf("%04d%02d", $v[5]+1900, $v[4]+1); 237 | my $ymd = sprintf("%04d%02d%02d", $v[5]+1900, $v[4]+1, $v[3]); 238 | my $ymdh = sprintf("%04d%02d%02d%02d", $v[5]+1900, $v[4]+1, $v[3], $v[2]); 239 | my $ymdhIso = sprintf("%04d-%02d-%02dT%02d", $v[5]+1900, $v[4]+1, $v[3], $v[2]); 240 | 241 | return (1, $token, $ym, $ymd, $ymdh, $ymdhIso); 242 | } else { 243 | return (0, $token); 244 | } 245 | } 246 | 247 | sub eoi2dir { 248 | my ($eoi) = @_; 249 | my %hash = (); 250 | 251 | exists $hash{$eoi} or die "eoi $eoi not exists"; 252 | 253 | return undef; 254 | } 255 | -------------------------------------------------------------------------------- /doc/tail2kafka-config.org: -------------------------------------------------------------------------------- 1 | tail2kafka 的所有配置都是lua文件(lua非常简单,甚至都可能意识不到这是个lua)。这些lua文件放到同一个目录,启动时以这个目录为参数。这些lua文件中,有一个名称固定的 ~main.lua~ 用来指定全局配置,其余配置,每个lua文件指定一个数据文件。 2 | 3 | 可以参考源码 =blackboxtest/tail2kafka= 中的一些配置。 4 | 5 | * main.lua 6 | ** hostshell 7 | 必配项,string 8 | 9 | 当多台机器发送数据到kafka时,可能需要标识数据的来源。例如多台web服务器发送access_log到kafka。可以用hostshell指定一行shell命令,获取机器名,例如IP或者hostname。注意:如果使用了自动分区,该机器名必须能够解析出IP。 10 | 11 | 例如: ~hostshell = "hostname"~ ,tail2kafka会执行 =hostname= 命令,该命令的输出作为机器名。 12 | 13 | ** pidfile 14 | 必配项,string 15 | 16 | 指定tail2kafka的pid文件,pid文件用于停止或重新加载配置文件 17 | 18 | 例如: ~pidfile = "/var/run/tail2kafka.pid"~ 19 | 20 | ** brokers 21 | 必配项,string 22 | 23 | 指定kafka的机器和端口,多个机器和端口用逗号分号。 24 | 25 | 例如: ~brokers = "127.0.0.1:9092"~ 26 | 27 | ** partition 28 | 可选项,int 29 | 30 | 指定kafka的partition,也可以再各个数据文件的配置中指定。 31 | 32 | ** kafka_global 33 | 必选项,table 34 | 35 | librdkafka全局配置,参考源码 =blackboxtest/tail2kafka= 和 [[https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md][librdkafka]] 36 | 37 | ** kafka_topic 38 | 可选项,table 39 | 40 | librdkafka的topic配置 41 | 42 | ** polllimit 43 | 可选项, int, 默认值 ~polllimit=100~ 44 | 45 | 当文件写入相当频繁,可以转成轮训模式,参数用来指定轮训的间隔,单位是毫秒。 46 | 47 | ** rotatedelay 48 | 可选项,int,默认值 -1,关闭,单位是秒 49 | 50 | 以nginx为例,当想rotate文件时,先把文件mv走,然后给nginx发送 =USR1= 信号,重新打开文件。这里有个问题,文件mv走时,tail2kafka感知到了,然后去读新文件,但是在mv走之后,nginx重新打开文件之前,nginx仍然会往老文件写日志。 =rotatedelay= 指定tail2kafka 感知到文件mv时,延迟几秒钟打开新文件,防止数据丢失。 51 | 52 | 这个值不能设置太大,如果太大,则新文件中的数据会有大的延迟,建议关闭或者设置在10s以内。 53 | 54 | ** pingbackurl 55 | 可选项,字符串,默认值 "",关闭 56 | 57 | =pingbackurl= 是一个http地址,tail2kafka会把关键事件以GET请求的形式发送这个地址。例如配置了 ~pingbackurl=http://server/pingback/tail2kafka~ 则发生文件rotate时会请求 ~http://server/pingback/tail2kafka?event=ROTATE&file=/data/logs/access_2018-03-21-04_10.142.113.65_log&size=891010&md5=4b04460d5b0a8b79a5a7a7b78e55aecf~ 这里file是rotate后老文件的名称,size是文件大小,md5是文件的md5。可以通过日志了解更多事件信息。 58 | 59 | ** libdir 60 | 可选项,字符串,默认值 ~/var/lib/tail2kafka~ 61 | 62 | 用于存放fileoff,topic的历史文件等一些运行信息。 63 | 64 | ** logdir 65 | 可选项,字符串,默认值 ~/var/log/tail2kafka~ 66 | 67 | * 数据源文件lua 68 | 69 | 部分配置可以同时出现在main.lua和数据源文件lua,后者会覆盖前者,如果后者没有指定,会继承前者。 70 | 71 | ** topic 72 | 必填项,string 73 | 74 | 指定kafka的topic 75 | 76 | 例如: ~topic = "cron"~ 77 | 78 | ** fileAlias 79 | 可选项,string,默认和topic值相同 80 | 81 | 一个文件可以被发往多个topic,当kafka不可用时,需要记录尚未发送数据的文件列表,使用 fileAlias 作为文件列表的文件名。 82 | 83 | ** file 84 | 必填项,string,例如: ~file = "/var/log/message"~ 85 | 86 | 指定要发往kafka的源数据文件,tail2kafka可以检测到3种文件rotate的情况,并在rotate后重新打开文件,从0开始读取。三种rotate情况: 87 | - 文件被清空,例如: ~truncate --size 0 /tmp/log~ 不推荐,这种rotate方式可能丢数据 88 | - 文件被改名,例如: ~/tmp/log /tmp/log.1~ 89 | - 文件被删除,例如: ~unlink /tmp/log~ 不推荐,这种rotate方式可能丢数据 90 | - 文件名自身带时间,建议至少分钟级别,例如:=basic.%Y-%m-%d_%H-%M.log= ,这种格式的文件,需要设置 ~fileWithTimeFormat=true~ 91 | 92 | ** fileWithTimeFormat 93 | 可选项,boolean,默认值 ~fileWithTimeFormat=false~ 94 | 95 | 当文件名自身带时间时,设置为true。tail2kafka会跟踪时间变化。 96 | 97 | ** startpos 98 | 可选项,string,默认值 ~startpos=log_start~ 99 | 100 | 每往kafka发送一条消息,会在fileoff中记录消息在相应文件中的位置。当tail2kafka重启启动,或者reload时会使用这个文件, =startpos= 指定了获取文件开始位置的策略,有4个可选值。 101 | 102 | | 名称 | 含义 | 103 | |-----------+--------------------------------------------------------------------| 104 | | log_start | 优先使用fileoff中记录的文件位置,如果fileoff没找到,从头开始 | 105 | | log_end | 优先使用fileoff中记录的文件位置,如果fileoff没找到,从最后一行开始 | 106 | | start | 从头开始,忽略fileoff中的值 | 107 | | end | 从最后一行开始 | 108 | 109 | ** autocreat 110 | 可选项,boolean,默认 ~autocreat = false~ 111 | 112 | 默认情况,当file指定的文件不存在时,tail2kafka会启动失败,如果指定 autocreat 为 true,可以自动创建不存在的文件。 113 | 114 | ** fileOwner 115 | 可选项,字符串 116 | 117 | 当 =autocreat= 为true时,自动创建文件,默认文件的owner和tail2kafka的运行用户相同,通过fileOwner改变,以免写文件的进程无法写入。例如:某些时候 nginx 以 nobody 的身份写入log。 118 | 119 | ** md5sum 120 | 可选项,boolean,默认值 true 121 | 122 | 实时计算发送内容的md5,用于消费kafka时校验数据的完整性。这个md5不一定准,当tail2kafka发送重启或reload时,如果不是从文件开头读,md5值不准确。计算md5需要耗费cpu,一般情况影响有限。 123 | 124 | ** partition 125 | 可选项,int,无默认值 126 | 127 | 指定kafka的partition,如果没有指定,使用main.lua中的配置。精心指定partition,可以实现均衡,但也很容易出错。 ~partition=-100~ 是一个特殊配置,把数据在多个分区间随机分布,且只能在每个lua配置中单独指定。 128 | 129 | ** autoparti 130 | 可选项,boolean,默认 ~autoparti = false~ 131 | 132 | 如果autoparti为true,那么使用hostshell的配置对应的IP得到一个数对kafka的全部partition取模。这会导致partition不均衡,但是配置简单,适合数据源机器特别多的情况。 133 | 134 | ** rotatedelay 135 | 含义同main.lua 136 | 137 | ** rawcopy 138 | 可选项,boolean,默认 ~rawcopy = false~ 139 | 140 | 默认情况,逐行发送新增内容到kafka。一次发送一行。如果不需要逐行处理,可以设置 =rawcopy= 为 true,一次复制多行数据到kafka,可以提高性能。 141 | 142 | *注意* 默认情况,一次发送一行,不包含换行符。一次发送多行时,只有最后一行没有换行符。处理kafka中的数据时,直接按换行符split就行。 143 | 144 | ** filter 145 | 可选项,table,无默认值 146 | 147 | tail2kafka内置了split功能,把数据行按照空格分隔成字段,通过filter指定字段的下标,然后拼接成行发送。相当于选择一行的某些字段发送,而不是整行发送。对于特别大的行,而行中的某些字段显然没有用,可以使用filter减少发送的内容。 148 | 149 | *注意* ~""和[]~ 包围的字符,被当做一个字段处理。下标从1开始,负数下标倒着数。 150 | 151 | 例如: ~filter = {4, 5, 6, -3}~ ,行的内容为 ~127.0.0.1 - - [16/Dec/2016:10:17:01 +0800] "GET /logm HTTP/1.1" 200 288 "-" "curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.16.2.3 Basic ECC~ ,发送的内容为 ~2016-12-16T10:17:01 GET /logm HTTP/1.1 200 288~ 152 | 153 | 这里同时指定了 ~timeidx = 4~ ,把时间转成了 ~iso8601~ 格式。 154 | 155 | ** grep 156 | 可选项,function,无默认值 157 | 158 | grep 是 filter 的增强版,是个lua函数。filter只能挑选制定的字段,不能改变字段的内容。grep 的输入是行split后的字段,输出是处理后的字段。 159 | 160 | *注意* 如果返回 =nil= ,这行数据会被忽略。 161 | 162 | 如果指定了 ~withhost = true~ ,把主机名 (参考: ~hostshell~ )自动放到行首。 163 | 164 | 例如: 165 | #+BEGIN_SRC lua 166 | grep = function(fields) 167 | return {'[' .. fields[4] .. '] "' .. fields[5] .. '"', fields[6]} 168 | end 169 | #+END_SRC 170 | 171 | 那么发送的行为 ~zzyong [16/Dec/2016:10:17:01 +0800] "GET /logm HTTP/1.1" 200~ 这里指定了 =withhost= ,但是没有指定 =timeidx= 172 | 173 | ** aggregate 174 | 可选项,function,无默认值 175 | 176 | aggregate 是 grep 的增强版,aggregate的输出是一个 key + =hash table= ,用于做各种统计,例如统计状态码,错误数量等。同一时间字段的数据会被尽量合并,但因为日志不报证时间字段绝对递增,所以同一时间的数据可能分多次发送,尤其时单位是秒时,处理kafka中的数据时需要合并。 177 | 178 | *注意* 必须配置 =timeidx= 和 =withtime= ,另外时间字段的精度(秒,分钟等)决定了聚合的粒度。为了能做到机器级别,可以配置 =withhost= 字段 179 | 180 | 例如: 181 | #+BEGIN_SRC lua 182 | aggregate = function(fields) 183 | local tbl = {} 184 | tbl["total"] = 1 185 | tbl["status_" .. fields[6] = 1 186 | return "user", tbl 187 | end 188 | #+END_SRC 189 | 190 | 那么发送到kafka类似 ~2016-12-16T10:17:01 zzyong user total=100 status_200=94 status_304=6~ 如果配置了 ~pkey=www~ ,那么同时会发送 ~2016-12-16T10:17:01 zzyong www total=190 status_200=174 status_304=16~ 191 | 192 | 这个是什么意思呢?它统计了user这个类别(可以是域名,日志文件名,或者某个业务)下的总请求量,http各个状态码的数量。如果配置了pkey,那么同时统计了这台机器的总请求量,各个状态码的数量。 193 | 194 | 这里时间字段是秒级的,所以统计也是秒级的。但是因为并发访问,可能出现 195 | #+BEGIN_EXAMPLE 196 | 127.0.0.1 - - [16/Dec/2016:10:17:01 +0800] "GET /logm HTTP/1.1" 200 288 "-" "curl/7.19.7" 197 | 127.0.0.1 - - [16/Dec/2016:10:17:02 +0800] "GET /logm HTTP/1.1" 200 288 "-" "curl/7.19.7" 198 | 127.0.0.1 - - [16/Dec/2016:10:17:01 +0800] "GET /logm HTTP/1.1" 200 288 "-" "curl/7.19.7" 199 | #+END_EXAMPLE 200 | 201 | 这里时间字段不是绝对递增的,kafka 会收到两条 ~2016-12-16T10:17:01~ 的数据,处理数据时,需要把他们累加起来。 202 | 203 | *注意* 如果返回 =nil= ,这行数据会被忽略。 204 | 205 | ** pkey 206 | 可选项 string || int,无默认 207 | 208 | 配合 =aggregate= 使用,指定全局的统计类别。 209 | 210 | ** transform 211 | 可选项 function 无默认值 212 | 213 | 输入是一行数据,transform操作这行数据,然后输出操作后的数据。 *注意* 如果返回 =nil= 忽略这行,如果返回空字符串,则使用源数据(也可以返回元数据,返回空算一种优化吧)。 214 | 215 | #+BEGIN_SRC lua 216 | transform = function(line) 217 | local s = string.sub(line, 1, 7); 218 | if s == "[error]" then return ""; 219 | elseif s == "[warn]" then return "[error]" .. string.sub(line, 8) 220 | else return nil end 221 | end 222 | #+END_SRC 223 | 224 | 如果是=[error]= 开头的,原样发送,如果是 =[warn]= 开头的,用 =[error]= 替换然后发送,否则忽略。 225 | 226 | ** timeidx 227 | 可选项 int 无默认值 228 | 229 | 指定时间字段的下标,主要配合 =filter grep aggregate= 使用。如果指定timeidx,时间从格式 =28/Feb/2015:12:30:23 +0800= 转成 =2015-03-30T16:31:53= 。 230 | 231 | ** withtime 232 | 可选项 boolean 默认 ~withtime=false~ 233 | 234 | 如果 =true= ,会在发往kafka前添加时间字段。 235 | 236 | ** withhost 237 | 可选项 boolean 默认 ~withhost=false~ 238 | 239 | 如果 =true= ,会在发往kafka前添加机器名。 240 | 241 | ** autonl 242 | 可选项 boolean 默认 ~autonl=true~ 243 | 244 | 如果 =true= ,会在发往kafka的行尾添加换行 245 | -------------------------------------------------------------------------------- /src/inotifyctx.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "logger.h" 14 | #include "sys.h" 15 | #include "cnfctx.h" 16 | #include "luactx.h" 17 | #include "filereader.h" 18 | #include "inotifyctx.h" 19 | #include "kafkactx.h" 20 | 21 | #define MAX_ERR_LEN 512 22 | 23 | /* watch IN_DELETE_SELF does not work 24 | * luactx hold fd to the deleted file, the file will never be real deleted 25 | * so DELETE will be inotified 26 | */ 27 | static const uint32_t WATCH_EVENT = IN_MODIFY | IN_MOVE_SELF; 28 | static const size_t ONE_EVENT_SIZE = sizeof(struct inotify_event) + NAME_MAX; 29 | 30 | InotifyCtx::~InotifyCtx() 31 | { 32 | if (wfd_ > 0) close(wfd_); 33 | } 34 | 35 | bool InotifyCtx::addWatch(LuaCtx *ctx, bool strict) 36 | { 37 | const std::string &file = ctx->file(); 38 | 39 | int fd = open(file.c_str(), O_RDONLY); 40 | if (fd == -1) { 41 | if (strict) { 42 | snprintf(cnf_->errbuf(), MAX_ERR_LEN, "%s open error %d:%s", 43 | file.c_str(), errno, strerror(errno)); 44 | } else { 45 | log_fatal(errno, "rewatch open %s error", file.c_str()); 46 | ctx->holdFd(-1); 47 | } 48 | return false; 49 | } 50 | 51 | int wd = inotify_add_watch(wfd_, file.c_str(), WATCH_EVENT); 52 | if (wd == -1) { 53 | close(fd); 54 | 55 | if (strict) { 56 | snprintf(cnf_->errbuf(), MAX_ERR_LEN, "%s add watch error %d:%s", 57 | file.c_str(), errno, strerror(errno)); 58 | } else { 59 | log_fatal(errno, "rewatch add %s error", file.c_str()); 60 | ctx->holdFd(-1); 61 | } 62 | return false; 63 | } 64 | 65 | log_info(0, "%s %s @%d", strict ? "watch" : "rewatch", file.c_str(), wd); 66 | 67 | ctx->holdFd(fd); 68 | fdToCtx_.insert(std::make_pair(wd, ctx)); 69 | return true; 70 | } 71 | 72 | bool InotifyCtx::init() 73 | { 74 | // inotify_init1 Linux 2.6.27 75 | wfd_ = inotify_init(); 76 | if (wfd_ == -1) { 77 | snprintf(cnf_->errbuf(), MAX_ERR_LEN, "inotify_init error: %s", strerror(errno)); 78 | return false; 79 | } 80 | 81 | int nb = 1; 82 | ioctl(wfd_, FIONBIO, &nb); 83 | 84 | for (std::vector::iterator ite = cnf_->getLuaCtxs().begin(); 85 | ite != cnf_->getLuaCtxs().end(); ++ite) { 86 | if (!addWatch(*ite, true)) return false; 87 | } 88 | return true; 89 | } 90 | 91 | void InotifyCtx::flowControl(RunStatus *runStatus, bool remedy) 92 | { 93 | while (runStatus->get() == RunStatus::WAIT) { 94 | bool block = cnf_->stats()->queueSize() > MAX_FILE_QUEUE_SIZE; 95 | cnf_->logStats(); 96 | 97 | cnf_->flowControl(block); 98 | 99 | if (block || remedy) { 100 | KafkaCtx *kafka = cnf_->getKafka(); 101 | if (kafka) kafka->poll(10); 102 | else if (block) sys::millisleep(10); 103 | 104 | cnf_->fasttime(true, TIMEUNIT_SECONDS); 105 | remedy = false; 106 | } else { 107 | break; 108 | } 109 | } 110 | } 111 | 112 | /* file was moved */ 113 | void InotifyCtx::tagRotate(LuaCtx *ctx, int wd) 114 | { 115 | char buffer[64]; 116 | snprintf(buffer, 64, "/proc/self/fd/%d", ctx->holdFd()); 117 | 118 | char path[2048]; 119 | ssize_t n; 120 | if ((n = readlink(buffer, path, 2048)) == -1) { 121 | log_fatal(errno, "readlink error"); 122 | } else { 123 | path[n] = '\0'; 124 | log_info(0, "tag remove %d %s", wd, ctx->file().c_str()); 125 | ctx->getFileReader()->tagRotate(FILE_MOVED, path); 126 | } 127 | } 128 | 129 | /* unlink or truncate 130 | * Note that the event queue can overflow. In this case, events are lost. 131 | */ 132 | 133 | void InotifyCtx::tryReWatch(bool remedy) 134 | { 135 | std::vector wds; 136 | 137 | for (std::map::iterator ite = fdToCtx_.begin(); ite != fdToCtx_.end(); ++ite) { 138 | LuaCtx *ctx = ite->second; 139 | 140 | if (remedy) { 141 | struct stat got, want; 142 | if (fstat(ctx->holdFd(), &got) == 0 && stat(ctx->file().c_str(), &want) == 0) { 143 | if (got.st_ino != want.st_ino) { 144 | log_error(0, "inotify may failed, tagRotate manual"); 145 | tagRotate(ctx, ite->first); 146 | } 147 | } else { 148 | log_fatal(errno, "stat holdFd %d or file %s error", ctx->holdFd(), ctx->file().c_str()); 149 | } 150 | } 151 | 152 | if (ctx->getFileReader()->remove()) { 153 | wds.push_back(ite->first); 154 | } 155 | } 156 | 157 | for (std::vector::iterator ite = wds.begin(); ite != wds.end(); ++ite) { 158 | inotify_rm_watch(wfd_, *ite); 159 | 160 | std::map::iterator pos = fdToCtx_.find(*ite); 161 | LuaCtx *ctx = pos->second; 162 | 163 | fdToCtx_.erase(pos); 164 | close(ctx->holdFd()); 165 | 166 | addWatch(ctx, false); 167 | ctx->getFileReader()->tail2kafka(); 168 | } 169 | 170 | for (std::vector::iterator ite = cnf_->getLuaCtxs().begin(); 171 | ite != cnf_->getLuaCtxs().end(); ++ite) { 172 | LuaCtx *ctx = *ite; 173 | 174 | if (ctx->holdFd() == -1) { 175 | addWatch(ctx, false); 176 | } 177 | } 178 | } 179 | 180 | void InotifyCtx::loop() 181 | { 182 | RunStatus *runStatus = cnf_->getRunStatus(); 183 | 184 | const size_t eventBufferSize = cnf_->getLuaCtxSize() * ONE_EVENT_SIZE * 5; 185 | char *eventBuffer = (char *) malloc(eventBufferSize); 186 | 187 | struct pollfd fds[] = { 188 | {wfd_, POLLIN, 0 } 189 | }; 190 | 191 | bool rotate = false; 192 | long savedTime = cnf_->fasttime(true, TIMEUNIT_SECONDS); 193 | long rewatchTime = savedTime; 194 | long remedyTime = savedTime; 195 | 196 | while (runStatus->get() == RunStatus::WAIT) { 197 | int nfd = poll(fds, 1, cnf_->getTailLimit() ? 1 : 500); 198 | cnf_->fasttime(true, TIMEUNIT_SECONDS); 199 | cnf_->setTailLimit(false); 200 | 201 | if (nfd == -1) { 202 | if (errno != EINTR) return; 203 | } else if (nfd == 0) { 204 | globalCheck(); 205 | } else { 206 | ssize_t nn = read(wfd_, eventBuffer, eventBufferSize); 207 | assert(nn > 0); 208 | 209 | char *p = eventBuffer; 210 | while (p < eventBuffer + nn) { 211 | /* IN_IGNORED when watch was removed */ 212 | struct inotify_event *event = (struct inotify_event *) p; 213 | if (event->mask & IN_MODIFY) { 214 | LuaCtx *ctx = getLuaCtx(event->wd); 215 | if (ctx) { 216 | log_debug(0, "inotify %s was modified", ctx->file().c_str()); 217 | ctx->getFileReader()->tail2kafka(); 218 | } else { 219 | log_fatal(0, "@%d could not found ctx", event->wd); 220 | } 221 | } 222 | if (event->mask & IN_MOVE_SELF) { 223 | LuaCtx *ctx = getLuaCtx(event->wd); 224 | if (ctx) { 225 | log_info(0, "inotify %s was moved", ctx->file().c_str()); 226 | tagRotate(ctx, event->wd); 227 | rotate = true; 228 | } else { 229 | log_fatal(0, "@%d could not found ctx", event->wd); 230 | } 231 | } 232 | p += sizeof(struct inotify_event) + event->len; 233 | } 234 | } 235 | 236 | if (cnf_->fasttime() != savedTime) { 237 | globalCheck(); 238 | savedTime = cnf_->fasttime(); 239 | } 240 | 241 | bool remedy = cnf_->fasttime() > remedyTime + 60; 242 | 243 | if (cnf_->fasttime() > rewatchTime + 5 || remedy || rotate) { 244 | tryReWatch(remedy); 245 | if (remedy) remedyTime = cnf_->fasttime(); 246 | rewatchTime = cnf_->fasttime(); 247 | rotate = false; 248 | } 249 | 250 | if (cnf_->getPollLimit()) sys::millisleep(cnf_->getPollLimit()); 251 | flowControl(runStatus, remedy); 252 | } 253 | 254 | runStatus->set(RunStatus::STOP); 255 | } 256 | 257 | void InotifyCtx::globalCheck() 258 | { 259 | for (std::vector::iterator ite = cnf_->getLuaCtxs().begin(); 260 | ite != cnf_->getLuaCtxs().end(); ++ite) { 261 | 262 | LuaCtx *ctx = *ite; 263 | ctx->getFileReader()->checkCache(); 264 | ctx->getFileReader()->tail2kafka(); 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /src/kafka2file.cc: -------------------------------------------------------------------------------- 1 | #define _LARGEFILE64_SOURCE 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include "sys.h" 17 | #include "bitshelper.h" 18 | #include "runstatus.h" 19 | #include "logger.h" 20 | #include "uint64offset.h" 21 | #include "cmdnotify.h" 22 | #include "transform.h" 23 | 24 | LOGGER_INIT(); 25 | 26 | class KafkaConsumer { 27 | public: 28 | static KafkaConsumer *create(const char *wdir, const char *brokers, const char *topic, int partition, bool defaultStart); 29 | 30 | ~KafkaConsumer() { 31 | if (rkqu_) rd_kafka_queue_destroy(rkqu_); 32 | if (rkt_) rd_kafka_topic_destroy(rkt_); 33 | if (rk_) rd_kafka_destroy(rk_); 34 | } 35 | 36 | bool loop(RunStatus *runStatus, Transform *transform); 37 | 38 | private: 39 | KafkaConsumer(uint64_t defaultOffset) : rk_(0), rkt_(0), rkqu_(0), offset_(defaultOffset) {} 40 | 41 | private: 42 | const char *wdir_; 43 | const char *topic_; 44 | int partition_; 45 | 46 | rd_kafka_t *rk_; 47 | rd_kafka_topic_t *rkt_; 48 | rd_kafka_queue_t *rkqu_; 49 | 50 | Offset offset_; 51 | }; 52 | 53 | static bool initSingleton(const char *datadir, const char *topic, int partition); 54 | 55 | int main(int argc, char *argv[]) 56 | { 57 | if (argc < 6) { 58 | fprintf(stderr, "%s kafka-broker topic partition (offset-begining|offset-end) datadir " 59 | "[notify] [informat:lua:outformat:interval:delay]\n", argv[0]); 60 | return EXIT_FAILURE; 61 | } 62 | 63 | const char *brokers = argv[1]; 64 | const char *topic = argv[2]; 65 | int partition = atoi(argv[3]); 66 | const char *offsetstr = argv[4]; 67 | const char *datadir = argv[5]; 68 | 69 | const char *notify = argc > 6 ? argv[6] : 0; 70 | const char *output = argc > 7 ? argv[7] : "raw::raw"; 71 | 72 | bool defaultStart; 73 | if (strcmp(offsetstr, "offset-begining") == 0) { 74 | defaultStart = true; 75 | } else if (strcmp(offsetstr, "offset-end") == 0) { 76 | defaultStart = false; 77 | } else { 78 | fprintf(stderr, "unknow default offset, use offset-begining or offset-end\n"); 79 | return EXIT_FAILURE; 80 | } 81 | 82 | char buffer[1024]; 83 | snprintf(buffer, 1024, "%s/%s", datadir, topic); 84 | if (mkdir(buffer, 0755) != 0 && errno != EEXIST) { 85 | fprintf(stderr, "create work dir %s error, %d:%s\n", buffer, errno, strerror(errno)); 86 | return EXIT_FAILURE; 87 | } 88 | 89 | if (!initSingleton(datadir, topic, partition)) { 90 | fprintf(stderr, "%s:%d instance already exists\n", topic, partition); 91 | return EXIT_FAILURE; 92 | } 93 | 94 | snprintf(buffer, 1024, "%s/%s.%d.log", datadir, topic, partition); 95 | Logger::create(buffer, Logger::DAY, true); 96 | 97 | CmdNotify cmdNotify(notify, datadir, topic, partition); 98 | 99 | std::auto_ptr transform(Transform::create(datadir, topic, partition, &cmdNotify, output, buffer)); 100 | if (transform.get() == 0) { 101 | fprintf(stderr, "create transform error %s\n", buffer); 102 | return EXIT_FAILURE; 103 | } 104 | 105 | RunStatus *runStatus = RunStatus::create(); 106 | sys::SignalHelper signalHelper(buffer); 107 | 108 | int signos[] = { SIGTERM, SIGINT, SIGCHLD }; 109 | RunStatus::Want wants[] = { RunStatus::STOP, RunStatus::STOP, RunStatus::IGNORE }; 110 | if (!signalHelper.signal(runStatus, sizeof(signos)/sizeof(signos[0]), signos, wants)) { 111 | log_fatal(errno, "install signal %s", buffer); 112 | return EXIT_FAILURE; 113 | } 114 | 115 | std::auto_ptr ctx(KafkaConsumer::create(datadir, brokers, topic, partition, defaultStart)); 116 | if (!ctx.get()) return EXIT_FAILURE; 117 | 118 | bool rc = ctx->loop(runStatus, transform.get()); 119 | 120 | // rd_kafka_destroy may block forever, kill before kill -9 is a safe way 121 | transform.release(); 122 | 123 | log_info(0, "exit"); 124 | return rc ? EXIT_SUCCESS : EXIT_FAILURE; 125 | } 126 | 127 | static void log_cb(const rd_kafka_t *, int level, const char *fac, const char *buf) 128 | { 129 | log_info(0, "kafka error level %d fac %s buf %s", level, fac, buf); 130 | } 131 | 132 | KafkaConsumer *KafkaConsumer::create(const char *wdir, const char *brokers, const char *topic, int partition, bool defaultStart) 133 | { 134 | uint64_t defaultOffset = defaultStart ? RD_KAFKA_OFFSET_BEGINNING : RD_KAFKA_OFFSET_END; 135 | std::auto_ptr ctx(new KafkaConsumer(defaultOffset)); 136 | 137 | char errstr[512]; 138 | 139 | rd_kafka_conf_t *conf = rd_kafka_conf_new(); 140 | rd_kafka_conf_set(conf, "broker.version.fallback", "0.8.2.1", 0, 0); 141 | rd_kafka_conf_set(conf, "enable.auto.commit", "false", 0, 0); 142 | 143 | rd_kafka_conf_set_log_cb(conf, log_cb); 144 | 145 | ctx->rk_ = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); 146 | if (rd_kafka_brokers_add(ctx->rk_, brokers) == 0) { 147 | log_fatal(0, "invalid brokers %s", brokers); 148 | return 0; 149 | } 150 | 151 | ctx->rkt_ = rd_kafka_topic_new(ctx->rk_, topic, 0); 152 | ctx->rkqu_ = rd_kafka_queue_new(ctx->rk_); 153 | 154 | char path[1024]; 155 | snprintf(path, 1024, "%s/%s.%d.offset", wdir, topic, partition); 156 | if (!ctx->offset_.init(path, errstr)) { 157 | log_fatal(0, "%s:%d init offset error, %s", topic, partition, errstr); 158 | return 0; 159 | } 160 | 161 | log_info(0, "%s:%d set offset at %ld", topic, partition, ctx->offset_.get()); 162 | if (rd_kafka_consume_start_queue(ctx->rkt_, partition, ctx->offset_.get(), ctx->rkqu_) == -1) { 163 | log_fatal(0, "%s:%d failed to start consuming: %s", topic, partition, rd_kafka_err2name(rd_kafka_last_error())); 164 | return 0; 165 | } 166 | 167 | ctx->wdir_ = wdir; 168 | ctx->topic_ = topic; 169 | ctx->partition_ = partition; 170 | 171 | return ctx.release(); 172 | } 173 | 174 | bool KafkaConsumer::loop(RunStatus *runStatus, Transform *transform) 175 | { 176 | uint64_t startOff = offset_.get(); 177 | uint64_t off = RD_KAFKA_OFFSET_END; 178 | 179 | while (runStatus->get() != RunStatus::STOP) { 180 | rd_kafka_message_t *rkm; 181 | rkm = rd_kafka_consume_queue(rkqu_, 1000); 182 | if (!rkm) { // timeout 183 | if (!bits_test(transform->timeout(&off), Transform::IGNORE)) offset_.update(off); 184 | log_info(0, "consume %s:%d timeout", topic_, partition_); 185 | continue; 186 | } 187 | 188 | if (rkm->err) { 189 | if (rkm->err != RD_KAFKA_RESP_ERR__PARTITION_EOF) { 190 | log_error(0, "consume %s:%d error %s", topic_, partition_, rd_kafka_message_errstr(rkm)); 191 | } 192 | rd_kafka_message_destroy(rkm); 193 | continue; 194 | } 195 | 196 | if (startOff == (uint64_t) rkm->offset) { 197 | log_info(0, "%s:%d same offset message %lu %.*s", topic_, partition_, startOff, (int) rkm->len, (char *) rkm->payload); 198 | rd_kafka_message_destroy(rkm); 199 | continue; 200 | } 201 | 202 | log_debug(0, "data @%ld %.*s\n", rkm->offset, (int) rkm->len, (char *) rkm->payload); 203 | 204 | uint32_t flags = transform->write(rkm, &off); 205 | if (bits_test(flags, Transform::RKMFREE)) rd_kafka_message_destroy(rkm); 206 | if (!bits_test(flags, Transform::IGNORE)) offset_.update(off); 207 | } 208 | 209 | if (!bits_test(transform->timeout(&off), Transform::IGNORE)) { assert(off != (uint64_t) RD_KAFKA_OFFSET_END); offset_.update(off); } 210 | log_info(0, "%s:%d end offset at %ld", topic_, partition_, offset_.get()); 211 | 212 | return true; 213 | } 214 | 215 | static char LOCK_FILE[1024] = {0}; 216 | static void deleteLockFile() 217 | { 218 | if (LOCK_FILE[0] != '\0') unlink(LOCK_FILE); 219 | } 220 | 221 | /* pidfile may stale, this's not a perfect method */ 222 | 223 | bool initSingleton(const char *datadir, const char *topic, int partition) 224 | { 225 | if (datadir[0] == '-') return true; 226 | 227 | snprintf(LOCK_FILE, 1024, "%s/%s.%d.lock", datadir, topic, partition); 228 | if (sys::initSingleton(LOCK_FILE, 0)) { 229 | atexit(deleteLockFile); 230 | return true; 231 | } else { 232 | return false; 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /src/tail2kafka.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "logger.h" 16 | #include "sys.h" 17 | #include "runstatus.h" 18 | #include "metrics.h" 19 | #include "luactx.h" 20 | #include "cnfctx.h" 21 | #include "inotifyctx.h" 22 | #include "filereader.h" 23 | #include "common.h" 24 | 25 | LOGGER_INIT(); 26 | 27 | pid_t spawn(CnfCtx *ctx, CnfCtx *octx); 28 | int runForeGround(CnfCtx *ctx); 29 | 30 | int main(int argc, char *argv[]) 31 | { 32 | if (argc != 2) { 33 | fprintf(stderr, "version: %s\n", VERSION); 34 | fprintf(stderr, "%s confdir\n", argv[0]); 35 | return EXIT_FAILURE; 36 | } 37 | 38 | const char *dir = argv[1]; 39 | pid_t pid = -1; 40 | char errbuf[MAX_ERR_LEN] = {0}; 41 | 42 | CnfCtx *cnf = CnfCtx::loadCnf(dir, errbuf); 43 | if (!cnf) { 44 | fprintf(stderr, "load cnf error %s\n", errbuf); 45 | return EXIT_FAILURE; 46 | } 47 | 48 | if (cnf->logdir() != "-") { 49 | if (!Logger::create(cnf->logdir() + "/tail2kafka.log", Logger::DAY, true)) { 50 | fprintf(stderr, "%d:%s init logger error\n", errno, strerror(errno)); 51 | return EXIT_FAILURE; 52 | } 53 | } 54 | 55 | if (cnf->daemonize() == 1) { 56 | if (getenv("TAIL2KAFKA_NOSTDIO")) { 57 | daemon(1, 0); 58 | } else { 59 | daemon(1, 1); 60 | } 61 | } 62 | 63 | RunStatus *runStatus = RunStatus::create(); 64 | cnf->setRunStatus(runStatus); 65 | 66 | if (cnf->daemonize() == -1) { 67 | return runForeGround(cnf); 68 | } 69 | 70 | if (!sys::initSingleton(cnf->getPidFile(), errbuf)) { 71 | log_fatal(0, "init singleton %s", errbuf); 72 | return EXIT_FAILURE; 73 | } 74 | 75 | sys::SignalHelper signalHelper(errbuf); 76 | 77 | int signos[] = { SIGTERM, SIGHUP, SIGCHLD, SIGUSR1, }; 78 | RunStatus::Want wants[] = { RunStatus::STOP, RunStatus::RELOAD, RunStatus::START2, 79 | RunStatus::REOPEN }; 80 | if (!signalHelper.signal(runStatus, 4, signos, wants)) { 81 | log_fatal(errno, "install signal %s", errbuf); 82 | return EXIT_FAILURE; 83 | } 84 | if (!signalHelper.block(SIGCHLD, SIGTERM, SIGHUP, SIGUSR1, -1)) { 85 | log_fatal(errno, "block signal %s", errbuf); 86 | return EXIT_FAILURE; 87 | } 88 | 89 | int rc = EXIT_SUCCESS; 90 | while (runStatus->get() != RunStatus::STOP) { 91 | log_info(0, "runstatus %s", runStatus->status()); 92 | 93 | if (runStatus->get() == RunStatus::START2) { 94 | pid_t opid; 95 | int status = 0; 96 | if ((opid = waitpid(-1, &status, WNOHANG)) > 0) { 97 | if (opid != pid) runStatus->set(RunStatus::WAIT); 98 | if (WIFEXITED(status)) { 99 | log_fatal(0, "children %d exit status=%d", (int) opid, WEXITSTATUS(status)); 100 | } else if (WIFSIGNALED(status)) { 101 | log_fatal(0, "children %d killed by signal %d", (int) opid, WTERMSIG(status)); 102 | } 103 | sys::millisleep(500); 104 | } else { 105 | runStatus->set(RunStatus::WAIT); 106 | } 107 | } 108 | 109 | if (runStatus->get() == RunStatus::START1 || runStatus->get() == RunStatus::START2) { 110 | if (runStatus->get() == RunStatus::START2 && !cnf->reset()) { 111 | log_fatal(errno, "cnf reset error %s before spawn", cnf->errbuf()); 112 | rc = EXIT_FAILURE; 113 | break; 114 | } 115 | 116 | pid = spawn(cnf, 0); 117 | if (pid == -1) { 118 | log_fatal(errno, "spawn failed %s, exit", cnf->errbuf()); 119 | rc = EXIT_FAILURE; 120 | break; 121 | } 122 | } else if (runStatus->get() == RunStatus::RELOAD) { 123 | CnfCtx *ncnf = CnfCtx::loadCnf(dir, errbuf); 124 | if (ncnf) { 125 | ncnf->setRunStatus(runStatus); 126 | 127 | pid_t npid = spawn(ncnf, cnf); 128 | if (npid != -1) { 129 | log_info(0, "reload cnf"); 130 | kill(pid, SIGTERM); 131 | cnf = ncnf; 132 | pid = npid; 133 | } else { 134 | delete ncnf; 135 | } 136 | } else { 137 | log_fatal(0, "reload cnf error %s", errbuf); 138 | } 139 | } else if (runStatus->get() == RunStatus::REOPEN) { 140 | log_error(0, "force reopening of files"); 141 | } 142 | 143 | LOGGER_ONCE(true); // solve parent process log rotate problem 144 | 145 | runStatus->set(RunStatus::WAIT); 146 | signalHelper.suspend(-1); 147 | 148 | LOGGER_ONCE(false); 149 | } 150 | 151 | if (pid != -1) kill(pid, SIGTERM); 152 | log_info(0, "tail2kafka exit"); 153 | 154 | delete cnf; 155 | return rc; 156 | } 157 | 158 | void *routine(void *data) 159 | { 160 | CnfCtx *cnf = (CnfCtx *) data; 161 | 162 | KafkaCtx *kafka = cnf->getKafka(); 163 | EsCtx *es = cnf->getEs(); 164 | 165 | RunStatus *runStatus = cnf->getRunStatus(); 166 | 167 | uintptr_t ptr; 168 | while (runStatus->get() == RunStatus::WAIT) { 169 | ssize_t nn = read(cnf->accept, &ptr, sizeof(ptr)); 170 | if (nn == -1) { 171 | if (errno != EINTR) break; 172 | else continue; 173 | } else if (nn == 0) { 174 | break; 175 | } 176 | 177 | assert(nn == sizeof(ptr)); 178 | 179 | if (!ptr) break; // terminate task 180 | 181 | if (kafka && !kafka->produce((std::vector*) ptr)) { 182 | log_fatal(0, "rd_kafka_poll timeout, librdkafka may have bug or kafka service is unavailable, exit"); 183 | runStatus->set(RunStatus::STOP); 184 | kafka->poll(10); // poll kafka 185 | } else if (es && !es->produce((std::vector*) ptr)) { 186 | log_fatal(0, "es_poll timeout, es service may unavailable, exit"); 187 | runStatus->set(RunStatus::STOP); 188 | } 189 | delete (std::vector*)ptr; 190 | } 191 | 192 | runStatus->set(RunStatus::STOP); 193 | log_info(0, "routine exit"); 194 | return NULL; 195 | } 196 | 197 | inline void terminateRoutine(CnfCtx *ctx) 198 | { 199 | uintptr_t ptr = 0; 200 | write(ctx->server, &ptr, sizeof(ptr)); 201 | } 202 | 203 | void run(InotifyCtx *inotify, CnfCtx *cnf) 204 | { 205 | /* must call in subprocess */ 206 | const char *pingbackUrl = cnf->pingbackUrl().empty() ? 0 : cnf->pingbackUrl().c_str(); 207 | if (!util::Metrics::create(pingbackUrl, cnf->errbuf())) { 208 | log_fatal(0, "Metrics::create error %s", cnf->errbuf()); 209 | } 210 | util::Metrics::pingback("SPAWN", "status=%s", cnf->getRunStatus()->status()); 211 | 212 | cnf->getRunStatus()->set(RunStatus::WAIT); 213 | 214 | sys::SignalHelper signalHelper(0); 215 | signalHelper.setmask(-1); 216 | 217 | // must call in subprocess 218 | if (!cnf->initFileReader()) { 219 | log_fatal(0, "init filereader error %s", cnf->errbuf()); 220 | exit(EXIT_FAILURE); 221 | } 222 | 223 | if (!cnf->getFileOff()->reinit()) { 224 | log_fatal(0, "reinit fileoff error %s", cnf->errbuf()); 225 | exit(EXIT_FAILURE); 226 | } 227 | 228 | if (cnf->enableKafka()) { 229 | /* initKafka startup librdkafka thread */ 230 | if (!cnf->initKafka()) { 231 | log_fatal(0, "init kafka error %s", cnf->errbuf()); 232 | exit(EXIT_FAILURE); 233 | } 234 | } else if (cnf->enableEs()) { 235 | if (!cnf->initEs()) { 236 | log_fatal(0, "init es error %s", cnf->errbuf()); 237 | exit(EXIT_FAILURE); 238 | } 239 | } 240 | 241 | pthread_t tid; 242 | pthread_create(&tid, NULL, routine, cnf); 243 | inotify->loop(); 244 | terminateRoutine(cnf); 245 | pthread_join(tid, NULL); 246 | } 247 | 248 | int runForeGround(CnfCtx *cnf) 249 | { 250 | InotifyCtx inotify(cnf); 251 | if (!inotify.init()) return -1; 252 | 253 | if (!cnf->initFileOff()) return -1; 254 | 255 | run(&inotify, cnf); 256 | 257 | delete cnf; 258 | return EXIT_SUCCESS; 259 | } 260 | 261 | pid_t spawn(CnfCtx *cnf, CnfCtx *ocnf) 262 | { 263 | InotifyCtx inotify(cnf); 264 | if (!inotify.init()) return -1; 265 | 266 | if (!cnf->initFileOff()) return -1; 267 | 268 | /* unload old cnf before fork */ 269 | if (ocnf) delete ocnf; 270 | 271 | int pid = fork(); 272 | if (pid == 0) { 273 | run(&inotify, cnf); 274 | 275 | delete cnf; 276 | exit(EXIT_SUCCESS); 277 | } 278 | 279 | return pid; 280 | } 281 | -------------------------------------------------------------------------------- /src/common.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "util.h" 9 | #include "common.h" 10 | 11 | bool shell(const char *cmd, std::string *output, char *errbuf) 12 | { 13 | FILE *fp = popen(cmd, "r"); 14 | if (!fp) { 15 | snprintf(errbuf, MAX_ERR_LEN, "%s exec error", cmd); 16 | return false; 17 | } 18 | 19 | char buf[256]; 20 | while (fgets(buf, 256, fp)) { 21 | output->append(buf); 22 | } 23 | 24 | int status = pclose(fp); 25 | if (status != 0) { 26 | snprintf(errbuf, MAX_ERR_LEN, "%s exit %d", cmd, status); 27 | return false; 28 | } 29 | 30 | output->assign(util::trim(*output)); 31 | return true; 32 | } 33 | 34 | bool hostAddr(const std::string &host, uint32_t *addr, char *errbuf) 35 | { 36 | struct addrinfo *ai; 37 | struct addrinfo hints; 38 | 39 | memset(&hints, 0x00, sizeof(hints)); 40 | hints.ai_family = AF_INET; 41 | 42 | int rc = getaddrinfo(host.c_str(), NULL, &hints, &ai); 43 | if (rc != 0) { 44 | snprintf(errbuf, MAX_ERR_LEN, "getaddrinfo() %s error %s\n", 45 | host.c_str(), rc == EAI_SYSTEM ? strerror(errno) : gai_strerror(rc)); 46 | return false; 47 | } 48 | 49 | struct sockaddr_in *in = (struct sockaddr_in *) ai->ai_addr; 50 | *addr = in->sin_addr.s_addr; 51 | freeaddrinfo(ai); 52 | return true; 53 | } 54 | 55 | void split(const char *line, size_t nline, std::vector *items) 56 | { 57 | bool esc = false; 58 | char want = '\0'; 59 | size_t pos = 0; 60 | 61 | if (nline == (size_t)-1) nline = strlen(line); 62 | 63 | for (size_t i = 0; i < nline; ++i) { 64 | if (esc) { 65 | esc = false; 66 | } else if (line[i] == '\\') { 67 | esc = true; 68 | } else if (want == '"') { 69 | if (line[i] == '"') { 70 | want = '\0'; 71 | items->push_back(std::string(line + pos, i-pos)); 72 | pos = i+1; 73 | } 74 | } else if (want == ']') { 75 | if (line[i] == ']') { 76 | want = '\0'; 77 | items->push_back(std::string(line + pos, i-pos)); 78 | pos = i+1; 79 | } 80 | } else { 81 | if (line[i] == '"') { 82 | want = line[i]; 83 | pos++; 84 | } else if (line[i] == '[') { 85 | want = ']'; 86 | pos++; 87 | } else if (line[i] == ' ') { 88 | if (i != pos) items->push_back(std::string(line + pos, i - pos)); 89 | pos = i+1; 90 | } 91 | } 92 | } 93 | if (pos < nline) items->push_back(std::string(line + pos, nline - pos)); 94 | } 95 | 96 | void splitn(const char *line, size_t nline, std::vector *items, int limit, char delimiter) 97 | { 98 | bool esc = false; 99 | size_t pos = 0; 100 | 101 | if (nline == (size_t)-1) nline = strlen(line); 102 | 103 | for (size_t i = 0; i < nline; ++i) { 104 | if (esc) { 105 | esc = false; 106 | } else if (line[i] == '\\') { 107 | esc = true; 108 | } else if (line[i] == delimiter) { 109 | if (i != pos) { 110 | if (limit > 0 && (size_t) limit == items->size() + 1) i = nline; 111 | items->push_back(std::string(line + pos, i - pos)); 112 | } 113 | pos = i+1; 114 | } 115 | } 116 | if (pos < nline) items->push_back(std::string(line + pos, nline - pos)); 117 | } 118 | 119 | enum DateTimeStatus { WaitYear, WaitMonth, WaitDay, WaitHour, WaitMin, WaitSec }; 120 | 121 | static const char *MonthAlpha[12] = { 122 | "Jan", "Feb", "Mar", "Apr", "May", "Jun", 123 | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; 124 | 125 | // 28/Feb/2015:12:30:23 +0800 -> 2015-03-30T16:31:53 126 | bool timeLocalToIso8601(const std::string &t, std::string *iso, time_t *time) 127 | { 128 | DateTimeStatus status = WaitDay; 129 | int year, mon, day, hour, min, sec; 130 | year = mon = day = hour = min = sec = 0; 131 | 132 | const char *p = t.c_str(); 133 | while (*p && *p != ' ') { 134 | if (*p == '/') { 135 | if (status == WaitDay) status = WaitMonth; 136 | else if (status == WaitMonth) status = WaitYear; 137 | else return false; 138 | } else if (*p == ':') { 139 | if (status == WaitYear) status = WaitHour; 140 | else if (status == WaitHour) status = WaitMin; 141 | else if (status == WaitMin) status = WaitSec; 142 | else return false; 143 | } else if (*p >= '0' && *p <= '9') { 144 | int n = *p - '0'; 145 | if (status == WaitYear) year = year * 10 + n; 146 | else if (status == WaitDay) day = day * 10 + n; 147 | else if (status == WaitHour) hour = hour * 10 + n; 148 | else if (status == WaitMin) min = min * 10 + n; 149 | else if (status == WaitSec) sec = sec * 10 + n; 150 | else return false; 151 | } else if (status == WaitMonth) { 152 | size_t i; 153 | for (i = 0; i < 12; ++i) { 154 | if (strncmp(p, MonthAlpha[i], 3) == 0) { 155 | mon = i+1; 156 | break; 157 | } 158 | } 159 | } else { 160 | return false; 161 | } 162 | p++; 163 | } 164 | 165 | iso->reserve(sizeof("yyyy-mm-ddThh:mm:ss")); 166 | iso->resize(sizeof("yyyy-mm-ddThh:mm:ss")-1); 167 | sprintf((char *) iso->data(), "%04d-%02d-%02dT%02d:%02d:%02d", 168 | year, mon, day, hour, min, sec); 169 | 170 | if (time) *time = mktime(year, mon, day, hour, min, sec); 171 | return true; 172 | } 173 | 174 | // 2018-02-22 17:40:00.000 175 | bool parseIso8601(const std::string &t, time_t *timestamp) 176 | { 177 | DateTimeStatus status = WaitYear; 178 | int year, mon, day, hour, min, sec; 179 | year = mon = day = hour = min = sec = 0; 180 | 181 | const char *p = t.c_str(); 182 | while (*p && *p != '.') { 183 | if (*p == '-') { 184 | if (status == WaitYear) status = WaitMonth; 185 | else if (status == WaitMonth) status = WaitDay; 186 | else return false; 187 | } else if (*p == ' ' || *p == 'T') { 188 | if (status == WaitDay) status = WaitHour; 189 | else return false; 190 | } else if (*p == ':') { 191 | if (status == WaitHour) status = WaitMin; 192 | else if (status == WaitMin) status = WaitSec; 193 | else return false; 194 | } else if (*p >= '0' && *p <= '9') { 195 | int n = *p - '0'; 196 | if (status == WaitYear) year = year * 10 + n; 197 | else if (status == WaitMonth) mon = mon * 10 + n; 198 | else if (status == WaitDay) day = day * 10 + n; 199 | else if (status == WaitHour) hour = hour * 10 + n; 200 | else if (status == WaitMin) min = min * 10 + n; 201 | else if (status == WaitSec) sec = sec * 10 + n; 202 | else return false; 203 | } else { 204 | return false; 205 | } 206 | p++; 207 | } 208 | if (status != WaitSec) return false; 209 | 210 | *timestamp = mktime(year, mon, day, hour, min, sec); 211 | return true; 212 | } 213 | 214 | bool parseQuery(const char *r, size_t len, std::string *path, std::map *query) 215 | { 216 | size_t i = 0; 217 | const char *ptr = r; 218 | 219 | while (i < len && *ptr && *ptr != '?') { 220 | ++ptr; 221 | ++i; 222 | } 223 | path->assign(r, ptr - r); 224 | 225 | if (i >= len || !*ptr) return true; 226 | 227 | ++i; 228 | ++ptr; 229 | 230 | std::string key, value; 231 | bool wantKey = true; 232 | while (i < len && *ptr) { 233 | if (*ptr == '&') { 234 | if (!key.empty() && !value.empty()) (*query)[key] = value; 235 | key.clear(); 236 | value.clear(); 237 | wantKey = true; 238 | } else if (*ptr == '=') { 239 | wantKey = false; 240 | } else { 241 | if (wantKey) { 242 | key.append(1, *ptr); 243 | } else { 244 | if (*ptr == '%' && (i+2 < len && *(ptr+2))) { 245 | size_t val; 246 | if (util::hexToInt(ptr+1, &val)) { 247 | value.append(1, val); 248 | i += 2; 249 | ptr += 2; 250 | } else { 251 | value.append(1, *ptr); 252 | } 253 | } else { 254 | value.append(1, *ptr); 255 | } 256 | } 257 | } 258 | ++ptr; 259 | ++i; 260 | } 261 | 262 | if (!key.empty() && !value.empty()) (*query)[key] = value; 263 | return true; 264 | } 265 | 266 | // GET /path[?k=v] HTTP/1.1 267 | bool parseRequest(const char *r, std::string *method, std::string *path, std::map *query) 268 | { 269 | const char *fsp = strchr(r, ' '); 270 | const char *lsp = strrchr(r, ' '); 271 | 272 | if (!fsp || !lsp || lsp <= fsp+1) return false; 273 | method->assign(r, fsp - r); 274 | 275 | return parseQuery(fsp+1, lsp-(fsp+1), path, query); 276 | } 277 | -------------------------------------------------------------------------------- /src/tail2es_unittest.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "logger.h" 12 | #include "unittesthelper.h" 13 | #include "sys.h" 14 | #include "luactx.h" 15 | #include "cnfctx.h" 16 | 17 | LOGGER_INIT(); 18 | 19 | static CnfCtx *cnf = 0; 20 | 21 | #define LUACNF_SIZE 2 22 | #define ETCDIR "blackboxtest/tail2es" 23 | #define LOG(f) "logs/" f 24 | 25 | static LuaCtx *getLuaCtx(const char *file) 26 | { 27 | for (std::vector::iterator ite = cnf->getLuaCtxs().begin(); 28 | ite != cnf->getLuaCtxs().end(); ++ite) { 29 | LuaCtx *ctx = (*ite); 30 | while (ctx) { 31 | if (ctx->file() == file) return ctx; 32 | ctx = ctx->next(); 33 | } 34 | } 35 | return 0; 36 | } 37 | 38 | static std::string fileGetContent(const char *file) 39 | { 40 | FILE *fp = fopen(file, "r"); 41 | check(fp, "read file %s error: %s", file, strerror(errno)); 42 | 43 | std::string content; 44 | char buf[512]; 45 | while (fgets(buf, 512, fp)) { 46 | content.append(buf); 47 | } 48 | fclose(fp); 49 | 50 | return content; 51 | } 52 | 53 | DEFINE(transformEsDocNginxLog) 54 | { 55 | std::string log = fileGetContent(ETCDIR"/nginx_log.log"); 56 | std::string *doc = new std::string; 57 | LuaFunction::transformEsDocNginxLog(log, doc); 58 | std::string expectDoc = "{\"y\":\"\\ufffd\\ufffd\"}"; 59 | check(*doc == expectDoc, "got %s, expect %s", PTRS(*doc), PTRS(expectDoc)); 60 | delete doc; 61 | } 62 | 63 | DEFINE(transformEsDocNginxJson) 64 | { 65 | std::string log = fileGetContent(ETCDIR"/nginx_json.log"); 66 | std::string *doc = new std::string; 67 | LuaFunction::transformEsDocNginxJson(log, doc); 68 | std::string expectDoc = "{\"receiver\":\"bb_up\"}"; 69 | check(*doc == expectDoc, "got %s, expect %s", PTRS(*doc), PTRS(expectDoc)); 70 | delete doc; 71 | } 72 | 73 | DEFINE(loadCnf) 74 | { 75 | static char errbuf[MAX_ERR_LEN]; 76 | 77 | cnf = CnfCtx::loadCnf(ETCDIR, errbuf); 78 | check(cnf, "loadCnf %s", errbuf); 79 | 80 | cnf->fasttime(true, TIMEUNIT_MILLI); 81 | check(cnf->getLuaCtxSize() == LUACNF_SIZE, "%d", (int) cnf->getLuaCtxSize()); 82 | } 83 | 84 | DEFINE(loadLuaCtx) 85 | { 86 | LuaCtx *ctx; 87 | LuaFunction *function; 88 | 89 | ctx = getLuaCtx(LOG("basic.log")); 90 | check(ctx, "%s", "basic not found"); 91 | 92 | std::string esIndex; 93 | bool esIndexWithTimeFormat; 94 | int esIndexPos, esDocPos, esDocDataFormat; 95 | ctx->es(&esIndex, &esIndexWithTimeFormat, &esIndexPos, &esDocPos, &esDocDataFormat); 96 | 97 | check(esIndex == "_%F", "%s", PTRS(esIndex)); 98 | check(esIndexWithTimeFormat, "%s", BTOS(esIndexWithTimeFormat)); 99 | check(esIndexPos == 1, "%d", esIndexPos); 100 | check(esDocPos == 3, "%d", esDocPos); 101 | check(esDocDataFormat == ESDOC_DATAFORMAT_NGINX_LOG, "%d", esDocDataFormat); 102 | 103 | function = ctx->function(); 104 | check(function->type_ == LuaFunction::ESPLAIN, "function type %s, expect esplain", LuaFunction::typeToString(function->type_)); 105 | 106 | ctx = getLuaCtx(LOG("indexdoc.log")); 107 | check(ctx, "%s", "indexdoc not found"); 108 | 109 | function = ctx->function(); 110 | check(function->type_ == LuaFunction::INDEXDOC, "function type %s, expect indexdoc", LuaFunction::typeToString(function->type_)); 111 | } 112 | 113 | #define CONTENT_LENGTH_HEADER \ 114 | "HTTP/1.1 400 Bad Request\r\n" \ 115 | "content-type: application/json; charset=UTF-8\r\n" \ 116 | "content-length: 175\r\n" 117 | 118 | #define CONTENT_BODY_PART1 \ 119 | "{\"_index\":\"indexdoc\",\"_type\":\"_doc\",\"_id\":\"lmBMN28B-3LtigQc8FLf\",\"_version\":1,\"result\":\"created\",\"_shards\":{\"total\":2,\"successful\":1,\"failed\":0}" 120 | 121 | #define CONTENT_BODY_PART2 ",\"_seq_no\":0,\"_primary_term\":1}" 122 | 123 | DEFINE(httpProtocol_1) 124 | { 125 | std::vector v; 126 | v.push_back("127.0.0.1:9200"); 127 | EsUrl url(v, 0, 0); 128 | url.respWant_ = STATUS_LINE; 129 | url.resp_ = url.header_; 130 | 131 | strcpy(url.header_, CONTENT_LENGTH_HEADER); 132 | url.offset_ = sizeof(CONTENT_LENGTH_HEADER)-1; 133 | 134 | url.initHttpResponse(url.header_ + url.offset_); 135 | check(url.respWant_ == HEADER, "parse part header error"); 136 | check(url.respCode_ == 400, "http status error %d", url.respCode_); 137 | 138 | strcpy(url.header_, CONTENT_LENGTH_HEADER "\r\n"); 139 | url.offset_ = sizeof(CONTENT_LENGTH_HEADER)-1 + 2; 140 | 141 | url.initHttpResponse(url.header_ + url.offset_); 142 | check(url.respWant_ == BODY, "parse want %d", url.respWant_); 143 | check(url.wantLen_ == 175, "content length error %d", int(url.wantLen_)); 144 | 145 | strcpy(url.header_, CONTENT_BODY_PART1 CONTENT_BODY_PART2); 146 | url.offset_ = strlen(url.header_); 147 | 148 | url.initHttpResponse(url.header_ + url.offset_); 149 | check(url.respWant_ == RESP_EOF, "parse want %d", url.respWant_); 150 | check(url.respBody_ == CONTENT_BODY_PART1 CONTENT_BODY_PART2, "content error"); 151 | } 152 | 153 | DEFINE(basic) 154 | { 155 | std::vector datas; 156 | 157 | LuaCtx *ctx = getLuaCtx(LOG("basic.log")); 158 | LuaFunction *function = ctx->function_; 159 | 160 | time_t now = time(0); 161 | struct tm ltm; 162 | localtime_r(&now, <m); 163 | 164 | char index[64]; 165 | strftime(index, 64, "basic_%F", <m); 166 | 167 | const char *s1 = "basic IP {\x22x\x22: 1}"; 168 | const char *json = "{\"x\": 1}"; 169 | function->process(0, s1, strlen(s1), &datas); 170 | check(datas.size() == 1, "datas size %d", (int) datas.size()); 171 | check(*datas[0]->esIndex == index, "expect %s, got %s", index, PTRS(*datas[0]->esIndex)); 172 | check(*datas[0]->data == "{\"x\": 1}", "expect %s, got %s", json, PTRS(*datas[0]->data)); 173 | } 174 | 175 | DEFINE(indexdoc) 176 | { 177 | std::vector datas; 178 | 179 | LuaCtx *ctx = getLuaCtx(LOG("indexdoc.log")); 180 | LuaFunction *function = ctx->function_; 181 | 182 | const char *s1 = "{\"x\": 1}"; 183 | check(function->process(0, s1, strlen(s1), &datas) > 0, "indexdoc error %s", cnf->errbuf()); 184 | check(datas.size() == 1, "data size %d", (int) datas.size()); 185 | check(*datas[0]->esIndex == "indexdoc", "expect indexdoc, got %s", PTRS(*datas[0]->esIndex)); 186 | check(*datas[0]->data == s1, "expect %s, got %s", s1, PTRS(*datas[0]->data)); 187 | } 188 | 189 | DEFINE(initEs) 190 | { 191 | check(cnf->initEs(), "%s", cnf->errbuf()); 192 | 193 | EsCtx *es = cnf->getEs(); 194 | for (std::vector::iterator ite = es->esSenders_.begin(); 195 | ite != es->esSenders_.end(); ++ite) { 196 | check((*ite)->epfd_ > 0, "init epoll error"); 197 | check((*ite)->running_, "es is not running"); 198 | } 199 | } 200 | 201 | DEFINE(esProduce) 202 | { 203 | LuaCtx *ctx = getLuaCtx(LOG("basic.log")); 204 | 205 | char json[64]; 206 | long x = 0; 207 | std::vector datas; 208 | for (int i = 0; i < 1000; ++i) { 209 | x = random(); 210 | snprintf(json, 64, "{\x22x\x22: %ld, \x22timestamp\x22: %ld}", x, cnf->fasttime(true, TIMEUNIT_MILLI)); 211 | 212 | std::string *index = new std::string("indexdoc"); 213 | std::string *data = new std::string(json); 214 | FileRecord *record = FileRecord::create(0, 0, index, data); 215 | record->ctx = ctx; 216 | datas.assign(1, record); 217 | 218 | cnf->getEs()->produce(&datas); 219 | sys::millisleep(1); 220 | } 221 | 222 | bool esOk = false; 223 | char cmd[256]; 224 | for (int i = 0; i < 5 * 1000; ++i) { 225 | snprintf(cmd, 256, "curl -Ss http://127.0.0.1:9200/indexdoc/_doc/_search?sort=timestamp:desc | grep -q %ld", x); 226 | int status; 227 | BASH(cmd, status); 228 | if (status == 0) { 229 | esOk = true; 230 | break; 231 | } else { 232 | printf("wait tail2es #%d\n", i); 233 | sys::millisleep(1); 234 | } 235 | } 236 | check(esOk, "expect %s in es, got nothing, use command: %s", json, cmd); 237 | } 238 | 239 | static const char *files[] = { 240 | LOG("basic.log"), 241 | LOG("indexdoc.log"), 242 | 0 243 | }; 244 | 245 | DEFINE(prepare) 246 | { 247 | mkdir(LOG(""), 0755); 248 | 249 | for (int i = 0; files[i]; ++i) { 250 | int fd = creat(files[i], 0644); 251 | if (fd != -1) close(fd); 252 | } 253 | } 254 | 255 | DEFINE(clean) 256 | { 257 | for (int i = 0; files[i]; ++i) { 258 | unlink(files[i]); 259 | } 260 | } 261 | 262 | TEST_RUN(tail2es) 263 | { 264 | DO(prepare); 265 | 266 | TEST(transformEsDocNginxLog); 267 | TEST(transformEsDocNginxJson); 268 | 269 | TEST(loadCnf); 270 | TEST(loadLuaCtx); 271 | TEST(basic); 272 | TEST(indexdoc); 273 | 274 | TEST(httpProtocol_1); 275 | 276 | TEST(initEs); 277 | TEST(esProduce); 278 | 279 | DO(clean); 280 | if (cnf) delete cnf; 281 | } 282 | 283 | int main() { 284 | UNITTEST_RUN(tail2es); 285 | printf("OK\n"); 286 | return 0; 287 | } 288 | -------------------------------------------------------------------------------- /src/kafkactx.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "logger.h" 6 | #include "cnfctx.h" 7 | #include "luactx.h" 8 | #include "filereader.h" 9 | #include "kafkactx.h" 10 | 11 | static int stats_cb(rd_kafka_t *, char * /*json*/, size_t /*json_len*/, void *) 12 | { 13 | // log_opaque(json, (int) json_len, true); 14 | return 0; 15 | } 16 | 17 | void KafkaCtx::error_cb(rd_kafka_t *, int err, const char *reason, void *opaque) 18 | { 19 | KafkaCtx *kafka = (KafkaCtx *) opaque; 20 | 21 | if (err == RD_KAFKA_RESP_ERR__ALL_BROKERS_DOWN || 22 | err == RD_KAFKA_RESP_ERR__TRANSPORT) { 23 | for (size_t i = 0; i < kafka->nrkt_; ++i) util::atomic_dec(kafka->errors_ + i, 1); 24 | } 25 | log_error(0, "kafka error level %d reason %s", err, reason); 26 | } 27 | 28 | static void log_cb(const rd_kafka_t *, int level, const char *fac, const char *buf) 29 | { 30 | log_info(0, "kafka error level %d fac %s buf %s", level, fac, buf); 31 | } 32 | 33 | static void dr_msg_cb(rd_kafka_t *, const rd_kafka_message_t *rkmsg, void *) 34 | { 35 | FileRecord *record = (FileRecord *) rkmsg->_private; 36 | record->ctx->getFileReader()->updateFileOffRecord(record); 37 | FileRecord::destroy(record); 38 | } 39 | 40 | static int32_t partitioner_cb ( 41 | const rd_kafka_topic_t *, const void *, size_t, int32_t pc, void *opaque, void *) 42 | { 43 | LuaCtx *ctx = (LuaCtx *) opaque; 44 | int partition = ctx->getPartition(pc); 45 | if (partition < 0) return RD_KAFKA_PARTITION_UA; 46 | else return partition; 47 | } 48 | 49 | bool KafkaCtx::initKafka(const char *brokers, const std::map &gcnf, char *errbuf) 50 | { 51 | char errstr[512]; 52 | 53 | rd_kafka_conf_t *conf = rd_kafka_conf_new(); 54 | for (std::map::const_iterator ite = gcnf.begin(); ite != gcnf.end(); ++ite) { 55 | rd_kafka_conf_res_t res; 56 | res = rd_kafka_conf_set(conf, ite->first.c_str(), ite->second.c_str(), errstr, sizeof(errstr)); 57 | if (res != RD_KAFKA_CONF_OK) { 58 | snprintf(errbuf, MAX_ERR_LEN, "kafka conf %s=%s %s", ite->first.c_str(), ite->second.c_str(), errstr); 59 | 60 | rd_kafka_conf_destroy(conf); 61 | return false; 62 | } 63 | } 64 | 65 | rd_kafka_conf_set_opaque(conf, this); 66 | rd_kafka_conf_set_dr_msg_cb(conf, dr_msg_cb); 67 | rd_kafka_conf_set_stats_cb(conf, stats_cb); 68 | rd_kafka_conf_set_error_cb(conf, error_cb); 69 | rd_kafka_conf_set_log_cb(conf, log_cb); 70 | 71 | /* rd_kafka_t will own conf */ 72 | if (!(rk_ = rd_kafka_new(RD_KAFKA_PRODUCER, conf, errstr, sizeof(errstr)))) { 73 | snprintf(errbuf, MAX_ERR_LEN, "new kafka produce error %s", errstr); 74 | return false; 75 | } 76 | 77 | if (rd_kafka_brokers_add(rk_, brokers) < 1) { 78 | snprintf(errbuf, MAX_ERR_LEN, "kafka invalid brokers %s", brokers); 79 | return false; 80 | } 81 | return true; 82 | } 83 | 84 | rd_kafka_topic_t *KafkaCtx::initKafkaTopic(LuaCtx *ctx, const std::map &tcnf, char *errbuf) 85 | { 86 | char errstr[512]; 87 | 88 | rd_kafka_topic_conf_t *tconf = rd_kafka_topic_conf_new(); 89 | for (std::map::const_iterator ite = tcnf.begin(); ite != tcnf.end(); ++ite) { 90 | rd_kafka_conf_res_t res; 91 | res = rd_kafka_topic_conf_set(tconf, ite->first.c_str(), ite->second.c_str(), errstr, sizeof(errstr)); 92 | if (res != RD_KAFKA_CONF_OK) { 93 | snprintf(errbuf, MAX_ERR_LEN, "kafka topic conf %s=%s %s", ite->first.c_str(), ite->second.c_str(), errstr); 94 | rd_kafka_topic_conf_destroy(tconf); 95 | return 0; 96 | } 97 | } 98 | 99 | rd_kafka_topic_conf_set_opaque(tconf, ctx); 100 | if (ctx->getPartitioner() == PARTITIONER_RANDOM) { 101 | rd_kafka_topic_conf_set_partitioner_cb(tconf, rd_kafka_msg_partitioner_random); 102 | } else { 103 | rd_kafka_topic_conf_set_partitioner_cb(tconf, partitioner_cb); 104 | } 105 | 106 | rd_kafka_topic_t *rkt; 107 | /* rd_kafka_topic_t will own tconf */ 108 | rkt = rd_kafka_topic_new(rk_, ctx->topic().c_str(), tconf); 109 | if (!rkt) { 110 | snprintf(errbuf, MAX_ERR_LEN, "kafka_topic_new error"); 111 | return 0; 112 | } 113 | return rkt; 114 | } 115 | 116 | bool KafkaCtx::init(CnfCtx *cnf, char *errbuf) 117 | { 118 | cnf_ = cnf; 119 | 120 | if (!initKafka(cnf->getBrokers(), cnf->getKafkaGlobalConf(), errbuf)) return false; 121 | 122 | rkts_ = new rd_kafka_topic_t*[cnf->getLuaCtxSize()]; 123 | errors_ = new int[cnf->getLuaCtxSize()]; 124 | memset(errors_, 0, cnf->getLuaCtxSize()); 125 | 126 | for (LuaCtxPtrList::iterator ite = cnf->getLuaCtxs().begin(); 127 | ite != cnf->getLuaCtxs().end(); ++ite) { 128 | LuaCtx *ctx = (*ite); 129 | while (ctx) { 130 | rd_kafka_topic_t *rkt = initKafkaTopic(ctx, cnf->getKafkaTopicConf(), errbuf); 131 | if (!rkt) return false; 132 | 133 | rkts_[nrkt_] = rkt; 134 | ctx->setRktId(nrkt_); 135 | nrkt_++; 136 | 137 | ctx = ctx->next(); 138 | } 139 | } 140 | 141 | return true; 142 | } 143 | 144 | KafkaCtx::~KafkaCtx() 145 | { 146 | for (size_t i = 0; i < nrkt_; ++i) rd_kafka_topic_destroy(rkts_[i]); 147 | if (rk_) rd_kafka_destroy(rk_); 148 | 149 | if (rkts_) delete[] rkts_; 150 | if (errors_) delete[] errors_; 151 | } 152 | 153 | bool KafkaCtx::ping(LuaCtx *ctx) 154 | { 155 | int id = ctx->rktId(); 156 | if (util::atomic_get(errors_ + id) > 0) return true; 157 | return false; 158 | 159 | /* 160 | struct rd_kafka_message_t *meta = 0; 161 | rd_kafka_resp_err_t rc = rd_kafka_metadata(rk_, 0, ctx->rkt, 30); 162 | if (rc == RD_KAFKA_RESP_ERR_NO_ERROR) { 163 | util::atomic_set(errors_ + id, 10); 164 | } 165 | 166 | if (meta) rd_kafka_metadata_destroy(meta); 167 | */ 168 | } 169 | 170 | bool KafkaCtx::produce(FileRecord *record) 171 | { 172 | CnfCtx *cnf = record->ctx->cnf(); 173 | rd_kafka_topic_t *rkt = rkts_[record->ctx->rktId()]; 174 | 175 | int rc; 176 | int i = 1; 177 | time_t startTime = cnf->fasttime(); 178 | while ((rc = rd_kafka_produce(rkt, RD_KAFKA_PARTITION_UA, 0, (void *) record->data->c_str(), 179 | record->data->size(), 0, 0, record)) != 0) { 180 | rd_kafka_resp_err_t err = rd_kafka_last_error(); 181 | if (err == RD_KAFKA_RESP_ERR__QUEUE_FULL) { 182 | if (cnf->fasttime() - startTime > QUEUE_ERROR_TIMEOUT + 2) { 183 | // librdkafka may trap this loop, call exit to restart 184 | return false; 185 | } 186 | 187 | cnf->flowControl(true); 188 | int nevent = rd_kafka_poll(rk_, 100 * i); 189 | log_error(0, "%s kafka produce error(#%d) %s, poll event %d", 190 | rd_kafka_topic_name(rkt), i++, rd_kafka_err2str(err), nevent); 191 | } else { 192 | cnf->stats()->queueSizeDec(); 193 | cnf->stats()->logErrorInc(); 194 | log_fatal(0, "%s kafka produce error %s", 195 | rd_kafka_topic_name(rkt), rd_kafka_err2str(err)); 196 | FileRecord::destroy(record); 197 | break; 198 | } 199 | } 200 | 201 | cnf->flowControl(false); 202 | return true; 203 | } 204 | 205 | bool KafkaCtx::produce(std::vector *datas) 206 | { 207 | cnf_->stats()->logRecvInc(datas->size()); 208 | assert(!datas->empty()); 209 | 210 | LuaCtx *ctx = datas->at(0)->ctx; 211 | rd_kafka_topic_t *rkt = rkts_[ctx->rktId()]; 212 | 213 | int partition = RD_KAFKA_PARTITION_UA; 214 | if (ctx->getPartitioner() == PARTITIONER_RANDOM) { 215 | if (ctx->rktPartition() < 0 ) { 216 | const struct rd_kafka_metadata *metadata = 0; 217 | rd_kafka_resp_err_t err = rd_kafka_metadata(rk_, 0, rkt, &metadata, 500); 218 | 219 | if (err == RD_KAFKA_RESP_ERR_NO_ERROR) { 220 | if (metadata->topic_cnt > 0) { 221 | ctx->rktSetPartition(metadata->topics[0].partition_cnt); 222 | } 223 | rd_kafka_metadata_destroy(metadata); 224 | } else { 225 | log_fatal(0, "%s rd_kafka_metadata error %s", 226 | rd_kafka_topic_name(rkt), rd_kafka_err2str(err)); 227 | } 228 | } 229 | 230 | if (ctx->rktPartition() >= 0) { 231 | partition = rand() % ctx->rktPartition(); 232 | } 233 | } 234 | 235 | std::vector rkmsgs; 236 | rkmsgs.resize(datas->size()); 237 | 238 | size_t i = 0; 239 | for (std::vector::iterator ite = datas->begin(), end = datas->end(); 240 | ite != end; ++ite, ++i) { 241 | FileRecord *record = (*ite); 242 | 243 | rkmsgs[i].payload = (void *) record->data->c_str(); 244 | rkmsgs[i].len = record->data->size(); 245 | rkmsgs[i].key = 0; 246 | rkmsgs[i].key_len = 0; 247 | rkmsgs[i]._private = record; 248 | } 249 | 250 | int n = rd_kafka_produce_batch(rkt, partition, 0, &rkmsgs[0], rkmsgs.size()); 251 | if (n != (int) rkmsgs.size()) { 252 | log_info(0, "rd_kafka_produce_batch %d != %d", n, (int) rkmsgs.size()); 253 | 254 | for (std::vector::iterator ite = rkmsgs.begin(), end = rkmsgs.end(); 255 | ite != end; ++ite) { 256 | if (ite->err) { 257 | if (!produce((FileRecord *) ite->_private)) return false; 258 | } 259 | } 260 | } 261 | 262 | rd_kafka_poll(rk_, 0); 263 | return true; 264 | } 265 | -------------------------------------------------------------------------------- /src/kafka2file_unittest.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "sys.h" 5 | #include "util.h" 6 | #include "unittesthelper.h" 7 | #include "logger.h" 8 | #include "common.h" 9 | #include "transform.h" 10 | 11 | LOGGER_INIT(); 12 | UNITTEST_INIT(); 13 | 14 | #define WDIR "kafka2filedir" 15 | #define TOPIC "nginx" 16 | #define TOPICDIR WDIR "/" TOPIC 17 | #define PARTITION "0" 18 | #define LUAFILE(f) "blackboxtest/kafka2file/" f 19 | #define LUALOGFILE(t, p, f) TOPICDIR "/" t "." p "_" f 20 | 21 | static char errbuf[1024]; 22 | 23 | DEFINE(parseRequest) 24 | { 25 | const char *request = "GET /pingback/tail2kafka?event=UPGRADE_ERROR&product=test.tail2kafka&error=upgrade%20config%20from%20to%200.0.2,%20reload%20failed HTTP/1.1"; 26 | std::string method, path; 27 | std::map query; 28 | bool rc = parseRequest(request, &method, &path, &query); 29 | 30 | check(rc, "parse %s error", request); 31 | check(method == "GET", "method %s", PTRS(method)); 32 | check(path == "/pingback/tail2kafka", "path %s", PTRS(path)); 33 | check(query["event"] == "UPGRADE_ERROR", "query['event'] %s", PTRS(query["event"])); 34 | check(query["product"] == "test.tail2kafka", "query['product'] %s", PTRS(query["product"])); 35 | check(query["error"] == "upgrade config from to 0.0.2, reload failed", "query['error'] %s", PTRS(query["error"])); 36 | } 37 | 38 | DEFINE(messageInfoExtrace) 39 | { 40 | MessageInfo info; 41 | 42 | std::string payload("#zzyong {'time':'2018-02-13T11:48:57', 'event':'END', 'file':'oldFileName','size':100, 'sendsize':0, 'lines':0, 'sendlines':0}"); 43 | util::replace(&payload, '\'', '"'); 44 | bool rc = MessageInfo::extract(payload.c_str(), payload.size(), &info, false); 45 | check(rc, "extrace %s error", PTRS(payload)); 46 | check(info.type == MessageInfo::META, "info type error"); 47 | check(info.host == "zzyong", "info host error %s", PTRS(info.host)); 48 | check(info.file == "oldFileName", "info file error %s", PTRS(info.file)); 49 | check(info.size == 100, "info size error %d", (int) info.size); 50 | 51 | payload = "#zzyong {'time':'2018-02-13T11:48:57', 'event':'START'}"; 52 | util::replace(&payload, '\'', '"'); 53 | rc = MessageInfo::extract(payload.c_str(), payload.size(), &info, false); 54 | check(!rc, "extrace %s error", PTRS(payload)); 55 | 56 | payload = "*zzyong@123456789 Hello World\n"; 57 | rc = MessageInfo::extract(payload.c_str(), payload.size(), &info, true); 58 | check(rc, "extrace %s error", PTRS(payload)); 59 | check(info.type == MessageInfo::NMSG, "info type error"); 60 | check(info.host == "zzyong", "info host error %s", PTRS(info.host)); 61 | check(info.pos == 123456789, "info pos error %lu", info.pos); 62 | check(info.len == 11, "info payload len error %d", info.len); 63 | check(strncmp(info.ptr, "Hello World", info.len) == 0, "info payload error %.*s", info.len, info.ptr); 64 | 65 | payload = "zzyong Hello World\n"; 66 | rc = MessageInfo::extract(payload.c_str(), payload.size(), &info, true); 67 | check(rc, "extrace %s error", PTRS(payload)); 68 | check(info.len == 18, "info payload len error %d", info.len); 69 | check(strncmp(info.ptr, payload.c_str(), info.len) == 0, "info payload error %.*s", info.len, info.ptr); 70 | } 71 | 72 | DEFINE(luaTransformInit) 73 | { 74 | LuaTransform *luaTransform = new LuaTransform(WDIR, TOPIC, atoi(PARTITION), 0); 75 | bool rc = luaTransform->init(Transform::NGINX, Transform::JSON, 60, 10, LUAFILE("nginx.lua"), errbuf); 76 | check(rc, "luaTransform.init error %s", errbuf); 77 | 78 | JsonValueTransform *fun = luaTransform->requestValueMap_["status"]; 79 | check(fun, "status fun not found"); 80 | check(strcmp(fun->name(), "JsonValueTypeTransform") == 0, "status fun name %s", fun->name()); 81 | Json::Value value = fun->call("1234"); 82 | check(value.isInt() && value.asInt() == 1234, "status fun call ok"); 83 | 84 | fun = luaTransform->requestValueMap_["uri"]; 85 | check(fun, "uri fun not found"); 86 | check(strcmp(fun->name(), "JsonValuePrefixTransform") == 0, "uri fun name %s", fun->name()); 87 | value = fun->call("/api/null"); 88 | check(value.isString() && value.asString() == "/host/api/null", "uri fun call error"); 89 | } 90 | 91 | inline rd_kafka_message_t *initKafkaMessage(rd_kafka_message_t *rkm, const char *payload, uint64_t offset) 92 | { 93 | rkm->payload = (void *) payload; 94 | rkm->len = strlen(payload); 95 | rkm->offset = offset; 96 | return rkm; 97 | } 98 | 99 | #define MSG_HOSTMETA "*zzyong@0" 100 | #define NGX_REQUEST "\"GET /pingback/tail2kafka?event=RELOAD HTTP/1.1\"" 101 | #define NGX_MSG_10_25_01 MSG_HOSTMETA " [12/Feb/2018:10:25:01 +0800] " NGX_REQUEST 102 | #define NGX_MSG_10_25_02 MSG_HOSTMETA " [12/Feb/2018:10:25:02 +0800] " NGX_REQUEST 103 | #define NGX_MSG_10_26_01 MSG_HOSTMETA " [12/Feb/2018:10:26:01 +0800] " NGX_REQUEST 104 | #define NGX_MSG_10_25_58 MSG_HOSTMETA " [12/Feb/2018:10:25:58 +0800] " NGX_REQUEST 105 | #define NGX_MSG_10_26_30 MSG_HOSTMETA " [12/Feb/2018:10:26:30 +0800] " NGX_REQUEST 106 | #define NGX_MSG_10_25_59 MSG_HOSTMETA " [12/Feb/2018:10:25:59 +0800] " NGX_REQUEST 107 | #define NGX_MSG_10_28_01 MSG_HOSTMETA " [12/Feb/2018:10:28:01 +0800] " NGX_REQUEST 108 | #define NGX_MSG_10_28_02 MSG_HOSTMETA " [12/Feb/2018:10:28:02 +0800] " NGX_REQUEST 109 | #define NGX_MSG_10_29_11 MSG_HOSTMETA " [12/Feb/2018:10:29:11 +0800] " NGX_REQUEST 110 | 111 | DEFINE(luaTransformLogRotate) 112 | { 113 | LuaTransform *luaTransform = new LuaTransform(WDIR, TOPIC, atoi(PARTITION), 0); 114 | bool rc = luaTransform->init(Transform::NGINX, Transform::JSON, 60, 10, LUAFILE("test_rotate.lua"), errbuf); 115 | checkx(rc, "luaTransform.init error %s", errbuf); 116 | 117 | const char *msgs[] = { 118 | NGX_MSG_10_25_01, NGX_MSG_10_25_02, 119 | NGX_MSG_10_26_01, NGX_MSG_10_25_58, NGX_MSG_10_26_30, NGX_MSG_10_25_59, 120 | NGX_MSG_10_28_01, NGX_MSG_10_28_02, 121 | NGX_MSG_10_29_11, 0}; 122 | 123 | bool *withTimeout = ENV_GET("WITH_TIMEOUT", bool *); 124 | 125 | uint64_t offset; 126 | rd_kafka_message_t rkm; 127 | for (int i = 0; msgs[i]; ++i) { 128 | printf("%s\n", msgs[i]); 129 | uint32_t flags = luaTransform->write(initKafkaMessage(&rkm, msgs[i], i), &offset); 130 | checkx(flags & Transform::RKMFREE, "luaFunction.write should return rkmfree"); 131 | 132 | if (*withTimeout) { 133 | flags = luaTransform->timeout(&offset); 134 | checkx(!(flags & Transform::RKMFREE), "luaFunction.timeout should not return rkmfree"); 135 | } 136 | } 137 | delete luaTransform; 138 | 139 | const char *f_10_25_00 = LUALOGFILE(TOPIC, PARTITION, "2018-02-12_10-25-00"); 140 | checkx(access(f_10_25_00, F_OK) == 0, "logfile 2018-02-12_10-25-00 notfound"); 141 | 142 | std::vector lines; 143 | sys::file2vector(f_10_25_00, &lines); 144 | checkx(lines.size() == 3, "file size error, %s, %d", f_10_25_00, (int) lines.size()); 145 | checkx(lines[0] == "{\"time_local\":\"2018-02-12T10:25:01\"}", "line 0 error, %s", PTRS(lines[0])); 146 | checkx(lines[1] == "{\"time_local\":\"2018-02-12T10:25:02\"}", "line 1 error, %s", PTRS(lines[1])); 147 | checkx(lines[2] == "{\"time_local\":\"2018-02-12T10:25:58\"}", "line 0 error, %s", PTRS(lines[2])); 148 | 149 | const char *f_10_26_00 = LUALOGFILE(TOPIC, PARTITION, "2018-02-12_10-26-00"); 150 | checkx(access(f_10_26_00, F_OK) == 0, "logfile 2018-02-12_10-26-00 notfound"); 151 | 152 | lines.clear(); 153 | sys::file2vector(f_10_26_00, &lines); 154 | checkx(lines.size() == 2, "file size error, %s, %d", f_10_26_00, (int) lines.size()); 155 | checkx(lines[0] == "{\"time_local\":\"2018-02-12T10:26:01\"}", "line 0 error, %s", PTRS(lines[0])); 156 | checkx(lines[1] == "{\"time_local\":\"2018-02-12T10:26:30\"}", "line 1 error, %s", PTRS(lines[1])); 157 | 158 | const char *f_10_27_00 = LUALOGFILE(TOPIC, PARTITION, "2018-02-12_10-27-00"); 159 | checkx(access(f_10_27_00, F_OK) != 0, "logfile 2018-02-12_10-27-00 found"); 160 | 161 | const char *f_10_28_00 = LUALOGFILE(TOPIC, PARTITION, "2018-02-12_10-28-00"); 162 | checkx(access(f_10_28_00, F_OK) == 0, "logfile 2018-02-12_10-28-00 notfound"); 163 | 164 | lines.clear(); 165 | sys::file2vector(f_10_28_00, &lines); 166 | checkx(lines.size() == 2, "file size error, %s, %d", f_10_28_00, (int) lines.size()); 167 | checkx(lines[0] == "{\"time_local\":\"2018-02-12T10:28:01\"}", "line 0 error, %s", PTRS(lines[0])); 168 | checkx(lines[1] == "{\"time_local\":\"2018-02-12T10:28:02\"}", "line 1 error, %s", PTRS(lines[1])); 169 | 170 | const char *f_10_29_00 = LUALOGFILE(TOPIC, PARTITION, "2018-02-12_10-29-00"); 171 | checkx(access(f_10_29_00, F_OK) != 0, "logfile 2018-02-12_10-29-00 found"); 172 | 173 | const char *f_10_29_00_current = LUALOGFILE(TOPIC, PARTITION, "2018-02-12_10-29-00.current"); 174 | checkx(access(f_10_29_00_current, F_OK) != 0, "logfile 2018-02-12_10-29-00.current found"); 175 | } 176 | 177 | DEFINE(prepare) 178 | { 179 | system("mkdir -p "TOPICDIR); 180 | } 181 | 182 | DEFINE(clean) 183 | { 184 | system("rm -rf "TOPICDIR"/*"); 185 | } 186 | 187 | int main() 188 | { 189 | DO(prepare); 190 | 191 | TEST(parseRequest); 192 | TEST(messageInfoExtrace); 193 | 194 | TEST(luaTransformInit); 195 | 196 | bool withTimeout; 197 | ENV_SET("WITH_TIMEOUT", &withTimeout); 198 | 199 | withTimeout = true; 200 | DO(clean); 201 | TESTX(luaTransformLogRotate, "luaTransformLogRotateWithTimeout"); 202 | 203 | withTimeout = false; 204 | DO(clean); 205 | TESTX(luaTransformLogRotate, "luaTransformLogRotateWithoutTimeout"); 206 | return 0; 207 | } 208 | --------------------------------------------------------------------------------