├── .gitignore ├── README-en.md ├── README.md ├── example ├── README.md ├── flume-env.sh ├── kafka2sls.properties └── log4j.properties ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── aliyun │ │ └── loghub │ │ └── flume │ │ ├── Constants.java │ │ ├── Validate.java │ │ ├── sink │ │ ├── DelimitedTextEventSerializer.java │ │ ├── EventHandler.java │ │ ├── EventSerializer.java │ │ ├── JSONEventSerializer.java │ │ ├── LoghubSink.java │ │ ├── RegexEventSerializer.java │ │ └── SimpleEventSerializer.java │ │ ├── source │ │ ├── DelimitedTextEventDeserializer.java │ │ ├── EventDeserializer.java │ │ ├── JSONEventDeserializer.java │ │ ├── LogReceiver.java │ │ └── LoghubSource.java │ │ └── utils │ │ └── VersionInfoUtils.java └── resources │ └── flume-versioninfo.properties └── test ├── java └── com │ └── aliyun │ └── loghub │ └── flume │ ├── sink │ └── TestLoghubSink.java │ └── source │ ├── JSONEventDeserializerTest.java │ └── LoghubSourceTest.java └── resources ├── log4j.properties ├── sink-example.conf └── source-example.conf /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | target/ -------------------------------------------------------------------------------- /README-en.md: -------------------------------------------------------------------------------- 1 | # Aliyun Log Flume 2 | 3 | ```Apache Flume``` is a open source project used for moving massive quantities of streaming data. Aliyun Log Flume implemented Flume Source and Sink for moving data between Loghub and external data source like HDFS, Kakfa. 4 | 5 | ### Requirements 6 | - Java 1.8+ 7 | - Maven 3.x+. 8 | 9 | ### Setup up Flume 10 | 11 | #### Downlaod Flume 12 | 13 | Download ```Apache Flume``` from the official site - http://www.apache.org/dyn/closer.lua/flume/1.9.0/apache-flume-1.9.0-bin.tar.gz, the latest version is ```1.9.0```. Copy the downloaded tarball in the directory of your server and extract contents using the following command: 14 | 15 | ```tar -xvf apache-flume-1.9.0-bin.tar.gz``` 16 | 17 | This command will create a new directory named apache-flume-1.9.0-bin and extract files into it. All official sinks/sources library are placed under the the directory apache-flume-1.9.0-bin/lib. There is also a directory apache-flume-1.9.0-bin/conf, we'll add our configuration file in that directory later. 18 | 19 | #### Build this project 20 | 21 | Go to the project root folder, and build aliyun-log-flume using the following command: 22 | 23 | ```mvn clean compile assembly:single -DskipTests``` 24 | 25 | After this command successfully executed, a new jar named ```aliyun-log-flume-1.0-SNAPSHOT.jar``` will be generated under directory target, all denpendencies of this project are packaged into the jar file as well. Then copy aliyun-log-flume-1.0-SNAPSHOT.jar to apache-flume-1.9.0-bin/lib. 26 | 27 | ### Configuration 28 | 29 | Create a new configuration file ```flume-loghub.conf``` under apache-flume-1.9.0-bin/conf, and add our configuration of sink or source in this file, here is an example for collecting data from netcat to Loghub and from Loghub to HDFS: 30 | 31 | ##### Sink example 32 | 33 | Loghub sink used to streaming data from flume to Loghub, here is an example for collecting data from ```netcat``` and send to Loghub: 34 | ``` 35 | agent.sources = netcatsource 36 | agent.sinks = slssink 37 | agent.channels = memoryChannel 38 | 39 | # Configure the source: 40 | agent.sources.netcatsource.type = netcat 41 | agent.sources.netcatsource.bind = localhost 42 | agent.sources.netcatsource.port = 44444 43 | 44 | # Describe the sink: 45 | agent.sinks.slssink.type = com.aliyun.loghub.flume.sink.LoghubSink 46 | agent.sinks.slssink.endpoint = 47 | agent.sinks.slssink.project = 48 | agent.sinks.slssink.logstore = 49 | agent.sinks.slssink.accessKeyId = 50 | agent.sinks.slssink.accessKey = 51 | 52 | 53 | # Configure a channel that buffers events in memory: 54 | agent.channels.memoryChannel.type = memory 55 | agent.channels.memoryChannel.capacity = 20000 56 | agent.channels.memoryChannel.transactionCapacity = 100 57 | 58 | # Bind the source and sink to the channel: 59 | agent.sources.netcatsource.channels = memoryChannel 60 | agent.sinks.slssink.channel = memoryChannel 61 | ``` 62 | 63 | #### Source example 64 | Ingesting data from Loghub and save to HDFS: 65 | ``` 66 | agent.sources = slssrc 67 | agent.sinks = hdfssink 68 | agent.channels = memoryChannel 69 | 70 | # Configure the source: 71 | agent.sources.slssrc.type = com.aliyun.loghub.flume.source.LoghubSource 72 | agent.sources.slssrc.endpoint = 73 | agent.sources.slssrc.project = 74 | agent.sources.slssrc.logstore = 75 | agent.sources.slssrc.accessKeyId = 76 | agent.sources.slssrc.accessKey = 77 | agent.sources.slssrc.consumerGroup = consumer-group-test 78 | agent.sources.slssrc.columns = 79 | agent.sources.slssrc.separatorChar = , 80 | # query for SLS SPL in source, refer: https://help.aliyun.com/zh/sls/user-guide/spl-overview 81 | agent.sources.slssrc.query = * | WHERE method = 'POST' 82 | 83 | # Describe the sink: 84 | agent.sinks.hdfssink.type = hdfs 85 | agent.sinks.hdfssink.hdfs.path = hdfs://localhost:8020/user/root/test 86 | agent.sinks.hdfssink.hdfs.writeFormat = Text 87 | agent.sinks.hdfssink.hdfs.round = true 88 | agent.sinks.hdfssink.hdfs.roundValue = 20 89 | agent.sinks.hdfssink.hdfs.roundUnit = minute 90 | agent.sinks.hdfssink.hdfs.rollSize = 0 91 | agent.sinks.hdfssink.hdfs.rollCount = 0 92 | agent.sinks.hdfssink.hdfs.fileType = DataStream 93 | agent.sinks.hdfssink.hdfs.useLocalTimeStamp = true 94 | 95 | # Configure a channel that buffers events in memory: 96 | agent.channels.memoryChannel.type = memory 97 | agent.channels.memoryChannel.capacity = 20000 98 | agent.channels.memoryChannel.transactionCapacity = 100 99 | 100 | 101 | # Bind the source and sink to the channel: 102 | agent.sources.slssrc.channels = memoryChannel 103 | agent.sinks.hdfssink.channel = memoryChannel 104 | ``` 105 | NOTE: For HDFS sink, we need to download HDFS libraries from https://hadoop.apache.org/releases.html , the latest version is 3.1.2, after extracted it, copy all libraries under hadoop-{hadoop-version}/share/hadoop/common and hadoop-{hadoop-version}/share/hadoop/common/lib to apache-flume-1.9.0-bin/lib , this libraries are required by HDFS sink. 106 | 107 | ### Start Flume 108 | After the configuration file is created, run the following command under apache-flume-1.9.0-bin: 109 | ``` 110 | ./bin/flume-ng agent --name agent --conf conf --conf-file conf/flume-loghub.conf 111 | ``` 112 | 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Aliyun Log Flume 3 | ================ 4 | 5 | #### Flume 6 | 7 | Flume是Apache开源的一个在各个大数据系统之间搬运数据的工具,用户需要在机器上运行Flume Agent, 每个Flume Agent进程可以包含Source,Sink和Channel三个组件。 8 | - Source: 数据源,常见的Source有Kafka,文件等。 9 | - Sink: 数据写入目标,如HDFS,Hive等。 10 | - Channel: 数据在从Source获取之后写入Sink之前的缓冲队列,常见的channel有内存队列,Kafka等。 11 | 12 | Flume中的每条数据以Event的形式存在,Event对象由两部分组成: 13 | - body: 数据内容,以字节数组的形式。 14 | - headers: 以key-value的形式组成,包含附加属性。 15 | 16 | 17 | #### aliyun-log-flume 18 | aliyun-log-flume 是一个实现日志服务(Loghub)对接Flume的插件,可以通过Flume将日志服务和其他的数据 19 | 系统如HDFS,Kafka等系统打通。目前Flume官方支持的插件除了HDFS,Kafka之外还有Hive,HBase,ElasticSearch等, 20 | 除此之外对于常见的数据源在社区也都能找到对应的插件支持。 21 | aliyun-log-flume 为Loghub 实现了Sink和Source 插件。 22 | - Sink: Flume读取其他数据源的数据然后写入Loghub。 23 | - Source: Flume消费Loghub然后写入其他系统如HDFS。 24 | 25 | ##### Loghub Sink 26 | 通过sink的方式可以将其他数据源的数据通过Flume接入Loghub。目前支持两种解析格式: 27 | - SIMPLE:将整个Flume Event作为一个字段写入Loghub。 28 | - DELIMITED:将整个Flume Event作为分隔符分隔的数据根据配置的列名解析成对应的字段写入Loghub。 29 | 30 | 支持的配置如下: 31 | 32 | |名称|描述|默认值|必需| 33 | |---|---|---|---| 34 | |type| 固定为com.aliyun.loghub.flume.sink.LoghubSink | | Y | 35 | |endpoint| Loghub endpoint| | Y | 36 | |project| Loghub project| | Y | 37 | |logstore| Loghub logstore| | Y | 38 | |accessKeyId| Loghub accessKeyId| | Y | 39 | |accessKey| Loghub accessKey| | Y | 40 | |batchSize| 写入Loghub批数据大小|1000 | N | 41 | |maxBufferSize| 缓存队列大小|1000 | N | 42 | |serializer| Event序列化格式,支持DELIMITED, SIMPLE,或者自定义serializer,如果是自定义serializer,此处填完整类名称 |SIMPLE | N | 43 | |columns| serializer为DELIMITED时,必须指定字段列表,用逗号分隔,顺序与实际的数据中字段顺序一致。| | N | 44 | |separatorChar| serializer为DELIMITED时,用于指定数据的分隔符,必须为单个字符|, | N | 45 | |quoteChar| serializer为DELIMITED时,用于指定Quote字符 |" | N | 46 | |escapeChar| serializer为DELIMITED时,用于指定转义字符 | " | N | 47 | |useRecordTime| 是否使用数据中的timestamp字段作为日志时间| false| N | 48 | 49 | #### Loghub Source 50 | 通过Source的方式可以将Loghub的数据经过Flume投递到其他的数据源。目前支持两种输出格式: 51 | - DELIMITED:数据以分隔符的方式写入Flume。 52 | - JSON:数据以JSON的形式写入Flume。 53 | 54 | 支持的配置如下: 55 | 56 | |名称|描述|默认值|必需| 57 | |---|---|---|---| 58 | |type| 固定为com.aliyun.loghub.flume.source.LoghubSource | | Y | 59 | |endpoint| Loghub endpoint| | Y | 60 | |project| Loghub project| | Y | 61 | |logstore| Loghub logstore| | Y | 62 | |accessKeyId| Loghub accessKeyId| | Y | 63 | |accessKey| Loghub accessKey| | Y | 64 | |heartbeatIntervalMs| 客户端和Loghub的心跳间隔,单位毫秒|30000 | N | 65 | |fetchIntervalMs| Loghub数据拉取间隔,单位毫秒|100 | N | 66 | |fetchInOrder| 是否按顺序消费|false | N | 67 | |batchSize| 拉取批量大小 |100 | N | 68 | |consumerGroup| 拉取的消费组名称 | 随机产生 | N | 69 | |initialPosition| 拉取起点位置,支持begin, end, timestamp。注意:如果服务端已经存在checkpoint,会优先使用服务端的checkpoint|begin | N | 70 | |timestamp| 当我initialPosition为timestamp时,必须指定时间戳,Unix时间戳格式 | | N | 71 | |deserializer| Event反序列化格式,支持DELIMITED, JSON,或者自定义deserializer,如果是自定义deserializer,此处填完整类名称 |DELIMITED | Y | 72 | |columns| deserializer为DELIMITED时,必须指定字段列表,用逗号分隔,顺序与实际的数据中字段顺序一致。| | N | 73 | |separatorChar| deserializer为DELIMITED时,用于指定数据的分隔符,必须为单个字符|, | N | 74 | |quoteChar| deserializer为DELIMITED时,用于指定Quote字符 |" | N | 75 | |escapeChar| deserializer为DELIMITED时,用于指定转义字符 | " | N | 76 | |appendTimestamp| deserializer为DELIMITED时,是否将时间戳作为一个字段自动添加到每行末尾 | false | N | 77 | |sourceAsField| deserializer为JSON时,是否将日志Source作为一个字段,字段名称为__source__ |false | N | 78 | |tagAsField| deserializer为JSON时,是否将日志Tag作为字段,字段名称为__tag__:{tag名称}| false | N | 79 | |timeAsField| deserializer为JSON时,是否将日志时间作为一个字段,字段名称为__time__ | false | N | 80 | |useRecordTime| 是否使用日志的时间,用于Event header中指定时间戳,如果为false则使用系统时间| false| N | 81 | |query| 指定SLS SPL, 语法参考:https://help.aliyun.com/zh/sls/user-guide/spl-overview | | N | 82 | -------------------------------------------------------------------------------- /example/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | mvn clean compile assembly:single -DskipTests 3 | mv target/aliyun-log-flume-1.3.jar $FLUME_HOME/lib 4 | mv flume-env.sh $FLUME_HOME/conf 5 | mv kafka2sls.properties $FLUME_HOME/conf 6 | mv log4j.properties $FLUME_HOME/conf 7 | cd $FLUME_HOME 8 | ./bin/flume-ng agent --conf ./conf --conf-file ./conf/kafka2sls.properties -n agent 9 | ``` 10 | -------------------------------------------------------------------------------- /example/flume-env.sh: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # If this file is placed at FLUME_CONF_DIR/flume-env.sh, it will be sourced 18 | # during Flume startup. 19 | 20 | # Enviroment variables can be set here. 21 | 22 | # export JAVA_HOME=/usr/lib/jvm/java-8-oracle 23 | 24 | # Give Flume more memory and pre-allocate, enable remote monitoring via JMX 25 | export JAVA_OPTS="-Xms100m -Xmx2000m -Dcom.sun.management.jmxremote" 26 | 27 | # Let Flume write raw event data and configuration information to its log files for debugging 28 | # purposes. Enabling these flags is not recommended in production, 29 | # as it may result in logging sensitive user information or encryption secrets. 30 | # export JAVA_OPTS="$JAVA_OPTS -Dorg.apache.flume.log.rawdata=true -Dorg.apache.flume.log.printconfig=true " 31 | 32 | # Note that the Flume conf directory is always included in the classpath. 33 | #FLUME_CLASSPATH="" 34 | 35 | -------------------------------------------------------------------------------- /example/kafka2sls.properties: -------------------------------------------------------------------------------- 1 | agent.sources = kafkasource 2 | agent.sinks = slssink 3 | agent.channels = memoryChannel 4 | 5 | # Configure the source: 6 | agent.sources.kafkasource.type = org.apache.flume.source.kafka.KafkaSource 7 | agent.sources.kafkasource.kafka.bootstrap.servers =localhost:9092 8 | agent.sources.kafkasource.kafka.topics = mytopic2 9 | agent.sources.kafkasource.kafka.batchSize = 1000 10 | agent.sources.kafkasource.kafka.auto.offset.reset = earliest 11 | agent.sources.kafkasource.kafka.consumer.group.id = flume_consumer_id 12 | 13 | # Describe the sink: 14 | agent.sinks.slssink.type = com.aliyun.loghub.flume.sink.LoghubSink 15 | agent.sinks.slssink.endpoint = cn-hangzhou-share.log.aliyuncs.com 16 | agent.sinks.slssink.project = 17 | agent.sinks.slssink.logstore = 18 | agent.sinks.slssink.accessKeyId = 19 | agent.sinks.slssink.accessKey = 20 | #agent.sinks.slssink.serializer = JSON 21 | agent.sinks.slssink.serializer = REGEX 22 | agent.sinks.slssink.fieldNames = namespace,logTime,logLevel,threadName,yk_metric,traceId,json 23 | agent.sinks.slssink.expandJsonKeys= json 24 | agent.sinks.slssink.timeField = logTime 25 | agent.sinks.slssink.timeFormat = yyyy-MM-dd HH:mm:ss.SSS 26 | agent.sinks.slssink.batchSize = 2000 27 | agent.sinks.slssink.bufferSize = 2000 28 | agent.sinks.slssink.regex = \\[(.*?)]\\[(.*?)]\\[(.*?)\\]\\[(.*?)\\]\\[(.*?)\\]\\[(.*?)\\]\\s\\-\\s(.*) 29 | 30 | # Configure a channel that buffers events in memory: 31 | agent.channels.memoryChannel.type = memory 32 | agent.channels.memoryChannel.capacity = 100000 33 | agent.channels.memoryChannel.keep-alive = 60 34 | agent.channels.memoryChannel.transactionCapacity = 1000 35 | 36 | # Bind the source and sink to the channel: 37 | agent.sources.kafkasource.channels = memoryChannel 38 | agent.sinks.slssink.channel = memoryChannel 39 | -------------------------------------------------------------------------------- /example/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | # Define some default values that can be overridden by system properties. 21 | # 22 | # For testing, it may also be convenient to specify 23 | # -Dflume.root.logger=DEBUG,console when launching flume. 24 | 25 | #flume.root.logger=DEBUG,console 26 | flume.root.logger=INFO,LOGFILE 27 | flume.log.dir=./logs 28 | flume.log.file=flume.log 29 | 30 | log4j.logger.org.apache.flume.lifecycle = INFO 31 | log4j.logger.org.jboss = WARN 32 | log4j.logger.org.mortbay = INFO 33 | 34 | log4j.logger.org.apache.avro.ipc.NettyTransceiver = WARN 35 | log4j.logger.org.apache.hadoop = INFO 36 | log4j.logger.org.apache.hadoop.hive = ERROR 37 | 38 | # Define the root logger to the system property "flume.root.logger". 39 | log4j.rootLogger=${flume.root.logger} 40 | 41 | 42 | # Stock log4j rolling file appender 43 | # Default log rotation configuration 44 | log4j.appender.LOGFILE=org.apache.log4j.RollingFileAppender 45 | log4j.appender.LOGFILE.MaxFileSize=100MB 46 | log4j.appender.LOGFILE.MaxBackupIndex=10 47 | log4j.appender.LOGFILE.File=${flume.log.dir}/${flume.log.file} 48 | log4j.appender.LOGFILE.layout=org.apache.log4j.PatternLayout 49 | log4j.appender.LOGFILE.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n 50 | 51 | 52 | # Warning: If you enable the following appender it will fill up your disk if you don't have a cleanup job! 53 | # This uses the updated rolling file appender from log4j-extras that supports a reliable time-based rolling policy. 54 | # See http://logging.apache.org/log4j/companions/extras/apidocs/org/apache/log4j/rolling/TimeBasedRollingPolicy.html 55 | # Add "DAILY" to flume.root.logger above if you want to use this 56 | log4j.appender.DAILY=org.apache.log4j.rolling.RollingFileAppender 57 | log4j.appender.DAILY.rollingPolicy=org.apache.log4j.rolling.TimeBasedRollingPolicy 58 | log4j.appender.DAILY.rollingPolicy.ActiveFileName=${flume.log.dir}/${flume.log.file} 59 | log4j.appender.DAILY.rollingPolicy.FileNamePattern=${flume.log.dir}/${flume.log.file}.%d{yyyy-MM-dd} 60 | log4j.appender.DAILY.layout=org.apache.log4j.PatternLayout 61 | log4j.appender.DAILY.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n 62 | 63 | # console 64 | # Add "console" to flume.root.logger above if you want to use this 65 | log4j.appender.console=org.apache.log4j.ConsoleAppender 66 | log4j.appender.console.target=System.err 67 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 68 | log4j.appender.console.layout.ConversionPattern=%d (%t) [%p - %l] %m%n 69 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.aliyun.loghub.flume 8 | aliyun-log-flume 9 | 1.8 10 | 11 | 12 | UTF-8 13 | 1.8.0 14 | 4.10 15 | 0.6.47 16 | 2.5.0 17 | 4.6 18 | UTF-8 19 | UTF-8 20 | 21 | 22 | 23 | 24 | org.apache.flume 25 | flume-ng-core 26 | ${flume.version} 27 | provided 28 | 29 | 30 | junit 31 | junit 32 | ${junit.version} 33 | test 34 | 35 | 36 | com.google.protobuf 37 | protobuf-java 38 | ${protobuf.version} 39 | 40 | 41 | com.aliyun.openservices 42 | loghub-client-lib 43 | ${loghub.consumergroup.version} 44 | 45 | 46 | log4j 47 | log4j 48 | 49 | 50 | 51 | 52 | com.opencsv 53 | opencsv 54 | ${opencsv.version} 55 | 56 | 57 | 58 | 59 | 60 | 61 | maven-assembly-plugin 62 | 63 | ${project.name}-${project.version} 64 | false 65 | 66 | jar-with-dependencies 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | org.apache.maven.plugins 75 | maven-compiler-plugin 76 | 77 | 78 | 1.8 79 | 1.8 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/Constants.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume; 2 | 3 | 4 | import com.aliyun.loghub.flume.utils.VersionInfoUtils; 5 | 6 | public class Constants { 7 | 8 | public static final String CONSUMER_GROUP_KEY = "consumerGroup"; 9 | public static final String ENDPOINT_KEY = "endpoint"; 10 | public static final String PROJECT_KEY = "project"; 11 | public static final String LOGSTORE_KEY = "logstore"; 12 | public static final String ACCESS_KEY_ID_KEY = "accessKeyId"; 13 | public static final String ACCESS_KEY_SECRET_KEY = "accessKey"; 14 | public static final String CONSUME_INITIAL_POSITION = "initialPosition"; 15 | public static final String CONSUME_POSITION_BEGIN = "begin"; 16 | public static final String CONSUME_POSITION_END = "end"; 17 | public static final String QUERY = "query"; 18 | public static final String CONSUME_POSITION_TIMESTAMP = "timestamp"; 19 | public static final String LOG_USER_AGENT = "userAgent"; 20 | public static final String LOG_CONNECTOR_USER_AGENT = VersionInfoUtils.getDefaultUserAgent(); 21 | 22 | /** 23 | * Consumer group heartbeat interval in millisecond. 24 | */ 25 | public static final String HEARTBEAT_INTERVAL_MS = "heartbeatIntervalMs"; 26 | /** 27 | * Fetch data interval in millisecond. 28 | */ 29 | public static final String FETCH_INTERVAL_MS = "fetchIntervalMs"; 30 | 31 | public static final String USE_RECORD_TIME = "useRecordTime"; 32 | public static final long DEFAULT_HEARTBEAT_INTERVAL_MS = 30000L; 33 | public static final long DEFAULT_FETCH_INTERVAL_MS = 100L; 34 | public static final String FETCH_IN_ORDER = "fetchInOrder"; 35 | public static final boolean DEFAULT_FETCH_IN_ORDER = false; 36 | public static final String BATCH_SIZE = "batchSize"; 37 | public static final int DEFAULT_BATCH_SIZE = 1000; 38 | public static final String BUFFER_BYTES = "bufferBytes"; 39 | public static final long DEFAULT_BUFFER_BYTES = 2 * 1024 * 1024; // 2MB 40 | public static final long MAX_LOG_GROUP_BYTES = 10 * 1024 * 1024; // 10MB 41 | public static final String MAX_BUFFER_TIME = "maxBufferTime"; 42 | public static final String MAX_RETRY = "maxRetry"; 43 | public static final int DEFAULT_MAX_RETRY = 16; 44 | 45 | public static final String SERIALIZER = "serializer"; 46 | public static final String DESERIALIZER = "deserializer"; 47 | public static final String COLUMNS = "columns"; 48 | public static final String SEPARATOR_CHAR = "separatorChar"; 49 | public static final String APPLY_QUOTES_TO_ALL = "applyQuotesToAll"; 50 | public static final String QUOTE_CHAR = "quoteChar"; 51 | public static final String ESCAPE_CHAR = "escapeChar"; 52 | public static final String LINE_END = "lineEnd"; 53 | public static final String APPEND_TIMESTAMP = "appendTimestamp"; 54 | public static final String TIME_AS_FIELD = "timeAsField"; 55 | public static final String TOPIC_AS_FIELD = "topicAsField"; 56 | public static final String TAG_AS_FIELD = "tagAsField"; 57 | public static final String SOURCE_AS_FIELD = "sourceAsField"; 58 | 59 | public static final String TIME_FIELD = "timeField"; 60 | public static final String TIME_FORMAT = "timeFormat"; 61 | 62 | public static final String AUTO_DETECT_JSON_FIELDS = "autoDetectJSONFields"; 63 | public static final String TIMESTAMP = "timestamp"; 64 | public static final String RECORD_TIME_KEY = "__time__"; 65 | public static final String RECORD_TAG_PREFIX = "__tag__:"; 66 | public static final String RECORD_SOURCE_KEY = "__source__"; 67 | public static final String RECORD_TOPIC_KEY = "__topic__"; 68 | 69 | public static final String APPEND_LOCAL_TIME = "appendLocalTime"; 70 | public static final String LOCAL_TIME_FIELD_NAME = "localTimeFieldName"; 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/Validate.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume; 2 | 3 | import static com.google.common.base.Preconditions.checkArgument; 4 | 5 | public final class Validate { 6 | 7 | public static void notEmpty(String value, String name) { 8 | checkArgument(value != null && !value.isEmpty(), "Missing parameter: " + name); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/sink/DelimitedTextEventSerializer.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.sink; 2 | 3 | import com.aliyun.openservices.log.common.LogItem; 4 | import com.opencsv.CSVParser; 5 | import com.opencsv.CSVParserBuilder; 6 | import com.opencsv.CSVReader; 7 | import com.opencsv.CSVReaderBuilder; 8 | import com.opencsv.CSVWriter; 9 | import org.apache.commons.lang.StringUtils; 10 | import org.apache.flume.Context; 11 | import org.apache.flume.Event; 12 | import org.apache.flume.FlumeException; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | import java.io.ByteArrayInputStream; 17 | import java.io.IOException; 18 | import java.io.InputStreamReader; 19 | import java.nio.charset.StandardCharsets; 20 | 21 | import static com.aliyun.loghub.flume.Constants.COLUMNS; 22 | import static com.aliyun.loghub.flume.Constants.ESCAPE_CHAR; 23 | import static com.aliyun.loghub.flume.Constants.QUOTE_CHAR; 24 | import static com.aliyun.loghub.flume.Constants.SEPARATOR_CHAR; 25 | import static com.aliyun.loghub.flume.Constants.TIMESTAMP; 26 | import static com.aliyun.loghub.flume.Constants.USE_RECORD_TIME; 27 | 28 | 29 | public class DelimitedTextEventSerializer implements EventSerializer { 30 | private static final Logger LOG = LoggerFactory.getLogger(DelimitedTextEventSerializer.class); 31 | 32 | static final String ALIAS = "DELIMITED"; 33 | 34 | private String[] fieldNames; 35 | private CSVParser csvParser; 36 | private boolean useRecordTime; 37 | 38 | @Override 39 | public LogItem serialize(Event event) { 40 | try (InputStreamReader in = new InputStreamReader( 41 | new ByteArrayInputStream(event.getBody()), 42 | StandardCharsets.UTF_8)) { 43 | CSVReader reader = new CSVReaderBuilder(in).withCSVParser(csvParser).build(); 44 | String[] record = reader.readNext(); 45 | LogItem item = new LogItem(); 46 | int numberOfCol = Math.min(record.length, fieldNames.length); 47 | for (int i = 0; i < numberOfCol; i++) { 48 | if (useRecordTime && TIMESTAMP.equals(fieldNames[i])) { 49 | try { 50 | item.SetTime(Integer.parseInt(record[i])); 51 | } catch (NumberFormatException nfe) { 52 | LOG.warn("Failed to parse record time", nfe); 53 | } 54 | } 55 | item.PushBack(fieldNames[i], record[i]); 56 | } 57 | return item; 58 | } catch (IOException ex) { 59 | throw new FlumeException("Failed to parsing delimited text", ex); 60 | } 61 | } 62 | 63 | private static char getChar(Context context, String key, char defaultValue) { 64 | String value = context.getString(key); 65 | if (value == null) { 66 | return defaultValue; 67 | } 68 | value = value.trim(); 69 | if (value.length() != 1) { 70 | throw new IllegalArgumentException(key + " is invalid for DELIMITED serializer: " + value); 71 | } 72 | return value.charAt(0); 73 | } 74 | 75 | @Override 76 | public void configure(Context context) { 77 | String columns = context.getString(COLUMNS); 78 | if (StringUtils.isBlank(columns)) { 79 | throw new IllegalArgumentException("Missing parameter: " + COLUMNS); 80 | } 81 | char separatorChar = getChar(context, SEPARATOR_CHAR, CSVWriter.DEFAULT_SEPARATOR); 82 | char quoteChar = getChar(context, QUOTE_CHAR, CSVWriter.DEFAULT_QUOTE_CHARACTER); 83 | char escapeChar = getChar(context, ESCAPE_CHAR, CSVWriter.DEFAULT_ESCAPE_CHARACTER); 84 | LOG.info("separatorChar=[" + separatorChar + "] quoteChar=[" + quoteChar + "] escapeChar=[" + escapeChar + "]"); 85 | fieldNames = columns.split(",", -1); 86 | csvParser = new CSVParserBuilder().withEscapeChar(escapeChar) 87 | .withSeparator(separatorChar) 88 | .withQuoteChar(quoteChar) 89 | .build(); 90 | useRecordTime = context.getBoolean(USE_RECORD_TIME, false); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/sink/EventHandler.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.sink; 2 | 3 | import com.aliyun.openservices.log.Client; 4 | import com.aliyun.openservices.log.common.LogItem; 5 | import com.aliyun.openservices.log.exception.LogException; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import java.util.List; 10 | import java.util.concurrent.Callable; 11 | 12 | public class EventHandler implements Callable { 13 | private static final Logger LOG = LoggerFactory.getLogger(EventHandler.class); 14 | 15 | private final Client client; 16 | private final String project; 17 | private final String logstore; 18 | private final String source; 19 | private final List eventList; 20 | private final int maxRetry; 21 | 22 | private static final long MAX_BACKOFF = 3000; 23 | 24 | EventHandler(Client client, 25 | String project, 26 | String logstore, 27 | String source, 28 | List eventList, 29 | int maxRetry) { 30 | this.client = client; 31 | this.project = project; 32 | this.logstore = logstore; 33 | this.source = source; 34 | this.eventList = eventList; 35 | this.maxRetry = maxRetry; 36 | } 37 | 38 | @Override 39 | public Boolean call() throws Exception { 40 | if (eventList.isEmpty()) { 41 | return true; 42 | } 43 | long backoff = 100; 44 | for (int i = 0; ; i++) { 45 | if (i > 0) { 46 | try { 47 | Thread.sleep(backoff); 48 | } catch (InterruptedException ex) { 49 | // It's okay 50 | Thread.currentThread().interrupt(); 51 | } 52 | backoff = Math.min(backoff * 2, MAX_BACKOFF); 53 | } 54 | try { 55 | client.PutLogs(project, logstore, "", eventList, source); 56 | LOG.info("{} events has been sent to Log Service", eventList.size()); 57 | return true; 58 | } catch (LogException ex) { 59 | int code = ex.GetHttpCode(); 60 | boolean alwaysRetry = code >= 500 || code == 403 || code <= 0; 61 | if (alwaysRetry || i < maxRetry - 1) { 62 | LOG.warn("Retry on error={}, status={}", ex.GetErrorMessage(), code); 63 | } else { 64 | LOG.error("Send events to Log Service failed", ex); 65 | throw ex; 66 | } 67 | } 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/sink/EventSerializer.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.sink; 2 | 3 | import com.aliyun.openservices.log.common.LogItem; 4 | import org.apache.flume.Event; 5 | import org.apache.flume.conf.Configurable; 6 | 7 | public interface EventSerializer extends Configurable { 8 | 9 | /** 10 | * Serialize event to log item. 11 | * 12 | * @param event 13 | * @return 14 | */ 15 | LogItem serialize(Event event); 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/sink/JSONEventSerializer.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.sink; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import com.aliyun.openservices.log.common.LogItem; 5 | import org.apache.flume.Context; 6 | import org.apache.flume.Event; 7 | 8 | import java.io.UnsupportedEncodingException; 9 | import java.time.LocalDateTime; 10 | import java.time.ZoneOffset; 11 | import java.time.format.DateTimeFormatter; 12 | 13 | import static com.aliyun.loghub.flume.Constants.TIME_FIELD; 14 | import static com.aliyun.loghub.flume.Constants.TIME_FORMAT; 15 | 16 | public class JSONEventSerializer implements EventSerializer { 17 | 18 | static final String ALIAS = "JSON"; 19 | 20 | private String encoding; 21 | private String timeField; 22 | private DateTimeFormatter formatter; 23 | private boolean isEpoch = false; 24 | 25 | public LocalDateTime parse(String dateNow) { 26 | return LocalDateTime.parse(dateNow, formatter); 27 | } 28 | 29 | @Override 30 | public void configure(Context context) { 31 | encoding = context.getString("encoding", "UTF-8"); 32 | timeField = context.getString(TIME_FIELD); 33 | String timeFormat = context.getString(TIME_FORMAT); 34 | if (timeFormat != null && !timeFormat.isEmpty()) { 35 | if (timeFormat.equalsIgnoreCase("epoch")) { 36 | isEpoch = true; 37 | } else { 38 | formatter = DateTimeFormatter.ofPattern(timeFormat); 39 | } 40 | } 41 | } 42 | 43 | @Override 44 | public LogItem serialize(Event event) { 45 | try { 46 | String body = new String(event.getBody(), encoding); 47 | LogItem item = new LogItem(); 48 | JSONObject object = JSONObject.parseObject(body); 49 | object.forEach((key, value) -> { 50 | item.PushBack(key, value == null ? "null" : value.toString()); 51 | if (timeField != null && !timeField.isEmpty() && timeField.equals(key) 52 | && value != null) { 53 | try { 54 | String timestr = value.toString(); 55 | if (isEpoch) { 56 | item.SetTime(Integer.parseInt(timestr)); 57 | } else { 58 | item.SetTime((int) (parse(timestr).toEpochSecond(ZoneOffset.UTC))); 59 | } 60 | } catch (Exception ex) { 61 | // ignore 62 | } 63 | } 64 | }); 65 | return item; 66 | } catch (UnsupportedEncodingException e) { 67 | throw new RuntimeException("Failed to decode event with encoding: " + encoding, e); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/sink/LoghubSink.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.sink; 2 | 3 | import com.aliyun.loghub.flume.Validate; 4 | import com.aliyun.loghub.flume.source.DelimitedTextEventDeserializer; 5 | import com.aliyun.openservices.log.Client; 6 | import com.aliyun.openservices.log.common.LogContent; 7 | import com.aliyun.openservices.log.common.LogItem; 8 | import com.aliyun.openservices.log.util.NetworkUtils; 9 | import org.apache.commons.lang.StringUtils; 10 | import org.apache.flume.*; 11 | import org.apache.flume.conf.Configurable; 12 | import org.apache.flume.instrumentation.SinkCounter; 13 | import org.apache.flume.sink.AbstractSink; 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | import java.util.concurrent.*; 20 | 21 | import static com.aliyun.loghub.flume.Constants.*; 22 | 23 | public class LoghubSink extends AbstractSink implements Configurable { 24 | private static final Logger LOG = LoggerFactory.getLogger(LoghubSink.class); 25 | 26 | private int batchSize; 27 | private long bufferBytes; 28 | private int maxRetry; 29 | private int concurrency; 30 | private long maxBufferTime; 31 | private String project; 32 | private String logstore; 33 | private String source; 34 | private EventSerializer serializer; 35 | private ThreadPoolExecutor executor; 36 | private Client client; 37 | private SinkCounter counter; 38 | 39 | @Override 40 | public synchronized void start() { 41 | executor = new ThreadPoolExecutor(0, concurrency, 42 | 60L, TimeUnit.SECONDS, 43 | new ArrayBlockingQueue<>(100), 44 | Executors.defaultThreadFactory(), 45 | new ThreadPoolExecutor.CallerRunsPolicy()); 46 | executor.allowCoreThreadTimeOut(true); 47 | counter.start(); 48 | source = NetworkUtils.getLocalMachineIP(); 49 | super.start(); 50 | LOG.info("Loghub Sink {} started.", getName()); 51 | } 52 | 53 | private static long getLogItemSize(LogItem item) { 54 | long size = 4 + item.mContents.size() * 2L; 55 | for (LogContent logContent : item.mContents) { 56 | if (logContent.mKey != null) { 57 | size += logContent.mKey.length(); 58 | } 59 | if (logContent.mValue != null) { 60 | size += logContent.mValue.length(); 61 | } 62 | } 63 | return size; 64 | } 65 | 66 | @Override 67 | public Status process() throws EventDeliveryException { 68 | Channel channel = getChannel(); 69 | Transaction transaction = null; 70 | long earliestEventTime = -1; 71 | Status result = Status.READY; 72 | List buffer = new ArrayList<>(); 73 | List> producerFutures = new ArrayList<>(); 74 | long totalBytes = 0; 75 | long processedEvents = 0; 76 | try { 77 | transaction = channel.getTransaction(); 78 | transaction.begin(); 79 | for (; processedEvents < batchSize; processedEvents++) { 80 | Event event = channel.take(); 81 | if (event == null) { 82 | // no events available in channel 83 | if (processedEvents == 0) { 84 | result = Status.BACKOFF; 85 | counter.incrementBatchEmptyCount(); 86 | } else { 87 | counter.incrementBatchUnderflowCount(); 88 | } 89 | break; 90 | } 91 | counter.incrementEventDrainAttemptCount(); 92 | LogItem logItem; 93 | try { 94 | logItem = serializer.serialize(event); 95 | } catch (Exception ex) { 96 | LOG.error("Failed to serialize event to log", ex); 97 | continue; 98 | } 99 | long logItemSize = getLogItemSize(logItem); 100 | if (logItemSize >= MAX_LOG_GROUP_BYTES) { 101 | LOG.error("Event size {} is too large", logItemSize); 102 | continue; 103 | } 104 | if (earliestEventTime < 0) { 105 | earliestEventTime = System.currentTimeMillis(); 106 | } 107 | if (totalBytes + logItemSize > bufferBytes || System.currentTimeMillis() - earliestEventTime >= maxBufferTime) { 108 | LOG.debug("Flushing events to Log service, event count {}", buffer.size()); 109 | List events = buffer; 110 | producerFutures.add(sendEvents(events)); 111 | buffer = new ArrayList<>(); 112 | earliestEventTime = -1; 113 | totalBytes = 0; 114 | } 115 | buffer.add(logItem); 116 | totalBytes += logItemSize; 117 | } 118 | if (!buffer.isEmpty()) { 119 | producerFutures.add(sendEvents(buffer)); 120 | } 121 | if (processedEvents > 0) { 122 | for (Future future : producerFutures) { 123 | // throw exception here if failed to send events 124 | future.get(); 125 | } 126 | producerFutures.clear(); 127 | counter.addToEventDrainSuccessCount(processedEvents); 128 | } 129 | transaction.commit(); 130 | } catch (Exception ex) { 131 | LOG.error("Failed to publish events", ex); 132 | if (transaction != null) { 133 | try { 134 | transaction.rollback(); 135 | } catch (Exception e) { 136 | LOG.error("Transaction rollback failed", e); 137 | } 138 | } 139 | throw new EventDeliveryException("Failed to publish events", ex); 140 | } finally { 141 | if (transaction != null) { 142 | transaction.close(); 143 | } 144 | } 145 | return result; 146 | } 147 | 148 | private Future sendEvents(List events) { 149 | return executor.submit(new EventHandler(client, project, logstore, source, events, maxRetry)); 150 | } 151 | 152 | @Override 153 | public void configure(Context context) { 154 | String endpoint = context.getString(ENDPOINT_KEY); 155 | Validate.notEmpty(endpoint, ENDPOINT_KEY); 156 | project = context.getString(PROJECT_KEY); 157 | Validate.notEmpty(project, PROJECT_KEY); 158 | logstore = context.getString(LOGSTORE_KEY); 159 | Validate.notEmpty(logstore, LOGSTORE_KEY); 160 | String accessKeyId = context.getString(ACCESS_KEY_ID_KEY); 161 | Validate.notEmpty(accessKeyId, ACCESS_KEY_ID_KEY); 162 | String accessKey = context.getString(ACCESS_KEY_SECRET_KEY); 163 | Validate.notEmpty(accessKey, ACCESS_KEY_SECRET_KEY); 164 | client = new Client(endpoint, accessKeyId, accessKey); 165 | String userAgent = context.getString(LOG_USER_AGENT); 166 | if (StringUtils.isEmpty(userAgent)) { 167 | userAgent = LOG_CONNECTOR_USER_AGENT; 168 | } 169 | client.setUserAgent(userAgent); 170 | logstore = context.getString(LOGSTORE_KEY); 171 | if (counter == null) { 172 | counter = new SinkCounter(getName()); 173 | } 174 | batchSize = context.getInteger(BATCH_SIZE, DEFAULT_BATCH_SIZE); 175 | bufferBytes = context.getLong(BUFFER_BYTES, DEFAULT_BUFFER_BYTES); 176 | maxBufferTime = context.getInteger(MAX_BUFFER_TIME, 3000); 177 | maxRetry = context.getInteger(MAX_RETRY, DEFAULT_MAX_RETRY); 178 | int cores = Runtime.getRuntime().availableProcessors(); 179 | concurrency = context.getInteger("concurrency", cores); 180 | serializer = createSerializer(context); 181 | } 182 | 183 | private EventSerializer createSerializer(Context context) { 184 | String serializerName = context.getString(SERIALIZER); 185 | EventSerializer serializer; 186 | if (serializerName == null || serializerName.isEmpty()) { 187 | serializer = new DelimitedTextEventSerializer(); 188 | } else if (serializerName.equals(DelimitedTextEventSerializer.ALIAS) 189 | || serializerName.equalsIgnoreCase(DelimitedTextEventDeserializer.class.getName())) { 190 | serializer = new DelimitedTextEventSerializer(); 191 | } else if (serializerName.equals(SimpleEventSerializer.ALIAS) 192 | || serializerName.equalsIgnoreCase(SimpleEventSerializer.class.getName())) { 193 | serializer = new SimpleEventSerializer(); 194 | } else if (serializerName.endsWith(RegexEventSerializer.ALIAS) 195 | || serializerName.equalsIgnoreCase(RegexEventSerializer.class.getName())) { 196 | serializer = new RegexEventSerializer(); 197 | } else if (serializerName.endsWith(JSONEventSerializer.ALIAS) 198 | || serializerName.equalsIgnoreCase(JSONEventSerializer.class.getName())) { 199 | serializer = new JSONEventSerializer(); 200 | } else { 201 | try { 202 | serializer = (EventSerializer) Class.forName(serializerName).newInstance(); 203 | } catch (Exception e) { 204 | throw new IllegalArgumentException("Unable to instantiate serializer: " + serializerName 205 | + " on sink: " + getName(), e); 206 | } 207 | } 208 | serializer.configure(context); 209 | return serializer; 210 | } 211 | 212 | @Override 213 | public synchronized void stop() { 214 | super.stop(); 215 | LOG.info("Stopping Loghub Sink {}", getName()); 216 | if (executor != null) { 217 | try { 218 | executor.shutdown(); 219 | executor.awaitTermination(30, TimeUnit.SECONDS); 220 | } catch (final Exception ex) { 221 | LOG.error("Error while closing Loghub sink {}.", getName(), ex); 222 | } 223 | } 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/sink/RegexEventSerializer.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.sink; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import com.aliyun.openservices.log.common.LogItem; 5 | import org.apache.flume.Context; 6 | import org.apache.flume.Event; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.io.UnsupportedEncodingException; 11 | import java.time.LocalDateTime; 12 | import java.time.ZoneOffset; 13 | import java.time.format.DateTimeFormatter; 14 | import java.util.ArrayList; 15 | import java.util.Collections; 16 | import java.util.List; 17 | import java.util.regex.Matcher; 18 | import java.util.regex.Pattern; 19 | 20 | import static com.aliyun.loghub.flume.Constants.TIME_FIELD; 21 | import static com.aliyun.loghub.flume.Constants.TIME_FORMAT; 22 | 23 | public class RegexEventSerializer implements EventSerializer { 24 | private static final Logger LOG = LoggerFactory.getLogger(RegexEventSerializer.class); 25 | 26 | static final String ALIAS = "REGEX"; 27 | private List fieldNames; 28 | private String encoding; 29 | private Pattern pattern; 30 | private List expandJsonFields; 31 | private String timeField; 32 | private DateTimeFormatter formatter; 33 | private boolean isEpoch = false; 34 | 35 | private List trimAll(String[] keys) { 36 | List fields = new ArrayList<>(keys.length); 37 | for (String k : keys) { 38 | fields.add(k.trim()); 39 | } 40 | return fields; 41 | } 42 | 43 | @Override 44 | public void configure(Context context) { 45 | LOG.info("Start sink with config: {}", context.toString()); 46 | encoding = context.getString("encoding", "UTF-8"); 47 | String regex = context.getString("regex"); 48 | if (regex == null || regex.isEmpty()) { 49 | throw new IllegalArgumentException("regex is missing"); 50 | } 51 | String fieldNames = context.getString("fieldNames"); 52 | if (fieldNames == null || fieldNames.isEmpty()) { 53 | throw new IllegalArgumentException("fieldNames is missing"); 54 | } 55 | this.fieldNames = trimAll(fieldNames.split(",", -1)); 56 | pattern = Pattern.compile(regex); 57 | String expandJsonKeys = context.getString("expandJsonKeys"); 58 | if (expandJsonKeys != null && !expandJsonKeys.isEmpty()) { 59 | this.expandJsonFields = trimAll(expandJsonKeys.split(",", -1)); 60 | } else { 61 | this.expandJsonFields = Collections.emptyList(); 62 | } 63 | timeField = context.getString(TIME_FIELD); 64 | String timeFormat = context.getString(TIME_FORMAT); 65 | if (timeFormat != null && !timeFormat.isEmpty()) { 66 | if (timeFormat.equalsIgnoreCase("epoch")) { 67 | isEpoch = true; 68 | } else { 69 | formatter = DateTimeFormatter.ofPattern(timeFormat); 70 | } 71 | } 72 | } 73 | 74 | public LocalDateTime parse(String dateNow) { 75 | return LocalDateTime.parse(dateNow, formatter); 76 | } 77 | 78 | private boolean isTimeField(String key) { 79 | return timeField != null && !timeField.isEmpty() && timeField.equals(key); 80 | } 81 | 82 | private int parseLogTime(String timestr) { 83 | try { 84 | if (isEpoch) { 85 | return Integer.parseInt(timestr); 86 | } else { 87 | return (int) (parse(timestr).toEpochSecond(ZoneOffset.of("+8"))); 88 | } 89 | } catch (Exception ex) { 90 | // ignore 91 | } 92 | return -1; 93 | } 94 | 95 | private void pushField(LogItem item, String k, String v) { 96 | item.PushBack(k, v); 97 | if (isTimeField(k)) { 98 | int ts = parseLogTime(v); 99 | if (ts > 0) { 100 | item.SetTime(ts); 101 | } 102 | } 103 | } 104 | 105 | @Override 106 | public LogItem serialize(Event event) { 107 | LogItem record = new LogItem(); 108 | try { 109 | String text = new String(event.getBody(), encoding); 110 | Matcher matcher = pattern.matcher(text); 111 | if (!matcher.matches()) { 112 | LOG.debug("Regex not match - {}", text); 113 | record.PushBack("content", text); 114 | return record; 115 | } 116 | for (int i = 1; i <= matcher.groupCount(); i++) { 117 | if (i > fieldNames.size()) { 118 | break; 119 | } 120 | String value = matcher.group(i); 121 | String key = fieldNames.get(i - 1); 122 | if (value == null) { 123 | record.PushBack(key, "null"); 124 | continue; 125 | } 126 | if (expandJsonFields.contains(key)) { 127 | // expand first 128 | String jsonStr = value.trim(); 129 | try { 130 | JSONObject object = JSONObject.parseObject(jsonStr); 131 | object.forEach((k, v) -> { 132 | if (v == null) { 133 | record.PushBack(k, "null"); 134 | } else { 135 | pushField(record, k, v.toString()); 136 | } 137 | }); 138 | continue; 139 | } catch (Exception ex) { 140 | LOG.error("Cannot parse JSON: " + value); 141 | } 142 | } 143 | pushField(record, key, value); 144 | } 145 | } catch (UnsupportedEncodingException e) { 146 | throw new RuntimeException("Failed to decode event with encoding: " + encoding, e); 147 | } 148 | return record; 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/sink/SimpleEventSerializer.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.sink; 2 | 3 | import com.aliyun.openservices.log.common.LogItem; 4 | import org.apache.flume.Context; 5 | import org.apache.flume.Event; 6 | 7 | import java.io.UnsupportedEncodingException; 8 | 9 | public class SimpleEventSerializer implements EventSerializer { 10 | 11 | static final String ALIAS = "SIMPLE"; 12 | private String fieldName; 13 | private String encoding; 14 | 15 | private static final String DEFAULT_FIELD_NAME = "body"; 16 | 17 | @Override 18 | public void configure(Context context) { 19 | encoding = context.getString("encoding", "UTF-8"); 20 | fieldName = context.getString("fieldName"); 21 | if (fieldName == null || fieldName.isEmpty()) { 22 | fieldName = DEFAULT_FIELD_NAME; 23 | } 24 | } 25 | 26 | @Override 27 | public LogItem serialize(Event event) { 28 | LogItem item = new LogItem(); 29 | try { 30 | item.PushBack(fieldName, new String(event.getBody(), encoding)); 31 | } catch (UnsupportedEncodingException e) { 32 | throw new RuntimeException("Failed to decode event with encoding: " + encoding, e); 33 | } 34 | return item; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/source/DelimitedTextEventDeserializer.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.source; 2 | 3 | import com.aliyun.openservices.log.common.FastLog; 4 | import com.aliyun.openservices.log.common.FastLogContent; 5 | import com.aliyun.openservices.log.common.FastLogGroup; 6 | import com.opencsv.CSVWriter; 7 | import org.apache.commons.lang.StringUtils; 8 | import org.apache.flume.Context; 9 | import org.apache.flume.Event; 10 | import org.apache.flume.FlumeException; 11 | import org.apache.flume.event.EventBuilder; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import java.io.IOException; 16 | import java.io.StringWriter; 17 | import java.util.ArrayList; 18 | import java.util.Collections; 19 | import java.util.HashMap; 20 | import java.util.List; 21 | import java.util.Map; 22 | 23 | import static com.aliyun.loghub.flume.Constants.APPEND_LOCAL_TIME; 24 | import static com.aliyun.loghub.flume.Constants.APPEND_TIMESTAMP; 25 | import static com.aliyun.loghub.flume.Constants.APPLY_QUOTES_TO_ALL; 26 | import static com.aliyun.loghub.flume.Constants.COLUMNS; 27 | import static com.aliyun.loghub.flume.Constants.ESCAPE_CHAR; 28 | import static com.aliyun.loghub.flume.Constants.LINE_END; 29 | import static com.aliyun.loghub.flume.Constants.LOCAL_TIME_FIELD_NAME; 30 | import static com.aliyun.loghub.flume.Constants.QUOTE_CHAR; 31 | import static com.aliyun.loghub.flume.Constants.SEPARATOR_CHAR; 32 | import static com.aliyun.loghub.flume.Constants.TIMESTAMP; 33 | import static com.aliyun.loghub.flume.Constants.USE_RECORD_TIME; 34 | 35 | 36 | public class DelimitedTextEventDeserializer implements EventDeserializer { 37 | private static final Logger LOG = LoggerFactory.getLogger(DelimitedTextEventDeserializer.class); 38 | 39 | static final String ALIAS = "DELIMITED"; 40 | 41 | private Map fieldIndexMapping; 42 | private boolean useRecordTime; 43 | private boolean appendTimestamp; 44 | private boolean applyQuotesToAll; 45 | private char separatorChar; 46 | private char quoteChar; 47 | private char escapeChar; 48 | private String lineEnd; 49 | 50 | private static final String DEFAULT_LINE_END = ""; 51 | 52 | private boolean appendLocalTime; 53 | private int localTimeIndex = 0; 54 | 55 | @Override 56 | public List deserialize(FastLogGroup logGroup) { 57 | int count = logGroup.getLogsCount(); 58 | int width = fieldIndexMapping.size(); 59 | if (appendTimestamp) { 60 | width++; 61 | } 62 | String[] record = new String[width]; 63 | List events = new ArrayList<>(count); 64 | final StringWriter writer = new StringWriter(); 65 | CSVWriter csvWriter = new CSVWriter(writer, separatorChar, quoteChar, escapeChar, lineEnd); 66 | 67 | for (int idx = 0; idx < count; ++idx) { 68 | FastLog log = logGroup.getLogs(idx); 69 | for (int i = 0; i < log.getContentsCount(); i++) { 70 | FastLogContent content = log.getContents(i); 71 | final String key = content.getKey(); 72 | Integer index = fieldIndexMapping.get(key); 73 | if (index != null) { 74 | // otherwise ignore this field 75 | String value = content.getValue(); 76 | if (value != null && value.contains("\n")) { 77 | value = value.replace('\n', ' '); 78 | } 79 | record[index] = value; 80 | } 81 | } 82 | int recordTime = log.getTime(); 83 | String localTime = String.valueOf(System.currentTimeMillis()); 84 | String timestamp; 85 | if (useRecordTime) { 86 | timestamp = String.valueOf(((long) recordTime) * 1000); 87 | } else { 88 | timestamp = localTime; 89 | } 90 | if (appendTimestamp) { 91 | record[width - 1] = timestamp; 92 | } 93 | if (appendLocalTime) { 94 | record[localTimeIndex] = localTime; 95 | } 96 | csvWriter.writeNext(record, applyQuotesToAll); 97 | try { 98 | csvWriter.flush(); 99 | } catch (IOException ex) { 100 | throw new FlumeException("Failed to flush writer", ex); 101 | } 102 | Event event = EventBuilder.withBody(writer.toString(), charset, 103 | Collections.singletonMap(TIMESTAMP, timestamp)); 104 | events.add(event); 105 | for (int i = 0; i < width; i++) { 106 | record[i] = null; 107 | } 108 | writer.getBuffer().setLength(0); 109 | } 110 | return events; 111 | } 112 | 113 | private static char getChar(Context context, String key, char defaultValue) { 114 | String value = context.getString(key); 115 | if (value == null) { 116 | return defaultValue; 117 | } 118 | value = value.trim(); 119 | if (value.length() != 1) { 120 | throw new IllegalArgumentException(key + " is invalid for CSV serializer: " + value); 121 | } 122 | return value.charAt(0); 123 | } 124 | 125 | @Override 126 | public void configure(Context context) { 127 | useRecordTime = context.getBoolean(USE_RECORD_TIME, false); 128 | String columns = context.getString(COLUMNS); 129 | if (StringUtils.isBlank(columns)) { 130 | throw new IllegalArgumentException("Missing parameters: " + COLUMNS); 131 | } 132 | applyQuotesToAll = context.getBoolean(APPLY_QUOTES_TO_ALL, false); 133 | separatorChar = getChar(context, SEPARATOR_CHAR, CSVWriter.DEFAULT_SEPARATOR); 134 | quoteChar = getChar(context, QUOTE_CHAR, CSVWriter.DEFAULT_QUOTE_CHARACTER); 135 | escapeChar = getChar(context, ESCAPE_CHAR, CSVWriter.DEFAULT_ESCAPE_CHARACTER); 136 | if (LOG.isDebugEnabled()) { 137 | LOG.debug("separatorChar=[" + separatorChar + "] quoteChar=[" + quoteChar + "] escapeChar=[" + escapeChar + "]"); 138 | } 139 | lineEnd = context.getString(LINE_END, DEFAULT_LINE_END); 140 | appendTimestamp = context.getBoolean(APPEND_TIMESTAMP, false); 141 | String[] fields = columns.split(",", -1); 142 | int width = fields.length; 143 | fieldIndexMapping = new HashMap<>(width); 144 | for (int i = 0; i < width; i++) { 145 | fieldIndexMapping.put(fields[i], i); 146 | } 147 | appendLocalTime = context.getBoolean(APPEND_LOCAL_TIME, false); 148 | if (appendLocalTime) { 149 | String localTimeFieldName = context.getString(LOCAL_TIME_FIELD_NAME); 150 | if (StringUtils.isBlank(localTimeFieldName)) { 151 | throw new IllegalArgumentException("Missing parameter: " + LOCAL_TIME_FIELD_NAME); 152 | } 153 | if (!fieldIndexMapping.containsKey(localTimeFieldName)) { 154 | throw new IllegalArgumentException("Field '" + localTimeFieldName + "' not exist in columns"); 155 | } 156 | localTimeIndex = fieldIndexMapping.get(localTimeFieldName); 157 | } 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/source/EventDeserializer.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.source; 2 | 3 | import com.aliyun.openservices.log.common.FastLogGroup; 4 | import org.apache.flume.Event; 5 | import org.apache.flume.conf.Configurable; 6 | 7 | import java.nio.charset.Charset; 8 | import java.nio.charset.StandardCharsets; 9 | import java.util.List; 10 | 11 | public interface EventDeserializer extends Configurable { 12 | 13 | Charset charset = StandardCharsets.UTF_8; 14 | 15 | /** 16 | * Serializes a LogGroup to one or more Flume events. 17 | * 18 | * @param logGroup 19 | * @return 20 | */ 21 | List deserialize(FastLogGroup logGroup); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/source/JSONEventDeserializer.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.source; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import com.aliyun.openservices.log.common.FastLog; 6 | import com.aliyun.openservices.log.common.FastLogContent; 7 | import com.aliyun.openservices.log.common.FastLogGroup; 8 | import com.aliyun.openservices.log.common.FastLogTag; 9 | import org.apache.flume.Context; 10 | import org.apache.flume.Event; 11 | import org.apache.flume.event.EventBuilder; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import java.util.ArrayList; 16 | import java.util.Collections; 17 | import java.util.List; 18 | 19 | import static com.aliyun.loghub.flume.Constants.AUTO_DETECT_JSON_FIELDS; 20 | import static com.aliyun.loghub.flume.Constants.RECORD_SOURCE_KEY; 21 | import static com.aliyun.loghub.flume.Constants.RECORD_TAG_PREFIX; 22 | import static com.aliyun.loghub.flume.Constants.RECORD_TIME_KEY; 23 | import static com.aliyun.loghub.flume.Constants.SOURCE_AS_FIELD; 24 | import static com.aliyun.loghub.flume.Constants.TAG_AS_FIELD; 25 | import static com.aliyun.loghub.flume.Constants.TIMESTAMP; 26 | import static com.aliyun.loghub.flume.Constants.TIME_AS_FIELD; 27 | import static com.aliyun.loghub.flume.Constants.USE_RECORD_TIME; 28 | import static com.aliyun.loghub.flume.Constants.TOPIC_AS_FIELD; 29 | import static com.aliyun.loghub.flume.Constants.RECORD_TOPIC_KEY; 30 | 31 | 32 | public class JSONEventDeserializer implements EventDeserializer { 33 | static final String ALIAS = "JSON"; 34 | private static final Logger LOG = LoggerFactory.getLogger(JSONEventDeserializer.class); 35 | 36 | private boolean useRecordTime; 37 | private boolean sourceAsField; 38 | private boolean tagAsField; 39 | private boolean timeAsField; 40 | private boolean topicAsField; 41 | private boolean autoDetectJSONFields; 42 | 43 | 44 | static boolean mayBeJSON(String string) { 45 | // if (string == null) { 46 | // return false; 47 | // } else if ("null".equals(string)) { 48 | // return true; 49 | // } 50 | // int n = string.length(); 51 | // int left = 0; 52 | // while (left < n && Character.isWhitespace(string.charAt(left))) 53 | // left++; 54 | // if (left >= n) 55 | // return false; 56 | // char lch = string.charAt(left); 57 | // if (lch != '{' && lch != '[') 58 | // return false; 59 | // int right = n - 1; 60 | // while (right >= 0 && Character.isWhitespace(string.charAt(right))) 61 | // right--; 62 | // if (right < 0) 63 | // return false; 64 | // char rch = string.charAt(right); 65 | // return (lch == '[' && rch == ']') || (lch == '{' && rch == '}'); 66 | return string != null 67 | && ("null".equals(string) 68 | || (string.startsWith("[") && string.endsWith("]")) || (string.startsWith("{") && string.endsWith("}"))); 69 | } 70 | 71 | static Object parseJSONObjectOrArray(String string) { 72 | return JSON.parse(string); 73 | } 74 | 75 | private String convertLogToJSONString(FastLogGroup logGroup, FastLog log) { 76 | int fieldCount = log.getContentsCount(); 77 | JSONObject record = new JSONObject(fieldCount); 78 | for (int i = 0; i < fieldCount; i++) { 79 | FastLogContent content = log.getContents(i); 80 | final String key = content.getKey(); 81 | final String value = content.getValue(); 82 | if (autoDetectJSONFields && mayBeJSON(value)) { 83 | try { 84 | record.put(key, parseJSONObjectOrArray(value)); 85 | } catch (Exception jex) { 86 | record.put(key, value); 87 | } 88 | } else { 89 | record.put(key, value); 90 | } 91 | } 92 | if (timeAsField) { 93 | record.put(RECORD_TIME_KEY, String.valueOf(log.getTime())); 94 | } 95 | if (tagAsField) { 96 | int tagCount = logGroup.getLogTagsCount(); 97 | for (int i = 0; i < tagCount; i++) { 98 | FastLogTag tag = logGroup.getLogTags(i); 99 | record.put(RECORD_TAG_PREFIX + tag.getKey(), tag.getValue()); 100 | } 101 | } 102 | if (sourceAsField) { 103 | record.put(RECORD_SOURCE_KEY, logGroup.getSource()); 104 | } 105 | if (topicAsField) { 106 | record.put(RECORD_TOPIC_KEY, logGroup.getTopic()); 107 | } 108 | return record.toJSONString(); 109 | } 110 | 111 | @Override 112 | public List deserialize(FastLogGroup logGroup) { 113 | int count = logGroup.getLogsCount(); 114 | List events = new ArrayList<>(count); 115 | LOG.debug("Converting log group to events, log count {}", count); 116 | for (int idx = 0; idx < count; ++idx) { 117 | FastLog log = logGroup.getLogs(idx); 118 | String logAsJSON = convertLogToJSONString(logGroup, log); 119 | int recordTime = log.getTime(); 120 | long timestamp; 121 | if (useRecordTime) { 122 | timestamp = ((long) recordTime) * 1000; 123 | } else { 124 | timestamp = System.currentTimeMillis(); 125 | } 126 | Event event = EventBuilder.withBody(logAsJSON, charset, 127 | Collections.singletonMap(TIMESTAMP, String.valueOf(timestamp))); 128 | events.add(event); 129 | } 130 | LOG.debug("Converting log group to events done, event count {}", events.size()); 131 | return events; 132 | } 133 | 134 | @Override 135 | public void configure(Context context) { 136 | useRecordTime = context.getBoolean(USE_RECORD_TIME, false); 137 | sourceAsField = context.getBoolean(SOURCE_AS_FIELD, false); 138 | tagAsField = context.getBoolean(TAG_AS_FIELD, false); 139 | timeAsField = context.getBoolean(TIME_AS_FIELD, false); 140 | topicAsField = context.getBoolean(TOPIC_AS_FIELD, false); 141 | autoDetectJSONFields = context.getBoolean(AUTO_DETECT_JSON_FIELDS, true); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/source/LogReceiver.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.source; 2 | 3 | import com.aliyun.openservices.log.common.FastLogGroup; 4 | import com.aliyun.openservices.log.common.LogGroupData; 5 | import com.aliyun.openservices.loghub.client.ILogHubCheckPointTracker; 6 | import com.aliyun.openservices.loghub.client.exceptions.LogHubCheckPointException; 7 | import com.aliyun.openservices.loghub.client.interfaces.ILogHubProcessor; 8 | import org.apache.flume.ChannelFullException; 9 | import org.apache.flume.Event; 10 | import org.apache.flume.channel.ChannelProcessor; 11 | import org.apache.flume.instrumentation.SourceCounter; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | import java.util.Random; 18 | 19 | /** 20 | * Receives logs from Loghub and send to Flume channel. 21 | */ 22 | class LogReceiver implements ILogHubProcessor { 23 | 24 | private static final Logger LOG = LoggerFactory.getLogger(LogReceiver.class); 25 | 26 | private final ChannelProcessor processor; 27 | private final EventDeserializer deserializer; 28 | private final SourceCounter sourceCounter; 29 | private final String sourceName; 30 | 31 | private int shardId = 0; 32 | private long checkpointSavedAt = 0; 33 | private Random random; 34 | private volatile boolean running; 35 | private volatile boolean success; 36 | private int maxRetry; 37 | 38 | LogReceiver(ChannelProcessor processor, 39 | EventDeserializer deserializer, 40 | SourceCounter sourceCounter, 41 | String sourceName, 42 | int maxRetry) { 43 | this.processor = processor; 44 | this.deserializer = deserializer; 45 | this.sourceCounter = sourceCounter; 46 | this.sourceName = sourceName; 47 | this.random = new Random(); 48 | this.maxRetry = maxRetry; 49 | this.running = true; 50 | this.success = true; 51 | } 52 | 53 | @Override 54 | public void initialize(int shardId) { 55 | LOG.debug("LogReceiver for shard {} has been initialized", shardId); 56 | this.shardId = shardId; 57 | } 58 | 59 | private boolean emitEvents(List events) { 60 | int count = events.size(); 61 | int retry = 0; 62 | long backoff = 1000; 63 | long maxBackoff = 30000; 64 | while (retry < maxRetry && running) { 65 | if (retry > 0) { 66 | try { 67 | Thread.sleep(random.nextInt(500) + backoff); 68 | } catch (InterruptedException e) { 69 | Thread.currentThread().interrupt(); 70 | // It's OK as we don't need to exit base on this signal 71 | } 72 | backoff = Math.min((long) (backoff * 1.2), maxBackoff); 73 | } 74 | try { 75 | long beginTime = System.currentTimeMillis(); 76 | LOG.debug("Sending {} events to Flume", count); 77 | processor.processEventBatch(events); 78 | sourceCounter.addToEventReceivedCount(count); 79 | long elapsedTime = System.currentTimeMillis() - beginTime; 80 | LOG.debug("Processed {} events, elapsedTime {}", count, elapsedTime); 81 | return true; 82 | } catch (ChannelFullException ex) { 83 | // For Queue Full, retry until success. 84 | LOG.warn("Queue full, wait and retry"); 85 | } catch (final Exception ex) { 86 | if (retry < maxRetry - 1) { 87 | LOG.warn("{} - failed to send data, retrying: {}", sourceName, ex.getMessage()); 88 | retry++; 89 | } else { 90 | LOG.error("{} - failed to send data, data maybe loss", sourceName, ex); 91 | success = false; 92 | break; 93 | } 94 | } 95 | } 96 | return false; 97 | } 98 | 99 | @Override 100 | public String process(List logGroups, ILogHubCheckPointTracker tracker) { 101 | LOG.debug("Processing {} log groups", logGroups.size()); 102 | int totalCount = 0; 103 | List batchEvents = new ArrayList<>(); 104 | for (LogGroupData data : logGroups) { 105 | FastLogGroup logGroup = data.GetFastLogGroup(); 106 | List events = deserializer.deserialize(logGroup); 107 | int numberOfEvents = events.size(); 108 | totalCount += numberOfEvents; 109 | LOG.debug("{} events serialized for shard {}", numberOfEvents, shardId); 110 | if (numberOfEvents == 0) { 111 | continue; 112 | } 113 | batchEvents.addAll(events); 114 | if (batchEvents.size() < 1024) { 115 | continue; 116 | } 117 | success = emitEvents(batchEvents); 118 | if (success) { 119 | batchEvents = new ArrayList<>(); 120 | } 121 | } 122 | if (!batchEvents.isEmpty()) { 123 | success = emitEvents(batchEvents); 124 | } 125 | LOG.debug("{} events have been serialized from {} log groups", totalCount, logGroups.size()); 126 | long nowMs = System.currentTimeMillis(); 127 | if (success && nowMs - checkpointSavedAt > 30 * 1000) { 128 | try { 129 | tracker.saveCheckPoint(true); 130 | checkpointSavedAt = nowMs; 131 | LOG.info("SLS source processed {} logs", sourceCounter.getEventReceivedCount()); 132 | } catch (LogHubCheckPointException ex) { 133 | LOG.error("Failed to save checkpoint to remote sever", ex); 134 | } 135 | } 136 | return null; 137 | } 138 | 139 | @Override 140 | public void shutdown(ILogHubCheckPointTracker checkPointTracker) { 141 | LOG.info("Shutting down receiver."); 142 | running = false; 143 | if (success) { 144 | try { 145 | checkPointTracker.saveCheckPoint(true); 146 | } catch (Exception ex) { 147 | LOG.error("Failed to save checkpoint to remote sever", ex); 148 | } 149 | } 150 | } 151 | } -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/source/LoghubSource.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.source; 2 | 3 | import com.aliyun.loghub.flume.Validate; 4 | import com.aliyun.openservices.loghub.client.ClientWorker; 5 | import com.aliyun.openservices.loghub.client.config.LogHubConfig; 6 | import com.aliyun.openservices.loghub.client.config.LogHubConfig.ConsumePosition; 7 | import com.google.common.annotations.VisibleForTesting; 8 | import org.apache.commons.lang.StringUtils; 9 | import org.apache.flume.Context; 10 | import org.apache.flume.EventDrivenSource; 11 | import org.apache.flume.FlumeException; 12 | import org.apache.flume.conf.Configurable; 13 | import org.apache.flume.instrumentation.SourceCounter; 14 | import org.apache.flume.source.AbstractSource; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | import java.net.InetAddress; 19 | import java.net.UnknownHostException; 20 | import java.util.UUID; 21 | 22 | import static com.aliyun.loghub.flume.Constants.ACCESS_KEY_ID_KEY; 23 | import static com.aliyun.loghub.flume.Constants.ACCESS_KEY_SECRET_KEY; 24 | import static com.aliyun.loghub.flume.Constants.BATCH_SIZE; 25 | import static com.aliyun.loghub.flume.Constants.CONSUMER_GROUP_KEY; 26 | import static com.aliyun.loghub.flume.Constants.CONSUME_INITIAL_POSITION; 27 | import static com.aliyun.loghub.flume.Constants.CONSUME_POSITION_BEGIN; 28 | import static com.aliyun.loghub.flume.Constants.CONSUME_POSITION_END; 29 | import static com.aliyun.loghub.flume.Constants.CONSUME_POSITION_TIMESTAMP; 30 | import static com.aliyun.loghub.flume.Constants.DEFAULT_BATCH_SIZE; 31 | import static com.aliyun.loghub.flume.Constants.DEFAULT_FETCH_INTERVAL_MS; 32 | import static com.aliyun.loghub.flume.Constants.DEFAULT_FETCH_IN_ORDER; 33 | import static com.aliyun.loghub.flume.Constants.DEFAULT_HEARTBEAT_INTERVAL_MS; 34 | import static com.aliyun.loghub.flume.Constants.DEFAULT_MAX_RETRY; 35 | import static com.aliyun.loghub.flume.Constants.DESERIALIZER; 36 | import static com.aliyun.loghub.flume.Constants.ENDPOINT_KEY; 37 | import static com.aliyun.loghub.flume.Constants.FETCH_INTERVAL_MS; 38 | import static com.aliyun.loghub.flume.Constants.FETCH_IN_ORDER; 39 | import static com.aliyun.loghub.flume.Constants.HEARTBEAT_INTERVAL_MS; 40 | import static com.aliyun.loghub.flume.Constants.LOGSTORE_KEY; 41 | import static com.aliyun.loghub.flume.Constants.LOG_CONNECTOR_USER_AGENT; 42 | import static com.aliyun.loghub.flume.Constants.LOG_USER_AGENT; 43 | import static com.aliyun.loghub.flume.Constants.MAX_RETRY; 44 | import static com.aliyun.loghub.flume.Constants.PROJECT_KEY; 45 | import static com.aliyun.loghub.flume.Constants.QUERY; 46 | import static com.google.common.base.Preconditions.checkArgument; 47 | 48 | 49 | public class LoghubSource extends AbstractSource implements 50 | EventDrivenSource, Configurable { 51 | private static final Logger LOG = LoggerFactory.getLogger(LoghubSource.class); 52 | 53 | private LogHubConfig config; 54 | private ClientWorker worker; 55 | private SourceCounter counter; 56 | private EventDeserializer deserializer; 57 | private int maxRetry; 58 | 59 | @Override 60 | public void configure(Context context) { 61 | config = parseConsumerConfig(context); 62 | deserializer = createDeserializer(context); 63 | maxRetry = context.getInteger(MAX_RETRY, DEFAULT_MAX_RETRY); 64 | } 65 | 66 | private static LogHubConfig parseConsumerConfig(Context context) { 67 | String endpoint = context.getString(ENDPOINT_KEY); 68 | Validate.notEmpty(endpoint, ENDPOINT_KEY); 69 | String project = context.getString(PROJECT_KEY); 70 | Validate.notEmpty(project, PROJECT_KEY); 71 | String logstore = context.getString(LOGSTORE_KEY); 72 | Validate.notEmpty(logstore, LOGSTORE_KEY); 73 | String accessKeyId = context.getString(ACCESS_KEY_ID_KEY); 74 | Validate.notEmpty(accessKeyId, ACCESS_KEY_ID_KEY); 75 | String accessKey = context.getString(ACCESS_KEY_SECRET_KEY); 76 | Validate.notEmpty(accessKey, ACCESS_KEY_SECRET_KEY); 77 | String consumerGroup = context.getString(CONSUMER_GROUP_KEY); 78 | if (StringUtils.isBlank(consumerGroup)) { 79 | LOG.info("Loghub Consumer Group is not specified, will generate a random Consumer Group name."); 80 | consumerGroup = createConsumerGroupName(); 81 | } 82 | String consumerId = UUID.randomUUID().toString(); 83 | LOG.info("Using consumer group {}, consumer {}", consumerGroup, consumerId); 84 | 85 | long heartbeatIntervalMs = context.getLong(HEARTBEAT_INTERVAL_MS, DEFAULT_HEARTBEAT_INTERVAL_MS); 86 | long fetchIntervalMs = context.getLong(FETCH_INTERVAL_MS, DEFAULT_FETCH_INTERVAL_MS); 87 | boolean fetchInOrder = context.getBoolean(FETCH_IN_ORDER, DEFAULT_FETCH_IN_ORDER); 88 | int batchSize = context.getInteger(BATCH_SIZE, DEFAULT_BATCH_SIZE); 89 | 90 | String position = context.getString(CONSUME_INITIAL_POSITION, CONSUME_POSITION_BEGIN); 91 | LogHubConfig config; 92 | switch (position) { 93 | case CONSUME_POSITION_TIMESTAMP: 94 | Integer startTime = context.getInteger(CONSUME_POSITION_TIMESTAMP); 95 | checkArgument(startTime != null, "Missing parameter: " + CONSUME_POSITION_TIMESTAMP); 96 | checkArgument(startTime > 0, "timestamp must be > 0"); 97 | config = new LogHubConfig(consumerGroup, consumerId, endpoint, project, logstore, accessKeyId, accessKey, 98 | startTime, batchSize); 99 | break; 100 | case CONSUME_POSITION_END: 101 | config = new LogHubConfig(consumerGroup, consumerId, endpoint, project, logstore, accessKeyId, accessKey, 102 | ConsumePosition.END_CURSOR, batchSize); 103 | break; 104 | default: 105 | // Start from the earliest position by default 106 | config = new LogHubConfig(consumerGroup, consumerId, endpoint, project, logstore, accessKeyId, accessKey, 107 | ConsumePosition.BEGIN_CURSOR, batchSize); 108 | break; 109 | } 110 | String query = context.getString(QUERY); 111 | if (!StringUtils.isBlank(query)) { 112 | config.setQuery(query); 113 | } 114 | config.setHeartBeatIntervalMillis(heartbeatIntervalMs); 115 | config.setConsumeInOrder(fetchInOrder); 116 | config.setFetchIntervalMillis(fetchIntervalMs); 117 | String userAgent = context.getString(LOG_USER_AGENT); 118 | if (StringUtils.isBlank(userAgent)) { 119 | userAgent = LOG_CONNECTOR_USER_AGENT; 120 | } 121 | config.setUserAgent(userAgent); 122 | return config; 123 | } 124 | 125 | @VisibleForTesting 126 | static String createConsumerGroupName() { 127 | try { 128 | return InetAddress.getLocalHost().getHostName().replace('.', '-').toLowerCase(); 129 | } catch (UnknownHostException e) { 130 | return UUID.randomUUID().toString(); 131 | } 132 | } 133 | 134 | private EventDeserializer createDeserializer(Context context) { 135 | String deserializerName = context.getString(DESERIALIZER); 136 | EventDeserializer deserializer; 137 | if (deserializerName == null || deserializerName.isEmpty()) { 138 | deserializer = new DelimitedTextEventDeserializer(); 139 | } else if (deserializerName.equals(DelimitedTextEventDeserializer.ALIAS) 140 | || deserializerName.equalsIgnoreCase(DelimitedTextEventDeserializer.class.getName())) { 141 | deserializer = new DelimitedTextEventDeserializer(); 142 | } else if (deserializerName.equals(JSONEventDeserializer.ALIAS) 143 | || deserializerName.equalsIgnoreCase(JSONEventDeserializer.class.getName())) { 144 | deserializer = new JSONEventDeserializer(); 145 | } else { 146 | try { 147 | deserializer = (EventDeserializer) Class.forName(deserializerName).newInstance(); 148 | } catch (Exception e) { 149 | throw new IllegalArgumentException("Unable to instantiate deserializer: " + deserializerName 150 | + " on source: " + getName(), e); 151 | } 152 | } 153 | deserializer.configure(context); 154 | return deserializer; 155 | } 156 | 157 | @Override 158 | public void start() throws FlumeException { 159 | LOG.info("Starting Loghub source {}...", getName()); 160 | try { 161 | worker = new ClientWorker(() -> 162 | new LogReceiver(getChannelProcessor(), deserializer, counter, getName(), maxRetry), config); 163 | } catch (Exception e) { 164 | throw new FlumeException("Fail to start log service client worker.", e); 165 | } 166 | Runtime.getRuntime().addShutdownHook(new Thread(() -> { 167 | LOG.info("Shutting down source..."); 168 | worker.shutdown(); 169 | })); 170 | Thread consumerThread = new Thread(worker); 171 | consumerThread.start(); 172 | LOG.info("Loghub consumer group {} started.", getName()); 173 | if (counter == null) { 174 | counter = new SourceCounter(getName()); 175 | } 176 | counter.start(); 177 | super.start(); 178 | LOG.info("Loghub source {} started.", getName()); 179 | } 180 | 181 | @Override 182 | public void stop() throws FlumeException { 183 | if (worker != null) { 184 | worker.shutdown(); 185 | LOG.info("Loghub consumer stopped."); 186 | } 187 | if (counter != null) { 188 | counter.stop(); 189 | } 190 | super.stop(); 191 | LOG.info("Loghub source {} stopped. Metrics: {}", getName(), counter); 192 | } 193 | 194 | } 195 | -------------------------------------------------------------------------------- /src/main/java/com/aliyun/loghub/flume/utils/VersionInfoUtils.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.utils; 2 | 3 | import java.io.InputStream; 4 | import java.util.Properties; 5 | 6 | public class VersionInfoUtils { 7 | private static final String VERSION_INFO_FILE = "flume-versioninfo.properties"; 8 | private static final String USER_AGENT_PREFIX = "aliyun-log-flume"; 9 | 10 | private static String version = null; 11 | 12 | private static String defaultUserAgent = null; 13 | 14 | private static String getVersion() { 15 | if (version == null) { 16 | initializeVersion(); 17 | } 18 | return version; 19 | } 20 | 21 | public static String getDefaultUserAgent() { 22 | if (defaultUserAgent == null) { 23 | defaultUserAgent = USER_AGENT_PREFIX + "-" + getVersion() + "/" + System.getProperty("java.version"); 24 | } 25 | return defaultUserAgent; 26 | } 27 | 28 | private static void initializeVersion() { 29 | InputStream inputStream = VersionInfoUtils.class.getClassLoader().getResourceAsStream(VERSION_INFO_FILE); 30 | Properties versionInfoProperties = new Properties(); 31 | try { 32 | if (inputStream == null) { 33 | throw new IllegalArgumentException(VERSION_INFO_FILE + " not found on classpath"); 34 | } 35 | versionInfoProperties.load(inputStream); 36 | version = versionInfoProperties.getProperty("version"); 37 | } catch (Exception e) { 38 | version = "unknown-version"; 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/resources/flume-versioninfo.properties: -------------------------------------------------------------------------------- 1 | version=1.8 2 | -------------------------------------------------------------------------------- /src/test/java/com/aliyun/loghub/flume/sink/TestLoghubSink.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.sink; 2 | 3 | import org.apache.flume.Channel; 4 | import org.apache.flume.Context; 5 | import org.apache.flume.Event; 6 | import org.apache.flume.Sink; 7 | import org.apache.flume.Transaction; 8 | import org.apache.flume.channel.MemoryChannel; 9 | import org.apache.flume.conf.Configurables; 10 | import org.apache.flume.event.EventBuilder; 11 | import org.junit.After; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | import java.util.Map; 16 | import java.util.UUID; 17 | 18 | import static org.apache.flume.source.SpoolDirectorySourceConfigurationConstants.BATCH_SIZE; 19 | 20 | 21 | public class TestLoghubSink { 22 | 23 | private Map parameters; 24 | private LoghubSink fixture; 25 | 26 | private Channel bindAndStartChannel(LoghubSink fixture) { 27 | // Configure the channel 28 | Channel channel = new MemoryChannel(); 29 | Configurables.configure(channel, new Context()); 30 | // Wire them together 31 | fixture.setChannel(channel); 32 | fixture.start(); 33 | return channel; 34 | } 35 | 36 | @Before 37 | public void init() throws Exception { 38 | fixture = new LoghubSink(); 39 | fixture.setName("LoghubSink-" + UUID.randomUUID().toString()); 40 | } 41 | 42 | @After 43 | public void tearDown() throws Exception { 44 | // No-op 45 | } 46 | 47 | @Test 48 | public void shouldIndexOneEvent() throws Exception { 49 | Configurables.configure(fixture, new Context(parameters)); 50 | Channel channel = bindAndStartChannel(fixture); 51 | 52 | Transaction tx = channel.getTransaction(); 53 | tx.begin(); 54 | Event event = EventBuilder.withBody("event #1 or 1".getBytes()); 55 | channel.put(event); 56 | tx.commit(); 57 | tx.close(); 58 | 59 | fixture.process(); 60 | fixture.stop(); 61 | 62 | // check result 63 | } 64 | 65 | @Test 66 | public void shouldIndexInvalidComplexJsonBody() throws Exception { 67 | parameters.put(BATCH_SIZE, "3"); 68 | Configurables.configure(fixture, new Context(parameters)); 69 | Channel channel = bindAndStartChannel(fixture); 70 | 71 | Transaction tx = channel.getTransaction(); 72 | tx.begin(); 73 | Event event1 = EventBuilder.withBody("TEST1 {test}".getBytes()); 74 | channel.put(event1); 75 | Event event2 = EventBuilder.withBody("{test: TEST2 }".getBytes()); 76 | channel.put(event2); 77 | Event event3 = EventBuilder.withBody("{\"test\":{ TEST3 {test} }}".getBytes()); 78 | channel.put(event3); 79 | tx.commit(); 80 | tx.close(); 81 | 82 | fixture.process(); 83 | fixture.stop(); 84 | } 85 | 86 | @Test 87 | public void shouldIndexComplexJsonEvent() throws Exception { 88 | Configurables.configure(fixture, new Context(parameters)); 89 | Channel channel = bindAndStartChannel(fixture); 90 | 91 | Transaction tx = channel.getTransaction(); 92 | tx.begin(); 93 | Event event = EventBuilder.withBody( 94 | "{\"event\":\"json content\",\"num\":1}".getBytes()); 95 | channel.put(event); 96 | tx.commit(); 97 | tx.close(); 98 | 99 | fixture.process(); 100 | fixture.stop(); 101 | } 102 | 103 | @Test 104 | public void shouldIndexFiveEvents() throws Exception { 105 | // Make it so we only need to call process once 106 | parameters.put(BATCH_SIZE, "5"); 107 | Configurables.configure(fixture, new Context(parameters)); 108 | Channel channel = bindAndStartChannel(fixture); 109 | 110 | int numberOfEvents = 5; 111 | Event[] events = new Event[numberOfEvents]; 112 | 113 | Transaction tx = channel.getTransaction(); 114 | tx.begin(); 115 | for (int i = 0; i < numberOfEvents; i++) { 116 | String body = "event #" + i + " of " + numberOfEvents; 117 | Event event = EventBuilder.withBody(body.getBytes()); 118 | events[i] = event; 119 | channel.put(event); 120 | } 121 | tx.commit(); 122 | tx.close(); 123 | 124 | fixture.process(); 125 | fixture.stop(); 126 | } 127 | 128 | @Test 129 | public void shouldIndexFiveEventsOverThreeBatches() throws Exception { 130 | parameters.put(BATCH_SIZE, "2"); 131 | Configurables.configure(fixture, new Context(parameters)); 132 | Channel channel = bindAndStartChannel(fixture); 133 | 134 | int numberOfEvents = 5; 135 | Event[] events = new Event[numberOfEvents]; 136 | 137 | Transaction tx = channel.getTransaction(); 138 | tx.begin(); 139 | for (int i = 0; i < numberOfEvents; i++) { 140 | String body = "event #" + i + " of " + numberOfEvents; 141 | Event event = EventBuilder.withBody(body.getBytes()); 142 | events[i] = event; 143 | channel.put(event); 144 | } 145 | tx.commit(); 146 | tx.close(); 147 | 148 | int count = 0; 149 | Sink.Status status = Sink.Status.READY; 150 | while (status != Sink.Status.BACKOFF) { 151 | count++; 152 | status = fixture.process(); 153 | } 154 | fixture.stop(); 155 | } 156 | } -------------------------------------------------------------------------------- /src/test/java/com/aliyun/loghub/flume/source/JSONEventDeserializerTest.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.source; 2 | 3 | import com.alibaba.fastjson.JSONArray; 4 | import com.alibaba.fastjson.JSONObject; 5 | import com.google.gson.Gson; 6 | import org.apache.flume.Context; 7 | import org.junit.Test; 8 | 9 | import java.time.LocalDateTime; 10 | import java.time.ZoneOffset; 11 | import java.time.format.DateTimeFormatter; 12 | 13 | import static org.junit.Assert.assertTrue; 14 | 15 | public class JSONEventDeserializerTest { 16 | 17 | 18 | @Test 19 | public void testAutoDetectJSON() { 20 | String validJSONArray = "[{\"foor\":\"bar\"}]"; 21 | assertTrue(JSONEventDeserializer.mayBeJSON(validJSONArray)); 22 | Object object = JSONEventDeserializer.parseJSONObjectOrArray(validJSONArray); 23 | assertTrue(object instanceof JSONArray); 24 | 25 | String validJSONObject = "{\"foo\":\"bar\"}"; 26 | assertTrue(JSONEventDeserializer.mayBeJSON(validJSONObject)); 27 | Object result = JSONEventDeserializer.parseJSONObjectOrArray(validJSONObject); 28 | assertTrue(result instanceof JSONObject); 29 | } 30 | 31 | @Test 32 | public void testAddTopic(){ 33 | Context context = new Context(); 34 | //context.put("topicAsField","true"); 35 | JSONEventDeserializer jsonEventDeserializer = new JSONEventDeserializer(); 36 | jsonEventDeserializer.configure(context); 37 | System.out.println(new Gson().toJson(jsonEventDeserializer)); 38 | } 39 | 40 | @Test 41 | public void testTimeParser() { 42 | DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-M-dd HH:mm:ss"); 43 | LocalDateTime time = LocalDateTime.parse("2020-9-23 10:45:00", formatter); 44 | System.out.println((int) (time.toEpochSecond(ZoneOffset.UTC))); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/com/aliyun/loghub/flume/source/LoghubSourceTest.java: -------------------------------------------------------------------------------- 1 | package com.aliyun.loghub.flume.source; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | import java.util.regex.Pattern; 7 | 8 | public class LoghubSourceTest { 9 | 10 | 11 | @Test 12 | public void testCreateConsumerGroupName() { 13 | String consumerGroup = LoghubSource.createConsumerGroupName(); 14 | System.out.println(consumerGroup); 15 | Pattern pattern = Pattern.compile("[0-9a-z-_]{2,64}"); 16 | Assert.assertTrue(pattern.matcher(consumerGroup).matches()); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | # Define some default values that can be overridden by system properties. 21 | # 22 | # For testing, it may also be convenient to specify 23 | # -Dflume.root.logger=DEBUG,console when launching flume. 24 | 25 | flume.root.logger=DEBUG,console 26 | #flume.root.logger=INFO,LOGFILE 27 | flume.log.dir=./logs 28 | flume.log.file=flume.log 29 | 30 | log4j.logger.org.apache.flume.lifecycle = INFO 31 | log4j.logger.org.mortbay = INFO 32 | log4j.logger.org.apache.avro.ipc.NettyTransceiver = WARN 33 | log4j.logger.org.apache.hadoop = INFO 34 | log4j.logger.org.apache.hadoop.hive = ERROR 35 | log4j.logger.org.apache.http = ERROR 36 | 37 | 38 | # Define the root logger to the system property "flume.root.logger". 39 | log4j.rootLogger=${flume.root.logger} 40 | 41 | 42 | # Stock log4j rolling file appender 43 | # Default log rotation configuration 44 | log4j.appender.LOGFILE=org.apache.log4j.RollingFileAppender 45 | log4j.appender.LOGFILE.MaxFileSize=100MB 46 | log4j.appender.LOGFILE.MaxBackupIndex=10 47 | log4j.appender.LOGFILE.File=${flume.log.dir}/${flume.log.file} 48 | log4j.appender.LOGFILE.layout=org.apache.log4j.PatternLayout 49 | log4j.appender.LOGFILE.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n 50 | 51 | 52 | # Warning: If you enable the following appender it will fill up your disk if you don't have a cleanup job! 53 | # This uses the updated rolling file appender from log4j-extras that supports a reliable time-based rolling policy. 54 | # See http://logging.apache.org/log4j/companions/extras/apidocs/org/apache/log4j/rolling/TimeBasedRollingPolicy.html 55 | # Add "DAILY" to flume.root.logger above if you want to use this 56 | log4j.appender.DAILY=org.apache.log4j.rolling.RollingFileAppender 57 | log4j.appender.DAILY.rollingPolicy=org.apache.log4j.rolling.TimeBasedRollingPolicy 58 | log4j.appender.DAILY.rollingPolicy.ActiveFileName=${flume.log.dir}/${flume.log.file} 59 | log4j.appender.DAILY.rollingPolicy.FileNamePattern=${flume.log.dir}/${flume.log.file}.%d{yyyy-MM-dd} 60 | log4j.appender.DAILY.layout=org.apache.log4j.PatternLayout 61 | log4j.appender.DAILY.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n 62 | 63 | 64 | # console 65 | # Add "console" to flume.root.logger above if you want to use this 66 | log4j.appender.console=org.apache.log4j.ConsoleAppender 67 | log4j.appender.console.target=System.err 68 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 69 | log4j.appender.console.layout.ConversionPattern=%d (%t) [%p - %l] %m%n 70 | -------------------------------------------------------------------------------- /src/test/resources/sink-example.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Source: netcat 3 | # Sink: Loghub 4 | 5 | agent.sources = netcatsource 6 | agent.sinks = slssink 7 | agent.channels = memoryChannel 8 | 9 | # Configure the source: 10 | agent.sources.netcatsource.type = netcat 11 | agent.sources.netcatsource.bind = localhost 12 | agent.sources.netcatsource.port = 44444 13 | 14 | # Describe the sink: 15 | agent.sinks.slssrc.type = com.aliyun.loghub.flume.sink.LoghubSink 16 | agent.sinks.slssrc.endpoint = 17 | agent.sinks.slssrc.project = 18 | agent.sinks.slssrc.logstore = 19 | agent.sinks.slssrc.accessKeyId = 20 | agent.sinks.slssrc.accessKey = 21 | 22 | 23 | # Configure a channel that buffers events in memory: 24 | agent.channels.memoryChannel.type = memory 25 | agent.channels.memoryChannel.capacity = 20000 26 | agent.channels.memoryChannel.transactionCapacity = 100 27 | 28 | # Bind the source and sink to the channel: 29 | agent.sources.netcatsource.channels = memoryChannel 30 | agent.sinks.slssink.channel = memoryChannel 31 | -------------------------------------------------------------------------------- /src/test/resources/source-example.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Source: Loghub 3 | # Sink: HDFS 4 | # 5 | agent.sources = slssrc 6 | agent.sinks = hdfssink 7 | agent.channels = memoryChannel 8 | 9 | # Configure the source: 10 | agent.sources.slssrc.type = com.aliyun.loghub.flume.source.LoghubSource 11 | agent.sources.slssrc.endpoint = 12 | agent.sources.slssrc.project = 13 | agent.sources.slssrc.logstore = 14 | agent.sources.slssrc.accessKeyId = 15 | agent.sources.slssrc.accessKey = 16 | agent.sources.slssrc.columns = 17 | agent.sources.slssrc.seperator = , 18 | 19 | # Describe the sink: 20 | agent.sinks.hdfssink.type = hdfs 21 | agent.sinks.hdfssink.hdfs.path = hdfs://localhost:8020/user/root/test 22 | agent.sinks.hdfssink.hdfs.writeFormat = Text 23 | agent.sinks.hdfssink.hdfs.round = true 24 | agent.sinks.hdfssink.hdfs.roundValue = 20 25 | agent.sinks.hdfssink.hdfs.roundUnit = minute 26 | agent.sinks.hdfssink.hdfs.rollSize = 0 27 | agent.sinks.hdfssink.hdfs.rollCount = 0 28 | agent.sinks.hdfssink.hdfs.fileType = DataStream 29 | agent.sinks.hdfssink.hdfs.useLocalTimeStamp = true 30 | 31 | # Configure a channel that buffers events in memory: 32 | agent.channels.memoryChannel.type = memory 33 | agent.channels.memoryChannel.capacity = 20000 34 | agent.channels.memoryChannel.transactionCapacity = 100 35 | 36 | 37 | # Bind the source and sink to the channel: 38 | agent.sources.slssrc.channels = memoryChannel 39 | agent.sinks.hdfssink.channel = memoryChannel 40 | --------------------------------------------------------------------------------