├── README.md ├── pom.xml └── src ├── main └── java │ ├── log4j.xml │ ├── org │ └── apache │ │ └── flume │ │ └── chiwei │ │ └── filemonitor │ │ ├── Constants.java │ │ ├── FileMonitorSource.java │ │ └── PositionLog.java │ └── source.conf └── test └── java └── org └── apache └── flume └── chiwei └── filemonitor └── test ├── TestConstants.java └── TestFileMonitorSource.java /README.md: -------------------------------------------------------------------------------- 1 | # flume-filemonitor-source 2 | 3 | This source can record the position of file if the flume application has been killed,it also know which line should be read from next time 4 | 5 | 6 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | flume-source 8 | flume-filemonitor-source 9 | Flume FileMonitor Sink 10 | 1.0.0 11 | 12 | 13 | 14 | org.apache.maven.plugins 15 | maven-jar-plugin 16 | 17 | 18 | 19 | 20 | 21 | 22 | org.apache.flume 23 | flume-ng-sdk 24 | 1.5.2 25 | 26 | 27 | 28 | org.apache.flume 29 | flume-ng-core 30 | 1.5.2 31 | 32 | 33 | 34 | org.apache.flume 35 | flume-ng-configuration 36 | 1.5.2 37 | 38 | 39 | 40 | org.slf4j 41 | slf4j-api 42 | 1.6.1 43 | 44 | 45 | 46 | junit 47 | junit 48 | 4.10 49 | test 50 | 51 | 52 | 53 | com.google.guava 54 | guava 55 | 18.0 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /src/main/java/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/main/java/org/apache/flume/chiwei/filemonitor/Constants.java: -------------------------------------------------------------------------------- 1 | package org.apache.flume.chiwei.filemonitor; 2 | 3 | public class Constants { 4 | 5 | public static String MONITOR_FILE = "file"; 6 | 7 | public static String POSITION_DIR = "positionDir"; 8 | 9 | public static String POSITION_FILE_NAME = "position.log"; 10 | 11 | public static long POSITION_INIT_VALUE = 0L; 12 | 13 | public static String KEY_DATA_SIZE = "readDataSize"; 14 | 15 | public static String KEY_DATA_LINE = "readDataLine"; 16 | 17 | public static int POSITION_SAVE_COUNTER = 10; 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/org/apache/flume/chiwei/filemonitor/FileMonitorSource.java: -------------------------------------------------------------------------------- 1 | package org.apache.flume.chiwei.filemonitor; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.RandomAccessFile; 6 | import java.nio.ByteBuffer; 7 | import java.nio.CharBuffer; 8 | import java.nio.channels.FileChannel; 9 | import java.nio.charset.Charset; 10 | import java.nio.charset.CharsetDecoder; 11 | import java.util.ArrayList; 12 | import java.util.HashMap; 13 | import java.util.List; 14 | import java.util.Map; 15 | import java.util.concurrent.Executors; 16 | import java.util.concurrent.ScheduledExecutorService; 17 | import java.util.concurrent.TimeUnit; 18 | 19 | import org.apache.flume.Context; 20 | import org.apache.flume.Event; 21 | import org.apache.flume.EventDrivenSource; 22 | import org.apache.flume.channel.ChannelProcessor; 23 | import org.apache.flume.conf.Configurable; 24 | import org.apache.flume.event.EventBuilder; 25 | import org.apache.flume.instrumentation.SourceCounter; 26 | import org.apache.flume.source.AbstractSource; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import com.google.common.base.Preconditions; 31 | 32 | /** 33 | * 34 | *

35 | * author: chiwei 36 | *

37 | * 38 | *

39 | * time: 2015年4月9日 上午9:27:11 40 | *

41 | * 42 | *

43 | * version: version 1.0 44 | *

45 | * 46 | *

47 | * My CSDN BLOG: http://blog.csdn.net/simonchi 48 | *

49 | *

50 | * My GITHUB: https://github.com/cwtree 51 | *

52 | *

53 | * My SINA WEIBO: http://weibo.com/cwtree 54 | *

55 | *

56 | * My EMAIL: 719259043@qq.com 57 | *

58 | *

59 | * My WebChat: cwtree 60 | *

61 | * 62 | */ 63 | public class FileMonitorSource extends AbstractSource implements Configurable, EventDrivenSource { 64 | 65 | private static final Logger log = LoggerFactory.getLogger(FileMonitorSource.class); 66 | private ChannelProcessor channelProcessor; 67 | private RandomAccessFile monitorFile = null; 68 | private File coreFile = null; 69 | private long lastMod = 0L; 70 | private String monitorFilePath = null; 71 | private String positionFilePath = null; 72 | private FileChannel monitorFileChannel = null; 73 | private ByteBuffer buffer = ByteBuffer.allocate(1 << 20);// 1MB 74 | private long positionValue = 0L; 75 | private ScheduledExecutorService executor; 76 | private FileMonitorThread runner; 77 | private PositionLog positionLog = null; 78 | private Charset charset = null; 79 | private CharsetDecoder decoder = null; 80 | private CharBuffer charBuffer = null; 81 | private long counter = 0L; 82 | private Map headers = new HashMap();// event 83 | // header 84 | private Object exeLock = new Object(); 85 | private long lastFileSize = 0L; 86 | private long nowFileSize = 0L; 87 | 88 | private SourceCounter sourceCounter; 89 | 90 | @Override 91 | public synchronized void start() { 92 | channelProcessor = getChannelProcessor(); 93 | executor = Executors.newSingleThreadScheduledExecutor(); 94 | runner = new FileMonitorThread(); 95 | executor.scheduleWithFixedDelay(runner, 500, 2000, TimeUnit.MILLISECONDS); 96 | sourceCounter.start(); 97 | super.start(); 98 | log.debug("FileMonitorSource source started"); 99 | } 100 | 101 | @Override 102 | public synchronized void stop() { 103 | positionLog.setPosition(positionValue); 104 | log.debug("Set the positionValue {} when stopped", positionValue); 105 | if (this.monitorFileChannel != null) { 106 | try { 107 | this.monitorFileChannel.close(); 108 | } catch (IOException e) { 109 | log.error(this.monitorFilePath + " filechannel close Exception", e); 110 | } 111 | } 112 | if (this.monitorFile != null) { 113 | try { 114 | this.monitorFile.close(); 115 | } catch (IOException e) { 116 | log.error(this.monitorFilePath + " file close Exception", e); 117 | } 118 | } 119 | executor.shutdown(); 120 | try { 121 | executor.awaitTermination(10L, TimeUnit.SECONDS); 122 | } catch (InterruptedException ex) { 123 | log.info("Interrupted while awaiting termination", ex); 124 | } 125 | executor.shutdownNow(); 126 | sourceCounter.stop(); 127 | super.stop(); 128 | log.debug("FileMonitorSource source stopped"); 129 | } 130 | 131 | @Override 132 | public void configure(Context context) { 133 | charset = Charset.forName("UTF-8"); 134 | decoder = charset.newDecoder(); 135 | this.monitorFilePath = context.getString(Constants.MONITOR_FILE); 136 | this.positionFilePath = context.getString(Constants.POSITION_DIR); 137 | Preconditions.checkArgument(monitorFilePath != null, "The file can not be null !"); 138 | Preconditions.checkArgument(positionFilePath != null, "the positionDir can not be null !"); 139 | if (positionFilePath.endsWith(":")) { 140 | positionFilePath += File.separator; 141 | } else if (positionFilePath.endsWith("\\") || positionFilePath.endsWith("/")) { 142 | positionFilePath = positionFilePath.substring(0, positionFilePath.length() - 1); 143 | } 144 | // create properties file when start the source if the properties is not 145 | // exists 146 | File file = new File(positionFilePath + File.separator + Constants.POSITION_FILE_NAME); 147 | if (!file.exists()) { 148 | try { 149 | file.createNewFile(); 150 | log.debug("Create the {} file", Constants.POSITION_FILE_NAME); 151 | } catch (IOException e) { 152 | log.error("Create the position.properties error", e); 153 | return; 154 | } 155 | } 156 | try { 157 | coreFile = new File(monitorFilePath); 158 | lastMod = coreFile.lastModified(); 159 | } catch (Exception e) { 160 | log.error("Initialize the File/FileChannel Error", e); 161 | return; 162 | } 163 | 164 | positionLog = new PositionLog(positionFilePath); 165 | try { 166 | positionValue = positionLog.initPosition(); 167 | } catch (Exception e) { 168 | log.error("Initialize the positionValue in File positionLog", e); 169 | return; 170 | } 171 | lastFileSize = positionValue; 172 | if (sourceCounter == null) { 173 | sourceCounter = new SourceCounter(getName()); 174 | } 175 | } 176 | 177 | /** 178 | * 179 | *

180 | * author: chiwei 181 | *

182 | * 183 | *

184 | * time: 2015年4月9日 上午9:12:55 185 | *

186 | * 187 | *

188 | * version: version 1.0 189 | *

190 | * 191 | */ 192 | class FileMonitorThread implements Runnable { 193 | 194 | /** 195 | * a thread to check whether the file is modified 196 | */ 197 | @Override 198 | public void run() { 199 | synchronized (exeLock) { 200 | log.debug("FileMonitorThread running ..."); 201 | // coreFile = new File(monitorFilePath); 202 | long nowModified = coreFile.lastModified(); 203 | // the file has been changed 204 | if (lastMod != nowModified) { 205 | log.debug(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>File modified ..."); 206 | // you must record the last modified and now file size as 207 | // soon 208 | // as possible 209 | lastMod = nowModified; 210 | nowFileSize = coreFile.length(); 211 | int readDataBytesLen = 0; 212 | try { 213 | log.debug("The Last coreFileSize {},now coreFileSize {}", lastFileSize, 214 | nowFileSize); 215 | // it indicated the file is rolled by log4j 216 | if (nowFileSize <= lastFileSize) { 217 | log.debug("The file size is changed to be lower,it indicated that the file is rolled by log4j."); 218 | positionValue = 0L; 219 | } 220 | lastFileSize = nowFileSize; 221 | monitorFile = new RandomAccessFile(coreFile, "r"); 222 | // you must be instantiate the file channel Object when 223 | // the 224 | // file 225 | // changed 226 | monitorFileChannel = monitorFile.getChannel(); 227 | monitorFileChannel.position(positionValue); 228 | // read file content into buffer 229 | int bytesRead = monitorFileChannel.read(buffer); 230 | // this while for it can not read all the data when the 231 | // file 232 | // modified 233 | while (bytesRead != -1) { 234 | log.debug("How many bytes read in this loop ? --> {}", bytesRead); 235 | String contents = buffer2String(buffer); 236 | // every read,the last byte is \n,this can make sure 237 | // the 238 | // integrity of read data 239 | // include the \n 240 | int lastLineBreak = contents.lastIndexOf("\n") + 1; 241 | String readData = contents.substring(0, lastLineBreak); 242 | byte[] readDataBytes = readData.getBytes(); 243 | readDataBytesLen = readDataBytes.length; 244 | positionValue += readDataBytesLen; 245 | // change the position value for next read 246 | monitorFileChannel.position(positionValue); 247 | log.debug("Read bytes {},Real read bytes {}", bytesRead, 248 | readDataBytesLen); 249 | headers.put(Constants.KEY_DATA_SIZE, String.valueOf(readDataBytesLen)); 250 | headers.put(Constants.KEY_DATA_LINE, 251 | String.valueOf(readData.split("\n"))); 252 | sourceCounter.incrementEventReceivedCount(); 253 | channelProcessor.processEvent(EventBuilder.withBody(readDataBytes, 254 | headers)); 255 | sourceCounter.addToEventAcceptedCount(1); 256 | // channelProcessor.processEventBatch(getEventByReadData(readData)); 257 | log.debug("Change the next read position {}", positionValue); 258 | buffer.clear(); 259 | bytesRead = monitorFileChannel.read(buffer); 260 | } 261 | } catch (Exception e) { 262 | log.error("Read data into Channel Error", e); 263 | log.debug("Save the last positionValue {} into Disk File", positionValue 264 | - readDataBytesLen); 265 | positionLog.setPosition(positionValue - readDataBytesLen); 266 | } 267 | counter++; 268 | if (counter % Constants.POSITION_SAVE_COUNTER == 0) { 269 | log.debug( 270 | Constants.POSITION_SAVE_COUNTER 271 | + " times file modified checked,save the position Value {} into Disk file", 272 | positionValue); 273 | positionLog.setPosition(positionValue); 274 | } 275 | if (monitorFile != null) { 276 | monitorFile.close(); 277 | } 278 | } 279 | } 280 | } 281 | 282 | } 283 | 284 | public List getEventByReadData(String readData) { 285 | String str[] = readData.split("\n"); 286 | int len = str.length; 287 | List events = new ArrayList(); 288 | for (int i = 0; i < len; i++) { 289 | Event event = EventBuilder.withBody((str[i]).getBytes()); 290 | events.add(event); 291 | } 292 | return events; 293 | } 294 | 295 | public String buffer2String(ByteBuffer buffer) { 296 | buffer.flip(); 297 | try { 298 | charBuffer = decoder.decode(buffer); 299 | return charBuffer.toString(); 300 | } catch (Exception ex) { 301 | ex.printStackTrace(); 302 | return ""; 303 | } 304 | } 305 | 306 | } 307 | -------------------------------------------------------------------------------- /src/main/java/org/apache/flume/chiwei/filemonitor/PositionLog.java: -------------------------------------------------------------------------------- 1 | package org.apache.flume.chiwei.filemonitor; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.RandomAccessFile; 6 | import java.nio.ByteBuffer; 7 | import java.nio.channels.FileChannel; 8 | 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | public class PositionLog { 13 | 14 | private static final Logger log = LoggerFactory 15 | .getLogger(PositionLog.class); 16 | 17 | private FileChannel positionFileChannel; 18 | private String postionFilePath; 19 | private RandomAccessFile raf = null; 20 | private String filePath = null; 21 | public FileChannel getPositionFileChannel() { 22 | return positionFileChannel; 23 | } 24 | public void setPositionFileChannel(FileChannel positionFileChannel) { 25 | this.positionFileChannel = positionFileChannel; 26 | } 27 | public String getPostionFilePath() { 28 | return postionFilePath; 29 | } 30 | public void setPostionFilePath(String postionFilePath) { 31 | this.postionFilePath = postionFilePath; 32 | } 33 | 34 | public PositionLog() { 35 | } 36 | public PositionLog(String postionFilePath) { 37 | this.postionFilePath = postionFilePath; 38 | } 39 | public long initPosition() throws Exception { 40 | filePath = postionFilePath + File.separator 41 | + Constants.POSITION_FILE_NAME; 42 | File file = new File(filePath); 43 | if (!file.exists()) { 44 | try { 45 | file.createNewFile(); 46 | log.debug("Create the position file"); 47 | } catch (IOException e) { 48 | log.error("Create the position error", e); 49 | throw e; 50 | } 51 | } 52 | try { 53 | raf = new RandomAccessFile(filePath, "rw"); 54 | this.positionFileChannel =raf.getChannel(); 55 | long fileSize = positionFileChannel.size(); 56 | if(fileSize==0) { 57 | log.debug("The file content is null,init the value 0"); 58 | ByteBuffer buffer = ByteBuffer.allocate(1); 59 | buffer.put("0".getBytes()); 60 | buffer.flip(); 61 | positionFileChannel.write(buffer); 62 | raf.close(); 63 | return 0L; 64 | }else { 65 | return getPosition(); 66 | } 67 | } catch (Exception e) { 68 | log.error("Init the position file error",e); 69 | throw e; 70 | } 71 | } 72 | 73 | public long getPosition() { 74 | try { 75 | raf = new RandomAccessFile(filePath, "rw"); 76 | this.positionFileChannel =raf.getChannel(); 77 | long fileSize = positionFileChannel.size(); 78 | ByteBuffer buffer = ByteBuffer.allocate((int) fileSize); 79 | int bytesRead = positionFileChannel.read(buffer); 80 | StringBuffer sb = new StringBuffer(); 81 | while(bytesRead!=-1) { 82 | buffer.flip(); 83 | while(buffer.hasRemaining()) { 84 | sb.append((char)buffer.get()); 85 | } 86 | buffer.clear(); 87 | bytesRead = positionFileChannel.read(buffer); 88 | } 89 | raf.close(); 90 | return Long.parseLong(sb.toString()); 91 | } catch (Exception e) { 92 | log.error("Get Position Value Error",e); 93 | return -1; 94 | } 95 | } 96 | 97 | public long setPosition(Long position) { 98 | try { 99 | raf = new RandomAccessFile(filePath, "rw"); 100 | this.positionFileChannel =raf.getChannel(); 101 | String positionStr = String.valueOf(position); 102 | int bufferSize = positionStr.length(); 103 | ByteBuffer buffer = ByteBuffer.allocate(bufferSize); 104 | buffer.clear(); 105 | buffer.put(positionStr.getBytes()); 106 | buffer.flip(); 107 | while(buffer.hasRemaining()) { 108 | this.positionFileChannel.write(buffer); 109 | } 110 | raf.close(); 111 | log.debug("Set Position Value Successfully {}",position); 112 | return position; 113 | } catch (Exception e) { 114 | log.error("Set Position Value Error",e); 115 | return -1; 116 | } 117 | } 118 | 119 | } 120 | -------------------------------------------------------------------------------- /src/main/java/source.conf: -------------------------------------------------------------------------------- 1 | a1.sources.r1.type=org.apache.flume.chiwei.filemonitor.FileMonitorSource 2 | a1.sources.r1.channels=c1 3 | a1.sources.r1.file=/home/flume/example/file/bizlogic.log 4 | a1.sources.r1.positionDir=/home/flume -------------------------------------------------------------------------------- /src/test/java/org/apache/flume/chiwei/filemonitor/test/TestConstants.java: -------------------------------------------------------------------------------- 1 | package org.apache.flume.chiwei.filemonitor.test; 2 | 3 | import java.io.File; 4 | 5 | public class TestConstants { 6 | 7 | public static String FILE = "e:"+File.separator+"demo.txt"; 8 | 9 | public static String POSITION_DIR = "e:"; 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/test/java/org/apache/flume/chiwei/filemonitor/test/TestFileMonitorSource.java: -------------------------------------------------------------------------------- 1 | package org.apache.flume.chiwei.filemonitor.test; 2 | 3 | import org.apache.flume.Channel; 4 | import org.apache.flume.ChannelSelector; 5 | import org.apache.flume.Context; 6 | import org.apache.flume.channel.ChannelProcessor; 7 | import org.apache.flume.channel.MemoryChannel; 8 | import org.apache.flume.channel.ReplicatingChannelSelector; 9 | import org.apache.flume.chiwei.filemonitor.FileMonitorSource; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import com.google.common.collect.Lists; 14 | 15 | public class TestFileMonitorSource { 16 | 17 | private FileMonitorSource source; 18 | private Context context; 19 | private Channel channel; 20 | private ChannelSelector rcs = new ReplicatingChannelSelector(); 21 | 22 | @Before 23 | public void before() { 24 | this.context = new Context(); 25 | this.context.put("file", TestConstants.FILE); 26 | this.context.put("positionDir", TestConstants.POSITION_DIR); 27 | 28 | source = new FileMonitorSource(); 29 | channel = new MemoryChannel(); 30 | rcs.setChannels(Lists.newArrayList(channel)); 31 | source.setChannelProcessor(new ChannelProcessor(rcs)); 32 | } 33 | 34 | @Test 35 | public void test() { 36 | //source.configure(context); 37 | //source.start(); 38 | } 39 | 40 | } 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | --------------------------------------------------------------------------------