├── README.md
├── pom.xml
└── src
├── main
└── java
│ ├── log4j.xml
│ ├── org
│ └── apache
│ │ └── flume
│ │ └── chiwei
│ │ └── filemonitor
│ │ ├── Constants.java
│ │ ├── FileMonitorSource.java
│ │ └── PositionLog.java
│ └── source.conf
└── test
└── java
└── org
└── apache
└── flume
└── chiwei
└── filemonitor
└── test
├── TestConstants.java
└── TestFileMonitorSource.java
/README.md:
--------------------------------------------------------------------------------
1 | # flume-filemonitor-source
2 |
3 | This source can record the position of file if the flume application has been killed,it also know which line should be read from next time
4 |
5 |
6 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 | 4.0.0
6 |
7 | flume-source
8 | flume-filemonitor-source
9 | Flume FileMonitor Sink
10 | 1.0.0
11 |
12 |
13 |
14 | org.apache.maven.plugins
15 | maven-jar-plugin
16 |
17 |
18 |
19 |
20 |
21 |
22 | org.apache.flume
23 | flume-ng-sdk
24 | 1.5.2
25 |
26 |
27 |
28 | org.apache.flume
29 | flume-ng-core
30 | 1.5.2
31 |
32 |
33 |
34 | org.apache.flume
35 | flume-ng-configuration
36 | 1.5.2
37 |
38 |
39 |
40 | org.slf4j
41 | slf4j-api
42 | 1.6.1
43 |
44 |
45 |
46 | junit
47 | junit
48 | 4.10
49 | test
50 |
51 |
52 |
53 | com.google.guava
54 | guava
55 | 18.0
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/src/main/java/log4j.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/flume/chiwei/filemonitor/Constants.java:
--------------------------------------------------------------------------------
1 | package org.apache.flume.chiwei.filemonitor;
2 |
3 | public class Constants {
4 |
5 | public static String MONITOR_FILE = "file";
6 |
7 | public static String POSITION_DIR = "positionDir";
8 |
9 | public static String POSITION_FILE_NAME = "position.log";
10 |
11 | public static long POSITION_INIT_VALUE = 0L;
12 |
13 | public static String KEY_DATA_SIZE = "readDataSize";
14 |
15 | public static String KEY_DATA_LINE = "readDataLine";
16 |
17 | public static int POSITION_SAVE_COUNTER = 10;
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/flume/chiwei/filemonitor/FileMonitorSource.java:
--------------------------------------------------------------------------------
1 | package org.apache.flume.chiwei.filemonitor;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.io.RandomAccessFile;
6 | import java.nio.ByteBuffer;
7 | import java.nio.CharBuffer;
8 | import java.nio.channels.FileChannel;
9 | import java.nio.charset.Charset;
10 | import java.nio.charset.CharsetDecoder;
11 | import java.util.ArrayList;
12 | import java.util.HashMap;
13 | import java.util.List;
14 | import java.util.Map;
15 | import java.util.concurrent.Executors;
16 | import java.util.concurrent.ScheduledExecutorService;
17 | import java.util.concurrent.TimeUnit;
18 |
19 | import org.apache.flume.Context;
20 | import org.apache.flume.Event;
21 | import org.apache.flume.EventDrivenSource;
22 | import org.apache.flume.channel.ChannelProcessor;
23 | import org.apache.flume.conf.Configurable;
24 | import org.apache.flume.event.EventBuilder;
25 | import org.apache.flume.instrumentation.SourceCounter;
26 | import org.apache.flume.source.AbstractSource;
27 | import org.slf4j.Logger;
28 | import org.slf4j.LoggerFactory;
29 |
30 | import com.google.common.base.Preconditions;
31 |
32 | /**
33 | *
34 | *
35 | * author: chiwei
36 | *
37 | *
38 | *
39 | * time: 2015年4月9日 上午9:27:11
40 | *
41 | *
42 | *
43 | * version: version 1.0
44 | *
45 | *
46 | *
47 | * My CSDN BLOG: http://blog.csdn.net/simonchi
48 | *
49 | *
50 | * My GITHUB: https://github.com/cwtree
51 | *
52 | *
53 | * My SINA WEIBO: http://weibo.com/cwtree
54 | *
55 | *
56 | * My EMAIL: 719259043@qq.com
57 | *
58 | *
59 | * My WebChat: cwtree
60 | *
61 | *
62 | */
63 | public class FileMonitorSource extends AbstractSource implements Configurable, EventDrivenSource {
64 |
65 | private static final Logger log = LoggerFactory.getLogger(FileMonitorSource.class);
66 | private ChannelProcessor channelProcessor;
67 | private RandomAccessFile monitorFile = null;
68 | private File coreFile = null;
69 | private long lastMod = 0L;
70 | private String monitorFilePath = null;
71 | private String positionFilePath = null;
72 | private FileChannel monitorFileChannel = null;
73 | private ByteBuffer buffer = ByteBuffer.allocate(1 << 20);// 1MB
74 | private long positionValue = 0L;
75 | private ScheduledExecutorService executor;
76 | private FileMonitorThread runner;
77 | private PositionLog positionLog = null;
78 | private Charset charset = null;
79 | private CharsetDecoder decoder = null;
80 | private CharBuffer charBuffer = null;
81 | private long counter = 0L;
82 | private Map headers = new HashMap();// event
83 | // header
84 | private Object exeLock = new Object();
85 | private long lastFileSize = 0L;
86 | private long nowFileSize = 0L;
87 |
88 | private SourceCounter sourceCounter;
89 |
90 | @Override
91 | public synchronized void start() {
92 | channelProcessor = getChannelProcessor();
93 | executor = Executors.newSingleThreadScheduledExecutor();
94 | runner = new FileMonitorThread();
95 | executor.scheduleWithFixedDelay(runner, 500, 2000, TimeUnit.MILLISECONDS);
96 | sourceCounter.start();
97 | super.start();
98 | log.debug("FileMonitorSource source started");
99 | }
100 |
101 | @Override
102 | public synchronized void stop() {
103 | positionLog.setPosition(positionValue);
104 | log.debug("Set the positionValue {} when stopped", positionValue);
105 | if (this.monitorFileChannel != null) {
106 | try {
107 | this.monitorFileChannel.close();
108 | } catch (IOException e) {
109 | log.error(this.monitorFilePath + " filechannel close Exception", e);
110 | }
111 | }
112 | if (this.monitorFile != null) {
113 | try {
114 | this.monitorFile.close();
115 | } catch (IOException e) {
116 | log.error(this.monitorFilePath + " file close Exception", e);
117 | }
118 | }
119 | executor.shutdown();
120 | try {
121 | executor.awaitTermination(10L, TimeUnit.SECONDS);
122 | } catch (InterruptedException ex) {
123 | log.info("Interrupted while awaiting termination", ex);
124 | }
125 | executor.shutdownNow();
126 | sourceCounter.stop();
127 | super.stop();
128 | log.debug("FileMonitorSource source stopped");
129 | }
130 |
131 | @Override
132 | public void configure(Context context) {
133 | charset = Charset.forName("UTF-8");
134 | decoder = charset.newDecoder();
135 | this.monitorFilePath = context.getString(Constants.MONITOR_FILE);
136 | this.positionFilePath = context.getString(Constants.POSITION_DIR);
137 | Preconditions.checkArgument(monitorFilePath != null, "The file can not be null !");
138 | Preconditions.checkArgument(positionFilePath != null, "the positionDir can not be null !");
139 | if (positionFilePath.endsWith(":")) {
140 | positionFilePath += File.separator;
141 | } else if (positionFilePath.endsWith("\\") || positionFilePath.endsWith("/")) {
142 | positionFilePath = positionFilePath.substring(0, positionFilePath.length() - 1);
143 | }
144 | // create properties file when start the source if the properties is not
145 | // exists
146 | File file = new File(positionFilePath + File.separator + Constants.POSITION_FILE_NAME);
147 | if (!file.exists()) {
148 | try {
149 | file.createNewFile();
150 | log.debug("Create the {} file", Constants.POSITION_FILE_NAME);
151 | } catch (IOException e) {
152 | log.error("Create the position.properties error", e);
153 | return;
154 | }
155 | }
156 | try {
157 | coreFile = new File(monitorFilePath);
158 | lastMod = coreFile.lastModified();
159 | } catch (Exception e) {
160 | log.error("Initialize the File/FileChannel Error", e);
161 | return;
162 | }
163 |
164 | positionLog = new PositionLog(positionFilePath);
165 | try {
166 | positionValue = positionLog.initPosition();
167 | } catch (Exception e) {
168 | log.error("Initialize the positionValue in File positionLog", e);
169 | return;
170 | }
171 | lastFileSize = positionValue;
172 | if (sourceCounter == null) {
173 | sourceCounter = new SourceCounter(getName());
174 | }
175 | }
176 |
177 | /**
178 | *
179 | *
180 | * author: chiwei
181 | *
182 | *
183 | *
184 | * time: 2015年4月9日 上午9:12:55
185 | *
186 | *
187 | *
188 | * version: version 1.0
189 | *
190 | *
191 | */
192 | class FileMonitorThread implements Runnable {
193 |
194 | /**
195 | * a thread to check whether the file is modified
196 | */
197 | @Override
198 | public void run() {
199 | synchronized (exeLock) {
200 | log.debug("FileMonitorThread running ...");
201 | // coreFile = new File(monitorFilePath);
202 | long nowModified = coreFile.lastModified();
203 | // the file has been changed
204 | if (lastMod != nowModified) {
205 | log.debug(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>File modified ...");
206 | // you must record the last modified and now file size as
207 | // soon
208 | // as possible
209 | lastMod = nowModified;
210 | nowFileSize = coreFile.length();
211 | int readDataBytesLen = 0;
212 | try {
213 | log.debug("The Last coreFileSize {},now coreFileSize {}", lastFileSize,
214 | nowFileSize);
215 | // it indicated the file is rolled by log4j
216 | if (nowFileSize <= lastFileSize) {
217 | log.debug("The file size is changed to be lower,it indicated that the file is rolled by log4j.");
218 | positionValue = 0L;
219 | }
220 | lastFileSize = nowFileSize;
221 | monitorFile = new RandomAccessFile(coreFile, "r");
222 | // you must be instantiate the file channel Object when
223 | // the
224 | // file
225 | // changed
226 | monitorFileChannel = monitorFile.getChannel();
227 | monitorFileChannel.position(positionValue);
228 | // read file content into buffer
229 | int bytesRead = monitorFileChannel.read(buffer);
230 | // this while for it can not read all the data when the
231 | // file
232 | // modified
233 | while (bytesRead != -1) {
234 | log.debug("How many bytes read in this loop ? --> {}", bytesRead);
235 | String contents = buffer2String(buffer);
236 | // every read,the last byte is \n,this can make sure
237 | // the
238 | // integrity of read data
239 | // include the \n
240 | int lastLineBreak = contents.lastIndexOf("\n") + 1;
241 | String readData = contents.substring(0, lastLineBreak);
242 | byte[] readDataBytes = readData.getBytes();
243 | readDataBytesLen = readDataBytes.length;
244 | positionValue += readDataBytesLen;
245 | // change the position value for next read
246 | monitorFileChannel.position(positionValue);
247 | log.debug("Read bytes {},Real read bytes {}", bytesRead,
248 | readDataBytesLen);
249 | headers.put(Constants.KEY_DATA_SIZE, String.valueOf(readDataBytesLen));
250 | headers.put(Constants.KEY_DATA_LINE,
251 | String.valueOf(readData.split("\n")));
252 | sourceCounter.incrementEventReceivedCount();
253 | channelProcessor.processEvent(EventBuilder.withBody(readDataBytes,
254 | headers));
255 | sourceCounter.addToEventAcceptedCount(1);
256 | // channelProcessor.processEventBatch(getEventByReadData(readData));
257 | log.debug("Change the next read position {}", positionValue);
258 | buffer.clear();
259 | bytesRead = monitorFileChannel.read(buffer);
260 | }
261 | } catch (Exception e) {
262 | log.error("Read data into Channel Error", e);
263 | log.debug("Save the last positionValue {} into Disk File", positionValue
264 | - readDataBytesLen);
265 | positionLog.setPosition(positionValue - readDataBytesLen);
266 | }
267 | counter++;
268 | if (counter % Constants.POSITION_SAVE_COUNTER == 0) {
269 | log.debug(
270 | Constants.POSITION_SAVE_COUNTER
271 | + " times file modified checked,save the position Value {} into Disk file",
272 | positionValue);
273 | positionLog.setPosition(positionValue);
274 | }
275 | if (monitorFile != null) {
276 | monitorFile.close();
277 | }
278 | }
279 | }
280 | }
281 |
282 | }
283 |
284 | public List getEventByReadData(String readData) {
285 | String str[] = readData.split("\n");
286 | int len = str.length;
287 | List events = new ArrayList();
288 | for (int i = 0; i < len; i++) {
289 | Event event = EventBuilder.withBody((str[i]).getBytes());
290 | events.add(event);
291 | }
292 | return events;
293 | }
294 |
295 | public String buffer2String(ByteBuffer buffer) {
296 | buffer.flip();
297 | try {
298 | charBuffer = decoder.decode(buffer);
299 | return charBuffer.toString();
300 | } catch (Exception ex) {
301 | ex.printStackTrace();
302 | return "";
303 | }
304 | }
305 |
306 | }
307 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/flume/chiwei/filemonitor/PositionLog.java:
--------------------------------------------------------------------------------
1 | package org.apache.flume.chiwei.filemonitor;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.io.RandomAccessFile;
6 | import java.nio.ByteBuffer;
7 | import java.nio.channels.FileChannel;
8 |
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | public class PositionLog {
13 |
14 | private static final Logger log = LoggerFactory
15 | .getLogger(PositionLog.class);
16 |
17 | private FileChannel positionFileChannel;
18 | private String postionFilePath;
19 | private RandomAccessFile raf = null;
20 | private String filePath = null;
21 | public FileChannel getPositionFileChannel() {
22 | return positionFileChannel;
23 | }
24 | public void setPositionFileChannel(FileChannel positionFileChannel) {
25 | this.positionFileChannel = positionFileChannel;
26 | }
27 | public String getPostionFilePath() {
28 | return postionFilePath;
29 | }
30 | public void setPostionFilePath(String postionFilePath) {
31 | this.postionFilePath = postionFilePath;
32 | }
33 |
34 | public PositionLog() {
35 | }
36 | public PositionLog(String postionFilePath) {
37 | this.postionFilePath = postionFilePath;
38 | }
39 | public long initPosition() throws Exception {
40 | filePath = postionFilePath + File.separator
41 | + Constants.POSITION_FILE_NAME;
42 | File file = new File(filePath);
43 | if (!file.exists()) {
44 | try {
45 | file.createNewFile();
46 | log.debug("Create the position file");
47 | } catch (IOException e) {
48 | log.error("Create the position error", e);
49 | throw e;
50 | }
51 | }
52 | try {
53 | raf = new RandomAccessFile(filePath, "rw");
54 | this.positionFileChannel =raf.getChannel();
55 | long fileSize = positionFileChannel.size();
56 | if(fileSize==0) {
57 | log.debug("The file content is null,init the value 0");
58 | ByteBuffer buffer = ByteBuffer.allocate(1);
59 | buffer.put("0".getBytes());
60 | buffer.flip();
61 | positionFileChannel.write(buffer);
62 | raf.close();
63 | return 0L;
64 | }else {
65 | return getPosition();
66 | }
67 | } catch (Exception e) {
68 | log.error("Init the position file error",e);
69 | throw e;
70 | }
71 | }
72 |
73 | public long getPosition() {
74 | try {
75 | raf = new RandomAccessFile(filePath, "rw");
76 | this.positionFileChannel =raf.getChannel();
77 | long fileSize = positionFileChannel.size();
78 | ByteBuffer buffer = ByteBuffer.allocate((int) fileSize);
79 | int bytesRead = positionFileChannel.read(buffer);
80 | StringBuffer sb = new StringBuffer();
81 | while(bytesRead!=-1) {
82 | buffer.flip();
83 | while(buffer.hasRemaining()) {
84 | sb.append((char)buffer.get());
85 | }
86 | buffer.clear();
87 | bytesRead = positionFileChannel.read(buffer);
88 | }
89 | raf.close();
90 | return Long.parseLong(sb.toString());
91 | } catch (Exception e) {
92 | log.error("Get Position Value Error",e);
93 | return -1;
94 | }
95 | }
96 |
97 | public long setPosition(Long position) {
98 | try {
99 | raf = new RandomAccessFile(filePath, "rw");
100 | this.positionFileChannel =raf.getChannel();
101 | String positionStr = String.valueOf(position);
102 | int bufferSize = positionStr.length();
103 | ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
104 | buffer.clear();
105 | buffer.put(positionStr.getBytes());
106 | buffer.flip();
107 | while(buffer.hasRemaining()) {
108 | this.positionFileChannel.write(buffer);
109 | }
110 | raf.close();
111 | log.debug("Set Position Value Successfully {}",position);
112 | return position;
113 | } catch (Exception e) {
114 | log.error("Set Position Value Error",e);
115 | return -1;
116 | }
117 | }
118 |
119 | }
120 |
--------------------------------------------------------------------------------
/src/main/java/source.conf:
--------------------------------------------------------------------------------
1 | a1.sources.r1.type=org.apache.flume.chiwei.filemonitor.FileMonitorSource
2 | a1.sources.r1.channels=c1
3 | a1.sources.r1.file=/home/flume/example/file/bizlogic.log
4 | a1.sources.r1.positionDir=/home/flume
--------------------------------------------------------------------------------
/src/test/java/org/apache/flume/chiwei/filemonitor/test/TestConstants.java:
--------------------------------------------------------------------------------
1 | package org.apache.flume.chiwei.filemonitor.test;
2 |
3 | import java.io.File;
4 |
5 | public class TestConstants {
6 |
7 | public static String FILE = "e:"+File.separator+"demo.txt";
8 |
9 | public static String POSITION_DIR = "e:";
10 |
11 | }
12 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/flume/chiwei/filemonitor/test/TestFileMonitorSource.java:
--------------------------------------------------------------------------------
1 | package org.apache.flume.chiwei.filemonitor.test;
2 |
3 | import org.apache.flume.Channel;
4 | import org.apache.flume.ChannelSelector;
5 | import org.apache.flume.Context;
6 | import org.apache.flume.channel.ChannelProcessor;
7 | import org.apache.flume.channel.MemoryChannel;
8 | import org.apache.flume.channel.ReplicatingChannelSelector;
9 | import org.apache.flume.chiwei.filemonitor.FileMonitorSource;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 |
13 | import com.google.common.collect.Lists;
14 |
15 | public class TestFileMonitorSource {
16 |
17 | private FileMonitorSource source;
18 | private Context context;
19 | private Channel channel;
20 | private ChannelSelector rcs = new ReplicatingChannelSelector();
21 |
22 | @Before
23 | public void before() {
24 | this.context = new Context();
25 | this.context.put("file", TestConstants.FILE);
26 | this.context.put("positionDir", TestConstants.POSITION_DIR);
27 |
28 | source = new FileMonitorSource();
29 | channel = new MemoryChannel();
30 | rcs.setChannels(Lists.newArrayList(channel));
31 | source.setChannelProcessor(new ChannelProcessor(rcs));
32 | }
33 |
34 | @Test
35 | public void test() {
36 | //source.configure(context);
37 | //source.start();
38 | }
39 |
40 | }
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------