├── README.md
├── bin
    ├── hdata
    └── hdata.bat
├── conf
    ├── hdata.xml
    ├── log4j2.xml
    └── plugins.xml
├── job-examples
    ├── ftp-ftp.xml
    ├── hbase-console.xml
    ├── hdfs-hive.xml
    ├── hdfs-jdbc.xml
    ├── hive-jdbc.xml
    ├── jdbc-hbase.xml
    ├── jdbc-hdfs.xml
    ├── jdbc-hive.xml
    ├── jdbc-jdbc.xml
    ├── jdbc-mongodb.xml
    ├── job.xml
    └── mongodb-console.xml
├── pom.xml
└── src
    └── main
        └── java
            └── opensource
                └── hdata
                    ├── CliDriver.java
                    ├── common
                        ├── Constants.java
                        └── HDataConfigConstants.java
                    ├── config
                        ├── Configuration.java
                        ├── EngineConfig.java
                        ├── JobConfig.java
                        └── PluginConfig.java
                    ├── core
                        ├── DefaultRecord.java
                        ├── Fields.java
                        ├── HData.java
                        ├── JobContext.java
                        ├── Metric.java
                        ├── OutputFieldsDeclarer.java
                        ├── PluginLoader.java
                        ├── ReaderWorker.java
                        ├── RecordEvent.java
                        ├── RecordWorkHandler.java
                        ├── Storage.java
                        ├── WaitStrategyFactory.java
                        └── plugin
                        │   ├── AbstractPlugin.java
                        │   ├── Pluginable.java
                        │   ├── Reader.java
                        │   ├── ReaderPlugin.java
                        │   ├── Record.java
                        │   ├── RecordCollector.java
                        │   ├── Splitter.java
                        │   ├── Writer.java
                        │   └── WriterPlugin.java
                    ├── exception
                        └── HDataException.java
                    ├── plugin
                        ├── reader
                        │   ├── ftp
                        │   │   ├── FTPReader.java
                        │   │   ├── FTPReaderProperties.java
                        │   │   └── FTPSplitter.java
                        │   ├── hbase
                        │   │   ├── HBaseReader.java
                        │   │   ├── HBaseReaderProperties.java
                        │   │   └── HBaseSplitter.java
                        │   ├── hdfs
                        │   │   ├── HDFSReader.java
                        │   │   ├── HDFSReaderProperties.java
                        │   │   └── HDFSSplitter.java
                        │   ├── hive
                        │   │   ├── HiveReader.java
                        │   │   ├── HiveReaderProperties.java
                        │   │   └── HiveSplitter.java
                        │   ├── jdbc
                        │   │   ├── JBDCReaderProperties.java
                        │   │   ├── JDBCReader.java
                        │   │   └── JDBCSplitter.java
                        │   └── mongodb
                        │   │   ├── MongoDBReader.java
                        │   │   ├── MongoDBReaderProperties.java
                        │   │   └── MongoDBSplitter.java
                        └── writer
                        │   ├── console
                        │       └── ConsoleWriter.java
                        │   ├── ftp
                        │       ├── FTPWriter.java
                        │       └── FTPWriterProperties.java
                        │   ├── hbase
                        │       ├── HBaseWriter.java
                        │       └── HBaseWriterProperties.java
                        │   ├── hdfs
                        │       ├── HDFSWriter.java
                        │       └── HDFSWriterProperties.java
                        │   ├── hive
                        │       ├── HiveRecordWritable.java
                        │       ├── HiveWriter.java
                        │       └── HiveWriterProperties.java
                        │   ├── jdbc
                        │       ├── JBDCWriterProperties.java
                        │       └── JDBCWriter.java
                        │   └── mongodb
                        │       ├── MongoDBWriter.java
                        │       └── MongoDBWriterProperties.java
                    ├── tool
                        └── SQLExecuteTool.java
                    └── util
                        ├── EscaperUtils.java
                        ├── FTPUtils.java
                        ├── HiveMetaStoreUtils.java
                        ├── HiveTypeUtils.java
                        ├── JDBCUtils.java
                        ├── LoggerUtils.java
                        ├── TypeConvertUtils.java
                        ├── Utils.java
                        └── XMLUtils.java


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataSky/HData/50ff4568fec2538a6f2098311c9ab5ff6737471c/README.md


--------------------------------------------------------------------------------
/bin/hdata:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | CDPATH=""
 5 | SCRIPT="$0"
 6 | 
 7 | while [ -h "$SCRIPT" ] ; do
 8 |   ls=`ls -ld "$SCRIPT"`
 9 |   link=`expr "$ls" : '.*-> \(.*\)$'`
10 |   if expr "$link" : '/.*' > /dev/null; then
11 |     SCRIPT="$link"
12 |   else
13 |     SCRIPT=`dirname "$SCRIPT"`/"$link"
14 |   fi
15 | done
16 | 
17 | HDATA_HOME=`dirname "$SCRIPT"`/..
18 | HDATA_HOME=`cd "$HDATA_HOME"; pwd`
19 | HDATA_LIB_DIR=$HDATA_HOME/lib
20 | HDATA_CONF_DIR=$HDATA_HOME/conf
21 | 
22 | if [ -x "$JAVA_HOME/bin/java" ]; then
23 |     JAVA="$JAVA_HOME/bin/java"
24 | else
25 |     JAVA=`which java`
26 | fi
27 | 
28 | if [ ! -x "$JAVA" ]; then
29 |     echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME"
30 |     exit 1
31 | fi
32 | 
33 | HDATA_CLASSPATH='.'
34 | for f in $HDATA_LIB_DIR/*.jar; do
35 |     HDATA_CLASSPATH=${HDATA_CLASSPATH}:$f;
36 | done
37 | 
38 | JAVA_OPTS="$JAVA_OPTS -Dhdata.conf.dir=$HDATA_CONF_DIR"
39 | JAVA_OPTS="$JAVA_OPTS -Dlog4j.configurationFile=file:///$HDATA_CONF_DIR/log4j2.xml"
40 | 
41 | MAIN_CLASS="com.suning.hdata.CliDriver"
42 | if [ "$1" = "execute-sql" ]; then
43 |     MAIN_CLASS="com.suning.hdata.tool.SQLExecuteTool"
44 | fi
45 | 
46 | exec "$JAVA" $JAVA_OPTS -cp "$HDATA_CLASSPATH" $MAIN_CLASS "$@"
47 | 


--------------------------------------------------------------------------------
/bin/hdata.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | SETLOCAL
 4 | 
 5 | if NOT DEFINED JAVA_HOME goto err
 6 | 
 7 | set SCRIPT_DIR=%~dp0
 8 | for %%I in ("%SCRIPT_DIR%..") do set HDATA_HOME=%%~dpfI
 9 | 
10 | set MAIN_CLASSPATH=.;%HDATA_HOME%\lib\*
11 | set HDATA_CONF_DIR=%HDATA_HOME%\conf
12 | 
13 | set JAVA_OPTS=%JAVA_OPTS% -Xss256k
14 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseParNewGC
15 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseConcMarkSweepGC
16 | 
17 | set JAVA_OPTS=%JAVA_OPTS% -XX:CMSInitiatingOccupancyFraction=75
18 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseCMSInitiatingOccupancyOnly
19 | set JAVA_OPTS=%JAVA_OPTS% -XX:+HeapDumpOnOutOfMemoryError
20 | set JAVA_OPTS=%JAVA_OPTS% -Dhdata.conf.dir="%HDATA_CONF_DIR%"
21 | set JAVA_OPTS=%JAVA_OPTS% -Dlog4j.configurationFile="file:///%HDATA_CONF_DIR%/log4j2.xml"
22 | 
23 | set FIRST_ARG=%1
24 | set MAIN_CLASS="com.suning.hdata.CliDriver"
25 | if "%FIRST_ARG%"=="execute-sql" (set MAIN_CLASS="com.suning.hdata.tool.SQLExecuteTool")
26 | 
27 | "%JAVA_HOME%\bin\java" %JAVA_OPTS% -cp "%MAIN_CLASSPATH%" %MAIN_CLASS% %*
28 | 
29 | goto finally
30 | 
31 | :err
32 | echo JAVA_HOME environment variable must be set!
33 | pause
34 | 
35 | 
36 | :finally
37 | 
38 | ENDLOCAL


--------------------------------------------------------------------------------
/conf/hdata.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <configuration>
 4 | 	<property>
 5 | 		<name>hdata.storage.default.buffer.size</name>
 6 | 		<value>16384</value>
 7 | 		<description>默认storage缓冲区大小，值必须为2^n</description>
 8 | 	</property>
 9 | 	<property>
10 | 		<name>hdata.storage.disruptor.wait.strategy</name>
11 | 		<value>BlockingWaitStrategy</value>
12 | 		<description>线程等待策略，可选项：BlockingWaitStrategy、BusySpinWaitStrategy、SleepingWaitStrategy、YieldingWaitStrategy</description>
13 | 	</property>
14 | 	<property>
15 | 		<name>hdata.hive.writer.tmp.dir</name>
16 | 		<value>/tmp</value>
17 | 		<description>Hive Writer写入HDFS文件的临时目录</description>
18 | 	</property>
19 | </configuration>
20 | 


--------------------------------------------------------------------------------
/conf/log4j2.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration status="off">
 3 | 	<appenders>
 4 | 		<Console name="Console" target="SYSTEM_OUT">
 5 | 			<PatternLayout
 6 | 				pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n" />
 7 | 		</Console>
 8 | 	</appenders>
 9 | 	<loggers>
10 | 		<root level="info">
11 | 			<appender-ref ref="Console" />
12 | 		</root>
13 | 	</loggers>
14 | </configuration>


--------------------------------------------------------------------------------
/conf/plugins.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <plugins>
 4 | 	<readers>
 5 | 		<reader>
 6 | 			<name>jdbc</name>
 7 | 			<class>opensource.hdata.plugin.reader.jdbc.JDBCReader</class>
 8 | 			<splitter>opensource.hdata.plugin.reader.jdbc.JDBCSplitter</splitter>
 9 | 		</reader>
10 | 		<reader>
11 | 			<name>hive</name>
12 | 			<class>opensource.hdata.plugin.reader.hive.HiveReader</class>
13 | 			<splitter>opensource.hdata.plugin.reader.hive.HiveSplitter</splitter>
14 | 		</reader>
15 | 		<reader>
16 | 			<name>hdfs</name>
17 | 			<class>opensource.hdata.plugin.reader.hdfs.HDFSReader</class>
18 | 			<splitter>opensource.hdata.plugin.reader.hdfs.HDFSSplitter</splitter>
19 | 		</reader>
20 | 		<reader>
21 | 			<name>ftp</name>
22 | 			<class>opensource.hdata.plugin.reader.ftp.FTPReader</class>
23 | 			<splitter>opensource.hdata.plugin.reader.ftp.FTPSplitter</splitter>
24 | 		</reader>
25 | 		<reader>
26 | 			<name>mongodb</name>
27 | 			<class>opensource.hdata.plugin.reader.mongodb.MongoDBReader</class>
28 | 			<splitter>opensource.hdata.plugin.reader.mongodb.MongoDBSplitter</splitter>
29 | 		</reader>
30 | 		<reader>
31 | 			<name>hbase</name>
32 | 			<class>opensource.hdata.plugin.reader.hbase.HBaseReader</class>
33 | 			<splitter>opensource.hdata.plugin.reader.hbase.HBaseSplitter</splitter>
34 | 		</reader>
35 | 	</readers>
36 | 
37 | 	<writers>
38 | 		<writer>
39 | 			<name>console</name>
40 | 			<class>opensource.hdata.plugin.writer.console.ConsoleWriter</class>
41 | 		</writer>
42 | 		<writer>
43 | 			<name>jdbc</name>
44 | 			<class>opensource.hdata.plugin.writer.jdbc.JDBCWriter</class>
45 | 		</writer>
46 | 		<writer>
47 | 			<name>hive</name>
48 | 			<class>opensource.hdata.plugin.writer.hive.HiveWriter</class>
49 | 		</writer>
50 | 		<writer>
51 | 			<name>hdfs</name>
52 | 			<class>opensource.hdata.plugin.writer.hdfs.HDFSWriter</class>
53 | 		</writer>
54 | 		<writer>
55 | 			<name>ftp</name>
56 | 			<class>opensource.hdata.plugin.writer.ftp.FTPWriter</class>
57 | 		</writer>
58 | 		<writer>
59 | 			<name>mongodb</name>
60 | 			<class>opensource.hdata.plugin.writer.mongodb.MongoDBWriter</class>
61 | 		</writer>
62 | 		<writer>
63 | 			<name>hbase</name>
64 | 			<class>opensource.hdata.plugin.writer.hbase.HBaseWriter</class>
65 | 		</writer>
66 | 	</writers>
67 | </plugins>
68 | 


--------------------------------------------------------------------------------
/job-examples/ftp-ftp.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="ftp">
 5 |     	<host>192.168.130.161</host>
 6 |     	<username>1</username>
 7 |     	<password>1@1</password>
 8 |     	<dir>/etldata/input/sa_log/151_125</dir>
 9 |     	<recursive></recursive>
10 |     	<filename>serv11-saIntf-pageTime-access-20140407_00.0.log</filename>
11 |     	<fieldsSeparator>|</fieldsSeparator>
12 |     	<encoding></encoding>
13 | 		<parallelism>1</parallelism>
14 | 	</reader>
15 | 
16 | 	<writer name="ftp">
17 |     	<host>localhost</host>
18 |     	<username>1</username>
19 |     	<password>1</password>
20 |     	<path>/ftp/tmp/1.txt</path>
21 | 		<parallelism>1</parallelism>
22 | 	</writer>
23 | </job>
24 | 


--------------------------------------------------------------------------------
/job-examples/hbase-console.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="hbase">
 5 |         <zookeeperQuorum>192.168.142.16,192.168.142.17,192.168.142.18</zookeeperQuorum>
 6 | 		<zookeeperClientPort>2181</zookeeperClientPort>
 7 | 		<table>ip_address</table>
 8 | 		<columns>:rowkey,cf:start_ip,cf:end_ip,cf:start_ip_num,cf:end_ip_num,cf:country,cf:area,cf:province,cf:city,cf:isp</columns>
 9 | 		<schema>id,start_ip,end_ip,start_ip_num,end_ip_num,country,area,province,city,isp</schema>
10 | 		<startRowkey>958200</startRowkey>
11 | 		<endRowkey></endRowkey>
12 | 		<parallelism>2</parallelism>
13 |     </reader>
14 | 
15 | 	<writer name="console">
16 | 		<parallelism>1</parallelism>
17 | 	</writer>
18 | </job>
19 | 


--------------------------------------------------------------------------------
/job-examples/hdfs-hive.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="hdfs">
 5 |     	<dir>hdfs://192.168.142.21:8020/tmp/hdata_test</dir>
 6 |     	<filename>.*\.csv</filename>
 7 |     	<fieldsSeparator>,</fieldsSeparator>
 8 |     	<encoding>gb18030</encoding>
 9 |     	<hadoopUser>bigdata</hadoopUser>
10 | 		<parallelism>1</parallelism>
11 | 	</reader>
12 | 
13 | 	<writer name="hive">
14 | 		<metastoreUris>thrift://192.168.142.21:9083</metastoreUris>
15 | 		<database>default</database>
16 | 		<table>tmp_hdata_rcfile_test</table>
17 | 		<hadoopUser>bigdata</hadoopUser>
18 | 		<parallelism>1</parallelism>
19 | 	</writer>
20 | </job>
21 | 


--------------------------------------------------------------------------------
/job-examples/hdfs-jdbc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 | 	<reader name="hdfs">
 5 | 		<dir>hdfs://192.168.142.21:8020/tmp/hdata_test</dir>
 6 | 		<filename>hdfs.test</filename>
 7 |     	<hadoopUser>bigdata</hadoopUser>
 8 | 		<parallelism>1</parallelism>
 9 | 	</reader>
10 | 	
11 | 	<writer name="jdbc">
12 | 		<driver>org.postgresql.Driver</driver>
13 | 		<url>jdbc:postgresql://localhost:5432/ip</url>
14 | 		<username>postgres</username>
15 | 		<password>toor</password>
16 | 		<table>tmp</table>
17 | 		<parallelism>3</parallelism>
18 | 	</writer>
19 | </job>
20 | 


--------------------------------------------------------------------------------
/job-examples/hive-jdbc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="hive">
 5 |         <metastoreUris>thrift://192.168.142.21:9083</metastoreUris>
 6 |         <database>bi_td</database>
 7 |         <table>tdm_common_td</table>
 8 |         <partitions></partitions>
 9 |         <parallelism>1</parallelism>
10 |     </reader>
11 | 
12 | 	<writer name="jdbc">
13 | 		<driver>org.postgresql.Driver</driver>
14 | 		<url>jdbc:postgresql://localhost:5432/tmp</url>
15 | 		<username>postgres</username>
16 | 		<password>toor</password>
17 | 		<table>tdm_common_td</table>
18 | 		<parallelism>3</parallelism>
19 | 	</writer>
20 | </job>
21 | 


--------------------------------------------------------------------------------
/job-examples/jdbc-hbase.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="jdbc">
 5 | 		<driver>org.postgresql.Driver</driver>
 6 | 		<url>jdbc:postgresql://10.22.8.140:5432/ip</url>
 7 | 		<username>postgres</username>
 8 | 		<password>toor</password>
 9 | 		<table>ip_address</table>
10 | 		<columns></columns>
11 | 		<excludeColumns></excludeColumns>
12 | 		<sql></sql>
13 | 		<where></where>
14 | 		<splitBy></splitBy>
15 | 		<parallelism>1</parallelism>
16 | 	</reader>
17 | 
18 | 	<writer name="hbase">
19 | 		<zookeeperQuorum>192.168.142.16,192.168.142.17,192.168.142.18,192.168.142.19,192.168.142.20,192.168.142.21,192.168.142.23,192.168.142.24,192.168.142.25,192.168.142.26,192.168.142.27</zookeeperQuorum>
20 | 		<zookeeperClientPort>2181</zookeeperClientPort>
21 | 		<table>ip_address</table>
22 | 		<columns>:rowkey,cf:start_ip,cf:end_ip,cf:start_ip_num,cf:end_ip_num,cf:country,cf:area,cf:province,cf:city,cf:isp</columns>
23 | 		<batchInsertSize>10000</batchInsertSize>
24 | 		<parallelism>1</parallelism>
25 | 	</writer>
26 | </job>
27 | 


--------------------------------------------------------------------------------
/job-examples/jdbc-hdfs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="jdbc">
 5 | 		<driver>org.postgresql.Driver</driver>
 6 | 		<url>jdbc:postgresql://10.22.8.140:5432/ip</url>
 7 | 		<username>postgres</username>
 8 | 		<password>toor</password>
 9 | 		<table>ip_address</table>
10 | 		<columns></columns>
11 | 		<excludeColumns></excludeColumns>
12 | 		<sql></sql>
13 | 		<where></where>
14 | 		<splitBy></splitBy>
15 | 		<parallelism>3</parallelism>
16 | 	</reader>
17 | 
18 | 	<writer name="hdfs">
19 | 		<path>hdfs://192.168.142.21:8020/tmp/hdata_test/hdfs.test</path>
20 |     	<hadoopUser>bigdata</hadoopUser>
21 | 		<parallelism>1</parallelism>
22 | 	</writer>
23 | </job>
24 | 


--------------------------------------------------------------------------------
/job-examples/jdbc-hive.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="jdbc">
 5 | 		<driver>org.postgresql.Driver</driver>
 6 | 		<url>jdbc:postgresql://localhost:5432/ip</url>
 7 | 		<username>postgres</username>
 8 | 		<password>toor</password>
 9 | 		<table>ip_address</table>
10 | 		<columns></columns>
11 | 		<excludeColumns></excludeColumns>
12 | 		<sql></sql>
13 | 		<where></where>
14 | 		<splitBy></splitBy>
15 | 		<parallelism>3</parallelism>
16 | 	</reader>
17 | 
18 | 	<writer name="hive">
19 | 		<metastoreUris>thrift://192.168.142.21:9083</metastoreUris>
20 | 		<database>default</database>
21 | 		<table>tmp_hdata_rcfile_test_p</table>
22 | 		<partitions>p=20140407</partitions>
23 | 		<hadoopUser>bigdata</hadoopUser>
24 | 		<parallelism>3</parallelism>
25 | 	</writer>
26 | </job>
27 | 


--------------------------------------------------------------------------------
/job-examples/jdbc-jdbc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 | 	<reader name="jdbc">
 5 | 		<driver>org.postgresql.Driver</driver>
 6 | 		<url>jdbc:postgresql://localhost:5432/ip</url>
 7 | 		<username>postgres</username>
 8 | 		<password>toor</password>
 9 | 		<table>ip_address</table>
10 | 		<columns></columns>
11 | 		<excludeColumns></excludeColumns>
12 | 		<sql></sql>
13 | 		<where></where>
14 | 		<splitBy></splitBy>
15 | 		<parallelism>3</parallelism>
16 | 	</reader>
17 | 
18 | 	<writer name="jdbc">
19 | 		<driver>org.postgresql.Driver</driver>
20 | 		<url>jdbc:postgresql://localhost:5432/ip</url>
21 | 		<username>postgres</username>
22 | 		<password>toor</password>
23 | 		<table>tmp</table>
24 | 		<batchInsertSize>10000</batchInsertSize>
25 | 		<parallelism>3</parallelism>
26 | 	</writer>
27 | </job>
28 | 


--------------------------------------------------------------------------------
/job-examples/jdbc-mongodb.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="jdbc">
 5 |     	<driver>org.postgresql.Driver</driver>
 6 | 		<url>jdbc:postgresql://localhost:5432/ip</url>
 7 | 		<username>postgres</username>
 8 | 		<password>toor</password>
 9 | 		<table>ip_address</table>
10 | 		<columns></columns>
11 | 		<excludeColumns></excludeColumns>
12 | 		<sql></sql>
13 | 		<where></where>
14 | 		<splitBy></splitBy>
15 | 		<parallelism>3</parallelism>
16 | 	</reader>
17 | 
18 | 	<writer name="mongodb">
19 | 		<uri>mongodb://localhost/test.ip</uri>
20 | 		<where></where>
21 | 		<parallelism>3</parallelism>
22 | 	</writer>
23 | </job>
24 | 


--------------------------------------------------------------------------------
/job-examples/job.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 | 	<reader name="jdbc">
 5 | 		<driver>com.mysql.jdbc.Driver</driver>
 6 |         <url>jdbc:mysql://localhost:3306/test</url>
 7 |         <username>root</username>
 8 |         <password>toor</password>
 9 |         <table>ip_address</table>
10 |         <columns></columns>
11 | 		<excludeColumns></excludeColumns>
12 | 		<sql></sql>
13 | 		<where></where>
14 | 		<splitBy></splitBy>
15 | 		<parallelism>7</parallelism>
16 | 	</reader>
17 | 
18 | 	<writer name="jdbc">
19 | 		<driver>com.mysql.jdbc.Driver</driver>
20 |         <url>jdbc:mysql://localhost:3306/test?useUnicode=true&amp;characterEncoding=UTF-8</url>
21 |         <username>root</username>
22 |         <password>toor</password>
23 |         <table>tmp</table>
24 | 		<batchInsertSize>10000</batchInsertSize>
25 | 		<parallelism>3</parallelism>
26 | 	</writer>
27 | </job>
28 | 


--------------------------------------------------------------------------------
/job-examples/mongodb-console.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <job id="">
 4 |     <reader name="mongodb">
 5 |         <uri>mongodb://localhost/test.ip</uri>
 6 |         <query>{"city":"南京市"}</query>
 7 |         <parallelism>1</parallelism>
 8 |     </reader>
 9 | 
10 | 	<writer name="console">
11 | 		<parallelism>1</parallelism>
12 | 	</writer>
13 | </job>
14 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 	<groupId>opensource</groupId>
  5 | 	<artifactId>hdata</artifactId>
  6 | 	<name>hdata</name>
  7 | 	<version>0.1</version>
  8 | 
  9 | 	<properties>
 10 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 11 | 		<hadoopVersion>1.2.1</hadoopVersion>
 12 | 		<hiveVersion>0.12.0</hiveVersion>
 13 | 		<hbaseVersion>0.94.16</hbaseVersion>
 14 | 	</properties>
 15 | 
 16 | 	<inceptionYear>2014</inceptionYear>
 17 | 	<developers>
 18 | 		<developer>
 19 | 			<name>Jayer</name>
 20 | 			<email>dczxxuhai@gmail.com</email>
 21 | 		</developer>
 22 | 	</developers>
 23 | 
 24 | 	<dependencies>
 25 | 		<dependency>
 26 | 			<groupId>org.apache.logging.log4j</groupId>
 27 | 			<artifactId>log4j-api</artifactId>
 28 | 			<version>2.0-rc1</version>
 29 | 		</dependency>
 30 | 		<dependency>
 31 | 			<groupId>org.apache.logging.log4j</groupId>
 32 | 			<artifactId>log4j-core</artifactId>
 33 | 			<version>2.0-rc1</version>
 34 | 		</dependency>
 35 | 		<dependency>
 36 | 			<groupId>com.google.guava</groupId>
 37 | 			<artifactId>guava</artifactId>
 38 | 			<version>16.0.1</version>
 39 | 		</dependency>
 40 | 		<dependency>
 41 | 			<groupId>com.lmax</groupId>
 42 | 			<artifactId>disruptor</artifactId>
 43 | 			<version>3.2.1</version>
 44 | 		</dependency>
 45 | 		<dependency>
 46 | 			<groupId>commons-cli</groupId>
 47 | 			<artifactId>commons-cli</artifactId>
 48 | 			<version>1.2</version>
 49 | 		</dependency>
 50 | 		<dependency>
 51 | 			<groupId>org.apache.commons</groupId>
 52 | 			<artifactId>commons-lang3</artifactId>
 53 | 			<version>3.3.2</version>
 54 | 		</dependency>
 55 | 		<dependency>
 56 | 			<groupId>commons-cli</groupId>
 57 | 			<artifactId>commons-cli</artifactId>
 58 | 			<version>1.2</version>
 59 | 		</dependency>
 60 | 		<dependency>
 61 | 			<groupId>org.jdom</groupId>
 62 | 			<artifactId>jdom2</artifactId>
 63 | 			<version>2.0.5</version>
 64 | 		</dependency>
 65 | 		<dependency>
 66 | 			<groupId>javassist</groupId>
 67 | 			<artifactId>javassist</artifactId>
 68 | 			<version>3.18.1-GA</version>
 69 | 		</dependency>
 70 | 		<dependency>
 71 | 			<groupId>org.antlr</groupId>
 72 | 			<artifactId>antlr-runtime</artifactId>
 73 | 			<version>3.4</version>
 74 | 		</dependency>
 75 | 		<dependency>
 76 | 			<groupId>commons-configuration</groupId>
 77 | 			<artifactId>commons-configuration</artifactId>
 78 | 			<version>1.9</version>
 79 | 		</dependency>
 80 | 		<dependency>
 81 | 			<groupId>commons-lang</groupId>
 82 | 			<artifactId>commons-lang</artifactId>
 83 | 			<version>2.6</version>
 84 | 		</dependency>
 85 | 		<dependency>
 86 | 			<groupId>commons-logging</groupId>
 87 | 			<artifactId>commons-logging</artifactId>
 88 | 			<version>1.1.1</version>
 89 | 		</dependency>
 90 | 		<dependency>
 91 | 			<groupId>commons-net</groupId>
 92 | 			<artifactId>commons-net</artifactId>
 93 | 			<version>3.3</version>
 94 | 		</dependency>
 95 | 		<dependency>
 96 | 			<groupId>log4j</groupId>
 97 | 			<artifactId>log4j</artifactId>
 98 | 			<version>1.2.17</version>
 99 | 		</dependency>
100 | 		<dependency>
101 | 			<groupId>org.slf4j</groupId>
102 | 			<artifactId>slf4j-api</artifactId>
103 | 			<version>1.7.6</version>
104 | 		</dependency>
105 | 		<dependency>
106 | 			<groupId>org.slf4j</groupId>
107 | 			<artifactId>slf4j-log4j12</artifactId>
108 | 			<version>1.7.6</version>
109 | 		</dependency>
110 | 		<dependency>
111 | 			<groupId>org.apache.hive</groupId>
112 | 			<artifactId>hive-exec</artifactId>
113 | 			<version>${hiveVersion}</version>
114 | 		</dependency>
115 | 		<dependency>
116 | 			<groupId>org.apache.hive</groupId>
117 | 			<artifactId>hive-metastore</artifactId>
118 | 			<version>${hiveVersion}</version>
119 | 		</dependency>
120 | 		<dependency>
121 | 			<groupId>org.apache.hadoop</groupId>
122 | 			<artifactId>hadoop-core</artifactId>
123 | 			<version>${hadoopVersion}</version>
124 | 		</dependency>
125 | 		<dependency>
126 | 			<groupId>org.apache.hbase</groupId>
127 | 			<artifactId>hbase</artifactId>
128 | 			<version>${hbaseVersion}</version>
129 | 		</dependency>
130 | 		<dependency>
131 | 			<groupId>org.apache.zookeeper</groupId>
132 | 			<artifactId>zookeeper</artifactId>
133 | 			<version>3.4.6</version>
134 | 		</dependency>
135 | 		<dependency>
136 | 			<groupId>org.mongodb</groupId>
137 | 			<artifactId>mongo-java-driver</artifactId>
138 | 			<version>2.12.0</version>
139 | 		</dependency>
140 | 		<dependency>
141 | 			<groupId>javax.jdo</groupId>
142 | 			<artifactId>jdo-api</artifactId>
143 | 			<version>3.0.1</version>
144 | 		</dependency>
145 | 		<dependency>
146 | 			<groupId>org.apache.thrift</groupId>
147 | 			<artifactId>libfb303</artifactId>
148 | 			<version>0.9.0</version>
149 | 		</dependency>
150 | 		<dependency>
151 | 			<groupId>org.datanucleus</groupId>
152 | 			<artifactId>datanucleus-api-jdo</artifactId>
153 | 			<version>3.2.1</version>
154 | 		</dependency>
155 | 		<dependency>
156 | 			<groupId>org.datanucleus</groupId>
157 | 			<artifactId>datanucleus-core</artifactId>
158 | 			<version>3.2.2</version>
159 | 		</dependency>
160 | 		<dependency>
161 | 			<groupId>org.datanucleus</groupId>
162 | 			<artifactId>datanucleus-rdbms</artifactId>
163 | 			<version>3.2.1</version>
164 | 		</dependency>
165 | 	</dependencies>
166 | </project>


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/CliDriver.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata;
  2 | 
  3 | import java.util.HashMap;
  4 | import java.util.Map;
  5 | import java.util.Map.Entry;
  6 | import java.util.Properties;
  7 | 
  8 | import opensource.hdata.config.JobConfig;
  9 | import opensource.hdata.config.PluginConfig;
 10 | import opensource.hdata.core.HData;
 11 | 
 12 | import org.apache.commons.cli.CommandLine;
 13 | import org.apache.commons.cli.CommandLineParser;
 14 | import org.apache.commons.cli.HelpFormatter;
 15 | import org.apache.commons.cli.OptionBuilder;
 16 | import org.apache.commons.cli.Options;
 17 | import org.apache.commons.cli.ParseException;
 18 | import org.apache.commons.cli.PosixParser;
 19 | 
 20 | public class CliDriver {
 21 | 
 22 |     private static final String XML_FILE = "f";
 23 |     private static final String HDATA_VARS = "var";
 24 | 
 25 |     /**
 26 |      * 创建命令行选项
 27 |      * 
 28 |      * @return
 29 |      */
 30 |     public Options createOptions() {
 31 |         Options options = new Options();
 32 |         options.addOption(XML_FILE, null, true, "job xml path");
 33 |         OptionBuilder.withValueSeparator();
 34 |         OptionBuilder.hasArgs(2);
 35 |         OptionBuilder.withArgName("property=value");
 36 |         OptionBuilder.withLongOpt(HDATA_VARS);
 37 |         options.addOption(OptionBuilder.create());
 38 |         return options;
 39 |     }
 40 | 
 41 |     /**
 42 |      * 打印命令行帮助信息
 43 |      * 
 44 |      * @param options
 45 |      */
 46 |     public void printHelp(Options options) {
 47 |         HelpFormatter formatter = new HelpFormatter();
 48 |         formatter.printHelp(" ", options);
 49 |     }
 50 | 
 51 |     /**
 52 |      * 替换命令行变量
 53 |      * 
 54 |      * @param config
 55 |      * @param vars
 56 |      */
 57 |     public void replaceConfigVars(PluginConfig config, Map<String, String> vars) {
 58 |         for (Entry<Object, Object> confEntry : config.entrySet()) {
 59 |             if (confEntry.getKey().getClass() == String.class && confEntry.getValue().getClass() == String.class) {
 60 |                 for (Entry<String, String> varEntry : vars.entrySet()) {
 61 |                     String replaceVar = "${" + varEntry.getKey() + "}";
 62 |                     if (confEntry.getValue().toString().contains(replaceVar)) {
 63 |                         config.put(confEntry.getKey(), confEntry.getValue().toString().replace(replaceVar, varEntry.getValue()));
 64 |                     }
 65 |                 }
 66 |             }
 67 |         }
 68 |     }
 69 | 
 70 |     /**
 71 |      * 主程序入口
 72 |      * 
 73 |      * @param args
 74 |      */
 75 |     public static void main(String[] args) {
 76 |         CliDriver cliDriver = new CliDriver();
 77 |         Options options = cliDriver.createOptions();
 78 |         if (args.length < 1) {
 79 |             cliDriver.printHelp(options);
 80 |             System.exit(-1);
 81 |         }
 82 | 
 83 |         CommandLineParser parser = new PosixParser();
 84 |         CommandLine cmd = null;
 85 |         try {
 86 |             cmd = parser.parse(options, args);
 87 |             String jobXmlPath = cmd.getOptionValue(XML_FILE);
 88 |             JobConfig jobConfig = new JobConfig(jobXmlPath);
 89 |             Map<String, String> vars = new HashMap<String, String>();
 90 |             Properties properties = cmd.getOptionProperties(HDATA_VARS);
 91 |             for (String key : properties.stringPropertyNames()) {
 92 |                 vars.put(key, properties.getProperty(key));
 93 |             }
 94 | 
 95 |             final PluginConfig readerConfig = jobConfig.getReaderConfig();
 96 |             final PluginConfig writerConfig = jobConfig.getWriterConfig();
 97 | 
 98 |             cliDriver.replaceConfigVars(readerConfig, vars);
 99 |             cliDriver.replaceConfigVars(writerConfig, vars);
100 | 
101 |             HData hData = new HData();
102 |             hData.start(jobConfig);
103 |         } catch (ParseException e) {
104 |             cliDriver.printHelp(options);
105 |             System.exit(-1);
106 |         }
107 |     }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/common/Constants.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.common;
 2 | 
 3 | public class Constants {
 4 | 
 5 |     public static final String HDATA_XML = "hdata.xml";
 6 |     public static final String PLUGINS_XML = "plugins.xml";
 7 |     public static final String LOG4J2_XML = "log4j2.xml";
 8 |     public static final String DATE_FORMAT_STRING = "yyyy-MM-dd HH:mm:ss";
 9 |     public static final String COLUMNS_SPLIT_REGEX = "\\s*,\\s*";
10 | 
11 |     private Constants() {
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/common/HDataConfigConstants.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.common;
 2 | 
 3 | public class HDataConfigConstants {
 4 | 
 5 |     public static final String STORAGE_BUFFER_SIZE = "hdata.storage.default.buffer.size";
 6 |     public static final String HDATA_STORAGE_DISRUPTOR_WAIT_STRATEGY = "hdata.storage.disruptor.wait.strategy";
 7 |     public static final String HDATA_SLEEP_MILLIS = "hdata.sleep.millis";
 8 | 
 9 |     private HDataConfigConstants() {
10 |     }
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/config/Configuration.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.config;
 2 | 
 3 | import java.util.Properties;
 4 | 
 5 | public abstract class Configuration extends Properties {
 6 | 
 7 |     private static final long serialVersionUID = 8606831740240321865L;
 8 | 
 9 |     public String getString(String key, String defalutValue) {
10 |         String value = getProperty(key);
11 |         return value != null ? value : defalutValue;
12 |     }
13 | 
14 |     public String getString(String key) {
15 |         return getProperty(key);
16 |     }
17 | 
18 |     public void setString(String key, String value) {
19 |         setProperty(key, value);
20 |     }
21 | 
22 |     public int getInt(String key, int defalutValue) {
23 |         String value = getProperty(key);
24 |         return value != null ? Integer.parseInt(value) : defalutValue;
25 |     }
26 | 
27 |     public void setInt(String key, int value) {
28 |         setString(key, Integer.toString(value));
29 |     }
30 | 
31 |     public long getLong(String key, long defalutValue) {
32 |         String value = getProperty(key);
33 |         return value != null ? Long.parseLong(value) : defalutValue;
34 |     }
35 | 
36 |     public void setLong(String key, long value) {
37 |         setString(key, Long.toString(value));
38 |     }
39 | 
40 |     public double getDouble(String key, double defalutValue) {
41 |         String value = getProperty(key);
42 |         return value != null ? Double.parseDouble(value) : defalutValue;
43 |     }
44 | 
45 |     public void setDouble(String key, double value) {
46 |         setString(key, Double.toString(value));
47 |     }
48 | 
49 |     public boolean getBoolean(String key, boolean defalutValue) {
50 |         String value = getProperty(key);
51 |         return value != null ? Boolean.parseBoolean(value) : defalutValue;
52 |     }
53 | 
54 |     public void setBoolean(String key, boolean value) {
55 |         setString(key, Boolean.toString(value));
56 |     }
57 | 
58 |     public float getFloat(String key, float defalutValue) {
59 |         String value = getProperty(key);
60 |         return value != null ? Float.parseFloat(value) : defalutValue;
61 |     }
62 | 
63 |     public void setFloat(String key, float value) {
64 |         setString(key, Float.toString(value));
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/config/EngineConfig.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.config;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import opensource.hdata.common.Constants;
 6 | import opensource.hdata.exception.HDataException;
 7 | import opensource.hdata.util.Utils;
 8 | import opensource.hdata.util.XMLUtils;
 9 | 
10 | import org.jdom2.Element;
11 | 
12 | public class EngineConfig extends Configuration {
13 | 
14 |     private static final long serialVersionUID = -4751544524691015405L;
15 | 
16 |     private EngineConfig() {
17 |         super();
18 |     }
19 | 
20 |     public static EngineConfig create() {
21 |         EngineConfig conf = new EngineConfig();
22 |         Element root = null;
23 |         try {
24 |             root = XMLUtils.load(Utils.getConfigDir() + Constants.HDATA_XML);
25 |         } catch (Exception e) {
26 |             throw new HDataException("Init EngineConf error!", e);
27 |         }
28 |         List<Element> list = root.getChildren("property");
29 | 
30 |         for (Element element : list) {
31 |             conf.setString(element.getChildText("name"), element.getChildText("value"));
32 |         }
33 |         return conf;
34 |     }
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/config/JobConfig.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.config;
  2 | 
  3 | import opensource.hdata.core.PluginLoader;
  4 | import opensource.hdata.core.plugin.Reader;
  5 | import opensource.hdata.core.plugin.Splitter;
  6 | import opensource.hdata.core.plugin.Writer;
  7 | import opensource.hdata.exception.HDataException;
  8 | import opensource.hdata.util.XMLUtils;
  9 | 
 10 | import org.jdom2.Element;
 11 | 
 12 | public class JobConfig extends Configuration {
 13 | 
 14 |     private Element root;
 15 |     private PluginConfig readerConfig;
 16 |     private PluginConfig writerConfig;
 17 |     private static final long serialVersionUID = -106497323171420503L;
 18 | 
 19 |     public JobConfig() {
 20 |         super();
 21 |     }
 22 | 
 23 |     public JobConfig(String jobXmlPath) {
 24 |         this();
 25 |         try {
 26 |             root = XMLUtils.load(jobXmlPath);
 27 |         } catch (Exception e) {
 28 |             throw new HDataException("Can not load job xml file: " + jobXmlPath, e);
 29 |         }
 30 |     }
 31 | 
 32 |     public PluginConfig getReaderConfig() {
 33 |         if (readerConfig == null) {
 34 |             readerConfig = new PluginConfig();
 35 |             for (Element e : root.getChild("reader").getChildren()) {
 36 |                 if (!e.getValue().trim().isEmpty()) {
 37 |                     readerConfig.setProperty(e.getName(), e.getValue());
 38 |                 }
 39 |             }
 40 |         }
 41 | 
 42 |         return readerConfig;
 43 |     }
 44 | 
 45 |     public PluginConfig getWriterConfig() {
 46 |         if (writerConfig == null) {
 47 |             writerConfig = new PluginConfig();
 48 |             for (Element e : root.getChild("writer").getChildren()) {
 49 |                 if (!e.getValue().trim().isEmpty()) {
 50 |                     writerConfig.setProperty(e.getName(), e.getValue());
 51 |                 }
 52 |             }
 53 |         }
 54 |         return writerConfig;
 55 |     }
 56 | 
 57 |     public String getReaderName() {
 58 |         return root.getChild("reader").getAttributeValue("name");
 59 |     }
 60 | 
 61 |     public String getReaderClassName() {
 62 |         return PluginLoader.getReaderPlugin(getReaderName()).getClassName();
 63 |     }
 64 | 
 65 |     public Reader newReader() {
 66 |         String readerClassName = getReaderClassName();
 67 |         if (readerClassName == null) {
 68 |             throw new HDataException("Can not find class for reader: " + getReaderName());
 69 |         }
 70 | 
 71 |         try {
 72 |             return (Reader) Class.forName(readerClassName).newInstance();
 73 |         } catch (Exception e) {
 74 |             throw new HDataException("Can not create new reader instance for: " + getReaderName(), e);
 75 |         }
 76 |     }
 77 | 
 78 |     public Splitter newSplitter() {
 79 |         String spliterClassName = PluginLoader.getReaderPlugin(getReaderName()).getSplitterClassName();
 80 | 
 81 |         if (spliterClassName == null) {
 82 |             return null;
 83 |         }
 84 | 
 85 |         try {
 86 |             return (Splitter) Class.forName(spliterClassName.trim()).newInstance();
 87 |         } catch (Exception e) {
 88 |             throw new HDataException("Can not find splitter for reader: " + getReaderName(), e);
 89 |         }
 90 |     }
 91 | 
 92 |     public String getWriterName() {
 93 |         return root.getChild("writer").getAttributeValue("name");
 94 |     }
 95 | 
 96 |     public String getWriterClassName() {
 97 |         return PluginLoader.getWriterPlugin(getWriterName()).getClassName();
 98 |     }
 99 | 
100 |     public Writer newWriter() {
101 |         String writerClassName = getWriterClassName();
102 |         if (writerClassName == null) {
103 |             throw new HDataException("Can not find class for writer: " + getWriterName());
104 |         }
105 | 
106 |         try {
107 |             return (Writer) Class.forName(getWriterClassName()).newInstance();
108 |         } catch (Exception e) {
109 |             throw new HDataException("Can not create new writer instance for: " + getWriterName(), e);
110 |         }
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/config/PluginConfig.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.config;
 2 | 
 3 | public class PluginConfig extends Configuration {
 4 | 
 5 |     private static final String PARALLELISM_KEY = "parallelism";
 6 |     private static final int DEFAULT_PARALLELISM = 1;
 7 |     private static final long serialVersionUID = 3311331304791946068L;
 8 | 
 9 |     public PluginConfig() {
10 |         super();
11 |     }
12 | 
13 |     public int getParallelism() {
14 |         int parallelism = getInt(PARALLELISM_KEY, DEFAULT_PARALLELISM);
15 |         if (parallelism < 1) {
16 |             throw new IllegalArgumentException("Reader and Writer parallelism must be >= 1.");
17 |         }
18 |         return parallelism;
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/DefaultRecord.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import opensource.hdata.core.plugin.Record;
 4 | 
 5 | public class DefaultRecord implements Record {
 6 | 
 7 |     private Object[] fields;
 8 |     private int cursor;
 9 | 
10 |     public DefaultRecord(int fieldCount) {
11 |         fields = new Object[fieldCount];
12 |     }
13 | 
14 |     public void addField(int index, Object field) {
15 |         fields[index] = field;
16 |         this.cursor++;
17 |     }
18 | 
19 |     public void addField(Object field) {
20 |         addField(cursor, field);
21 |     }
22 | 
23 |     public Object getField(int index) {
24 |         return fields[index];
25 |     }
26 | 
27 |     public int getFieldsCount() {
28 |         return fields.length;
29 |     }
30 | 
31 |     @Override
32 |     public String toString() {
33 |         StringBuilder sb = new StringBuilder();
34 |         sb.append("{");
35 |         for (int i = 0, len = fields.length; i < len; i++) {
36 |             if (i > 0) {
37 |                 sb.append(", ");
38 |             }
39 |             sb.append(fields[i]);
40 |         }
41 |         sb.append("}");
42 |         return sb.toString();
43 |     }
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/Fields.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import java.util.ArrayList;
 4 | 
 5 | public class Fields extends ArrayList<String> {
 6 | 
 7 |     private static final long serialVersionUID = -174064216143075549L;
 8 | 
 9 |     public Fields() {
10 |         super();
11 |     }
12 | 
13 |     public Fields(String... fields) {
14 |         super();
15 |         for (String field : fields) {
16 |             this.add(field);
17 |         }
18 |     }
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/HData.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.core;
  2 | 
  3 | import java.text.DecimalFormat;
  4 | import java.util.ArrayList;
  5 | import java.util.List;
  6 | import java.util.concurrent.ExecutorService;
  7 | import java.util.concurrent.Executors;
  8 | 
  9 | import opensource.hdata.common.HDataConfigConstants;
 10 | import opensource.hdata.config.EngineConfig;
 11 | import opensource.hdata.config.JobConfig;
 12 | import opensource.hdata.config.PluginConfig;
 13 | import opensource.hdata.core.plugin.Reader;
 14 | import opensource.hdata.core.plugin.RecordCollector;
 15 | import opensource.hdata.core.plugin.Splitter;
 16 | import opensource.hdata.core.plugin.Writer;
 17 | import opensource.hdata.exception.HDataException;
 18 | import opensource.hdata.util.Utils;
 19 | 
 20 | import org.apache.logging.log4j.LogManager;
 21 | import org.apache.logging.log4j.Logger;
 22 | 
 23 | import com.lmax.disruptor.WaitStrategy;
 24 | import com.lmax.disruptor.dsl.Disruptor;
 25 | import com.lmax.disruptor.dsl.ProducerType;
 26 | 
 27 | public class HData {
 28 | 
 29 |     private DecimalFormat df = new DecimalFormat("#0.00");
 30 |     private static final Logger LOG = LogManager.getLogger(HData.class);
 31 | 
 32 |     public void start(final JobConfig jobConfig) {
 33 |         final PluginConfig readerConfig = jobConfig.getReaderConfig();
 34 |         final PluginConfig writerConfig = jobConfig.getWriterConfig();
 35 | 
 36 |         LOG.info("Reader: {}, Writer: {}", jobConfig.getReaderName(), jobConfig.getWriterName());
 37 |         int writerParallelism = writerConfig.getParallelism();
 38 | 
 39 |         final JobContext context = new JobContext();
 40 |         context.setJobConfig(jobConfig);
 41 |         final Metric metric = new Metric();
 42 |         context.setMetric(metric);
 43 |         final OutputFieldsDeclarer outputFieldsDeclarer = new OutputFieldsDeclarer(context);
 44 |         context.setDeclarer(outputFieldsDeclarer);
 45 | 
 46 |         final EngineConfig engineConfig = EngineConfig.create();
 47 |         context.setEngineConfig(engineConfig);
 48 | 
 49 |         long sleepMillis = engineConfig.getLong(HDataConfigConstants.HDATA_SLEEP_MILLIS, 3000);
 50 | 
 51 |         List<PluginConfig> readerConfigList = null;
 52 |         Splitter spliter = jobConfig.newSplitter();
 53 |         if (spliter != null) {
 54 |             LOG.info("Executing spliter for reader.");
 55 |             readerConfigList = spliter.split(jobConfig);
 56 |             if (readerConfigList == null || readerConfigList.size() == 0) {
 57 |                 LOG.info("Job Finished.");
 58 |                 System.exit(0);
 59 |             }
 60 |         } else if (readerConfig.getParallelism() > 1) {
 61 |             throw new HDataException("Reader parallelism is " + readerConfig.getParallelism() + ", but can not find splitter.");
 62 |         } else {
 63 |             readerConfigList = new ArrayList<PluginConfig>();
 64 |             readerConfigList.add(readerConfig);
 65 |         }
 66 | 
 67 |         Reader[] readers = new Reader[readerConfigList.size()];
 68 |         for (int i = 0, len = readers.length; i < len; i++) {
 69 |             readers[i] = jobConfig.newReader();
 70 |         }
 71 | 
 72 |         LOG.info("Reader parallelism: {}, Writer parallelism: {}", readers.length, writerParallelism);
 73 | 
 74 |         final Writer[] writers = new Writer[writerParallelism];
 75 |         final RecordWorkHandler[] handlers = new RecordWorkHandler[writerParallelism];
 76 |         for (int i = 0; i < writerParallelism; i++) {
 77 |             writers[i] = jobConfig.newWriter();
 78 |             handlers[i] = new RecordWorkHandler(readers, writers[i], context, writerConfig);
 79 |         }
 80 | 
 81 |         int bufferSize = engineConfig.getInt(HDataConfigConstants.STORAGE_BUFFER_SIZE, 1024);
 82 |         String WaitStrategyName = engineConfig.getString(HDataConfigConstants.HDATA_STORAGE_DISRUPTOR_WAIT_STRATEGY, "BlockingWaitStrategy");
 83 | 
 84 |         Storage storage = createStorage(bufferSize, WaitStrategyName, readers.length, handlers);
 85 |         context.setStorage(storage);
 86 |         RecordCollector rc = new RecordCollector(storage, metric);
 87 | 
 88 |         LOG.info("Transfering data from reader to writer...");
 89 |         ExecutorService es = Executors.newFixedThreadPool(readers.length);
 90 |         for (int i = 0, len = readerConfigList.size(); i < len; i++) {
 91 |             es.submit(new ReaderWorker(readers[i], context, readerConfigList.get(i), rc));
 92 |         }
 93 |         es.shutdown();
 94 | 
 95 |         metric.setReaderStartTime(System.currentTimeMillis());
 96 |         metric.setWriterStartTime(System.currentTimeMillis());
 97 |         while (!es.isTerminated()) {
 98 |             Utils.sleep(sleepMillis);
 99 |             LOG.info("Read: {}\tWrite: {}", metric.getReadCount().get(), metric.getWriteCount().get());
100 |         }
101 |         metric.setReaderEndTime(System.currentTimeMillis());
102 | 
103 |         while (!storage.isEmpty()) {
104 |             if (context.isWriterError()) {
105 |                 LOG.error("Write error.");
106 |                 break;
107 |             }
108 |             Utils.sleep(sleepMillis);
109 |             LOG.info("Read Finished(total: {}), Write: {}", metric.getReadCount().get(), metric.getWriteCount().get());
110 |         }
111 |         storage.close();
112 |         LOG.info("Read Finished(total: {}), Write Finished(total: {})", metric.getReadCount().get(), metric.getWriteCount().get());
113 | 
114 |         metric.setWriterEndTime(System.currentTimeMillis());
115 |         for (Writer writer : writers) {
116 |             writer.close();
117 |         }
118 | 
119 |         double readSeconds = (metric.getReaderEndTime() - metric.getReaderStartTime()) / 1000d;
120 |         double writeSeconds = (metric.getWriterEndTime() - metric.getWriterStartTime()) / 1000d;
121 |         String readSpeed = df.format(metric.getReadCount().get() / readSeconds);
122 |         String writeSpeed = df.format(metric.getWriteCount().get() / writeSeconds);
123 |         LOG.info("Read spent time: {}s, Write spent time: {}s", df.format(readSeconds), df.format(writeSeconds));
124 |         LOG.info("Read records: {}/s, Write records: {}/s", readSpeed, writeSpeed);
125 |     }
126 | 
127 |     private Storage createStorage(int bufferSize, String WaitStrategyName, int producerCount, RecordWorkHandler[] handlers) {
128 |         WaitStrategy waitStrategy = WaitStrategyFactory.build(WaitStrategyName);
129 |         ExecutorService executorService = Executors.newCachedThreadPool();
130 |         ProducerType producerType;
131 |         if (producerCount == 1) {
132 |             producerType = ProducerType.SINGLE;
133 |         } else {
134 |             producerType = ProducerType.MULTI;
135 |         }
136 |         Disruptor<RecordEvent> disruptor = new Disruptor<RecordEvent>(RecordEvent.FACTORY, bufferSize, executorService, producerType, waitStrategy);
137 |         Storage storage = new Storage(disruptor, handlers);
138 |         executorService.shutdown();
139 |         return storage;
140 |     }
141 | 
142 | }
143 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/JobContext.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import opensource.hdata.config.Configuration;
 4 | import opensource.hdata.config.EngineConfig;
 5 | import opensource.hdata.config.JobConfig;
 6 | 
 7 | public class JobContext {
 8 | 
 9 |     private Fields fields;
10 |     private EngineConfig engineConfig;
11 |     private JobConfig jobConfig;
12 |     private OutputFieldsDeclarer declarer;
13 |     private Storage storage;
14 |     private Metric metric;
15 |     private boolean isWriterError;
16 | 
17 |     public Fields getFields() {
18 |         return fields;
19 |     }
20 | 
21 |     protected void setFields(Fields fields) {
22 |         this.fields = fields;
23 |     }
24 | 
25 |     public Configuration getEngineConfig() {
26 |         return engineConfig;
27 |     }
28 | 
29 |     public void setEngineConfig(EngineConfig engineConfig) {
30 |         this.engineConfig = engineConfig;
31 |     }
32 | 
33 |     protected OutputFieldsDeclarer getDeclarer() {
34 |         return declarer;
35 |     }
36 | 
37 |     protected void setDeclarer(OutputFieldsDeclarer declarer) {
38 |         this.declarer = declarer;
39 |     }
40 | 
41 |     public Storage getStorage() {
42 |         return storage;
43 |     }
44 | 
45 |     public void setStorage(Storage storage) {
46 |         this.storage = storage;
47 |     }
48 | 
49 |     public Metric getMetric() {
50 |         return metric;
51 |     }
52 | 
53 |     public void setMetric(Metric metric) {
54 |         this.metric = metric;
55 |     }
56 | 
57 |     public JobConfig getJobConfig() {
58 |         return jobConfig;
59 |     }
60 | 
61 |     public void setJobConfig(JobConfig jobConfig) {
62 |         this.jobConfig = jobConfig;
63 |     }
64 | 
65 |     public boolean isWriterError() {
66 |         return isWriterError;
67 |     }
68 | 
69 |     public void setWriterError(boolean isWriterError) {
70 |         this.isWriterError = isWriterError;
71 |     }
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/Metric.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import java.util.concurrent.atomic.AtomicLong;
 4 | 
 5 | public class Metric {
 6 | 
 7 |     private AtomicLong readCount = new AtomicLong(0);
 8 |     private AtomicLong writeCount = new AtomicLong(0);
 9 |     private long readerStartTime;
10 |     private long readerEndTime;
11 |     private long writerStartTime;
12 |     private long writerEndTime;
13 | 
14 |     public AtomicLong getReadCount() {
15 |         return readCount;
16 |     }
17 | 
18 |     public void setReadCount(AtomicLong readCount) {
19 |         this.readCount = readCount;
20 |     }
21 | 
22 |     public AtomicLong getWriteCount() {
23 |         return writeCount;
24 |     }
25 | 
26 |     public void setWriteCount(AtomicLong writeCount) {
27 |         this.writeCount = writeCount;
28 |     }
29 | 
30 |     public long getReaderStartTime() {
31 |         return readerStartTime;
32 |     }
33 | 
34 |     public void setReaderStartTime(long readerStartTime) {
35 |         this.readerStartTime = readerStartTime;
36 |     }
37 | 
38 |     public long getReaderEndTime() {
39 |         return readerEndTime;
40 |     }
41 | 
42 |     public void setReaderEndTime(long readerEndTime) {
43 |         this.readerEndTime = readerEndTime;
44 |     }
45 | 
46 |     public long getWriterStartTime() {
47 |         return writerStartTime;
48 |     }
49 | 
50 |     public void setWriterStartTime(long writerStartTime) {
51 |         this.writerStartTime = writerStartTime;
52 |     }
53 | 
54 |     public long getWriterEndTime() {
55 |         return writerEndTime;
56 |     }
57 | 
58 |     public void setWriterEndTime(long writerEndTime) {
59 |         this.writerEndTime = writerEndTime;
60 |     }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/OutputFieldsDeclarer.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | public class OutputFieldsDeclarer {
 4 | 
 5 |     private JobContext context;
 6 | 
 7 |     public OutputFieldsDeclarer(JobContext context) {
 8 |         this.context = context;
 9 |     }
10 | 
11 |     public void declare(Fields fields) {
12 |         context.setFields(fields);
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/PluginLoader.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.List;
 5 | import java.util.Map;
 6 | 
 7 | import opensource.hdata.common.Constants;
 8 | import opensource.hdata.core.plugin.ReaderPlugin;
 9 | import opensource.hdata.core.plugin.WriterPlugin;
10 | import opensource.hdata.exception.HDataException;
11 | import opensource.hdata.util.Utils;
12 | import opensource.hdata.util.XMLUtils;
13 | 
14 | import org.jdom2.Element;
15 | 
16 | public class PluginLoader {
17 | 
18 |     private static Map<String, ReaderPlugin> readerMap;
19 |     private static Map<String, WriterPlugin> writerMap;
20 | 
21 |     public static ReaderPlugin getReaderPlugin(String name) {
22 |         return readerMap.get(name);
23 |     }
24 | 
25 |     public static WriterPlugin getWriterPlugin(String name) {
26 |         return writerMap.get(name);
27 |     }
28 | 
29 |     static {
30 |         readerMap = new HashMap<String, ReaderPlugin>();
31 |         writerMap = new HashMap<String, WriterPlugin>();
32 | 
33 |         Element root;
34 |         try {
35 |             root = XMLUtils.load(Utils.getConfigDir() + Constants.PLUGINS_XML);
36 |         } catch (Exception e) {
37 |             throw new HDataException(e);
38 |         }
39 |         List<Element> readers = root.getChild("readers").getChildren("reader");
40 |         for (Element e : readers) {
41 |             ReaderPlugin readerPlugin = new ReaderPlugin();
42 |             readerPlugin.setPluginName(e.getChildText("name"));
43 |             readerPlugin.setClassName(e.getChildText("class"));
44 |             readerPlugin.setSplitterClassName(e.getChildText("splitter"));
45 |             readerMap.put(readerPlugin.getPluginName(), readerPlugin);
46 |         }
47 | 
48 |         List<Element> writers = root.getChild("writers").getChildren("writer");
49 |         for (Element e : writers) {
50 |             WriterPlugin writerPlugin = new WriterPlugin();
51 |             writerPlugin.setPluginName(e.getChildText("name"));
52 |             writerPlugin.setClassName(e.getChildText("class"));
53 |             writerMap.put(writerPlugin.getPluginName(), writerPlugin);
54 |         }
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/ReaderWorker.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import opensource.hdata.config.PluginConfig;
 4 | import opensource.hdata.core.plugin.Reader;
 5 | import opensource.hdata.core.plugin.RecordCollector;
 6 | 
 7 | public class ReaderWorker implements Runnable {
 8 | 
 9 |     private Reader reader;
10 |     private JobContext context;
11 |     private PluginConfig readerConfig;
12 |     private RecordCollector rc;
13 | 
14 |     public ReaderWorker(Reader reader, JobContext context, PluginConfig readerConfig, RecordCollector rc) {
15 |         this.reader = reader;
16 |         this.context = context;
17 |         this.readerConfig = readerConfig;
18 |         this.rc = rc;
19 |     }
20 | 
21 |     public void run() {
22 |         reader.prepare(context, readerConfig);
23 |         reader.execute(rc);
24 |         reader.close();
25 |     }
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/RecordEvent.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import opensource.hdata.core.plugin.Record;
 4 | 
 5 | import com.lmax.disruptor.EventFactory;
 6 | 
 7 | public class RecordEvent {
 8 | 
 9 |     private Record record;
10 | 
11 |     public Record getRecord() {
12 |         return record;
13 |     }
14 | 
15 |     public void setRecord(Record record) {
16 |         this.record = record;
17 |     }
18 | 
19 |     public static final EventFactory<RecordEvent> FACTORY = new EventFactory<RecordEvent>() {
20 | 
21 |         public RecordEvent newInstance() {
22 |             return new RecordEvent();
23 |         }
24 |     };
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/RecordWorkHandler.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import opensource.hdata.config.PluginConfig;
 4 | import opensource.hdata.core.plugin.Reader;
 5 | import opensource.hdata.core.plugin.Writer;
 6 | 
 7 | import com.lmax.disruptor.WorkHandler;
 8 | 
 9 | public class RecordWorkHandler implements WorkHandler<RecordEvent> {
10 | 
11 |     private Reader[] readers;
12 |     private Writer writer;
13 |     private JobContext context;
14 |     private PluginConfig writerConfig;
15 |     private boolean writerPrepared;
16 |     private boolean isWriterError;
17 |     private Metric metric;
18 | 
19 |     public RecordWorkHandler(Reader[] readers, Writer writer, JobContext context, PluginConfig writerConfig) {
20 |         this.readers = readers;
21 |         this.writer = writer;
22 |         this.context = context;
23 |         this.writerConfig = writerConfig;
24 |         this.metric = context.getMetric();
25 |     }
26 | 
27 |     public void onEvent(RecordEvent event) {
28 |         if (!isWriterError) {
29 |             try {
30 |                 if (!writerPrepared) {
31 |                     for (Reader reader : readers) {
32 |                         if (context.getFields() == null) {
33 |                             reader.declareOutputFields(context.getDeclarer());
34 |                         } else {
35 |                             break;
36 |                         }
37 |                     }
38 |                     writer.prepare(context, writerConfig);
39 |                     writerPrepared = true;
40 | 
41 |                     if (metric.getWriterStartTime() == 0) {
42 |                         metric.setWriterStartTime(System.currentTimeMillis());
43 |                     }
44 |                 }
45 | 
46 |                 writer.execute(event.getRecord());
47 |                 metric.getWriteCount().incrementAndGet();
48 |             } catch (Exception e) {
49 |                 this.isWriterError = true;
50 |                 context.setWriterError(true);
51 |                 e.printStackTrace();
52 |             }
53 |         }
54 |     }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/Storage.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import opensource.hdata.core.plugin.Record;
 4 | 
 5 | import com.lmax.disruptor.EventTranslatorOneArg;
 6 | import com.lmax.disruptor.RingBuffer;
 7 | import com.lmax.disruptor.dsl.Disruptor;
 8 | 
 9 | public class Storage {
10 | 
11 |     private Disruptor<RecordEvent> disruptor;
12 |     private RingBuffer<RecordEvent> ringBuffer;
13 | 
14 |     private static final EventTranslatorOneArg<RecordEvent, Record> TRANSLATOR = new EventTranslatorOneArg<RecordEvent, Record>() {
15 | 
16 |         public void translateTo(RecordEvent event, long sequence, Record record) {
17 |             event.setRecord(record);
18 |         }
19 |     };
20 | 
21 |     public Storage(Disruptor<RecordEvent> disruptor, RecordWorkHandler[] handlers) {
22 |         this.disruptor = disruptor;
23 |         disruptor.handleEventsWithWorkerPool(handlers);
24 |         ringBuffer = disruptor.start();
25 |     }
26 | 
27 |     public void put(Record record) {
28 |         disruptor.publishEvent(TRANSLATOR, record);
29 |     }
30 | 
31 |     public void put(Record[] records) {
32 |         for (Record record : records) {
33 |             put(record);
34 |         }
35 |     }
36 | 
37 |     public boolean isEmpty() {
38 |         return ringBuffer.remainingCapacity() == ringBuffer.getBufferSize();
39 |     }
40 | 
41 |     public int size() {
42 |         return ringBuffer.getBufferSize();
43 |     }
44 | 
45 |     public void close() {
46 |         disruptor.shutdown();
47 |     }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/WaitStrategyFactory.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core;
 2 | 
 3 | import opensource.hdata.exception.HDataException;
 4 | 
 5 | import com.lmax.disruptor.BlockingWaitStrategy;
 6 | import com.lmax.disruptor.BusySpinWaitStrategy;
 7 | import com.lmax.disruptor.SleepingWaitStrategy;
 8 | import com.lmax.disruptor.WaitStrategy;
 9 | import com.lmax.disruptor.YieldingWaitStrategy;
10 | 
11 | public class WaitStrategyFactory {
12 | 
13 |     /**
14 |      * 构造线程等待策略
15 |      * 
16 |      * @param name
17 |      * @return
18 |      */
19 |     public static WaitStrategy build(String name) {
20 |         WaitStrategy waitStrategy = null;
21 |         if ("BlockingWaitStrategy".equals(name)) {
22 |             waitStrategy = new BlockingWaitStrategy();
23 |         } else if ("BusySpinWaitStrategy".equals(name)) {
24 |             waitStrategy = new BusySpinWaitStrategy();
25 |         } else if ("SleepingWaitStrategy".equals(name)) {
26 |             waitStrategy = new SleepingWaitStrategy();
27 |         } else if ("YieldingWaitStrategy".equals(name)) {
28 |             waitStrategy = new YieldingWaitStrategy();
29 |         } else {
30 |             throw new HDataException("Invalid wait strategy: " + name);
31 |         }
32 |         return waitStrategy;
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/AbstractPlugin.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core.plugin;
 2 | 
 3 | public abstract class AbstractPlugin implements Pluginable {
 4 | 
 5 |     private String pluginName;
 6 | 
 7 |     public String getPluginName() {
 8 |         return this.pluginName;
 9 |     }
10 | 
11 |     public void setPluginName(String name) {
12 |         this.pluginName = name;
13 |     }
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/Pluginable.java:
--------------------------------------------------------------------------------
1 | package opensource.hdata.core.plugin;
2 | 
3 | public interface Pluginable {
4 | 
5 |     public String getPluginName();
6 | 
7 |     public void setPluginName(String name);
8 | }
9 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/Reader.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core.plugin;
 2 | 
 3 | import opensource.hdata.config.PluginConfig;
 4 | import opensource.hdata.core.JobContext;
 5 | import opensource.hdata.core.OutputFieldsDeclarer;
 6 | 
 7 | public abstract class Reader extends AbstractPlugin {
 8 | 
 9 |     public void prepare(JobContext context, PluginConfig readerConfig) {
10 |     }
11 | 
12 |     public void execute(RecordCollector recordCollector) {
13 |     }
14 | 
15 |     public void close() {
16 |     }
17 | 
18 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/ReaderPlugin.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core.plugin;
 2 | 
 3 | public class ReaderPlugin extends AbstractPlugin {
 4 | 
 5 |     private String className;
 6 |     private String splitterClassName;
 7 | 
 8 |     public String getClassName() {
 9 |         return className;
10 |     }
11 | 
12 |     public void setClassName(String className) {
13 |         this.className = className;
14 |     }
15 | 
16 |     public String getSplitterClassName() {
17 |         return splitterClassName;
18 |     }
19 | 
20 |     public void setSplitterClassName(String splitterClassName) {
21 |         this.splitterClassName = splitterClassName;
22 |     }
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/Record.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core.plugin;
 2 | 
 3 | public interface Record {
 4 | 
 5 |     public void addField(Object field);
 6 | 
 7 |     public void addField(int index, Object field);
 8 | 
 9 |     public Object getField(int index);
10 | 
11 |     public int getFieldsCount();
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/RecordCollector.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core.plugin;
 2 | 
 3 | import opensource.hdata.core.Metric;
 4 | import opensource.hdata.core.Storage;
 5 | 
 6 | public class RecordCollector {
 7 | 
 8 |     private Storage storage;
 9 |     private Metric metric;
10 | 
11 |     public RecordCollector(Storage storage, Metric metric) {
12 |         this.storage = storage;
13 |         this.metric = metric;
14 |     }
15 | 
16 |     public void send(Record record) {
17 |         storage.put(record);
18 |         metric.getReadCount().incrementAndGet();
19 |     }
20 | 
21 |     public void send(Record[] records) {
22 |         storage.put(records);
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/Splitter.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core.plugin;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import opensource.hdata.config.JobConfig;
 6 | import opensource.hdata.config.PluginConfig;
 7 | 
 8 | public abstract class Splitter extends AbstractPlugin {
 9 | 
10 |     public abstract List<PluginConfig> split(JobConfig jobConfig);
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/Writer.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core.plugin;
 2 | 
 3 | import opensource.hdata.config.PluginConfig;
 4 | import opensource.hdata.core.JobContext;
 5 | 
 6 | public abstract class Writer extends AbstractPlugin {
 7 | 
 8 |     public void prepare(JobContext context, PluginConfig writerConfig) {
 9 |     }
10 | 
11 |     public void execute(Record record) {
12 |     }
13 | 
14 |     public void close() {
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/core/plugin/WriterPlugin.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.core.plugin;
 2 | 
 3 | public class WriterPlugin extends AbstractPlugin {
 4 | 
 5 |     private String className;
 6 | 
 7 |     public String getClassName() {
 8 |         return className;
 9 |     }
10 | 
11 |     public void setClassName(String className) {
12 |         this.className = className;
13 |     }
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/exception/HDataException.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.exception;
 2 | 
 3 | public class HDataException extends RuntimeException {
 4 | 
 5 |     private static final long serialVersionUID = 2510267358921118998L;
 6 | 
 7 |     private String message;
 8 | 
 9 |     public HDataException() {
10 |         super();
11 |     }
12 | 
13 |     public HDataException(final String message) {
14 |         super(message);
15 |     }
16 | 
17 |     public HDataException(final Exception e) {
18 |         super(e);
19 |     }
20 | 
21 |     public HDataException(Throwable cause) {
22 |         super(cause);
23 |     }
24 | 
25 |     public HDataException(final String message, final Throwable cause) {
26 |         super(message, cause);
27 |     }
28 | 
29 |     @Override
30 |     public String getMessage() {
31 |         return this.message == null ? super.getMessage() : this.message;
32 |     }
33 | 
34 |     public void setMessage(String message) {
35 |         this.message = message;
36 |     }
37 | 
38 |     @Override
39 |     public String toString() {
40 |         return this.message;
41 |     }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/ftp/FTPReader.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.reader.ftp;
  2 | 
  3 | import java.io.BufferedReader;
  4 | import java.io.InputStream;
  5 | import java.io.InputStreamReader;
  6 | import java.util.ArrayList;
  7 | import java.util.List;
  8 | import java.util.zip.GZIPInputStream;
  9 | 
 10 | import opensource.hdata.config.PluginConfig;
 11 | import opensource.hdata.core.DefaultRecord;
 12 | import opensource.hdata.core.Fields;
 13 | import opensource.hdata.core.JobContext;
 14 | import opensource.hdata.core.OutputFieldsDeclarer;
 15 | import opensource.hdata.core.plugin.Reader;
 16 | import opensource.hdata.core.plugin.Record;
 17 | import opensource.hdata.core.plugin.RecordCollector;
 18 | import opensource.hdata.exception.HDataException;
 19 | import opensource.hdata.util.EscaperUtils;
 20 | import opensource.hdata.util.FTPUtils;
 21 | 
 22 | import org.apache.commons.lang3.StringUtils;
 23 | import org.apache.commons.net.ftp.FTPClient;
 24 | 
 25 | public class FTPReader extends Reader {
 26 | 
 27 |     private Fields fields;
 28 |     private String host;
 29 |     private int port;
 30 |     private String username;
 31 |     private String password;
 32 |     private String fieldsSeparator;
 33 |     private String encoding;
 34 |     private int fieldsCount;
 35 |     private List<String> files = new ArrayList<String>();
 36 | 
 37 |     @SuppressWarnings("unchecked")
 38 |     @Override
 39 |     public void prepare(JobContext context, PluginConfig readerConfig) {
 40 |         host = readerConfig.getString(FTPReaderProperties.HOST);
 41 |         port = readerConfig.getInt(FTPReaderProperties.PORT, 21);
 42 |         username = readerConfig.getString(FTPReaderProperties.USERNAME, "anonymous");
 43 |         password = readerConfig.getString(FTPReaderProperties.PASSWORD, "");
 44 |         fieldsSeparator = EscaperUtils.parse(readerConfig.getString(FTPReaderProperties.FIELDS_SEPARATOR, "\t"));
 45 |         encoding = readerConfig.getString(FTPReaderProperties.ENCODING, "UTF-8");
 46 |         files = (List<String>) readerConfig.get(FTPReaderProperties.FILES);
 47 |         fieldsCount = readerConfig.getInt(FTPReaderProperties.FIELDS_COUNT_FILTER, 0);
 48 | 
 49 |         if (readerConfig.containsKey(FTPReaderProperties.SCHEMA)) {
 50 |             fields = new Fields();
 51 |             String[] tokens = readerConfig.getString(FTPReaderProperties.SCHEMA).split("\\s*,\\s*");
 52 |             for (String field : tokens) {
 53 |                 fields.add(field);
 54 |             }
 55 |         }
 56 |     }
 57 | 
 58 |     @Override
 59 |     public void execute(RecordCollector recordCollector) {
 60 |         FTPClient ftpClient = null;
 61 |         try {
 62 |             ftpClient = FTPUtils.getFtpClient(host, port, username, password);
 63 |             for (String file : files) {
 64 |                 InputStream is = ftpClient.retrieveFileStream(file);
 65 |                 BufferedReader br = null;
 66 |                 if (file.endsWith(".gz")) {
 67 |                     GZIPInputStream gzin = new GZIPInputStream(is);
 68 |                     br = new BufferedReader(new InputStreamReader(gzin, encoding));
 69 |                 } else {
 70 |                     br = new BufferedReader(new InputStreamReader(is, encoding));
 71 |                 }
 72 | 
 73 |                 String line = null;
 74 |                 while ((line = br.readLine()) != null) {
 75 |                     String[] tokens = StringUtils.splitByWholeSeparator(line, fieldsSeparator);
 76 |                     if (tokens.length >= fieldsCount) {
 77 |                         Record record = new DefaultRecord(tokens.length);
 78 |                         for (String field : tokens) {
 79 |                             record.addField(field);
 80 |                         }
 81 |                         recordCollector.send(record);
 82 |                     }
 83 |                 }
 84 |                 ftpClient.completePendingCommand();
 85 |                 br.close();
 86 |                 is.close();
 87 |             }
 88 |         } catch (Exception e) {
 89 |             throw new HDataException(e);
 90 |         } finally {
 91 |             FTPUtils.closeFtpClient(ftpClient);
 92 |         }
 93 |     }
 94 | 
 95 |     @Override
 96 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
 97 |         declarer.declare(fields);
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/ftp/FTPReaderProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.ftp;
 2 | 
 3 | public class FTPReaderProperties {
 4 |     public static final String HOST = "host";
 5 |     public static final String PORT = "port";
 6 |     public static final String USERNAME = "username";
 7 |     public static final String PASSWORD = "password";
 8 |     public static final String DIR = "dir";
 9 |     public static final String FILENAME = "filename";
10 |     public static final String RECURSIVE = "recursive";
11 |     public static final String ENCODING = "encoding";
12 |     public static final String FIELDS_SEPARATOR = "fieldsSeparator";
13 |     public static final String SCHEMA = "schema";
14 |     public static final String FIELDS_COUNT_FILTER = "fieldsCountFilter";
15 |     public static final String FILES = "reader.files";
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/ftp/FTPSplitter.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.ftp;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | import opensource.hdata.config.JobConfig;
 7 | import opensource.hdata.config.PluginConfig;
 8 | import opensource.hdata.core.plugin.Splitter;
 9 | import opensource.hdata.exception.HDataException;
10 | import opensource.hdata.util.FTPUtils;
11 | 
12 | import org.apache.commons.net.ftp.FTPClient;
13 | 
14 | public class FTPSplitter extends Splitter {
15 | 
16 |     @Override
17 |     public List<PluginConfig> split(JobConfig jobConfig) {
18 |         List<PluginConfig> list = new ArrayList<PluginConfig>();
19 |         PluginConfig readerConfig = jobConfig.getReaderConfig();
20 |         String host = readerConfig.getString(FTPReaderProperties.HOST);
21 |         int port = readerConfig.getInt(FTPReaderProperties.PORT, 21);
22 |         String username = readerConfig.getString(FTPReaderProperties.USERNAME, "anonymous");
23 |         String password = readerConfig.getString(FTPReaderProperties.PASSWORD, "");
24 |         String dir = readerConfig.getString(FTPReaderProperties.DIR);
25 |         String filenameRegexp = readerConfig.getString(FTPReaderProperties.FILENAME);
26 |         boolean recursive = readerConfig.getBoolean(FTPReaderProperties.RECURSIVE, false);
27 |         int parallelism = readerConfig.getParallelism();
28 | 
29 |         FTPClient ftpClient = null;
30 |         try {
31 |             ftpClient = FTPUtils.getFtpClient(host, port, username, password);
32 |             List<String> files = new ArrayList<String>();
33 |             FTPUtils.listFile(files, ftpClient, dir, filenameRegexp, recursive);
34 |             if (files.size() > 0) {
35 |                 if (parallelism == 1) {
36 |                     readerConfig.put(FTPReaderProperties.FILES, files);
37 |                     list.add(readerConfig);
38 |                 } else {
39 |                     double step = (double) files.size() / parallelism;
40 |                     for (int i = 0; i < parallelism; i++) {
41 |                         List<String> splitedFiles = new ArrayList<String>();
42 |                         for (int start = (int) Math.ceil(step * i), end = (int) Math.ceil(step * (i + 1)); start < end; start++) {
43 |                             splitedFiles.add(files.get(start));
44 |                         }
45 |                         PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
46 |                         pluginConfig.put(FTPReaderProperties.FILES, splitedFiles);
47 |                         list.add(pluginConfig);
48 |                     }
49 |                 }
50 |             }
51 |         } catch (Exception e) {
52 |             throw new HDataException(e);
53 |         } finally {
54 |             FTPUtils.closeFtpClient(ftpClient);
55 |         }
56 | 
57 |         return list;
58 |     }
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hbase/HBaseReader.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.reader.hbase;
  2 | 
  3 | import java.io.IOException;
  4 | 
  5 | import opensource.hdata.config.PluginConfig;
  6 | import opensource.hdata.core.DefaultRecord;
  7 | import opensource.hdata.core.Fields;
  8 | import opensource.hdata.core.JobContext;
  9 | import opensource.hdata.core.OutputFieldsDeclarer;
 10 | import opensource.hdata.core.plugin.Reader;
 11 | import opensource.hdata.core.plugin.Record;
 12 | import opensource.hdata.core.plugin.RecordCollector;
 13 | import opensource.hdata.exception.HDataException;
 14 | 
 15 | import org.apache.hadoop.conf.Configuration;
 16 | import org.apache.hadoop.hbase.HBaseConfiguration;
 17 | import org.apache.hadoop.hbase.client.HTable;
 18 | import org.apache.hadoop.hbase.client.Result;
 19 | import org.apache.hadoop.hbase.client.ResultScanner;
 20 | import org.apache.hadoop.hbase.client.Scan;
 21 | import org.apache.hadoop.hbase.util.Bytes;
 22 | 
 23 | public class HBaseReader extends Reader {
 24 | 
 25 |     private Fields fields = new Fields();
 26 |     private HTable table;
 27 |     private byte[] startRowkey;
 28 |     private byte[] endRowkey;
 29 |     private String[] columns;
 30 |     private int rowkeyIndex = -1;
 31 |     private static final String ROWKEY = ":rowkey";
 32 | 
 33 |     @Override
 34 |     public void prepare(JobContext context, PluginConfig readerConfig) {
 35 |         startRowkey = (byte[]) readerConfig.get(HBaseReaderProperties.START_ROWKWY);
 36 |         endRowkey = (byte[]) readerConfig.get(HBaseReaderProperties.END_ROWKWY);
 37 | 
 38 |         String[] schema = readerConfig.getString(HBaseReaderProperties.SCHEMA).split(",");
 39 |         for (String field : schema) {
 40 |             fields.add(field);
 41 |         }
 42 | 
 43 |         Configuration conf = HBaseConfiguration.create();
 44 |         conf.set("hbase.zookeeper.quorum", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_QUORUM));
 45 |         conf.set("hbase.zookeeper.property.clientPort", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181"));
 46 |         columns = readerConfig.getString(HBaseReaderProperties.COLUMNS).split("\\s*,\\s*");
 47 |         for (int i = 0, len = columns.length; i < len; i++) {
 48 |             if (ROWKEY.equalsIgnoreCase(columns[i])) {
 49 |                 rowkeyIndex = i;
 50 |                 break;
 51 |             }
 52 |         }
 53 | 
 54 |         try {
 55 |             table = new HTable(conf, readerConfig.getString(HBaseReaderProperties.TABLE));
 56 |         } catch (IOException e) {
 57 |             e.printStackTrace();
 58 |             throw new HDataException(e);
 59 |         }
 60 |     }
 61 | 
 62 |     @Override
 63 |     public void execute(RecordCollector recordCollector) {
 64 |         Scan scan = new Scan();
 65 |         if (startRowkey.length > 0) {
 66 |             scan.setStartRow(startRowkey);
 67 |         }
 68 |         if (endRowkey.length > 0) {
 69 |             scan.setStopRow(endRowkey);
 70 |         }
 71 | 
 72 |         for (int i = 0, len = columns.length; i < len; i++) {
 73 |             if (i != rowkeyIndex) {
 74 |                 String[] column = columns[i].split(":");
 75 |                 scan.addColumn(Bytes.toBytes(column[0]), Bytes.toBytes(column[1]));
 76 |             }
 77 |         }
 78 | 
 79 |         try {
 80 |             ResultScanner results = table.getScanner(scan);
 81 |             for (Result result : results) {
 82 |                 Record record = new DefaultRecord(fields.size());
 83 |                 for (int i = 0, len = fields.size(); i < len; i++) {
 84 |                     if (i == rowkeyIndex) {
 85 |                         record.addField(Bytes.toString(result.getRow()));
 86 |                     } else {
 87 |                         String[] column = columns[i].split(":");
 88 |                         record.addField(Bytes.toString(result.getValue(Bytes.toBytes(column[0]), Bytes.toBytes(column[1]))));
 89 |                     }
 90 |                 }
 91 |                 recordCollector.send(record);
 92 |             }
 93 | 
 94 |             if (table != null) {
 95 |                 table.close();
 96 |             }
 97 |         } catch (IOException e) {
 98 |             throw new HDataException(e);
 99 |         }
100 |     }
101 | 
102 |     @Override
103 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
104 |         declarer.declare(fields);
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hbase/HBaseReaderProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.hbase;
 2 | 
 3 | public class HBaseReaderProperties {
 4 | 
 5 |     public static final String ZOOKEEPER_QUORUM = "zookeeperQuorum";
 6 |     public static final String ZOOKEEPER_PROPERTY_CLIENTPORT = "zookeeperClientPort";
 7 |     public static final String TABLE = "table";
 8 |     public static final String START_ROWKWY = "startRowkey";
 9 |     public static final String END_ROWKWY = "endRowkey";
10 |     public static final String COLUMNS = "columns";
11 |     public static final String SCHEMA = "schema";
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hbase/HBaseSplitter.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.reader.hbase;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.ArrayList;
  5 | import java.util.List;
  6 | 
  7 | import opensource.hdata.config.JobConfig;
  8 | import opensource.hdata.config.PluginConfig;
  9 | import opensource.hdata.core.plugin.Splitter;
 10 | import opensource.hdata.exception.HDataException;
 11 | import opensource.hdata.plugin.writer.hbase.HBaseWriterProperties;
 12 | 
 13 | import org.apache.hadoop.conf.Configuration;
 14 | import org.apache.hadoop.hbase.HBaseConfiguration;
 15 | import org.apache.hadoop.hbase.client.HTable;
 16 | import org.apache.hadoop.hbase.util.Bytes;
 17 | import org.apache.hadoop.hbase.util.Pair;
 18 | import org.apache.logging.log4j.LogManager;
 19 | import org.apache.logging.log4j.Logger;
 20 | 
 21 | public class HBaseSplitter extends Splitter {
 22 | 
 23 |     private static final Logger LOG = LogManager.getLogger(HBaseSplitter.class);
 24 | 
 25 |     @Override
 26 |     public List<PluginConfig> split(JobConfig jobConfig) {
 27 |         List<PluginConfig> list = new ArrayList<PluginConfig>();
 28 |         PluginConfig readerConfig = jobConfig.getReaderConfig();
 29 |         int parallelism = readerConfig.getParallelism();
 30 | 
 31 |         String startRowkey = readerConfig.getString(HBaseReaderProperties.START_ROWKWY, "");
 32 |         String endRowkey = readerConfig.getString(HBaseReaderProperties.END_ROWKWY, "");
 33 |         byte[] startRowkeyBytes = startRowkey.getBytes();
 34 |         byte[] endRowkeyBytes = endRowkey.getBytes();
 35 | 
 36 |         if (parallelism == 1) {
 37 |             readerConfig.put(HBaseReaderProperties.START_ROWKWY, startRowkeyBytes);
 38 |             readerConfig.put(HBaseReaderProperties.END_ROWKWY, endRowkeyBytes);
 39 |             list.add(readerConfig);
 40 |             return list;
 41 |         } else {
 42 |             Configuration conf = HBaseConfiguration.create();
 43 |             conf.set("hbase.zookeeper.quorum", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_QUORUM));
 44 |             conf.set("hbase.zookeeper.property.clientPort", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181"));
 45 |             try {
 46 |                 HTable table = new HTable(conf, readerConfig.getString(HBaseWriterProperties.TABLE));
 47 |                 Pair<byte[][], byte[][]> startEndKeysPair = table.getStartEndKeys();
 48 |                 table.close();
 49 |                 List<Pair<byte[], byte[]>> selectedPairList = new ArrayList<Pair<byte[], byte[]>>();
 50 |                 byte[][] startKeys = startEndKeysPair.getFirst();
 51 |                 byte[][] endKeys = startEndKeysPair.getSecond();
 52 | 
 53 |                 if (startKeys.length == 1) {
 54 |                     Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
 55 |                     pair.setFirst(startRowkeyBytes);
 56 |                     pair.setSecond(endRowkeyBytes);
 57 |                     selectedPairList.add(pair);
 58 |                 } else {
 59 |                     if (startRowkeyBytes.length == 0 && endRowkeyBytes.length == 0) {
 60 |                         for (int i = 0, len = startKeys.length; i < len; i++) {
 61 |                             Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
 62 |                             pair.setFirst(startKeys[i]);
 63 |                             pair.setSecond(endKeys[i]);
 64 |                             selectedPairList.add(pair);
 65 |                         }
 66 |                     } else if (endRowkeyBytes.length == 0) {
 67 |                         for (int i = 0, len = startKeys.length; i < len; i++) {
 68 |                             if (Bytes.compareTo(endKeys[i], startRowkeyBytes) >= 0) {
 69 |                                 Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
 70 |                                 pair.setFirst(Bytes.compareTo(startKeys[i], startRowkeyBytes) >= 0 ? startKeys[i] : startRowkeyBytes);
 71 |                                 pair.setSecond(endKeys[i]);
 72 |                                 selectedPairList.add(pair);
 73 |                             }
 74 |                         }
 75 |                     } else {
 76 |                         for (int i = 0, len = startKeys.length; i < len; i++) {
 77 |                             if (len == 1) {
 78 |                                 Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
 79 |                                 pair.setFirst(startRowkeyBytes);
 80 |                                 pair.setSecond(endRowkeyBytes);
 81 |                                 selectedPairList.add(pair);
 82 |                                 break;
 83 |                             } else if (Bytes.compareTo(endKeys[i], startRowkeyBytes) >= 0 && Bytes.compareTo(endRowkeyBytes, startKeys[i]) >= 0) {
 84 |                                 Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>();
 85 |                                 pair.setFirst(Bytes.compareTo(startKeys[i], startRowkeyBytes) >= 0 ? startKeys[i] : startRowkeyBytes);
 86 |                                 pair.setSecond(Bytes.compareTo(endKeys[i], endRowkeyBytes) <= 0 ? endKeys[i] : endRowkeyBytes);
 87 |                                 selectedPairList.add(pair);
 88 |                             }
 89 |                         }
 90 |                     }
 91 |                 }
 92 | 
 93 |                 if (parallelism > selectedPairList.size()) {
 94 |                     LOG.info(
 95 |                             "parallelism: {} is greater than the region count: {} in the currently open table: {}, so parallelism is set equal to region count.",
 96 |                             parallelism, selectedPairList.size(), Bytes.toString(table.getTableName()));
 97 |                     parallelism = selectedPairList.size();
 98 |                 }
 99 | 
100 |                 double step = (double) selectedPairList.size() / parallelism;
101 |                 for (int i = 0; i < parallelism; i++) {
102 |                     List<Pair<byte[], byte[]>> splitedPairs = new ArrayList<Pair<byte[], byte[]>>();
103 |                     for (int start = (int) Math.ceil(step * i), end = (int) Math.ceil(step * (i + 1)); start < end; start++) {
104 |                         splitedPairs.add(selectedPairList.get(start));
105 |                     }
106 |                     PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
107 |                     pluginConfig.put(HBaseReaderProperties.START_ROWKWY, splitedPairs.get(0).getFirst());
108 |                     pluginConfig.put(HBaseReaderProperties.END_ROWKWY, splitedPairs.get(splitedPairs.size() - 1).getSecond());
109 |                     list.add(pluginConfig);
110 |                 }
111 |             } catch (IOException e) {
112 |                 throw new HDataException(e);
113 |             }
114 | 
115 |             return list;
116 |         }
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSReader.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.hdfs;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.IOException;
 5 | import java.io.InputStreamReader;
 6 | import java.util.ArrayList;
 7 | import java.util.List;
 8 | 
 9 | import opensource.hdata.config.PluginConfig;
10 | import opensource.hdata.core.DefaultRecord;
11 | import opensource.hdata.core.Fields;
12 | import opensource.hdata.core.JobContext;
13 | import opensource.hdata.core.OutputFieldsDeclarer;
14 | import opensource.hdata.core.plugin.Reader;
15 | import opensource.hdata.core.plugin.Record;
16 | import opensource.hdata.core.plugin.RecordCollector;
17 | import opensource.hdata.exception.HDataException;
18 | import opensource.hdata.util.EscaperUtils;
19 | 
20 | import org.apache.commons.lang3.StringUtils;
21 | import org.apache.hadoop.conf.Configuration;
22 | import org.apache.hadoop.fs.FSDataInputStream;
23 | import org.apache.hadoop.fs.FileSystem;
24 | import org.apache.hadoop.fs.Path;
25 | import org.apache.hadoop.io.compress.CompressionCodec;
26 | import org.apache.hadoop.io.compress.CompressionCodecFactory;
27 | 
28 | public class HDFSReader extends Reader {
29 | 
30 |     private Fields fields;
31 |     private String fieldsSeparator;
32 |     private String encoding;
33 |     private List<Path> files = new ArrayList<Path>();
34 | 
35 |     @SuppressWarnings("unchecked")
36 |     @Override
37 |     public void prepare(JobContext context, PluginConfig readerConfig) {
38 |         fieldsSeparator = EscaperUtils.parse(readerConfig.getString(HDFSReaderProperties.FIELDS_SEPARATOR, "\t"));
39 |         files = (List<Path>) readerConfig.get(HDFSReaderProperties.FILES);
40 |         encoding = readerConfig.getString(HDFSReaderProperties.ENCODING, "UTF-8");
41 |         if (readerConfig.containsKey(HDFSReaderProperties.SCHEMA)) {
42 |             fields = new Fields();
43 |             String[] tokens = readerConfig.getString(HDFSReaderProperties.SCHEMA).split("\\s*,\\s*");
44 |             for (String field : tokens) {
45 |                 fields.add(field);
46 |             }
47 |         }
48 |     }
49 | 
50 |     @Override
51 |     public void execute(RecordCollector recordCollector) {
52 |         Configuration conf = new Configuration();
53 |         CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
54 |         try {
55 |             for (Path file : files) {
56 |                 FileSystem fs = file.getFileSystem(conf);
57 |                 CompressionCodec codec = codecFactory.getCodec(file);
58 |                 FSDataInputStream input = fs.open(file);
59 |                 BufferedReader br;
60 |                 String line = null;
61 |                 if (codec == null) {
62 |                     br = new BufferedReader(new InputStreamReader(input, encoding));
63 |                 } else {
64 |                     br = new BufferedReader(new InputStreamReader(codec.createInputStream(input), encoding));
65 |                 }
66 |                 while ((line = br.readLine()) != null) {
67 |                     String[] tokens = StringUtils.splitByWholeSeparator(line, fieldsSeparator);
68 |                     Record record = new DefaultRecord(tokens.length);
69 |                     for (String field : tokens) {
70 |                         record.addField(field);
71 |                     }
72 |                     recordCollector.send(record);
73 |                 }
74 |                 br.close();
75 |             }
76 |         } catch (IOException e) {
77 |             e.printStackTrace();
78 |             throw new HDataException(e);
79 |         }
80 |     }
81 | 
82 |     @Override
83 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
84 |         declarer.declare(fields);
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSReaderProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.hdfs;
 2 | 
 3 | public class HDFSReaderProperties {
 4 |     public static final String DIR = "dir";
 5 |     public static final String FILENAME_REGEXP = "filename";
 6 |     public static final String SCHEMA = "schema";
 7 |     public static final String FIELDS_SEPARATOR = "fieldsSeparator";
 8 |     public static final String ENCODING = "encoding";
 9 |     public static final String HADOOP_USER = "hadoopUser";
10 |     public static final String FILES = "reader.files";
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hdfs/HDFSSplitter.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.hdfs;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.ArrayList;
 5 | import java.util.List;
 6 | import java.util.regex.Matcher;
 7 | import java.util.regex.Pattern;
 8 | 
 9 | import opensource.hdata.config.JobConfig;
10 | import opensource.hdata.config.PluginConfig;
11 | import opensource.hdata.core.plugin.Splitter;
12 | import opensource.hdata.exception.HDataException;
13 | 
14 | import org.apache.hadoop.conf.Configuration;
15 | import org.apache.hadoop.fs.FileStatus;
16 | import org.apache.hadoop.fs.FileSystem;
17 | import org.apache.hadoop.fs.Path;
18 | 
19 | public class HDFSSplitter extends Splitter {
20 | 
21 |     @Override
22 |     public List<PluginConfig> split(JobConfig jobConfig) {
23 |         List<PluginConfig> list = new ArrayList<PluginConfig>();
24 |         List<Path> matchedFiles = new ArrayList<Path>();
25 |         PluginConfig readerConfig = jobConfig.getReaderConfig();
26 |         Path dir = new Path(readerConfig.getString(HDFSReaderProperties.DIR));
27 |         int parallelism = readerConfig.getParallelism();
28 | 
29 |         System.setProperty("HADOOP_USER_NAME", readerConfig.getString(HDFSReaderProperties.HADOOP_USER));
30 |         Configuration conf = new Configuration();
31 |         try {
32 |             FileSystem fs = dir.getFileSystem(conf);
33 |             Pattern filenamePattern = Pattern.compile(readerConfig.getString(HDFSReaderProperties.FILENAME_REGEXP));
34 |             if (fs.exists(dir)) {
35 |                 for (FileStatus fileStatus : fs.listStatus(dir)) {
36 |                     Matcher m = filenamePattern.matcher(fileStatus.getPath().getName());
37 |                     if (m.matches()) {
38 |                         matchedFiles.add(fileStatus.getPath());
39 |                     }
40 |                 }
41 | 
42 |                 if (matchedFiles.size() > 0) {
43 |                     if (parallelism == 1) {
44 |                         readerConfig.put(HDFSReaderProperties.FILES, matchedFiles);
45 |                         list.add(readerConfig);
46 |                     } else {
47 |                         double step = (double) matchedFiles.size() / parallelism;
48 |                         for (int i = 0; i < parallelism; i++) {
49 |                             List<Path> splitedFiles = new ArrayList<Path>();
50 |                             for (int start = (int) Math.ceil(step * i), end = (int) Math.ceil(step * (i + 1)); start < end; start++) {
51 |                                 splitedFiles.add(matchedFiles.get(start));
52 |                             }
53 |                             PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
54 |                             pluginConfig.put(HDFSReaderProperties.FILES, splitedFiles);
55 |                             list.add(pluginConfig);
56 |                         }
57 |                     }
58 |                 }
59 | 
60 |             } else {
61 |                 throw new HDataException(String.format("Path %s not found.", dir));
62 |             }
63 |         } catch (IOException e) {
64 |             throw new HDataException(e);
65 |         }
66 | 
67 |         return list;
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hive/HiveReader.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.reader.hive;
  2 | 
  3 | import java.util.List;
  4 | 
  5 | import opensource.hdata.config.PluginConfig;
  6 | import opensource.hdata.core.DefaultRecord;
  7 | import opensource.hdata.core.Fields;
  8 | import opensource.hdata.core.JobContext;
  9 | import opensource.hdata.core.OutputFieldsDeclarer;
 10 | import opensource.hdata.core.plugin.Reader;
 11 | import opensource.hdata.core.plugin.Record;
 12 | import opensource.hdata.core.plugin.RecordCollector;
 13 | import opensource.hdata.exception.HDataException;
 14 | import opensource.hdata.util.HiveTypeUtils;
 15 | 
 16 | import org.apache.hadoop.fs.FileSystem;
 17 | import org.apache.hadoop.fs.Path;
 18 | import org.apache.hadoop.hive.metastore.api.FieldSchema;
 19 | import org.apache.hadoop.hive.serde2.Deserializer;
 20 | import org.apache.hadoop.hive.serde2.SerDeException;
 21 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 22 | import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 23 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 24 | import org.apache.hadoop.io.Writable;
 25 | import org.apache.hadoop.mapred.FileInputFormat;
 26 | import org.apache.hadoop.mapred.FileSplit;
 27 | import org.apache.hadoop.mapred.InputFormat;
 28 | import org.apache.hadoop.mapred.JobConf;
 29 | import org.apache.hadoop.mapred.RecordReader;
 30 | import org.apache.hadoop.mapred.Reporter;
 31 | 
 32 | @SuppressWarnings("deprecation")
 33 | public class HiveReader extends Reader {
 34 | 
 35 |     private final Fields fields = new Fields();
 36 |     private List<String> files;
 37 |     private List<String> partitionValues;
 38 |     private Class<? extends InputFormat<Writable, Writable>> inputFormat;
 39 |     private StructObjectInspector oi;
 40 |     private List<? extends StructField> structFields;
 41 | 
 42 |     private Deserializer deserializer;
 43 | 
 44 |     @SuppressWarnings("unchecked")
 45 |     @Override
 46 |     public void prepare(JobContext context, PluginConfig readerConfig) {
 47 |         inputFormat = (Class<? extends InputFormat<Writable, Writable>>) readerConfig.get(HiveReaderProperties.INPUT_FORMAT_CLASS);
 48 |         deserializer = (Deserializer) readerConfig.get(HiveReaderProperties.DESERIALIZER);
 49 |         files = (List<String>) readerConfig.get(HiveReaderProperties.TABLE_FILES);
 50 |         partitionValues = (List<String>) readerConfig.get(HiveReaderProperties.PARTITION_VALUES);
 51 |         List<FieldSchema> columns = (List<FieldSchema>) readerConfig.get(HiveReaderProperties.TABLE_COLUMNS);
 52 | 
 53 |         for (FieldSchema fs : columns) {
 54 |             fields.add(fs.getName());
 55 |         }
 56 | 
 57 |         try {
 58 |             oi = (StructObjectInspector) deserializer.getObjectInspector();
 59 |         } catch (SerDeException e) {
 60 |             throw new HDataException(e);
 61 |         }
 62 |         structFields = oi.getAllStructFieldRefs();
 63 |     }
 64 | 
 65 |     @Override
 66 |     public void execute(RecordCollector recordCollector) {
 67 | 
 68 |         int columnsCount = fields.size();
 69 |         int partitionValueCount = partitionValues == null ? 0 : partitionValues.size();
 70 | 
 71 |         JobConf jobConf = new JobConf();
 72 |         for (String file : files) {
 73 |             Path path = new Path(file);
 74 |             try {
 75 |                 FileSystem fs = path.getFileSystem(jobConf);
 76 |                 FileInputFormat<Writable, Writable> fileInputFormat = (FileInputFormat<Writable, Writable>) inputFormat.newInstance();
 77 |                 long filelen = fs.getFileStatus(path).getLen();
 78 |                 FileSplit split = new FileSplit(path, 0, filelen, (String[]) null);
 79 |                 RecordReader<Writable, Writable> reader = fileInputFormat.getRecordReader(split, jobConf, Reporter.NULL);
 80 |                 Writable key = reader.createKey();
 81 |                 Writable value = reader.createValue();
 82 |                 while (reader.next(key, value)) {
 83 |                     Object row = deserializer.deserialize(value);
 84 |                     Record record = new DefaultRecord(columnsCount);
 85 |                     for (int i = 0, len = structFields.size(); i < len; i++) {
 86 |                         Object fieldData = oi.getStructFieldData(row, structFields.get(i));
 87 |                         Object standardData = ObjectInspectorUtils.copyToStandardJavaObject(fieldData, structFields.get(i).getFieldObjectInspector());
 88 |                         record.addField(HiveTypeUtils.toJavaObject(standardData));
 89 |                     }
 90 | 
 91 |                     for (int i = 0, len = partitionValueCount; i < len; i++) {
 92 |                         record.addField(partitionValues.get(i));
 93 |                     }
 94 |                     recordCollector.send(record);
 95 |                 }
 96 |                 reader.close();
 97 |             } catch (Exception e) {
 98 |                 throw new HDataException(e);
 99 |             }
100 |         }
101 |     }
102 | 
103 |     @Override
104 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
105 |         declarer.declare(fields);
106 |     }
107 | 
108 | }
109 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hive/HiveReaderProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.hive;
 2 | 
 3 | public class HiveReaderProperties {
 4 |     public static final String METASTORE_URIS = "metastoreUris";
 5 |     public static final String DATABASE = "database";
 6 |     public static final String TABLE = "table";
 7 |     public static final String SELECT_COLUMNS = "columns";
 8 |     public static final String TABLE_COLUMNS = "reader.columns";
 9 |     public static final String PARTITIONS = "partitions";
10 |     public static final String TABLE_FILES = "reader.table.files";
11 |     public static final String PARTITION_VALUES = "reader.partition.values";
12 |     public static final String INPUT_FORMAT_CLASS = "reader.input.format.class";
13 |     public static final String DESERIALIZER = "reader.deserializer";
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/hive/HiveSplitter.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.reader.hive;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.ArrayList;
  5 | import java.util.List;
  6 | 
  7 | import opensource.hdata.config.JobConfig;
  8 | import opensource.hdata.config.PluginConfig;
  9 | import opensource.hdata.core.plugin.Splitter;
 10 | import opensource.hdata.exception.HDataException;
 11 | import opensource.hdata.util.LoggerUtils;
 12 | import opensource.hdata.util.Utils;
 13 | 
 14 | import org.apache.hadoop.conf.Configuration;
 15 | import org.apache.hadoop.fs.FileStatus;
 16 | import org.apache.hadoop.fs.FileSystem;
 17 | import org.apache.hadoop.fs.Path;
 18 | import org.apache.hadoop.hive.conf.HiveConf;
 19 | import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 20 | import org.apache.hadoop.hive.ql.metadata.Hive;
 21 | import org.apache.hadoop.hive.ql.metadata.HiveException;
 22 | import org.apache.hadoop.hive.ql.metadata.Table;
 23 | import org.apache.logging.log4j.LogManager;
 24 | import org.apache.logging.log4j.Logger;
 25 | 
 26 | public class HiveSplitter extends Splitter {
 27 | 
 28 |     private static final Logger LOG = LogManager.getLogger(HiveSplitter.class);
 29 | 
 30 |     @Override
 31 |     public List<PluginConfig> split(JobConfig jobConfig) {
 32 |         List<PluginConfig> list = new ArrayList<PluginConfig>();
 33 |         PluginConfig readerConfig = jobConfig.getReaderConfig();
 34 |         String metastoreUris = readerConfig.getString(HiveReaderProperties.METASTORE_URIS);
 35 |         String dbName = readerConfig.getString(HiveReaderProperties.DATABASE, "default");
 36 |         String tableName = readerConfig.getString(HiveReaderProperties.TABLE);
 37 |         int parallelism = readerConfig.getParallelism();
 38 |         List<String> partitionValues = null;
 39 | 
 40 |         HiveConf conf = new HiveConf();
 41 |         conf.set(ConfVars.METASTOREURIS.varname, metastoreUris);
 42 | 
 43 |         Hive hive;
 44 |         Table table;
 45 |         try {
 46 |             hive = Hive.get(conf, true);
 47 |             table = hive.getTable(dbName, tableName, false);
 48 |         } catch (HiveException e) {
 49 |             throw new HDataException(e);
 50 |         }
 51 | 
 52 |         if (table == null) {
 53 |             throw new HDataException(String.format("Table %s.%s is not exist.", dbName, tableName));
 54 |         }
 55 | 
 56 |         readerConfig.put(HiveReaderProperties.TABLE_COLUMNS, table.getAllCols());
 57 |         readerConfig.put(HiveReaderProperties.INPUT_FORMAT_CLASS, table.getInputFormatClass());
 58 |         readerConfig.put(HiveReaderProperties.DESERIALIZER, table.getDeserializer());
 59 | 
 60 |         String tableLocation = Utils.fixLocaltion(table.getDataLocation().toString(), metastoreUris);
 61 |         if (readerConfig.containsKey(HiveReaderProperties.PARTITIONS)) {
 62 |             String partitions = readerConfig.getString(HiveReaderProperties.PARTITIONS);
 63 |             tableLocation += "/" + partitions.replaceAll("\\s*,\\s*", "/");
 64 |             partitionValues = Utils.parsePartitionValue(partitions);
 65 |             readerConfig.put(HiveReaderProperties.PARTITION_VALUES, partitionValues);
 66 |         }
 67 | 
 68 |         List<String> files = getTableFiles(tableLocation);
 69 |         if (files == null || files.size() < 1) {
 70 |             LOG.info("Can not find files on path {}", tableLocation);
 71 |             return null;
 72 |         }
 73 | 
 74 |         if (parallelism > files.size()) {
 75 |             parallelism = files.size();
 76 |             LOG.info("Reader parallelism is greater than file count, so parallelism is set to equal with file count.");
 77 |         }
 78 | 
 79 |         if (parallelism == 1) {
 80 |             readerConfig.put(HiveReaderProperties.TABLE_FILES, files);
 81 |             list.add(readerConfig);
 82 |         } else {
 83 |             double step = (double) files.size() / parallelism;
 84 |             for (int i = 0; i < parallelism; i++) {
 85 |                 List<String> splitedFiles = new ArrayList<String>();
 86 |                 for (int start = (int) Math.ceil(step * i), end = (int) Math.ceil(step * (i + 1)); start < end; start++) {
 87 |                     splitedFiles.add(files.get(start));
 88 |                 }
 89 |                 PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
 90 |                 pluginConfig.put(HiveReaderProperties.TABLE_FILES, splitedFiles);
 91 |                 list.add(pluginConfig);
 92 |             }
 93 |         }
 94 | 
 95 |         Hive.closeCurrent();
 96 |         return list;
 97 |     }
 98 | 
 99 |     private List<String> getTableFiles(String tableLocation) {
100 |         try {
101 |             Configuration conf = new Configuration();
102 |             Path path = new Path(tableLocation);
103 |             FileSystem hdfs = path.getFileSystem(conf);
104 |             FileStatus[] fileStatus = hdfs.listStatus(path);
105 |             List<String> files = new ArrayList<String>();
106 |             for (FileStatus fs : fileStatus) {
107 |                 if (!fs.isDir() && !fs.getPath().getName().startsWith("_")) {
108 |                     files.add(fs.getPath().toString());
109 |                 }
110 |             }
111 |             return files;
112 |         } catch (IOException e) {
113 |             LoggerUtils.error(LOG, e);
114 |             return null;
115 |         }
116 |     }
117 | 
118 | }
119 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/jdbc/JBDCReaderProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.jdbc;
 2 | 
 3 | public class JBDCReaderProperties {
 4 | 
 5 |     public static final String DRIVER = "driver";
 6 |     public static final String URL = "url";
 7 |     public static final String USERNAME = "username";
 8 |     public static final String PASSWORD = "password";
 9 |     public static final String TABLE = "table";
10 |     public static final String COLUMNS = "columns";
11 |     public static final String EXCLUDE_COLUMNS = "excludeColumns";
12 |     public static final String WHERE = "where";
13 |     public static final String SQL = "sql";
14 |     public static final String SPLIT_BY = "splitBy";
15 | }
16 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/jdbc/JDBCReader.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.jdbc;
 2 | 
 3 | import java.sql.Connection;
 4 | import java.sql.ResultSet;
 5 | import java.sql.ResultSetMetaData;
 6 | import java.sql.SQLException;
 7 | import java.sql.Statement;
 8 | 
 9 | import opensource.hdata.config.PluginConfig;
10 | import opensource.hdata.core.DefaultRecord;
11 | import opensource.hdata.core.Fields;
12 | import opensource.hdata.core.JobContext;
13 | import opensource.hdata.core.OutputFieldsDeclarer;
14 | import opensource.hdata.core.plugin.Reader;
15 | import opensource.hdata.core.plugin.Record;
16 | import opensource.hdata.core.plugin.RecordCollector;
17 | import opensource.hdata.exception.HDataException;
18 | import opensource.hdata.util.JDBCUtils;
19 | 
20 | import org.apache.logging.log4j.LogManager;
21 | import org.apache.logging.log4j.Logger;
22 | 
23 | public class JDBCReader extends Reader {
24 | 
25 |     private Connection connection;
26 |     private String sql;
27 |     private Fields fields;
28 |     private static final Logger LOG = LogManager.getLogger(JDBCReader.class);
29 | 
30 |     @Override
31 |     public void prepare(JobContext context, PluginConfig readerConfig) {
32 |         String driver = readerConfig.getString(JBDCReaderProperties.DRIVER);
33 |         String url = readerConfig.getString(JBDCReaderProperties.URL);
34 |         String username = readerConfig.getString(JBDCReaderProperties.USERNAME);
35 |         String password = readerConfig.getString(JBDCReaderProperties.PASSWORD);
36 |         sql = readerConfig.getString(JBDCReaderProperties.SQL);
37 |         LOG.debug(sql);
38 | 
39 |         try {
40 |             connection = JDBCUtils.getConnection(driver, url, username, password);
41 |         } catch (Exception e) {
42 |             throw new HDataException(e);
43 |         }
44 |     }
45 | 
46 |     @Override
47 |     public void execute(RecordCollector recordCollector) {
48 |         try {
49 |             Statement statement = connection.createStatement();
50 | 
51 |             ResultSet rs = statement.executeQuery(sql);
52 |             ResultSetMetaData metaData = rs.getMetaData();
53 |             int ColumnCount = metaData.getColumnCount();
54 | 
55 |             if (fields == null) {
56 |                 fields = new Fields();
57 |                 for (int i = 1; i <= ColumnCount; i++) {
58 |                     fields.add(metaData.getColumnName(i));
59 |                 }
60 |             }
61 | 
62 |             while (rs.next()) {
63 |                 Record r = new DefaultRecord(ColumnCount);
64 |                 for (int i = 1; i <= ColumnCount; i++) {
65 |                     r.addField(i - 1, rs.getObject(i));
66 |                 }
67 |                 recordCollector.send(r);
68 |             }
69 |             rs.close();
70 |             statement.close();
71 |         } catch (SQLException e) {
72 |             e.printStackTrace();
73 |             JDBCUtils.closeConnection(connection);
74 |             throw new HDataException(e);
75 |         }
76 |     }
77 | 
78 |     @Override
79 |     public void close() {
80 |         JDBCUtils.closeConnection(connection);
81 |     }
82 | 
83 |     @Override
84 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
85 |         declarer.declare(fields);
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/jdbc/JDBCSplitter.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.reader.jdbc;
  2 | 
  3 | import java.sql.Connection;
  4 | import java.sql.SQLException;
  5 | import java.util.ArrayList;
  6 | import java.util.List;
  7 | 
  8 | import opensource.hdata.common.Constants;
  9 | import opensource.hdata.config.JobConfig;
 10 | import opensource.hdata.config.PluginConfig;
 11 | import opensource.hdata.core.plugin.Splitter;
 12 | import opensource.hdata.exception.HDataException;
 13 | import opensource.hdata.util.JDBCUtils;
 14 | import opensource.hdata.util.Utils;
 15 | 
 16 | import org.apache.logging.log4j.LogManager;
 17 | import org.apache.logging.log4j.Logger;
 18 | 
 19 | import com.google.common.base.Joiner;
 20 | 
 21 | public class JDBCSplitter extends Splitter {
 22 | 
 23 |     private static final String CONDITIONS_REGEX = "\\$CONDITIONS";
 24 |     private static final Logger LOG = LogManager.getLogger(JDBCSplitter.class);
 25 | 
 26 |     private void checkIfContainsConditionKey(String sql, String errorMessage) {
 27 |         if (!sql.contains("$CONDITIONS")) {
 28 |             throw new HDataException(errorMessage);
 29 |         }
 30 |     }
 31 | 
 32 |     private List<PluginConfig> buildPluginConfigs(Connection conn, String sql, String splitColumn, PluginConfig readerConfig) {
 33 |         List<PluginConfig> list = new ArrayList<PluginConfig>();
 34 |         try {
 35 |             int parallelism = readerConfig.getParallelism();
 36 |             double[] minAndMax = JDBCUtils.querySplitColumnRange(conn, sql.replaceAll(CONDITIONS_REGEX, "(1 = 1)"), splitColumn);
 37 |             double min = minAndMax[0];
 38 |             double max = minAndMax[1] + 1;
 39 |             double step = (max - min) / parallelism;
 40 |             for (int i = 0, len = parallelism; i < len; i++) {
 41 |                 PluginConfig otherReaderConfig = (PluginConfig) readerConfig.clone();
 42 |                 StringBuilder sb = new StringBuilder();
 43 |                 sb.append(splitColumn);
 44 |                 sb.append(" >= ");
 45 |                 sb.append((long) Math.ceil(min + step * i));
 46 |                 sb.append(" AND ");
 47 |                 sb.append(splitColumn);
 48 | 
 49 |                 if (i == (len - 1)) {
 50 |                     sb.append(" <= ");
 51 |                 } else {
 52 |                     sb.append(" < ");
 53 |                 }
 54 |                 sb.append((long) Math.ceil(min + step * (i + 1)));
 55 | 
 56 |                 otherReaderConfig.setProperty(JBDCReaderProperties.SQL, sql.toString().replaceAll(CONDITIONS_REGEX, sb.toString()));
 57 |                 list.add(otherReaderConfig);
 58 |             }
 59 |             return list;
 60 |         } catch (SQLException e) {
 61 |             throw new HDataException(e);
 62 |         } finally {
 63 |             JDBCUtils.closeConnection(conn);
 64 |         }
 65 |     }
 66 | 
 67 |     @Override
 68 |     public List<PluginConfig> split(JobConfig jobConfig) {
 69 |         PluginConfig readerConfig = jobConfig.getReaderConfig();
 70 |         String driver = readerConfig.getString(JBDCReaderProperties.DRIVER);
 71 |         String url = readerConfig.getString(JBDCReaderProperties.URL);
 72 |         String username = readerConfig.getString(JBDCReaderProperties.USERNAME);
 73 |         String password = readerConfig.getString(JBDCReaderProperties.PASSWORD);
 74 |         int parallelism = readerConfig.getParallelism();
 75 | 
 76 |         StringBuilder sql = new StringBuilder();
 77 |         if (readerConfig.containsKey(JBDCReaderProperties.SQL)) {
 78 |             if (parallelism > 1) {
 79 |                 checkIfContainsConditionKey(readerConfig.getString(JBDCReaderProperties.SQL),
 80 |                         "Reader must contains key word \"$CONDITIONS\" in sql property when parallelism > 1.");
 81 |             }
 82 |             sql.append(readerConfig.get(JBDCReaderProperties.SQL));
 83 |         } else {
 84 |             String table = readerConfig.getString(JBDCReaderProperties.TABLE);
 85 |             sql.append("SELECT ");
 86 |             if (!readerConfig.containsKey(JBDCReaderProperties.COLUMNS) && !readerConfig.containsKey(JBDCReaderProperties.EXCLUDE_COLUMNS)) {
 87 |                 sql.append("*");
 88 |             } else if (readerConfig.containsKey(JBDCReaderProperties.COLUMNS)) {
 89 |                 String columns = readerConfig.getString(JBDCReaderProperties.COLUMNS);
 90 |                 sql.append(columns);
 91 |             } else if (readerConfig.containsKey(JBDCReaderProperties.EXCLUDE_COLUMNS)) {
 92 |                 String[] excludeColumns = readerConfig.getString(JBDCReaderProperties.EXCLUDE_COLUMNS).trim().split(Constants.COLUMNS_SPLIT_REGEX);
 93 |                 Connection conn = null;
 94 |                 try {
 95 |                     conn = JDBCUtils.getConnection(driver, url, username, password);
 96 |                     String selectColumns = Joiner.on(", ").join(Utils.getColumns(JDBCUtils.getColumnNames(conn, table), excludeColumns));
 97 |                     sql.append(selectColumns);
 98 |                 } catch (Exception e) {
 99 |                     e.printStackTrace();
100 |                     JDBCUtils.closeConnection(conn);
101 |                     throw new HDataException(e);
102 |                 }
103 | 
104 |             }
105 |             sql.append(" FROM ");
106 |             sql.append(table);
107 | 
108 |             if (readerConfig.containsKey(JBDCReaderProperties.WHERE)) {
109 |                 String where = readerConfig.getString(JBDCReaderProperties.WHERE);
110 |                 sql.append(" WHERE ");
111 |                 sql.append(where);
112 |                 sql.append(" AND $CONDITIONS");
113 |             } else {
114 |                 sql.append(" WHERE $CONDITIONS");
115 |             }
116 |         }
117 | 
118 |         if (readerConfig.containsKey(JBDCReaderProperties.SPLIT_BY)) {
119 |             String splitColumn = readerConfig.getString(JBDCReaderProperties.SPLIT_BY);
120 |             LOG.debug("Get split-by column: {}", splitColumn);
121 | 
122 |             Connection conn = null;
123 |             try {
124 |                 conn = JDBCUtils.getConnection(driver, url, username, password);
125 |                 return buildPluginConfigs(conn, sql.toString(), splitColumn, readerConfig);
126 |             } catch (Exception e) {
127 |                 throw new HDataException(e);
128 |             } finally {
129 |                 JDBCUtils.closeConnection(conn);
130 |             }
131 |         } else {
132 |             if (readerConfig.containsKey(JBDCReaderProperties.TABLE)) {
133 |                 Connection conn = null;
134 |                 try {
135 |                     String table = readerConfig.getString(JBDCReaderProperties.TABLE);
136 |                     LOG.info("Attemp to query digital primary key for table: {}", table);
137 |                     conn = JDBCUtils.getConnection(driver, url, username, password);
138 |                     String splitColumn = JDBCUtils.getDigitalPrimaryKey(conn, conn.getCatalog(), null, table);
139 |                     if (splitColumn != null) {
140 |                         LOG.info("Table {} find digital primary key: {}", table, splitColumn);
141 |                         return buildPluginConfigs(conn, sql.toString(), splitColumn, readerConfig);
142 |                     } else {
143 |                         LOG.info("Table {} can not find digital primary key.", table);
144 |                     }
145 |                 } catch (Exception e) {
146 |                     throw new HDataException(e);
147 |                 } finally {
148 |                     JDBCUtils.closeConnection(conn);
149 |                 }
150 |             }
151 | 
152 |             if (parallelism > 1) {
153 |                 LOG.warn(
154 |                         "Reader parallelism is set to {}, but the \"split-by\" config is not given, so reader parallelism is set to default value: 1.",
155 |                         parallelism);
156 |             }
157 | 
158 |             List<PluginConfig> list = new ArrayList<PluginConfig>();
159 |             readerConfig.setProperty(JBDCReaderProperties.SQL, sql.toString().replaceAll(CONDITIONS_REGEX, "(1 = 1)"));
160 |             list.add(readerConfig);
161 |             return list;
162 |         }
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBReader.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.mongodb;
 2 | 
 3 | import java.net.UnknownHostException;
 4 | import java.util.Set;
 5 | 
 6 | import opensource.hdata.config.PluginConfig;
 7 | import opensource.hdata.core.DefaultRecord;
 8 | import opensource.hdata.core.Fields;
 9 | import opensource.hdata.core.JobContext;
10 | import opensource.hdata.core.OutputFieldsDeclarer;
11 | import opensource.hdata.core.plugin.Reader;
12 | import opensource.hdata.core.plugin.Record;
13 | import opensource.hdata.core.plugin.RecordCollector;
14 | import opensource.hdata.exception.HDataException;
15 | 
16 | import com.mongodb.BasicDBObject;
17 | import com.mongodb.DB;
18 | import com.mongodb.DBCollection;
19 | import com.mongodb.DBCursor;
20 | import com.mongodb.DBObject;
21 | import com.mongodb.MongoClient;
22 | import com.mongodb.MongoClientURI;
23 | 
24 | public class MongoDBReader extends Reader {
25 | 
26 |     private Fields fields;
27 |     private String uri;
28 |     private BasicDBObject condition;
29 |     private static final String OBJECT_ID_KEY = "_id";
30 | 
31 |     @Override
32 |     public void prepare(JobContext context, PluginConfig readerConfig) {
33 |         uri = readerConfig.getString(MongoDBReaderProperties.URI);
34 |         condition = (BasicDBObject) readerConfig.get(MongoDBReaderProperties.QUERY);
35 |     }
36 | 
37 |     @Override
38 |     public void execute(RecordCollector recordCollector) {
39 |         MongoClientURI clientURI = new MongoClientURI(uri);
40 |         MongoClient mongoClient = null;
41 |         try {
42 |             mongoClient = new MongoClient(clientURI);
43 |             DB db = mongoClient.getDB(clientURI.getDatabase());
44 |             DBCollection coll = db.getCollection(clientURI.getCollection());
45 |             DBCursor cur = coll.find(condition);
46 |             while (cur.hasNext()) {
47 |                 DBObject doc = cur.next();
48 |                 Set<String> keys = doc.keySet();
49 |                 Record record = new DefaultRecord(keys.size() - 1);
50 |                 if (fields == null) {
51 |                     fields = new Fields();
52 |                     for (String key : keys) {
53 |                         fields.add(key);
54 |                     }
55 |                 }
56 | 
57 |                 for (String key : keys) {
58 |                     if (!OBJECT_ID_KEY.equals(key)) {
59 |                         record.addField(doc.get(key));
60 |                     }
61 |                 }
62 | 
63 |                 recordCollector.send(record);
64 |             }
65 |         } catch (UnknownHostException e) {
66 |             throw new HDataException(e);
67 |         } finally {
68 |             if (mongoClient != null) {
69 |                 mongoClient.close();
70 |             }
71 |         }
72 |     }
73 | 
74 |     @Override
75 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
76 |         declarer.declare(fields);
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBReaderProperties.java:
--------------------------------------------------------------------------------
1 | package opensource.hdata.plugin.reader.mongodb;
2 | 
3 | public class MongoDBReaderProperties {
4 | 
5 |     public static final String URI = "uri";
6 |     public static final String QUERY = "query";
7 | }
8 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/reader/mongodb/MongoDBSplitter.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.reader.mongodb;
 2 | 
 3 | import java.math.BigInteger;
 4 | import java.net.UnknownHostException;
 5 | import java.util.ArrayList;
 6 | import java.util.List;
 7 | 
 8 | import opensource.hdata.config.JobConfig;
 9 | import opensource.hdata.config.PluginConfig;
10 | import opensource.hdata.core.plugin.Splitter;
11 | import opensource.hdata.exception.HDataException;
12 | 
13 | import org.bson.types.ObjectId;
14 | 
15 | import com.mongodb.BasicDBObject;
16 | import com.mongodb.DB;
17 | import com.mongodb.DBCollection;
18 | import com.mongodb.DBCursor;
19 | import com.mongodb.DBObject;
20 | import com.mongodb.MongoClient;
21 | import com.mongodb.MongoClientURI;
22 | import com.mongodb.util.JSON;
23 | 
24 | public class MongoDBSplitter extends Splitter {
25 | 
26 |     private static final String OBJECT_ID_KEY = "_id";
27 |     private static final int HEXADECIMAL = 16;
28 | 
29 |     @Override
30 |     public List<PluginConfig> split(JobConfig jobConfig) {
31 |         List<PluginConfig> list = new ArrayList<PluginConfig>();
32 |         PluginConfig readerConfig = jobConfig.getReaderConfig();
33 |         String uri = readerConfig.getString(MongoDBReaderProperties.URI);
34 |         int parallelism = readerConfig.getParallelism();
35 | 
36 |         MongoClientURI clientURI = new MongoClientURI(uri);
37 |         MongoClient mongoClient = null;
38 |         try {
39 |             mongoClient = new MongoClient(clientURI);
40 |             DB db = mongoClient.getDB(clientURI.getDatabase());
41 |             DBCollection coll = db.getCollection(clientURI.getCollection());
42 | 
43 |             String maxID = "";
44 |             String minID = "";
45 |             DBObject sort = new BasicDBObject();
46 |             sort.put(OBJECT_ID_KEY, -1);
47 |             DBCursor cursor = coll.find().sort(sort).limit(1);
48 |             while (cursor.hasNext()) {
49 |                 maxID = cursor.next().get(OBJECT_ID_KEY).toString();
50 |             }
51 | 
52 |             sort.put(OBJECT_ID_KEY, 1);
53 |             cursor = coll.find().sort(sort).limit(1);
54 |             while (cursor.hasNext()) {
55 |                 minID = cursor.next().get(OBJECT_ID_KEY).toString();
56 |             }
57 | 
58 |             if (!maxID.isEmpty() && !minID.isEmpty()) {
59 |                 BigInteger maxBigInteger = new BigInteger(maxID, HEXADECIMAL);
60 |                 BigInteger minBigInteger = new BigInteger(minID, HEXADECIMAL);
61 |                 BigInteger step = (maxBigInteger.subtract(minBigInteger).divide(BigInteger.valueOf(parallelism)));
62 |                 for (int i = 0, len = parallelism; i < len; i++) {
63 |                     BasicDBObject condition = null;
64 |                     if (readerConfig.containsKey(MongoDBReaderProperties.QUERY)) {
65 |                         condition = (BasicDBObject) JSON.parse(readerConfig.getString(MongoDBReaderProperties.QUERY));
66 |                     } else {
67 |                         condition = new BasicDBObject();
68 |                     }
69 | 
70 |                     BasicDBObject idRange = new BasicDBObject("$gte", new ObjectId(minBigInteger.add(step.multiply(BigInteger.valueOf(i))).toString(
71 |                             HEXADECIMAL)));
72 |                     if (i == len - 1) {
73 |                         idRange.append("$lte", new ObjectId(maxBigInteger.toString(HEXADECIMAL)));
74 |                     } else {
75 |                         idRange.append("$lt", new ObjectId(minBigInteger.add(step.multiply(BigInteger.valueOf(i + 1))).toString(HEXADECIMAL)));
76 |                     }
77 | 
78 |                     condition.put(OBJECT_ID_KEY, idRange);
79 | 
80 |                     PluginConfig pluginConfig = (PluginConfig) readerConfig.clone();
81 |                     pluginConfig.put(MongoDBReaderProperties.QUERY, condition);
82 |                     list.add(pluginConfig);
83 |                 }
84 |             }
85 |         } catch (UnknownHostException e) {
86 |             throw new HDataException(e);
87 |         } finally {
88 |             if (mongoClient != null) {
89 |                 mongoClient.close();
90 |             }
91 |         }
92 | 
93 |         return list;
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/console/ConsoleWriter.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.console;
 2 | 
 3 | import opensource.hdata.core.plugin.Record;
 4 | import opensource.hdata.core.plugin.Writer;
 5 | 
 6 | public class ConsoleWriter extends Writer {
 7 | 
 8 |     @Override
 9 |     public void execute(Record record) {
10 |         System.out.println(record);
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/ftp/FTPWriter.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.writer.ftp;
  2 | 
  3 | import java.io.BufferedWriter;
  4 | import java.io.IOException;
  5 | import java.io.OutputStream;
  6 | import java.io.OutputStreamWriter;
  7 | import java.util.concurrent.atomic.AtomicInteger;
  8 | import java.util.regex.Matcher;
  9 | import java.util.regex.Pattern;
 10 | import java.util.zip.GZIPOutputStream;
 11 | 
 12 | import opensource.hdata.config.PluginConfig;
 13 | import opensource.hdata.core.JobContext;
 14 | import opensource.hdata.core.plugin.Record;
 15 | import opensource.hdata.core.plugin.Writer;
 16 | import opensource.hdata.exception.HDataException;
 17 | import opensource.hdata.util.EscaperUtils;
 18 | import opensource.hdata.util.FTPUtils;
 19 | 
 20 | import org.apache.commons.net.ftp.FTPClient;
 21 | 
 22 | import com.google.common.base.Joiner;
 23 | 
 24 | public class FTPWriter extends Writer {
 25 | 
 26 |     private String host;
 27 |     private int port;
 28 |     private String username;
 29 |     private String password;
 30 |     private String fieldsSeparator;
 31 |     private String lineSeparator;
 32 |     private String encoding;
 33 |     private String path;
 34 |     private boolean gzipCompress;
 35 |     private FTPClient ftpClient;
 36 |     private BufferedWriter bw;
 37 |     private String[] strArray;
 38 |     private static AtomicInteger sequence = new AtomicInteger(0);
 39 |     private static final Pattern REG_FILE_PATH_WITHOUT_EXTENSION = Pattern.compile(".*?(?=\\.\\w+$)");
 40 |     private static final Pattern REG_FILE_EXTENSION = Pattern.compile("(\\.\\w+)$");
 41 | 
 42 |     @Override
 43 |     public void prepare(JobContext context, PluginConfig writerConfig) {
 44 |         host = writerConfig.getString(FTPWriterProperties.HOST);
 45 |         port = writerConfig.getInt(FTPWriterProperties.PORT, 21);
 46 |         username = writerConfig.getString(FTPWriterProperties.USERNAME, "anonymous");
 47 |         password = writerConfig.getString(FTPWriterProperties.PASSWORD, "");
 48 |         fieldsSeparator = EscaperUtils.parse(writerConfig.getString(FTPWriterProperties.FIELDS_SEPARATOR, "\t"));
 49 |         lineSeparator = EscaperUtils.parse(writerConfig.getString(FTPWriterProperties.LINE_SEPARATOR, "\n"));
 50 |         encoding = writerConfig.getString(FTPWriterProperties.ENCODING, "UTF-8");
 51 |         path = writerConfig.getString(FTPWriterProperties.PATH);
 52 |         gzipCompress = writerConfig.getBoolean(FTPWriterProperties.GZIP_COMPRESS, false);
 53 | 
 54 |         int parallelism = writerConfig.getParallelism();
 55 |         if (parallelism > 1) {
 56 |             String filePathWithoutExtension = "";
 57 |             String fileExtension = "";
 58 |             Matcher m1 = REG_FILE_PATH_WITHOUT_EXTENSION.matcher(path.trim());
 59 |             if (m1.find()) {
 60 |                 filePathWithoutExtension = m1.group();
 61 |             }
 62 | 
 63 |             Matcher m2 = REG_FILE_EXTENSION.matcher(path.trim());
 64 |             if (m2.find()) {
 65 |                 fileExtension = m2.group();
 66 |             }
 67 |             path = String.format("%s_%04d%s", filePathWithoutExtension, sequence.getAndIncrement(), fileExtension);
 68 |         }
 69 | 
 70 |         try {
 71 |             ftpClient = FTPUtils.getFtpClient(host, port, username, password);
 72 |             OutputStream outputStream = ftpClient.storeFileStream(path);
 73 |             if (gzipCompress) {
 74 |                 bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(outputStream), encoding));
 75 |             } else {
 76 |                 bw = new BufferedWriter(new OutputStreamWriter(outputStream, encoding));
 77 |             }
 78 |         } catch (Exception e) {
 79 |             throw new HDataException(e);
 80 |         }
 81 |     }
 82 | 
 83 |     @Override
 84 |     public void execute(Record record) {
 85 |         if (strArray == null) {
 86 |             strArray = new String[record.getFieldsCount()];
 87 |         }
 88 | 
 89 |         for (int i = 0, len = record.getFieldsCount(); i < len; i++) {
 90 |             Object o = record.getField(i);
 91 |             if (o == null) {
 92 |                 strArray[i] = "NULL";
 93 |             } else {
 94 |                 strArray[i] = o.toString();
 95 |             }
 96 |         }
 97 |         try {
 98 |             bw.write(Joiner.on(fieldsSeparator).join(strArray));
 99 |             bw.write(lineSeparator);
100 |         } catch (IOException e) {
101 |             throw new HDataException(e);
102 |         }
103 |     }
104 | 
105 |     @Override
106 |     public void close() {
107 |         if (bw != null) {
108 |             try {
109 |                 bw.close();
110 |             } catch (IOException e) {
111 |                 throw new HDataException(e);
112 |             }
113 |         }
114 |         FTPUtils.closeFtpClient(ftpClient);
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/ftp/FTPWriterProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.ftp;
 2 | 
 3 | public class FTPWriterProperties {
 4 |     public static final String HOST = "host";
 5 |     public static final String PORT = "port";
 6 |     public static final String USERNAME = "username";
 7 |     public static final String PASSWORD = "password";
 8 |     public static final String PATH = "path";
 9 |     public static final String ENCODING = "encoding";
10 |     public static final String FIELDS_SEPARATOR = "fieldsSeparator";
11 |     public static final String LINE_SEPARATOR = "lineSeparator";
12 |     public static final String GZIP_COMPRESS = "gzipCompress";
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/hbase/HBaseWriter.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.hbase;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.ArrayList;
 5 | import java.util.List;
 6 | 
 7 | import opensource.hdata.config.PluginConfig;
 8 | import opensource.hdata.core.JobContext;
 9 | import opensource.hdata.core.plugin.Record;
10 | import opensource.hdata.core.plugin.Writer;
11 | import opensource.hdata.exception.HDataException;
12 | 
13 | import org.apache.hadoop.conf.Configuration;
14 | import org.apache.hadoop.hbase.HBaseConfiguration;
15 | import org.apache.hadoop.hbase.client.HTable;
16 | import org.apache.hadoop.hbase.client.Put;
17 | import org.apache.hadoop.hbase.util.Bytes;
18 | 
19 | public class HBaseWriter extends Writer {
20 | 
21 |     private HTable table;
22 |     private int batchSize;
23 |     private int rowkeyIndex = -1;
24 |     private List<Put> putList = new ArrayList<Put>();
25 |     private String[] columns;
26 |     private static final String ROWKEY = ":rowkey";
27 | 
28 |     @Override
29 |     public void prepare(JobContext context, PluginConfig writerConfig) {
30 |         Configuration conf = HBaseConfiguration.create();
31 |         conf.set("hbase.zookeeper.quorum", writerConfig.getString(HBaseWriterProperties.ZOOKEEPER_QUORUM));
32 |         conf.set("hbase.zookeeper.property.clientPort", writerConfig.getString(HBaseWriterProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181"));
33 |         batchSize = writerConfig.getInt(HBaseWriterProperties.BATCH_INSERT_SIZE, 10000);
34 |         columns = writerConfig.getString(HBaseWriterProperties.COLUMNS).split(",");
35 |         for (int i = 0, len = columns.length; i < len; i++) {
36 |             if (ROWKEY.equalsIgnoreCase(columns[i])) {
37 |                 rowkeyIndex = i;
38 |                 break;
39 |             }
40 |         }
41 | 
42 |         if (rowkeyIndex == -1) {
43 |             throw new IllegalArgumentException("Can not find :rowkey in columnsMapping of HBase Writer!");
44 |         }
45 | 
46 |         try {
47 |             table = new HTable(conf, writerConfig.getString(HBaseWriterProperties.TABLE));
48 |         } catch (IOException e) {
49 |             throw new HDataException(e);
50 |         }
51 |     }
52 | 
53 |     @Override
54 |     public void execute(Record record) {
55 |         Object rowkeyValue = record.getField(rowkeyIndex);
56 |         Put put = new Put(Bytes.toBytes(rowkeyValue == null ? "NULL" : rowkeyValue.toString()));
57 |         for (int i = 0, len = record.getFieldsCount(); i < len; i++) {
58 |             if (i != rowkeyIndex) {
59 |                 String[] tokens = columns[i].split(":");
60 |                 put.add(Bytes.toBytes(tokens[0]), Bytes.toBytes(tokens[1]),
61 |                         record.getField(i) == null ? null : Bytes.toBytes(record.getField(i).toString()));
62 |             }
63 |         }
64 | 
65 |         putList.add(put);
66 |         if (putList.size() == batchSize) {
67 |             try {
68 |                 table.put(putList);
69 |             } catch (IOException e) {
70 |                 throw new HDataException(e);
71 |             }
72 |             putList.clear();
73 |         }
74 |     }
75 | 
76 |     @Override
77 |     public void close() {
78 |         if (table != null) {
79 |             try {
80 |                 if (putList.size() > 0) {
81 |                     table.put(putList);
82 |                 }
83 | 
84 |                 table.close();
85 |             } catch (IOException e) {
86 |                 throw new HDataException(e);
87 |             }
88 |             putList.clear();
89 |         }
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/hbase/HBaseWriterProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.hbase;
 2 | 
 3 | public class HBaseWriterProperties {
 4 |     public static final String ZOOKEEPER_QUORUM = "zookeeperQuorum";
 5 |     public static final String ZOOKEEPER_PROPERTY_CLIENTPORT = "zookeeperClientPort";
 6 |     public static final String TABLE = "table";
 7 |     public static final String COLUMNS = "columns";
 8 |     public static final String BATCH_INSERT_SIZE = "batchInsertSize";
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/hdfs/HDFSWriter.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.writer.hdfs;
  2 | 
  3 | import java.io.BufferedWriter;
  4 | import java.io.IOException;
  5 | import java.io.OutputStreamWriter;
  6 | import java.util.concurrent.atomic.AtomicInteger;
  7 | import java.util.regex.Matcher;
  8 | import java.util.regex.Pattern;
  9 | 
 10 | import opensource.hdata.config.PluginConfig;
 11 | import opensource.hdata.core.JobContext;
 12 | import opensource.hdata.core.plugin.Record;
 13 | import opensource.hdata.core.plugin.Writer;
 14 | import opensource.hdata.exception.HDataException;
 15 | import opensource.hdata.util.EscaperUtils;
 16 | 
 17 | import org.apache.hadoop.conf.Configuration;
 18 | import org.apache.hadoop.fs.FSDataOutputStream;
 19 | import org.apache.hadoop.fs.FileSystem;
 20 | import org.apache.hadoop.fs.Path;
 21 | import org.apache.hadoop.io.compress.CompressionCodec;
 22 | import org.apache.hadoop.io.compress.CompressionCodecFactory;
 23 | 
 24 | import com.google.common.base.Joiner;
 25 | 
 26 | public class HDFSWriter extends Writer {
 27 | 
 28 |     private String path;
 29 |     private String fieldsSeparator;
 30 |     private String lineSeparator;
 31 |     private String encoding;
 32 |     private String compressCodec;
 33 |     private String hadoopUser;
 34 |     private BufferedWriter bw;
 35 |     private String[] strArray;
 36 |     private static AtomicInteger sequence = new AtomicInteger(0);
 37 |     private static final Pattern REG_FILE_PATH_WITHOUT_EXTENSION = Pattern.compile(".*?(?=\\.\\w+$)");
 38 |     private static final Pattern REG_FILE_EXTENSION = Pattern.compile("(\\.\\w+)$");
 39 | 
 40 |     @Override
 41 |     public void prepare(JobContext context, PluginConfig writerConfig) {
 42 |         path = writerConfig.getString(HDFSWriterProperties.PATH);
 43 |         fieldsSeparator = EscaperUtils.parse(writerConfig.getString(HDFSWriterProperties.FIELDS_SEPARATOR, "\t"));
 44 |         lineSeparator = EscaperUtils.parse(writerConfig.getString(HDFSWriterProperties.LINE_SEPARATOR, "\n"));
 45 |         encoding = writerConfig.getString(HDFSWriterProperties.ENCODING, "UTF-8");
 46 |         compressCodec = writerConfig.getProperty(HDFSWriterProperties.COMPRESS_CODEC);
 47 |         hadoopUser = writerConfig.getString(HDFSWriterProperties.HADOOP_USER);
 48 |         System.setProperty("HADOOP_USER_NAME", hadoopUser);
 49 | 
 50 |         int parallelism = writerConfig.getParallelism();
 51 |         if (parallelism > 1) {
 52 |             String filePathWithoutExtension = "";
 53 |             String fileExtension = "";
 54 |             Matcher m1 = REG_FILE_PATH_WITHOUT_EXTENSION.matcher(path.trim());
 55 |             if (m1.find()) {
 56 |                 filePathWithoutExtension = m1.group();
 57 |             }
 58 | 
 59 |             Matcher m2 = REG_FILE_EXTENSION.matcher(path.trim());
 60 |             if (m2.find()) {
 61 |                 fileExtension = m2.group();
 62 |             }
 63 |             path = String.format("%s_%04d%s", filePathWithoutExtension, sequence.getAndIncrement(), fileExtension);
 64 |         }
 65 | 
 66 |         Path hdfsPath = new Path(path);
 67 |         Configuration conf = new Configuration();
 68 |         try {
 69 |             FileSystem fs = hdfsPath.getFileSystem(conf);
 70 |             FSDataOutputStream output = fs.create(hdfsPath);
 71 |             if (compressCodec == null) {
 72 |                 bw = new BufferedWriter(new OutputStreamWriter(output, encoding));
 73 |             } else {
 74 |                 CompressionCodecFactory factory = new CompressionCodecFactory(conf);
 75 |                 CompressionCodec codec = factory.getCodecByClassName(compressCodec);
 76 |                 bw = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(output), encoding));
 77 |             }
 78 |         } catch (IOException e) {
 79 |             throw new HDataException(e);
 80 |         }
 81 | 
 82 |     }
 83 | 
 84 |     @Override
 85 |     public void execute(Record record) {
 86 |         if (strArray == null) {
 87 |             strArray = new String[record.getFieldsCount()];
 88 |         }
 89 | 
 90 |         for (int i = 0, len = record.getFieldsCount(); i < len; i++) {
 91 |             Object o = record.getField(i);
 92 |             if (o == null) {
 93 |                 strArray[i] = "NULL";
 94 |             } else {
 95 |                 strArray[i] = o.toString();
 96 |             }
 97 |         }
 98 |         try {
 99 |             bw.write(Joiner.on(fieldsSeparator).join(strArray));
100 |             bw.write(lineSeparator);
101 |         } catch (IOException e) {
102 |             throw new HDataException(e);
103 |         }
104 |     }
105 | 
106 |     @Override
107 |     public void close() {
108 |         if (bw != null) {
109 |             try {
110 |                 bw.flush();
111 |                 bw.close();
112 |             } catch (IOException e) {
113 |                 throw new HDataException(e);
114 |             }
115 |         }
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/hdfs/HDFSWriterProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.hdfs;
 2 | 
 3 | public class HDFSWriterProperties {
 4 |     public static final String PATH = "path";
 5 |     public static final String FIELDS_SEPARATOR = "fieldsSeparator";
 6 |     public static final String LINE_SEPARATOR = "lineSeparator";
 7 |     public static final String ENCODING = "encoding";
 8 |     public static final String COMPRESS_CODEC = "compressCodec";
 9 |     public static final String HADOOP_USER = "hadoopUser";
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/hive/HiveRecordWritable.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.hive;
 2 | 
 3 | import java.io.DataInput;
 4 | import java.io.DataOutput;
 5 | import java.io.IOException;
 6 | 
 7 | import org.apache.hadoop.io.Writable;
 8 | 
 9 | public class HiveRecordWritable implements Writable {
10 | 
11 |     public void write(DataOutput dataOutput) throws IOException {
12 |         throw new UnsupportedOperationException("no write");
13 |     }
14 | 
15 |     public void readFields(DataInput dataInput) throws IOException {
16 |         throw new UnsupportedOperationException("no read");
17 |     }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/hive/HiveWriter.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.writer.hive;
  2 | 
  3 | import java.io.IOException;
  4 | import java.lang.reflect.Field;
  5 | import java.util.ArrayList;
  6 | import java.util.HashMap;
  7 | import java.util.List;
  8 | import java.util.Map;
  9 | import java.util.UUID;
 10 | import java.util.regex.Matcher;
 11 | import java.util.regex.Pattern;
 12 | 
 13 | import javassist.ClassPool;
 14 | import javassist.CtClass;
 15 | import javassist.CtField;
 16 | import opensource.hdata.config.PluginConfig;
 17 | import opensource.hdata.core.JobContext;
 18 | import opensource.hdata.core.plugin.Record;
 19 | import opensource.hdata.core.plugin.Writer;
 20 | import opensource.hdata.exception.HDataException;
 21 | import opensource.hdata.plugin.reader.hive.HiveReaderProperties;
 22 | import opensource.hdata.util.HiveTypeUtils;
 23 | import opensource.hdata.util.LoggerUtils;
 24 | import opensource.hdata.util.TypeConvertUtils;
 25 | import opensource.hdata.util.Utils;
 26 | 
 27 | import org.apache.hadoop.fs.FileSystem;
 28 | import org.apache.hadoop.fs.Path;
 29 | import org.apache.hadoop.hive.conf.HiveConf;
 30 | import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 31 | import org.apache.hadoop.hive.metastore.api.FieldSchema;
 32 | import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 33 | import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 34 | import org.apache.hadoop.hive.ql.metadata.Hive;
 35 | import org.apache.hadoop.hive.ql.metadata.HiveException;
 36 | import org.apache.hadoop.hive.ql.metadata.Partition;
 37 | import org.apache.hadoop.hive.ql.metadata.Table;
 38 | import org.apache.hadoop.hive.serde2.Serializer;
 39 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 40 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 41 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 42 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 43 | import org.apache.hadoop.mapred.JobConf;
 44 | import org.apache.hadoop.mapred.Reporter;
 45 | import org.apache.logging.log4j.LogManager;
 46 | import org.apache.logging.log4j.Logger;
 47 | 
 48 | @SuppressWarnings("deprecation")
 49 | public class HiveWriter extends Writer {
 50 | 
 51 |     private Serializer serializer;
 52 |     private HiveOutputFormat<?, ?> outputFormat;
 53 |     private StructObjectInspector inspector;
 54 |     private FileSinkOperator.RecordWriter writer;
 55 |     private Path path = null;
 56 |     private Map<String, String> partitionSpecify = new HashMap<String, String>();
 57 |     private int partitionKeySize;
 58 |     private PluginConfig writerConfig;
 59 |     private Object hiveRecord;
 60 |     private String hdfsTmpDir;
 61 | 
 62 |     private static Class<?> hiveRecordWritale;
 63 |     private static List<Field> classFields = new ArrayList<Field>();
 64 |     private static List<Path> files = new ArrayList<Path>();
 65 |     private static final Pattern HDFS_MASTER = Pattern.compile("hdfs://[\\w\\.]+:\\d+");
 66 |     private static final Logger LOG = LogManager.getLogger(HiveWriter.class);
 67 | 
 68 |     private synchronized static void createHiveRecordClass(List<FieldSchema> columns) {
 69 |         if (hiveRecordWritale == null) {
 70 |             ClassPool pool = ClassPool.getDefault();
 71 |             try {
 72 |                 CtClass ctClass = pool.get("opensource.hdata.plugin.writer.hive.HiveRecordWritable");
 73 |                 for (FieldSchema fieldSchema : columns) {
 74 |                     PrimitiveCategory primitiveCategory = HiveTypeUtils.getPrimitiveCategory(fieldSchema.getType().replaceAll("\\(.*\\)", "")
 75 |                             .toUpperCase());
 76 |                     Class<?> fieldTypeClazz = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveCategory)
 77 |                             .getJavaPrimitiveClass();
 78 |                     CtField ctField = new CtField(pool.get(fieldTypeClazz.getName()), fieldSchema.getName(), ctClass);
 79 |                     ctClass.addField(ctField);
 80 |                 }
 81 |                 hiveRecordWritale = ctClass.toClass();
 82 |                 for (Field field : hiveRecordWritale.getDeclaredFields()) {
 83 |                     field.setAccessible(true);
 84 |                     classFields.add(field);
 85 |                 }
 86 |             } catch (Exception e) {
 87 |                 throw new HDataException(e);
 88 |             }
 89 |         }
 90 |     }
 91 | 
 92 |     @Override
 93 |     public void prepare(JobContext context, PluginConfig writerConfig) {
 94 |         hdfsTmpDir = context.getEngineConfig().getString("hdata.hive.writer.tmp.dir", "/tmp");
 95 |         this.writerConfig = writerConfig;
 96 |         String metastoreUris = writerConfig.getString(HiveWriterProperties.METASTORE_URIS);
 97 |         String dbName = writerConfig.getString(HiveWriterProperties.DATABASE, "default");
 98 |         String tableName = writerConfig.getString(HiveWriterProperties.TABLE);
 99 |         boolean isCompress = writerConfig.getBoolean(HiveWriterProperties.COMPRESS, true);
100 | 
101 |         System.setProperty("HADOOP_USER_NAME", writerConfig.getString(HiveWriterProperties.HADOOP_USER));
102 | 
103 |         HiveConf conf = new HiveConf();
104 |         conf.set(ConfVars.METASTOREURIS.varname, metastoreUris);
105 | 
106 |         Hive hive;
107 |         Table table;
108 |         try {
109 |             hive = Hive.get(conf, true);
110 |             table = hive.getTable(dbName, tableName, false);
111 | 
112 |             partitionKeySize = table.getPartitionKeys().size();
113 |             serializer = (Serializer) table.getDeserializer();
114 |             outputFormat = (HiveOutputFormat<?, ?>) table.getOutputFormatClass().newInstance();
115 |             if (writerConfig.containsKey(HiveReaderProperties.PARTITIONS)) {
116 |                 String partitions = writerConfig.getString(HiveReaderProperties.PARTITIONS);
117 |                 String[] partKVs = partitions.split("\\s*,\\s*");
118 |                 for (String kv : partKVs) {
119 |                     String[] tokens = kv.split("=");
120 |                     if (tokens.length == 2) {
121 |                         partitionSpecify.put(tokens[0], tokens[1]);
122 |                     }
123 |                 }
124 |             } else if (partitionKeySize > 0) {
125 |                 throw new HDataException(String.format("Table %s.%s is partition table, but partition config is not given.", dbName, tableName));
126 |             }
127 | 
128 |             createHiveRecordClass(table.getCols());
129 |             hiveRecord = hiveRecordWritale.newInstance();
130 | 
131 |             String tableLocation = Utils.fixLocaltion(table.getDataLocation().toString(), metastoreUris);
132 |             Matcher m = HDFS_MASTER.matcher(tableLocation);
133 |             if (m.find()) {
134 |                 path = new Path(String.format("%s/%s/%s-%s.tmp", m.group(), hdfsTmpDir, tableName, UUID.randomUUID().toString().replaceAll("-", "")));
135 |                 files.add(path);
136 |             }
137 | 
138 |             inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(HiveRecordWritable.class,
139 |                     ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
140 |             JobConf jobConf = new JobConf();
141 |             writer = outputFormat.getHiveRecordWriter(jobConf, path, HiveRecordWritable.class, isCompress, table.getMetadata(), Reporter.NULL);
142 |         } catch (Exception e) {
143 |             throw new HDataException(e);
144 |         } finally {
145 |             Hive.closeCurrent();
146 |         }
147 |     }
148 | 
149 |     @Override
150 |     public void execute(Record record) {
151 |         try {
152 |             for (int i = 0, len = record.getFieldsCount() - partitionKeySize; i < len; i++) {
153 |                 classFields.get(i).set(hiveRecord, TypeConvertUtils.convert(record.getField(i), classFields.get(i).getType()));
154 |             }
155 |             writer.write(serializer.serialize(hiveRecord, inspector));
156 |         } catch (Exception e) {
157 |             throw new HDataException(e);
158 |         }
159 |     }
160 | 
161 |     private synchronized static Partition createPartition(Hive hive, Table table, Map<String, String> partSpec) {
162 |         Partition p = null;
163 |         try {
164 |             p = hive.getPartition(table, partSpec, false);
165 |             if (p == null) {
166 |                 p = hive.getPartition(table, partSpec, true);
167 |             }
168 |         } catch (HiveException e) {
169 |             throw new HDataException(e);
170 |         }
171 |         return p;
172 |     }
173 | 
174 |     @Override
175 |     public void close() {
176 |         if (writer != null) {
177 |             try {
178 |                 writer.close(true);
179 | 
180 |                 String metastoreUris = writerConfig.getString(HiveWriterProperties.METASTORE_URIS);
181 |                 String dbName = writerConfig.getString(HiveWriterProperties.DATABASE, "default");
182 |                 String tableName = writerConfig.getString(HiveWriterProperties.TABLE);
183 |                 HiveConf conf = new HiveConf();
184 |                 conf.set(ConfVars.METASTOREURIS.varname, metastoreUris);
185 |                 Path renamedPath = new Path(path.toString().replaceFirst("\\.tmp$", ""));
186 |                 FileSystem fs = renamedPath.getFileSystem(conf);
187 |                 fs.rename(path, renamedPath);
188 | 
189 |                 Hive hive;
190 |                 try {
191 |                     hive = Hive.get(conf, true);
192 |                     if (partitionKeySize == 0) {
193 |                         LOG.info("Loading data {} into table {}.{}", renamedPath.toString(), dbName, tableName);
194 |                         hive.loadTable(renamedPath, dbName + "." + tableName, false, false);
195 |                     } else {
196 |                         Table table = hive.getTable(dbName, tableName, false);
197 |                         Partition p = createPartition(hive, table, partitionSpecify);
198 |                         LOG.info("Loading data {} into table {}.{} partition({})", renamedPath.toString(), dbName, tableName, p.getName());
199 |                         hive.loadPartition(renamedPath, dbName + "." + tableName, partitionSpecify, false, false, true, false);
200 |                     }
201 |                 } catch (Exception e) {
202 |                     throw new HDataException(e);
203 |                 } finally {
204 |                     Hive.closeCurrent();
205 |                 }
206 |             } catch (IOException e) {
207 |                 LoggerUtils.error(LOG, e);
208 |             }
209 |         }
210 |     }
211 | }
212 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/hive/HiveWriterProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.hive;
 2 | 
 3 | public class HiveWriterProperties {
 4 | 
 5 |     public static final String METASTORE_URIS = "metastoreUris";
 6 |     public static final String DATABASE = "database";
 7 |     public static final String TABLE = "table";
 8 |     public static final String PARTITIONS = "partitions";
 9 |     public static final String COMPRESS = "compress";
10 |     public static final String HADOOP_USER = "hadoopUser";
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/jdbc/JBDCWriterProperties.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.jdbc;
 2 | 
 3 | public class JBDCWriterProperties {
 4 | 
 5 |     public static final String DRIVER = "driver";
 6 |     public static final String URL = "url";
 7 |     public static final String USERNAME = "username";
 8 |     public static final String PASSWORD = "password";
 9 |     public static final String TABLE = "table";
10 |     public static final String BATCH_INSERT_SIZE = "batchInsertSize";
11 |     public static final String PARALLELISM = "parallelism";
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/jdbc/JDBCWriter.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.plugin.writer.jdbc;
  2 | 
  3 | import java.sql.Connection;
  4 | import java.sql.PreparedStatement;
  5 | import java.sql.SQLException;
  6 | import java.sql.Timestamp;
  7 | import java.sql.Types;
  8 | import java.text.SimpleDateFormat;
  9 | import java.util.Arrays;
 10 | import java.util.Map;
 11 | 
 12 | import opensource.hdata.common.Constants;
 13 | import opensource.hdata.config.PluginConfig;
 14 | import opensource.hdata.core.Fields;
 15 | import opensource.hdata.core.JobContext;
 16 | import opensource.hdata.core.plugin.Record;
 17 | import opensource.hdata.core.plugin.Writer;
 18 | import opensource.hdata.exception.HDataException;
 19 | import opensource.hdata.util.JDBCUtils;
 20 | 
 21 | import org.apache.logging.log4j.LogManager;
 22 | import org.apache.logging.log4j.Logger;
 23 | 
 24 | import com.google.common.base.Joiner;
 25 | 
 26 | public class JDBCWriter extends Writer {
 27 | 
 28 |     private Connection connection = null;
 29 |     private PreparedStatement statement = null;
 30 |     private int count;
 31 |     private int batchInsertSize;
 32 |     private Fields columns;
 33 |     private String table;
 34 |     private Map<String, Integer> columnTypes;
 35 |     private final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat(Constants.DATE_FORMAT_STRING);
 36 |     private final int DEFAULT_BATCH_INSERT_SIZE = 10000;
 37 |     private static final Logger LOG = LogManager.getLogger(JDBCWriter.class);
 38 | 
 39 |     @Override
 40 |     public void prepare(JobContext context, PluginConfig writerConfig) {
 41 |         columns = context.getFields();
 42 |         String driver = writerConfig.getString(JBDCWriterProperties.DRIVER);
 43 |         String url = writerConfig.getString(JBDCWriterProperties.URL);
 44 |         String username = writerConfig.getString(JBDCWriterProperties.USERNAME);
 45 |         String password = writerConfig.getString(JBDCWriterProperties.PASSWORD);
 46 |         String table = writerConfig.getString(JBDCWriterProperties.TABLE);
 47 |         this.table = table;
 48 |         batchInsertSize = writerConfig.getInt(JBDCWriterProperties.BATCH_INSERT_SIZE, DEFAULT_BATCH_INSERT_SIZE);
 49 |         if (batchInsertSize < 1) {
 50 |             batchInsertSize = DEFAULT_BATCH_INSERT_SIZE;
 51 |         }
 52 | 
 53 |         try {
 54 |             connection = JDBCUtils.getConnection(driver, url, username, password);
 55 |             connection.setAutoCommit(false);
 56 |             columnTypes = JDBCUtils.getColumnTypes(connection, table);
 57 | 
 58 |             String sql = null;
 59 |             if (columns != null) {
 60 |                 String[] placeholder = new String[columns.size()];
 61 |                 Arrays.fill(placeholder, "?");
 62 |                 sql = String.format("INSERT INTO %s(%s) VALUES(%s)", table, Joiner.on(", ").join(columns), Joiner.on(", ").join(placeholder));
 63 |                 LOG.debug(sql);
 64 |                 statement = connection.prepareStatement(sql);
 65 |             }
 66 |         } catch (Exception e) {
 67 |             JDBCUtils.closeConnection(connection);
 68 |             throw new HDataException("Writer prepare failed.", e);
 69 |         }
 70 |     }
 71 | 
 72 |     @Override
 73 |     public void execute(Record record) {
 74 |         try {
 75 |             if (statement == null) {
 76 |                 String[] placeholder = new String[record.getFieldsCount()];
 77 |                 Arrays.fill(placeholder, "?");
 78 |                 String sql = String.format("INSERT INTO %s VALUES(%s)", table, Joiner.on(", ").join(placeholder));
 79 |                 LOG.debug(sql);
 80 |                 statement = connection.prepareStatement(sql);
 81 |             }
 82 | 
 83 |             for (int i = 0, len = record.getFieldsCount(); i < len; i++) {
 84 |                 if (record.getField(i) instanceof Timestamp
 85 |                         && !Integer.valueOf(Types.TIMESTAMP).equals(columnTypes.get(columns.get(i).toLowerCase()))) {
 86 |                     statement.setObject(i + 1, DATE_FORMAT.format(record.getField(i)));
 87 |                 } else {
 88 |                     statement.setObject(i + 1, record.getField(i));
 89 |                 }
 90 |             }
 91 | 
 92 |             count++;
 93 |             statement.addBatch();
 94 | 
 95 |             if (count % batchInsertSize == 0) {
 96 |                 count = 0;
 97 |                 statement.executeBatch();
 98 |                 connection.commit();
 99 |             }
100 |         } catch (SQLException e) {
101 |             close();
102 |             throw new HDataException("Writer execute failed.", e);
103 |         }
104 |     }
105 | 
106 |     @Override
107 |     public void close() {
108 |         try {
109 |             if (count > 0) {
110 |                 statement.executeBatch();
111 |                 connection.commit();
112 |             }
113 | 
114 |             if (statement != null) {
115 |                 statement.close();
116 |             }
117 | 
118 |         } catch (SQLException e) {
119 |             throw new HDataException(e);
120 |         } finally {
121 |             JDBCUtils.closeConnection(connection);
122 |         }
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/mongodb/MongoDBWriter.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.plugin.writer.mongodb;
 2 | 
 3 | import java.net.UnknownHostException;
 4 | 
 5 | import opensource.hdata.config.PluginConfig;
 6 | import opensource.hdata.core.Fields;
 7 | import opensource.hdata.core.JobContext;
 8 | import opensource.hdata.core.plugin.Record;
 9 | import opensource.hdata.core.plugin.Writer;
10 | import opensource.hdata.exception.HDataException;
11 | 
12 | import org.apache.commons.lang3.ArrayUtils;
13 | 
14 | import com.mongodb.BasicDBObject;
15 | import com.mongodb.DB;
16 | import com.mongodb.DBCollection;
17 | import com.mongodb.MongoClient;
18 | import com.mongodb.MongoClientURI;
19 | 
20 | public class MongoDBWriter extends Writer {
21 | 
22 |     private Fields fields;
23 |     private MongoClient mongoClient = null;
24 |     private DBCollection coll;
25 |     private BasicDBObject[] insertDocs;
26 |     private int batchsize;
27 |     private int count;
28 | 
29 |     @Override
30 |     public void prepare(JobContext context, PluginConfig writerConfig) {
31 |         fields = context.getFields();
32 |         batchsize = writerConfig.getInt(MongoDBWriterProperties.BATCH_INSERT_SIZE, 1000);
33 |         insertDocs = new BasicDBObject[batchsize];
34 |         MongoClientURI clientURI = new MongoClientURI(writerConfig.getString(MongoDBWriterProperties.URI));
35 |         try {
36 |             mongoClient = new MongoClient(clientURI);
37 |             DB db = mongoClient.getDB(clientURI.getDatabase());
38 |             coll = db.getCollection(clientURI.getCollection());
39 |         } catch (UnknownHostException e) {
40 |             throw new HDataException(e);
41 |         }
42 |     }
43 | 
44 |     @Override
45 |     public void execute(Record record) {
46 |         BasicDBObject doc = new BasicDBObject();
47 |         for (int i = 0, len = fields.size(); i < len; i++) {
48 |             doc.put(fields.get(i), record.getField(i));
49 |         }
50 | 
51 |         insertDocs[count++] = doc;
52 |         if (count == batchsize) {
53 |             coll.insert(insertDocs);
54 |             count = 0;
55 |         }
56 |     }
57 | 
58 |     @Override
59 |     public void close() {
60 |         if (mongoClient != null) {
61 |             if (count > 0) {
62 |                 coll.insert(ArrayUtils.subarray(insertDocs, 0, count));
63 |             }
64 |             mongoClient.close();
65 |         }
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/plugin/writer/mongodb/MongoDBWriterProperties.java:
--------------------------------------------------------------------------------
1 | package opensource.hdata.plugin.writer.mongodb;
2 | 
3 | public class MongoDBWriterProperties {
4 |     public static final String URI = "uri";
5 |     public static final String BATCH_INSERT_SIZE = "batchInsertSize";
6 | }
7 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/tool/SQLExecuteTool.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.tool;
 2 | 
 3 | import java.sql.Connection;
 4 | import java.sql.Statement;
 5 | 
 6 | import opensource.hdata.exception.HDataException;
 7 | import opensource.hdata.util.JDBCUtils;
 8 | 
 9 | import org.apache.commons.cli.CommandLine;
10 | import org.apache.commons.cli.CommandLineParser;
11 | import org.apache.commons.cli.HelpFormatter;
12 | import org.apache.commons.cli.Options;
13 | import org.apache.commons.cli.ParseException;
14 | import org.apache.commons.cli.PosixParser;
15 | import org.apache.logging.log4j.LogManager;
16 | import org.apache.logging.log4j.Logger;
17 | 
18 | public class SQLExecuteTool {
19 | 
20 |     private static final String JDBC_DRIVER = "jdbc-driver";
21 |     private static final String JDBC_URL = "jdbc-url";
22 |     private static final String JDBC_USERNAME = "jdbc-username";
23 |     private static final String JDBC_PASSWORD = "jdbc-password";
24 |     private static final String SQL = "sql";
25 |     private static final Logger LOG = LogManager.getLogger(SQLExecuteTool.class);
26 | 
27 |     public Options createOptions() {
28 |         Options options = new Options();
29 |         options.addOption(null, JDBC_DRIVER, true, "jdbc driver class name");
30 |         options.addOption(null, JDBC_URL, true, "jdbc url, e.g., jdbc:mysql://localhost:3306/database");
31 |         options.addOption(null, JDBC_USERNAME, true, "jdbc username");
32 |         options.addOption(null, JDBC_PASSWORD, true, "jdbc password");
33 |         options.addOption(null, SQL, true, "sql");
34 |         return options;
35 |     }
36 | 
37 |     public void printHelp(Options options) {
38 |         HelpFormatter formatter = new HelpFormatter();
39 |         formatter.printHelp(" ", options);
40 |     }
41 | 
42 |     public static void main(String[] args) {
43 |         SQLExecuteTool tool = new SQLExecuteTool();
44 |         Options options = tool.createOptions();
45 |         if (args.length < 1) {
46 |             tool.printHelp(options);
47 |             System.exit(-1);
48 |         }
49 | 
50 |         CommandLineParser parser = new PosixParser();
51 |         CommandLine cmd = null;
52 |         Connection conn = null;
53 |         try {
54 |             cmd = parser.parse(options, args);
55 |             String driver = cmd.getOptionValue(JDBC_DRIVER);
56 |             String url = cmd.getOptionValue(JDBC_URL);
57 |             String username = cmd.getOptionValue(JDBC_USERNAME);
58 |             String password = cmd.getOptionValue(JDBC_PASSWORD);
59 |             String sql = cmd.getOptionValue(SQL);
60 |             conn = JDBCUtils.getConnection(driver, url, username, password);
61 |             Statement statement = conn.createStatement();
62 | 
63 |             LOG.info("Executing sql: {}", sql);
64 |             statement.execute(sql);
65 |             LOG.info("Execute successfully.");
66 |         } catch (ParseException e) {
67 |             tool.printHelp(options);
68 |             System.exit(-1);
69 |         } catch (Exception e) {
70 |             throw new HDataException(e);
71 |         } finally {
72 |             JDBCUtils.closeConnection(conn);
73 |         }
74 |     }
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/EscaperUtils.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.util;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | 
 6 | public class EscaperUtils {
 7 |     private static Map<Character, Character> map = null;
 8 |     private static final char CHAR_SLASH = '\\';
 9 | 
10 |     /**
11 |      * 特殊字符转义
12 |      * 
13 |      * @param input
14 |      * @return
15 |      */
16 |     public static String parse(String input) {
17 |         int cursor = 0;
18 |         int index = input.indexOf(CHAR_SLASH, cursor);
19 | 
20 |         if (index < 0) {
21 |             return input;
22 |         }
23 | 
24 |         StringBuilder sb = new StringBuilder();
25 |         int len = input.length();
26 |         while ((index = input.indexOf('\\', cursor)) != -1) {
27 |             if (index < len - 1) {
28 |                 if (map.containsKey(input.charAt(index + 1))) {
29 |                     sb.append(input.substring(cursor, index));
30 |                     sb.append(map.get(input.charAt(index + 1)));
31 |                 } else {
32 |                     sb.append(input.substring(cursor, index + 2));
33 |                 }
34 |                 cursor = index + 2;
35 |             } else {
36 |                 break;
37 |             }
38 |         }
39 |         sb.append(input.substring(cursor));
40 | 
41 |         return sb.toString();
42 |     }
43 | 
44 |     static {
45 |         map = new HashMap<Character, Character>();
46 |         map.put('b', '\b');
47 |         map.put('t', '\t');
48 |         map.put('n', '\n');
49 |         map.put('f', '\f');
50 |         map.put('r', '\r');
51 |         map.put('"', '\"');
52 |         map.put('\'', '\'');
53 |         map.put('\\', '\\');
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/FTPUtils.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.util;
 2 | 
 3 | import java.io.IOException;
 4 | import java.net.SocketException;
 5 | import java.util.List;
 6 | import java.util.regex.Pattern;
 7 | 
 8 | import org.apache.commons.net.ftp.FTP;
 9 | import org.apache.commons.net.ftp.FTPClient;
10 | import org.apache.commons.net.ftp.FTPFile;
11 | import org.apache.commons.net.ftp.FTPReply;
12 | 
13 | public class FTPUtils {
14 | 
15 |     public static FTPClient getFtpClient(String host, int port, String username, String password) throws SocketException, IOException {
16 |         String LOCAL_CHARSET = "GB18030";
17 |         FTPClient ftpClient = new FTPClient();
18 |         ftpClient.connect(host, port);
19 |         // 检测服务器是否支持UTF-8编码，如果支持就用UTF-8编码，否则就使用本地编码GB18030
20 |         if (FTPReply.isPositiveCompletion(ftpClient.sendCommand("OPTS UTF8", "ON"))) {
21 |             LOCAL_CHARSET = "UTF-8";
22 |         }
23 |         ftpClient.setControlEncoding(LOCAL_CHARSET);
24 |         ftpClient.login(username, password);
25 |         ftpClient.setBufferSize(1024 * 1024 * 16);
26 |         ftpClient.enterLocalPassiveMode();
27 |         ftpClient.setFileType(FTP.BINARY_FILE_TYPE);
28 |         ftpClient.setControlKeepAliveTimeout(60);
29 |         return ftpClient;
30 |     }
31 | 
32 |     /**
33 |      * 获取FTP目录下的文件
34 |      * 
35 |      * @param files
36 |      * @param ftpClient
37 |      * @param path
38 |      *            FTP目录
39 |      * @param filenameRegexp
40 |      *            文件名正则表达式
41 |      * @param recursive
42 |      *            是否递归搜索
43 |      * @throws IOException
44 |      */
45 |     public static void listFile(List<String> files, FTPClient ftpClient, String path, String filenameRegexp, boolean recursive) throws IOException {
46 |         for (FTPFile ftpFile : ftpClient.listFiles(path)) {
47 |             if (ftpFile.isFile()) {
48 |                 if (Pattern.matches(filenameRegexp, ftpFile.getName())) {
49 |                     files.add(path + "/" + ftpFile.getName());
50 |                 }
51 |             } else if (recursive && ftpFile.isDirectory()) {
52 |                 listFile(files, ftpClient, path + "/" + ftpFile.getName(), filenameRegexp, recursive);
53 |             }
54 |         }
55 |     }
56 | 
57 |     /**
58 |      * 关闭FTP客户端连接
59 |      * 
60 |      * @param ftpClient
61 |      */
62 |     public static void closeFtpClient(FTPClient ftpClient) {
63 |         if (ftpClient != null) {
64 |             try {
65 |                 ftpClient.disconnect();
66 |             } catch (IOException e) {
67 |                 e.printStackTrace();
68 |             }
69 |         }
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/HiveMetaStoreUtils.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.util;
 2 | 
 3 | import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 4 | import org.apache.hadoop.hive.metastore.api.Partition;
 5 | import org.apache.hadoop.hive.metastore.api.Table;
 6 | 
 7 | public class HiveMetaStoreUtils {
 8 | 
 9 |     /**
10 |      * 获取Hive表
11 |      * 
12 |      * @param client
13 |      * @param database
14 |      * @param table
15 |      * @return
16 |      */
17 |     public static Table getTable(HiveMetaStoreClient client, String database, String table) {
18 |         try {
19 |             return client.getTable(database, table);
20 |         } catch (Exception e) {
21 |             return null;
22 |         }
23 |     }
24 | 
25 |     /**
26 |      * 判断是否为托管表
27 |      * 
28 |      * @param table
29 |      * @return
30 |      */
31 |     public static boolean isManagedTable(Table table) {
32 |         return "MANAGED_TABLE".equals(table.getTableType());
33 |     }
34 | 
35 |     /**
36 |      * 判断是否为分区表
37 |      * 
38 |      * @param table
39 |      * @return
40 |      */
41 |     public static boolean isPartitionTable(Table table) {
42 |         return table.getPartitionKeys().size() > 0 ? true : false;
43 |     }
44 | 
45 |     /**
46 |      * 获取Hive表的分区
47 |      * 
48 |      * @param client
49 |      * @param table
50 |      * @param partitionValues
51 |      * @return
52 |      */
53 |     public static Partition getPartition(HiveMetaStoreClient client, Table table, String partitionValues) {
54 |         try {
55 |             return client.getPartition(table.getDbName(), table.getTableName(), partitionValues.replaceAll("\"", "").replaceAll("\\s+,\\s+", ""));
56 |         } catch (Exception e) {
57 |             return null;
58 |         }
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/HiveTypeUtils.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.util;
 2 | 
 3 | import org.apache.hadoop.hive.common.type.HiveBaseChar;
 4 | import org.apache.hadoop.hive.common.type.HiveDecimal;
 5 | import org.apache.hadoop.hive.common.type.HiveVarchar;
 6 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 7 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 8 | 
 9 | public class HiveTypeUtils {
10 | 
11 |     /**
12 |      * 将Hive Writable类型转为标准Java类型
13 |      * 
14 |      * @param o
15 |      * @return
16 |      */
17 |     public static Object toJavaObject(Object o) {
18 |         if (o instanceof HiveBaseChar) {
19 |             return ((HiveVarchar) o).getValue();
20 |         } else if (o instanceof HiveDecimal) {
21 |             return ((HiveDecimal) o).bigDecimalValue();
22 |         }
23 | 
24 |         return o;
25 |     }
26 | 
27 |     /**
28 |      * 获取Hive类型的PrimitiveCategory
29 |      * 
30 |      * @param type
31 |      * @return
32 |      */
33 |     public static PrimitiveCategory getPrimitiveCategory(String type) {
34 |         if ("TINYINT".equals(type)) {
35 |             return PrimitiveObjectInspector.PrimitiveCategory.BYTE;
36 |         } else if ("SMALLINT".equals(type)) {
37 |             return PrimitiveObjectInspector.PrimitiveCategory.SHORT;
38 |         } else if ("BIGINT".equals(type)) {
39 |             return PrimitiveObjectInspector.PrimitiveCategory.LONG;
40 |         } else {
41 |             return PrimitiveObjectInspector.PrimitiveCategory.valueOf(type);
42 |         }
43 |     }
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/JDBCUtils.java:
--------------------------------------------------------------------------------
  1 | package opensource.hdata.util;
  2 | 
  3 | import java.sql.Connection;
  4 | import java.sql.DriverManager;
  5 | import java.sql.PreparedStatement;
  6 | import java.sql.ResultSet;
  7 | import java.sql.ResultSetMetaData;
  8 | import java.sql.SQLException;
  9 | import java.sql.Statement;
 10 | import java.sql.Types;
 11 | import java.util.ArrayList;
 12 | import java.util.HashMap;
 13 | import java.util.List;
 14 | import java.util.Map;
 15 | import java.util.regex.Matcher;
 16 | import java.util.regex.Pattern;
 17 | 
 18 | import org.apache.logging.log4j.LogManager;
 19 | import org.apache.logging.log4j.Logger;
 20 | 
 21 | public class JDBCUtils {
 22 | 
 23 |     private static final Logger LOG = LogManager.getLogger(JDBCUtils.class);
 24 | 
 25 |     /**
 26 |      * 获取JDBC连接
 27 |      * 
 28 |      * @param driver
 29 |      * @param url
 30 |      * @param username
 31 |      * @param password
 32 |      * @return
 33 |      * @throws ClassNotFoundException
 34 |      * @throws SQLException
 35 |      */
 36 |     public static Connection getConnection(String driver, String url, String username, String password) throws ClassNotFoundException, SQLException {
 37 |         Class.forName(driver);
 38 |         Connection conn = DriverManager.getConnection(url, username, password);
 39 |         return conn;
 40 |     }
 41 | 
 42 |     /**
 43 |      * 关闭JDBC连接
 44 |      * 
 45 |      * @param conn
 46 |      */
 47 |     public static void closeConnection(Connection conn) {
 48 |         if (conn != null) {
 49 |             try {
 50 |                 conn.close();
 51 |             } catch (SQLException e) {
 52 |                 LoggerUtils.error(LOG, e);
 53 |             }
 54 |         }
 55 |     }
 56 | 
 57 |     /**
 58 |      * 获取表的字段类型
 59 |      * 
 60 |      * @param connection
 61 |      * @param table
 62 |      * @return
 63 |      * @throws SQLException
 64 |      */
 65 |     public static Map<String, Integer> getColumnTypes(Connection connection, String table) throws SQLException {
 66 |         Map<String, Integer> map = new HashMap<String, Integer>();
 67 |         StringBuilder sql = new StringBuilder();
 68 |         sql.append("SELECT * FROM ");
 69 |         sql.append(table);
 70 |         sql.append(" WHERE 1=2");
 71 | 
 72 |         PreparedStatement ps = connection.prepareStatement(sql.toString());
 73 |         ResultSetMetaData rsd = ps.executeQuery().getMetaData();
 74 |         for (int i = 0; i < rsd.getColumnCount(); i++) {
 75 |             map.put(rsd.getColumnName(i + 1).toLowerCase(), rsd.getColumnType(i + 1));
 76 |         }
 77 |         ps.close();
 78 |         return map;
 79 |     }
 80 | 
 81 |     /**
 82 |      * 获取表的字段名称
 83 |      * 
 84 |      * @param conn
 85 |      * @param table
 86 |      * @return
 87 |      * @throws SQLException
 88 |      */
 89 |     public static List<String> getColumnNames(Connection conn, String table) throws SQLException {
 90 |         List<String> columnNames = new ArrayList<String>();
 91 |         StringBuilder sql = new StringBuilder();
 92 |         sql.append("SELECT * FROM ");
 93 |         sql.append(table);
 94 |         sql.append(" WHERE 1=2");
 95 | 
 96 |         PreparedStatement ps = conn.prepareStatement(sql.toString());
 97 |         ResultSet rs = ps.executeQuery();
 98 |         ResultSetMetaData rsd = rs.getMetaData();
 99 | 
100 |         for (int i = 0, len = rsd.getColumnCount(); i < len; i++) {
101 |             columnNames.add(rsd.getColumnName(i + 1));
102 |         }
103 |         rs.close();
104 |         ps.close();
105 | 
106 |         return columnNames;
107 |     }
108 | 
109 |     /**
110 |      * 查询表中分割字段值的区域（最大值、最小值）
111 |      * 
112 |      * @param conn
113 |      * @param sql
114 |      * @param splitColumn
115 |      * @return
116 |      * @throws SQLException
117 |      */
118 |     public static double[] querySplitColumnRange(Connection conn, String sql, String splitColumn) throws SQLException {
119 |         double[] minAndMax = new double[2];
120 |         Pattern p = Pattern.compile("\\s+FROM\\s+.*", Pattern.CASE_INSENSITIVE);
121 |         Matcher m = p.matcher(sql);
122 | 
123 |         if (m.find() && splitColumn != null && !splitColumn.trim().isEmpty()) {
124 |             StringBuilder sb = new StringBuilder();
125 |             sb.append("SELECT MIN(");
126 |             sb.append(splitColumn);
127 |             sb.append("), MAX(");
128 |             sb.append(splitColumn);
129 |             sb.append(")");
130 |             sb.append(m.group(0));
131 | 
132 |             Statement statement = conn.createStatement();
133 |             ResultSet rs = statement.executeQuery(sb.toString());
134 |             while (rs.next()) {
135 |                 minAndMax[0] = rs.getDouble(1);
136 |                 minAndMax[1] = rs.getDouble(2);
137 |             }
138 | 
139 |             rs.close();
140 |             statement.close();
141 |         }
142 | 
143 |         return minAndMax;
144 |     }
145 | 
146 |     /**
147 |      * 查询表数值类型的主键
148 |      * 
149 |      * @param conn
150 |      * @param catalog
151 |      * @param schema
152 |      * @param table
153 |      * @return
154 |      * @throws SQLException
155 |      */
156 |     public static String getDigitalPrimaryKey(Connection conn, String catalog, String schema, String table) throws SQLException {
157 |         List<String> primaryKeys = new ArrayList<String>();
158 |         ResultSet rs = conn.getMetaData().getPrimaryKeys(catalog, schema, table);
159 |         while (rs.next()) {
160 |             primaryKeys.add(rs.getString("COLUMN_NAME"));
161 |         }
162 |         rs.close();
163 | 
164 |         if (primaryKeys.size() > 0) {
165 |             Map<String, Integer> map = getColumnTypes(conn, table);
166 |             for (String pk : primaryKeys) {
167 |                 if (isDigitalType(map.get(pk))) {
168 |                     return pk;
169 |                 }
170 |             }
171 |         }
172 | 
173 |         return null;
174 |     }
175 | 
176 |     /**
177 |      * 判断字段类型是否为数值类型
178 |      * 
179 |      * @param sqlType
180 |      * @return
181 |      */
182 |     public static boolean isDigitalType(int sqlType) {
183 |         switch (sqlType) {
184 |             case Types.NUMERIC:
185 |             case Types.DECIMAL:
186 |             case Types.SMALLINT:
187 |             case Types.INTEGER:
188 |             case Types.BIGINT:
189 |             case Types.REAL:
190 |             case Types.FLOAT:
191 |             case Types.DOUBLE:
192 |                 return true;
193 | 
194 |             default:
195 |                 return false;
196 |         }
197 |     }
198 | 
199 | }
200 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/LoggerUtils.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.util;
 2 | 
 3 | import org.apache.logging.log4j.Logger;
 4 | 
 5 | public class LoggerUtils {
 6 | 
 7 |     public static void error(Logger logger, Exception e) {
 8 |         logger.error(e.getMessage());
 9 |         logger.error(e.getStackTrace());
10 |     }
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/TypeConvertUtils.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.util;
 2 | 
 3 | import java.math.BigDecimal;
 4 | import java.math.BigInteger;
 5 | 
 6 | public class TypeConvertUtils {
 7 | 
 8 |     /**
 9 |      * 数据类型转换
10 |      * 
11 |      * @param src
12 |      * @param clazz
13 |      * @return
14 |      */
15 |     public static Object convert(Object src, Class<?> clazz) {
16 |         if (src == null) {
17 |             return null;
18 |         } else if (src instanceof String) {
19 |             if (clazz == Integer.class) {
20 |                 return Integer.valueOf(src.toString());
21 |             } else if (clazz == Long.class) {
22 |                 return Long.valueOf(src.toString());
23 |             } else if (clazz == Double.class) {
24 |                 return Double.valueOf(src.toString());
25 |             } else if (clazz == Float.class) {
26 |                 return Float.valueOf(src.toString());
27 |             } else if (clazz == Boolean.class) {
28 |                 return Boolean.valueOf(src.toString());
29 |             } else if (clazz == Short.class) {
30 |                 return Short.valueOf(src.toString());
31 |             } else if (clazz == Byte.class) {
32 |                 return Byte.valueOf(src.toString());
33 |             } else if (clazz == BigInteger.class) {
34 |                 return BigInteger.valueOf(Long.valueOf(src.toString()));
35 |             } else if (clazz == BigDecimal.class) {
36 |                 return new BigDecimal(src.toString());
37 |             }
38 |         } else if (clazz == String.class) {
39 |             return src.toString();
40 |         }
41 |         return src;
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/Utils.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.util;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Arrays;
 5 | import java.util.List;
 6 | import java.util.regex.Matcher;
 7 | import java.util.regex.Pattern;
 8 | 
 9 | import org.apache.commons.lang3.ArrayUtils;
10 | import org.apache.commons.lang3.StringUtils;
11 | import org.apache.logging.log4j.LogManager;
12 | import org.apache.logging.log4j.Logger;
13 | 
14 | public class Utils {
15 | 
16 |     private static final Logger LOG = LogManager.getLogger(Utils.class);
17 | 
18 |     /**
19 |      * 线程休眠
20 |      * 
21 |      * @param millis
22 |      */
23 |     public static void sleep(long millis) {
24 |         try {
25 |             Thread.sleep(millis);
26 |         } catch (InterruptedException e) {
27 |             LoggerUtils.error(LOG, e);
28 |         }
29 |     }
30 | 
31 |     public static List<String> getColumns(String[] columns, String[] excludeColumns) {
32 |         if (excludeColumns == null || excludeColumns.length < 1) {
33 |             return columns == null ? null : Arrays.asList(columns);
34 |         }
35 | 
36 |         List<String> list = new ArrayList<String>();
37 |         for (String column : columns) {
38 |             if (!ArrayUtils.contains(excludeColumns, column)) {
39 |                 list.add(column);
40 |             }
41 |         }
42 |         return list;
43 |     }
44 | 
45 |     public static List<String> getColumns(List<String> columns, String[] excludeColumns) {
46 |         return getColumns(columns.toArray(new String[columns.size()]), excludeColumns);
47 |     }
48 | 
49 |     /**
50 |      * 修复HDFS路径（将主机名改成IP）
51 |      * 
52 |      * @param srcLocaltion
53 |      * @param metastoreUris
54 |      * @return
55 |      */
56 |     public static String fixLocaltion(String srcLocaltion, String metastoreUris) {
57 |         Matcher ipMatcher = Pattern.compile("(\\d+\\.){3}\\d+").matcher(metastoreUris.split(",")[0].trim());
58 |         if (ipMatcher.find()) {
59 |             String masterIP = ipMatcher.group();
60 |             return srcLocaltion.replaceFirst("^hdfs://\\w+:", "hdfs://" + masterIP + ":");
61 |         }
62 |         return srcLocaltion;
63 |     }
64 | 
65 |     /**
66 |      * 解析分区值
67 |      * 
68 |      * @param partitions
69 |      * @return
70 |      */
71 |     public static List<String> parsePartitionValue(String partitions) {
72 |         List<String> partitionValues = new ArrayList<String>();
73 |         String[] partitionKeyValue = partitions.split("\\s*,\\s*");
74 |         for (String kv : partitionKeyValue) {
75 |             String[] tokens = StringUtils.splitPreserveAllTokens(kv, "=");
76 |             partitionValues.add(tokens[1]);
77 |         }
78 |         return partitionValues;
79 |     }
80 | 
81 |     /**
82 |      * 获取配置目录
83 |      * 
84 |      * @return
85 |      */
86 |     public static String getConfigDir() {
87 |         return System.getProperty("hdata.conf.dir") + System.getProperty("file.separator");
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/util/XMLUtils.java:
--------------------------------------------------------------------------------
 1 | package opensource.hdata.util;
 2 | 
 3 | import java.io.FileInputStream;
 4 | import java.io.IOException;
 5 | import java.io.InputStream;
 6 | 
 7 | import javax.xml.parsers.DocumentBuilder;
 8 | import javax.xml.parsers.DocumentBuilderFactory;
 9 | import javax.xml.parsers.ParserConfigurationException;
10 | 
11 | import org.jdom2.Document;
12 | import org.jdom2.Element;
13 | import org.jdom2.input.DOMBuilder;
14 | import org.xml.sax.SAXException;
15 | 
16 | public class XMLUtils {
17 | 
18 |     /**
19 |      * 加载XML文件
20 |      * 
21 |      * @param input
22 |      * @return
23 |      * @throws ParserConfigurationException
24 |      * @throws SAXException
25 |      * @throws IOException
26 |      */
27 |     public static Element load(InputStream input) throws ParserConfigurationException, SAXException, IOException {
28 |         DOMBuilder domBuilder = new DOMBuilder();
29 |         DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
30 |         Document doc = domBuilder.build(builder.parse(input));
31 |         Element root = doc.getRootElement();
32 |         return root;
33 |     }
34 | 
35 |     public static Element load(String xmlpath) throws ParserConfigurationException, SAXException, IOException {
36 |         FileInputStream fis = new FileInputStream(xmlpath);
37 |         return load(fis);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------