├── README.md
├── bin
├── hdata
└── hdata.bat
├── conf
├── hdata.xml
├── log4j2.xml
└── plugins.xml
├── job-examples
├── ftp-ftp.xml
├── hbase-console.xml
├── hdfs-hive.xml
├── hdfs-jdbc.xml
├── hive-jdbc.xml
├── jdbc-hbase.xml
├── jdbc-hdfs.xml
├── jdbc-hive.xml
├── jdbc-jdbc.xml
├── jdbc-mongodb.xml
├── job.xml
└── mongodb-console.xml
├── pom.xml
└── src
└── main
└── java
└── opensource
└── hdata
├── CliDriver.java
├── common
├── Constants.java
└── HDataConfigConstants.java
├── config
├── Configuration.java
├── EngineConfig.java
├── JobConfig.java
└── PluginConfig.java
├── core
├── DefaultRecord.java
├── Fields.java
├── HData.java
├── JobContext.java
├── Metric.java
├── OutputFieldsDeclarer.java
├── PluginLoader.java
├── ReaderWorker.java
├── RecordEvent.java
├── RecordWorkHandler.java
├── Storage.java
├── WaitStrategyFactory.java
└── plugin
│ ├── AbstractPlugin.java
│ ├── Pluginable.java
│ ├── Reader.java
│ ├── ReaderPlugin.java
│ ├── Record.java
│ ├── RecordCollector.java
│ ├── Splitter.java
│ ├── Writer.java
│ └── WriterPlugin.java
├── exception
└── HDataException.java
├── plugin
├── reader
│ ├── ftp
│ │ ├── FTPReader.java
│ │ ├── FTPReaderProperties.java
│ │ └── FTPSplitter.java
│ ├── hbase
│ │ ├── HBaseReader.java
│ │ ├── HBaseReaderProperties.java
│ │ └── HBaseSplitter.java
│ ├── hdfs
│ │ ├── HDFSReader.java
│ │ ├── HDFSReaderProperties.java
│ │ └── HDFSSplitter.java
│ ├── hive
│ │ ├── HiveReader.java
│ │ ├── HiveReaderProperties.java
│ │ └── HiveSplitter.java
│ ├── jdbc
│ │ ├── JBDCReaderProperties.java
│ │ ├── JDBCReader.java
│ │ └── JDBCSplitter.java
│ └── mongodb
│ │ ├── MongoDBReader.java
│ │ ├── MongoDBReaderProperties.java
│ │ └── MongoDBSplitter.java
└── writer
│ ├── console
│ └── ConsoleWriter.java
│ ├── ftp
│ ├── FTPWriter.java
│ └── FTPWriterProperties.java
│ ├── hbase
│ ├── HBaseWriter.java
│ └── HBaseWriterProperties.java
│ ├── hdfs
│ ├── HDFSWriter.java
│ └── HDFSWriterProperties.java
│ ├── hive
│ ├── HiveRecordWritable.java
│ ├── HiveWriter.java
│ └── HiveWriterProperties.java
│ ├── jdbc
│ ├── JBDCWriterProperties.java
│ └── JDBCWriter.java
│ └── mongodb
│ ├── MongoDBWriter.java
│ └── MongoDBWriterProperties.java
├── tool
└── SQLExecuteTool.java
└── util
├── EscaperUtils.java
├── FTPUtils.java
├── HiveMetaStoreUtils.java
├── HiveTypeUtils.java
├── JDBCUtils.java
├── LoggerUtils.java
├── TypeConvertUtils.java
├── Utils.java
└── XMLUtils.java
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataSky/HData/50ff4568fec2538a6f2098311c9ab5ff6737471c/README.md
--------------------------------------------------------------------------------
/bin/hdata:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 |
4 | CDPATH=""
5 | SCRIPT="$0"
6 |
7 | while [ -h "$SCRIPT" ] ; do
8 | ls=`ls -ld "$SCRIPT"`
9 | link=`expr "$ls" : '.*-> \(.*\)$'`
10 | if expr "$link" : '/.*' > /dev/null; then
11 | SCRIPT="$link"
12 | else
13 | SCRIPT=`dirname "$SCRIPT"`/"$link"
14 | fi
15 | done
16 |
17 | HDATA_HOME=`dirname "$SCRIPT"`/..
18 | HDATA_HOME=`cd "$HDATA_HOME"; pwd`
19 | HDATA_LIB_DIR=$HDATA_HOME/lib
20 | HDATA_CONF_DIR=$HDATA_HOME/conf
21 |
22 | if [ -x "$JAVA_HOME/bin/java" ]; then
23 | JAVA="$JAVA_HOME/bin/java"
24 | else
25 | JAVA=`which java`
26 | fi
27 |
28 | if [ ! -x "$JAVA" ]; then
29 | echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME"
30 | exit 1
31 | fi
32 |
33 | HDATA_CLASSPATH='.'
34 | for f in $HDATA_LIB_DIR/*.jar; do
35 | HDATA_CLASSPATH=${HDATA_CLASSPATH}:$f;
36 | done
37 |
38 | JAVA_OPTS="$JAVA_OPTS -Dhdata.conf.dir=$HDATA_CONF_DIR"
39 | JAVA_OPTS="$JAVA_OPTS -Dlog4j.configurationFile=file:///$HDATA_CONF_DIR/log4j2.xml"
40 |
41 | MAIN_CLASS="com.suning.hdata.CliDriver"
42 | if [ "$1" = "execute-sql" ]; then
43 | MAIN_CLASS="com.suning.hdata.tool.SQLExecuteTool"
44 | fi
45 |
46 | exec "$JAVA" $JAVA_OPTS -cp "$HDATA_CLASSPATH" $MAIN_CLASS "$@"
47 |
--------------------------------------------------------------------------------
/bin/hdata.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 |
3 | SETLOCAL
4 |
5 | if NOT DEFINED JAVA_HOME goto err
6 |
7 | set SCRIPT_DIR=%~dp0
8 | for %%I in ("%SCRIPT_DIR%..") do set HDATA_HOME=%%~dpfI
9 |
10 | set MAIN_CLASSPATH=.;%HDATA_HOME%\lib\*
11 | set HDATA_CONF_DIR=%HDATA_HOME%\conf
12 |
13 | set JAVA_OPTS=%JAVA_OPTS% -Xss256k
14 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseParNewGC
15 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseConcMarkSweepGC
16 |
17 | set JAVA_OPTS=%JAVA_OPTS% -XX:CMSInitiatingOccupancyFraction=75
18 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseCMSInitiatingOccupancyOnly
19 | set JAVA_OPTS=%JAVA_OPTS% -XX:+HeapDumpOnOutOfMemoryError
20 | set JAVA_OPTS=%JAVA_OPTS% -Dhdata.conf.dir="%HDATA_CONF_DIR%"
21 | set JAVA_OPTS=%JAVA_OPTS% -Dlog4j.configurationFile="file:///%HDATA_CONF_DIR%/log4j2.xml"
22 |
23 | set FIRST_ARG=%1
24 | set MAIN_CLASS="com.suning.hdata.CliDriver"
25 | if "%FIRST_ARG%"=="execute-sql" (set MAIN_CLASS="com.suning.hdata.tool.SQLExecuteTool")
26 |
27 | "%JAVA_HOME%\bin\java" %JAVA_OPTS% -cp "%MAIN_CLASSPATH%" %MAIN_CLASS% %*
28 |
29 | goto finally
30 |
31 | :err
32 | echo JAVA_HOME environment variable must be set!
33 | pause
34 |
35 |
36 | :finally
37 |
38 | ENDLOCAL
--------------------------------------------------------------------------------
/conf/hdata.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | hdata.storage.default.buffer.size
6 | 16384
7 | 默认storage缓冲区大小,值必须为2^n
8 |
9 |
10 | hdata.storage.disruptor.wait.strategy
11 | BlockingWaitStrategy
12 | 线程等待策略,可选项:BlockingWaitStrategy、BusySpinWaitStrategy、SleepingWaitStrategy、YieldingWaitStrategy
13 |
14 |
15 | hdata.hive.writer.tmp.dir
16 | /tmp
17 | Hive Writer写入HDFS文件的临时目录
18 |
19 |
20 |
--------------------------------------------------------------------------------
/conf/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/conf/plugins.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | jdbc
7 | opensource.hdata.plugin.reader.jdbc.JDBCReader
8 | opensource.hdata.plugin.reader.jdbc.JDBCSplitter
9 |
10 |
11 | hive
12 | opensource.hdata.plugin.reader.hive.HiveReader
13 | opensource.hdata.plugin.reader.hive.HiveSplitter
14 |
15 |
16 | hdfs
17 | opensource.hdata.plugin.reader.hdfs.HDFSReader
18 | opensource.hdata.plugin.reader.hdfs.HDFSSplitter
19 |
20 |
21 | ftp
22 | opensource.hdata.plugin.reader.ftp.FTPReader
23 | opensource.hdata.plugin.reader.ftp.FTPSplitter
24 |
25 |
26 | mongodb
27 | opensource.hdata.plugin.reader.mongodb.MongoDBReader
28 | opensource.hdata.plugin.reader.mongodb.MongoDBSplitter
29 |
30 |
31 | hbase
32 | opensource.hdata.plugin.reader.hbase.HBaseReader
33 | opensource.hdata.plugin.reader.hbase.HBaseSplitter
34 |
35 |
36 |
37 |
38 |
39 | console
40 | opensource.hdata.plugin.writer.console.ConsoleWriter
41 |
42 |
43 | jdbc
44 | opensource.hdata.plugin.writer.jdbc.JDBCWriter
45 |
46 |
47 | hive
48 | opensource.hdata.plugin.writer.hive.HiveWriter
49 |
50 |
51 | hdfs
52 | opensource.hdata.plugin.writer.hdfs.HDFSWriter
53 |
54 |
55 | ftp
56 | opensource.hdata.plugin.writer.ftp.FTPWriter
57 |
58 |
59 | mongodb
60 | opensource.hdata.plugin.writer.mongodb.MongoDBWriter
61 |
62 |
63 | hbase
64 | opensource.hdata.plugin.writer.hbase.HBaseWriter
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/job-examples/ftp-ftp.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 192.168.130.161
6 | 1
7 | 1@1
8 | /etldata/input/sa_log/151_125
9 |
10 | serv11-saIntf-pageTime-access-20140407_00.0.log
11 | |
12 |
13 | 1
14 |
15 |
16 |
17 | localhost
18 | 1
19 | 1
20 | /ftp/tmp/1.txt
21 | 1
22 |
23 |
24 |
--------------------------------------------------------------------------------
/job-examples/hbase-console.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | 192.168.142.16,192.168.142.17,192.168.142.18
6 | 2181
7 |
8 | :rowkey,cf:start_ip,cf:end_ip,cf:start_ip_num,cf:end_ip_num,cf:country,cf:area,cf:province,cf:city,cf:isp
9 | id,start_ip,end_ip,start_ip_num,end_ip_num,country,area,province,city,isp
10 | 958200
11 |
12 | 2
13 |
14 |
15 |
16 | 1
17 |
18 |
19 |
--------------------------------------------------------------------------------
/job-examples/hdfs-hive.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | hdfs://192.168.142.21:8020/tmp/hdata_test
6 | .*\.csv
7 | ,
8 | gb18030
9 | bigdata
10 | 1
11 |
12 |
13 |
14 | thrift://192.168.142.21:9083
15 | default
16 |
17 | bigdata
18 | 1
19 |
20 |
21 |
--------------------------------------------------------------------------------
/job-examples/hdfs-jdbc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | hdfs://192.168.142.21:8020/tmp/hdata_test
6 | hdfs.test
7 | bigdata
8 | 1
9 |
10 |
11 |
12 | org.postgresql.Driver
13 | jdbc:postgresql://localhost:5432/ip
14 | postgres
15 | toor
16 |
17 | 3
18 |
19 |
20 |
--------------------------------------------------------------------------------
/job-examples/hive-jdbc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | thrift://192.168.142.21:9083
6 | bi_td
7 |
8 |
9 | 1
10 |
11 |
12 |
13 | org.postgresql.Driver
14 | jdbc:postgresql://localhost:5432/tmp
15 | postgres
16 | toor
17 |
18 | 3
19 |
20 |
21 |
--------------------------------------------------------------------------------
/job-examples/jdbc-hbase.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | org.postgresql.Driver
6 | jdbc:postgresql://10.22.8.140:5432/ip
7 | postgres
8 | toor
9 |
10 |
11 |
12 |
13 |
14 |
15 | 1
16 |
17 |
18 |
19 | 192.168.142.16,192.168.142.17,192.168.142.18,192.168.142.19,192.168.142.20,192.168.142.21,192.168.142.23,192.168.142.24,192.168.142.25,192.168.142.26,192.168.142.27
20 | 2181
21 |
22 | :rowkey,cf:start_ip,cf:end_ip,cf:start_ip_num,cf:end_ip_num,cf:country,cf:area,cf:province,cf:city,cf:isp
23 | 10000
24 | 1
25 |
26 |
27 |
--------------------------------------------------------------------------------
/job-examples/jdbc-hdfs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | org.postgresql.Driver
6 | jdbc:postgresql://10.22.8.140:5432/ip
7 | postgres
8 | toor
9 |
10 |
11 |
12 |
13 |
14 |
15 | 3
16 |
17 |
18 |
19 | hdfs://192.168.142.21:8020/tmp/hdata_test/hdfs.test
20 | bigdata
21 | 1
22 |
23 |
24 |
--------------------------------------------------------------------------------
/job-examples/jdbc-hive.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | org.postgresql.Driver
6 | jdbc:postgresql://localhost:5432/ip
7 | postgres
8 | toor
9 |
10 |
11 |
12 |
13 |
14 |
15 | 3
16 |
17 |
18 |
19 | thrift://192.168.142.21:9083
20 | default
21 |
22 | p=20140407
23 | bigdata
24 | 3
25 |
26 |
27 |
--------------------------------------------------------------------------------
/job-examples/jdbc-jdbc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | org.postgresql.Driver
6 | jdbc:postgresql://localhost:5432/ip
7 | postgres
8 | toor
9 |
10 |
11 |
12 |
13 |
14 |
15 | 3
16 |
17 |
18 |
19 | org.postgresql.Driver
20 | jdbc:postgresql://localhost:5432/ip
21 | postgres
22 | toor
23 |
24 | 10000
25 | 3
26 |
27 |
28 |
--------------------------------------------------------------------------------
/job-examples/jdbc-mongodb.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | org.postgresql.Driver
6 | jdbc:postgresql://localhost:5432/ip
7 | postgres
8 | toor
9 |
10 |
11 |
12 |
13 |
14 |
15 | 3
16 |
17 |
18 |
19 | mongodb://localhost/test.ip
20 |
21 | 3
22 |
23 |
24 |
--------------------------------------------------------------------------------
/job-examples/job.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | com.mysql.jdbc.Driver
6 | jdbc:mysql://localhost:3306/test
7 | root
8 | toor
9 |
10 |
11 |
12 |
13 |
14 |
15 | 7
16 |
17 |
18 |
19 | com.mysql.jdbc.Driver
20 | jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8
21 | root
22 | toor
23 |
24 | 10000
25 | 3
26 |
27 |
28 |
--------------------------------------------------------------------------------
/job-examples/mongodb-console.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | mongodb://localhost/test.ip
6 | {"city":"南京市"}
7 | 1
8 |
9 |
10 |
11 | 1
12 |
13 |
14 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | opensource
5 | hdata
6 | hdata
7 | 0.1
8 |
9 |
10 | UTF-8
11 | 1.2.1
12 | 0.12.0
13 | 0.94.16
14 |
15 |
16 | 2014
17 |
18 |
19 | Jayer
20 | dczxxuhai@gmail.com
21 |
22 |
23 |
24 |
25 |
26 | org.apache.logging.log4j
27 | log4j-api
28 | 2.0-rc1
29 |
30 |
31 | org.apache.logging.log4j
32 | log4j-core
33 | 2.0-rc1
34 |
35 |
36 | com.google.guava
37 | guava
38 | 16.0.1
39 |
40 |
41 | com.lmax
42 | disruptor
43 | 3.2.1
44 |
45 |
46 | commons-cli
47 | commons-cli
48 | 1.2
49 |
50 |
51 | org.apache.commons
52 | commons-lang3
53 | 3.3.2
54 |
55 |
56 | commons-cli
57 | commons-cli
58 | 1.2
59 |
60 |
61 | org.jdom
62 | jdom2
63 | 2.0.5
64 |
65 |
66 | javassist
67 | javassist
68 | 3.18.1-GA
69 |
70 |
71 | org.antlr
72 | antlr-runtime
73 | 3.4
74 |
75 |
76 | commons-configuration
77 | commons-configuration
78 | 1.9
79 |
80 |
81 | commons-lang
82 | commons-lang
83 | 2.6
84 |
85 |
86 | commons-logging
87 | commons-logging
88 | 1.1.1
89 |
90 |
91 | commons-net
92 | commons-net
93 | 3.3
94 |
95 |
96 | log4j
97 | log4j
98 | 1.2.17
99 |
100 |
101 | org.slf4j
102 | slf4j-api
103 | 1.7.6
104 |
105 |
106 | org.slf4j
107 | slf4j-log4j12
108 | 1.7.6
109 |
110 |
111 | org.apache.hive
112 | hive-exec
113 | ${hiveVersion}
114 |
115 |
116 | org.apache.hive
117 | hive-metastore
118 | ${hiveVersion}
119 |
120 |
121 | org.apache.hadoop
122 | hadoop-core
123 | ${hadoopVersion}
124 |
125 |
126 | org.apache.hbase
127 | hbase
128 | ${hbaseVersion}
129 |
130 |
131 | org.apache.zookeeper
132 | zookeeper
133 | 3.4.6
134 |
135 |
136 | org.mongodb
137 | mongo-java-driver
138 | 2.12.0
139 |
140 |
141 | javax.jdo
142 | jdo-api
143 | 3.0.1
144 |
145 |
146 | org.apache.thrift
147 | libfb303
148 | 0.9.0
149 |
150 |
151 | org.datanucleus
152 | datanucleus-api-jdo
153 | 3.2.1
154 |
155 |
156 | org.datanucleus
157 | datanucleus-core
158 | 3.2.2
159 |
160 |
161 | org.datanucleus
162 | datanucleus-rdbms
163 | 3.2.1
164 |
165 |
166 |
--------------------------------------------------------------------------------
/src/main/java/opensource/hdata/CliDriver.java:
--------------------------------------------------------------------------------
1 | package opensource.hdata;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 | import java.util.Map.Entry;
6 | import java.util.Properties;
7 |
8 | import opensource.hdata.config.JobConfig;
9 | import opensource.hdata.config.PluginConfig;
10 | import opensource.hdata.core.HData;
11 |
12 | import org.apache.commons.cli.CommandLine;
13 | import org.apache.commons.cli.CommandLineParser;
14 | import org.apache.commons.cli.HelpFormatter;
15 | import org.apache.commons.cli.OptionBuilder;
16 | import org.apache.commons.cli.Options;
17 | import org.apache.commons.cli.ParseException;
18 | import org.apache.commons.cli.PosixParser;
19 |
20 | public class CliDriver {
21 |
22 | private static final String XML_FILE = "f";
23 | private static final String HDATA_VARS = "var";
24 |
25 | /**
26 | * 创建命令行选项
27 | *
28 | * @return
29 | */
30 | public Options createOptions() {
31 | Options options = new Options();
32 | options.addOption(XML_FILE, null, true, "job xml path");
33 | OptionBuilder.withValueSeparator();
34 | OptionBuilder.hasArgs(2);
35 | OptionBuilder.withArgName("property=value");
36 | OptionBuilder.withLongOpt(HDATA_VARS);
37 | options.addOption(OptionBuilder.create());
38 | return options;
39 | }
40 |
41 | /**
42 | * 打印命令行帮助信息
43 | *
44 | * @param options
45 | */
46 | public void printHelp(Options options) {
47 | HelpFormatter formatter = new HelpFormatter();
48 | formatter.printHelp(" ", options);
49 | }
50 |
51 | /**
52 | * 替换命令行变量
53 | *
54 | * @param config
55 | * @param vars
56 | */
57 | public void replaceConfigVars(PluginConfig config, Map vars) {
58 | for (Entry