├── .gitignore ├── README.md ├── assembly ├── pom.xml └── src │ └── build │ └── package.xml ├── bin ├── hdata └── hdata.bat ├── conf ├── hdata.xml ├── log4j2.xml └── plugins.xml ├── doc └── img │ ├── 1.png │ └── 2.png ├── hdata-api ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ ├── api │ ├── AbstractPlugin.java │ ├── Configuration.java │ ├── DefaultRecord.java │ ├── EngineConfig.java │ ├── Fields.java │ ├── JobConfig.java │ ├── JobContext.java │ ├── JobStatus.java │ ├── Metric.java │ ├── OutputFieldsDeclarer.java │ ├── PluginConfig.java │ ├── Pluginable.java │ ├── Reader.java │ ├── Record.java │ ├── RecordCollector.java │ ├── Splitter.java │ ├── Storage.java │ └── Writer.java │ └── exception │ └── HDataException.java ├── hdata-console ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ ├── reader │ └── console │ │ └── ConsoleReader.java │ └── writer │ └── console │ └── ConsoleWriter.java ├── hdata-core ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ ├── CliDriver.java │ ├── common │ ├── Constants.java │ └── HDataConfigConstants.java │ ├── config │ ├── DefaultEngineConfig.java │ └── DefaultJobConfig.java │ ├── core │ ├── DefaultRecord.java │ ├── DefaultRecordCollector.java │ ├── DefaultStorage.java │ ├── HData.java │ ├── PluginClassLoader.java │ ├── PluginLoader.java │ ├── ReaderWorker.java │ ├── RecordEvent.java │ ├── RecordEventExceptionHandler.java │ ├── RecordWorkHandler.java │ └── WaitStrategyFactory.java │ └── util │ ├── NumberUtils.java │ ├── PluginUtils.java │ ├── TypeConvertUtils.java │ └── Utils.java ├── hdata-csv ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ ├── FormatConf.java │ ├── reader │ └── csv │ │ ├── CSVReader.java │ │ ├── CSVReaderProperties.java │ │ └── CSVSplitter.java │ └── writer │ └── csv │ ├── CSVWriter.java │ └── CSVWriterProperties.java ├── hdata-excel ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ └── excel │ ├── ExcelProperties.java │ ├── reader │ └── ExcelReader.java │ └── writer │ └── ExcelWriter.java ├── hdata-ftp ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ ├── ftp │ └── FTPUtils.java │ └── plugin │ ├── reader │ └── ftp │ │ ├── FTPReader.java │ │ ├── FTPReaderProperties.java │ │ └── FTPSplitter.java │ └── writer │ └── ftp │ ├── FTPWriter.java │ └── FTPWriterProperties.java ├── hdata-hbase ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ ├── reader │ └── hbase │ │ ├── HBaseReader.java │ │ ├── HBaseReaderProperties.java │ │ └── HBaseSplitter.java │ └── writer │ └── hbase │ ├── HBaseWriter.java │ └── HBaseWriterProperties.java ├── hdata-hdfs ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ ├── reader │ └── hdfs │ │ ├── HDFSReader.java │ │ ├── HDFSReaderProperties.java │ │ └── HDFSSplitter.java │ └── writer │ └── hdfs │ ├── HDFSWriter.java │ └── HDFSWriterProperties.java ├── hdata-hive ├── pom.xml └── src │ └── main │ └── java │ ├── com │ └── github │ │ └── stuxuhai │ │ └── hdata │ │ └── plugin │ │ ├── hive │ │ ├── HiveMetaStoreUtils.java │ │ └── HiveTypeUtils.java │ │ ├── reader │ │ └── hive │ │ │ ├── HiveReader.java │ │ │ ├── HiveReaderProperties.java │ │ │ └── HiveSplitter.java │ │ └── writer │ │ └── hive │ │ ├── HiveWriter.java │ │ └── HiveWriterProperties.java │ └── org │ └── apache │ └── hive │ └── hcatalog │ └── mapreduce │ └── PartInfo.java ├── hdata-http ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ └── reader │ └── http │ ├── HttpReader.java │ ├── HttpReaderProperties.java │ └── HttpSplitter.java ├── hdata-jdbc ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ ├── jdbc │ └── JdbcUtils.java │ ├── reader │ └── jdbc │ │ ├── JDBCIterator.java │ │ ├── JDBCReader.java │ │ ├── JDBCReaderProperties.java │ │ └── JDBCSplitter.java │ └── writer │ └── jdbc │ ├── JDBCWriter.java │ └── JDBCWriterProperties.java ├── hdata-kafka ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ ├── reader │ └── kafka │ │ ├── KafkaConsumer.java │ │ ├── KafkaReader.java │ │ └── KafkaReaderProperties.java │ └── writer │ └── kafka │ ├── KafkaWriter.java │ └── KafkaWriterProperties.java ├── hdata-mongodb ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── github │ └── stuxuhai │ └── hdata │ └── plugin │ ├── reader │ └── mongodb │ │ ├── MongoDBReader.java │ │ ├── MongoDBReaderProperties.java │ │ └── MongoDBSplitter.java │ └── writer │ └── mongodb │ ├── MongoDBWriter.java │ └── MongoDBWriterProperties.java ├── hdata-wit ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── github │ │ └── stuxuhai │ │ └── hdata │ │ └── plugin │ │ └── wit │ │ ├── Methods.java │ │ ├── WitDynamicRecord.java │ │ ├── resolvers │ │ └── RecordResolver.java │ │ └── writer │ │ ├── WitWriter.java │ │ └── WitWriterProperties.java │ └── resources │ ├── META-INF │ └── services │ │ └── org.febit.wit.plugin.EnginePlugin │ └── hdata-wit-writer.wim └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | /build/ -------------------------------------------------------------------------------- /assembly/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | ../pom.xml 9 | 10 | assembly 11 | hdata-assembly 12 | 13 | ${project.basedir}/.. 14 | 15 | 16 | 17 | 18 | org.apache.maven.plugins 19 | maven-assembly-plugin 20 | 21 | 22 | make-package 23 | package 24 | 25 | single 26 | 27 | 28 | ${topdir}/build 29 | hdata-${project.version} 30 | false 31 | 32 | src/build/package.xml 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /assembly/src/build/package.xml: -------------------------------------------------------------------------------- 1 | 5 | package 6 | 7 | tar.gz 8 | 9 | 10 | 11 | 12 | ${topdir}/bin 13 | bin 14 | 15 | 16 | 17 | ${topdir}/conf 18 | conf 19 | 20 | 21 | 22 | ${topdir}/target/all-modules/hdata-api 23 | lib 24 | 25 | 26 | ${topdir}/target/all-modules/hdata-core 27 | lib 28 | 29 | 30 | 31 | ${topdir}/target/all-modules/hdata-console 32 | plugins/console 33 | 34 | 35 | ${topdir}/target/all-modules/hdata-csv 36 | plugins/csv 37 | 38 | 39 | ${topdir}/target/all-modules/hdata-excel 40 | plugins/excel 41 | 42 | 43 | ${topdir}/target/all-modules/hdata-ftp 44 | plugins/ftp 45 | 46 | 47 | ${topdir}/target/all-modules/hdata-hbase 48 | plugins/hbase 49 | 50 | 51 | ${topdir}/target/all-modules/hdata-hdfs 52 | plugins/hdfs 53 | 54 | 55 | ${topdir}/target/all-modules/hdata-hive 56 | plugins/hive 57 | 58 | 59 | ${topdir}/target/all-modules/hdata-http 60 | plugins/http 61 | 62 | 63 | ${topdir}/target/all-modules/hdata-jdbc 64 | plugins/jdbc 65 | 66 | 67 | ${topdir}/target/all-modules/hdata-kafka 68 | plugins/kafka 69 | 70 | 71 | ${topdir}/target/all-modules/hdata-mongodb 72 | plugins/mongodb 73 | 74 | 75 | ${topdir}/target/all-modules/hdata-wit 76 | plugins/wit 77 | 78 | 79 | -------------------------------------------------------------------------------- /bin/hdata: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | CDPATH="" 5 | SCRIPT="$0" 6 | 7 | while [ -h "$SCRIPT" ] ; do 8 | ls=$(ls -ld "$SCRIPT") 9 | link=$(expr "$ls" : '.*-> \(.*\)$') 10 | if expr "$link" : '/.*' > /dev/null; then 11 | SCRIPT="$link" 12 | else 13 | SCRIPT=$(dirname "$SCRIPT")/"$link" 14 | fi 15 | done 16 | 17 | HDATA_HOME=$(cd "$(dirname "$SCRIPT")/.."; pwd) 18 | HDATA_LIB_DIR=$HDATA_HOME/lib 19 | HDATA_CONF_DIR=$HDATA_HOME/conf 20 | 21 | if [ -x "$JAVA_HOME/bin/java" ]; then 22 | JAVA="$JAVA_HOME/bin/java" 23 | else 24 | JAVA=$(which java) 25 | fi 26 | 27 | if [ ! -x "$JAVA" ]; then 28 | echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME" 29 | exit 1 30 | fi 31 | 32 | HDATA_CLASSPATH_APPEND="$HDATA_CLASSPATH" 33 | HDATA_CLASSPATH='.' 34 | 35 | for f in $HDATA_LIB_DIR/*.jar; do 36 | HDATA_CLASSPATH="${HDATA_CLASSPATH}:$f"; 37 | done 38 | 39 | if [ ! -z "$HDATA_CLASSPATH_APPEND" ]; then 40 | HDATA_CLASSPATH="${HDATA_CLASSPATH}:$HDATA_CLASSPATH_APPEND"; 41 | fi 42 | 43 | JAVA_OPTS="$JAVA_OPTS -Xss256k" 44 | JAVA_OPTS="$JAVA_OPTS -Xms1G -Xmx1G -Xmn512M" 45 | JAVA_OPTS="$JAVA_OPTS -XX:+UseParNewGC" 46 | JAVA_OPTS="$JAVA_OPTS -XX:+UseConcMarkSweepGC" 47 | JAVA_OPTS="$JAVA_OPTS -XX:+CMSClassUnloadingEnabled" 48 | JAVA_OPTS="$JAVA_OPTS -XX:+CMSParallelRemarkEnabled" 49 | JAVA_OPTS="$JAVA_OPTS -XX:+DisableExplicitGC" 50 | JAVA_OPTS="$JAVA_OPTS -XX:CMSInitiatingOccupancyFraction=75" 51 | JAVA_OPTS="$JAVA_OPTS -XX:+UseCMSInitiatingOccupancyOnly" 52 | JAVA_OPTS="$JAVA_OPTS -XX:+HeapDumpOnOutOfMemoryError" 53 | JAVA_OPTS="$JAVA_OPTS -XX:SoftRefLRUPolicyMSPerMB=0" 54 | 55 | JAVA_OPTS="$JAVA_OPTS -Dhdata.conf.dir=$HDATA_CONF_DIR" 56 | JAVA_OPTS="$JAVA_OPTS -Dlog4j.configurationFile=file:///$HDATA_CONF_DIR/log4j2.xml" 57 | 58 | MAIN_CLASS="com.github.stuxuhai.hdata.CliDriver" 59 | 60 | exec "$JAVA" $JAVA_OPTS -cp "$HDATA_CLASSPATH" $MAIN_CLASS "$@" 61 | -------------------------------------------------------------------------------- /bin/hdata.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | cd /d %~dp0.. 3 | setlocal ENABLEDELAYEDEXPANSION 4 | set HDATA_HOME=%cd% 5 | set HDATA_LIB_DIR=%HDATA_HOME%\lib 6 | set HDATA_CONF_DIR=%HDATA_HOME%\conf 7 | set HDATA_PLUGINS_DIR=%HDATA_HOME%\plugins 8 | 9 | if not defined java_home ( 10 | echo "Not defined JAVA_HOME,Please install java in your PATH and set JAVA_HOME" 11 | call :timeoutAndExit 12 | ) 13 | set JAVA="%JAVA_HOME%\bin\java.exe" 14 | 15 | if not exist %JAVA% ( 16 | echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME" 17 | call :timeoutAndExit 18 | ) 19 | 20 | set HDATA_CLASSPATH=.;%HDATA_LIB_DIR%\* 21 | ::add plugins to class_path 22 | ::for /f %%i in ('dir /b /ad %HDATA_PLUGINS_DIR%') do ( 23 | ::set HDATA_CLASSPATH=!HDATA_CLASSPATH!;!HDATA_PLUGINS_DIR!\%%i\* 24 | ::) 25 | echo %HDATA_CLASSPATH% 26 | 27 | set JAVA_OPTS=%JAVA_OPTS% -Xss256k 28 | set JAVA_OPTS=%JAVA_OPTS% -Xms1G -Xmx1G -Xmn512M 29 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseParNewGC 30 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseConcMarkSweepGC 31 | set JAVA_OPTS=%JAVA_OPTS% -XX:+CMSClassUnloadingEnabled 32 | set JAVA_OPTS=%JAVA_OPTS% -XX:+CMSParallelRemarkEnabled 33 | set JAVA_OPTS=%JAVA_OPTS% -XX:+DisableExplicitGC 34 | set JAVA_OPTS=%JAVA_OPTS% -XX:CMSInitiatingOccupancyFraction=75 35 | set JAVA_OPTS=%JAVA_OPTS% -XX:+UseCMSInitiatingOccupancyOnly 36 | set JAVA_OPTS=%JAVA_OPTS% -XX:+HeapDumpOnOutOfMemoryError 37 | set JAVA_OPTS=%JAVA_OPTS% -XX:SoftRefLRUPolicyMSPerMB=0 38 | 39 | set JAVA_OPTS=%JAVA_OPTS% -Dhdata.conf.dir="%HDATA_CONF_DIR%" 40 | set JAVA_OPTS=%JAVA_OPTS% -Dlog4j.configurationFile=file:///%HDATA_CONF_DIR%\log4j2.xml 41 | 42 | set MAIN_CLASS="com.github.stuxuhai.hdata.CliDriver" 43 | 44 | echo %JAVA% %JAVA_OPTS% -classpath "%HDATA_CLASSPATH%" %MAIN_CLASS% %1 %2 %3 %4 %5 %6 %7 %8 %9 45 | %JAVA% %JAVA_OPTS% -classpath "%HDATA_CLASSPATH%" %MAIN_CLASS% %1 %2 %3 %4 %5 %6 %7 %8 %9 46 | 47 | goto :EOF 48 | 49 | :timeoutAndExit 50 | timeout /t 10&&exit 51 | -------------------------------------------------------------------------------- /conf/hdata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | hdata.storage.default.buffer.size 6 | 16384 7 | 默认storage缓冲区大小,值必须为2^n 8 | 9 | 10 | hdata.storage.disruptor.wait.strategy 11 | com.lmax.disruptor.BlockingWaitStrategy 12 | 线程等待策略 13 | 14 | 15 | hdata.hive.writer.tmp.dir 16 | /tmp 17 | Hive Writer写入HDFS文件的临时目录 18 | 19 | 20 | jdbc.reader.sql.metric.time.ms 21 | 3000 22 | 设定SQL执行时间阈值,超过该值将记录日志中 23 | 24 | 25 | -------------------------------------------------------------------------------- /conf/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /conf/plugins.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | jdbc 7 | com.github.stuxuhai.hdata.plugin.reader.jdbc.JDBCReader 8 | 9 | 10 | hive 11 | com.github.stuxuhai.hdata.plugin.reader.hive.HiveReader 12 | 13 | 14 | hdfs 15 | com.github.stuxuhai.hdata.plugin.reader.hdfs.HDFSReader 16 | 17 | 18 | ftp 19 | com.github.stuxuhai.hdata.plugin.reader.ftp.FTPReader 20 | 21 | 22 | mongodb 23 | com.github.stuxuhai.hdata.plugin.reader.mongodb.MongoDBReader 24 | 25 | 26 | hbase 27 | com.github.stuxuhai.hdata.plugin.reader.hbase.HBaseReader 28 | 29 | 30 | http 31 | com.github.stuxuhai.hdata.plugin.reader.http.HttpReader 32 | 33 | 34 | csv 35 | com.github.stuxuhai.hdata.plugin.reader.csv.CSVReader 36 | 37 | 38 | kafka 39 | com.github.stuxuhai.hdata.plugin.reader.kafka.KafkaReader 40 | 41 | 42 | console 43 | com.github.stuxuhai.hdata.plugin.reader.console.ConsoleReader 44 | 45 | 46 | excel 47 | com.github.stuxuhai.hdata.plugin.excel.reader.ExcelReader 48 | 49 | 50 | 51 | 52 | 53 | console 54 | com.github.stuxuhai.hdata.plugin.writer.console.ConsoleWriter 55 | 56 | 57 | jdbc 58 | com.github.stuxuhai.hdata.plugin.writer.jdbc.JDBCWriter 59 | 60 | 61 | hive 62 | com.github.stuxuhai.hdata.plugin.writer.hive.HiveWriter 63 | 64 | 65 | hdfs 66 | com.github.stuxuhai.hdata.plugin.writer.hdfs.HDFSWriter 67 | 68 | 69 | ftp 70 | com.github.stuxuhai.hdata.plugin.writer.ftp.FTPWriter 71 | 72 | 73 | mongodb 74 | com.github.stuxuhai.hdata.plugin.writer.mongodb.MongoDBWriter 75 | 76 | 77 | hbase 78 | com.github.stuxuhai.hdata.plugin.writer.hbase.HBaseWriter 79 | 80 | 81 | csv 82 | com.github.stuxuhai.hdata.plugin.writer.csv.CSVWriter 83 | 84 | 85 | kafka 86 | com.github.stuxuhai.hdata.plugin.writer.kafka.KafkaWriter 87 | 88 | 89 | excel 90 | com.github.stuxuhai.hdata.plugin.excel.writer.ExcelWriter 91 | 92 | 93 | wit 94 | com.github.stuxuhai.hdata.plugin.wit.writer.WitWriter 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /doc/img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fork-archive-hub/hdata/5fdaf9d9e2e21c1ae704db4579c13753be23bd0a/doc/img/1.png -------------------------------------------------------------------------------- /doc/img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fork-archive-hub/hdata/5fdaf9d9e2e21c1ae704db4579c13753be23bd0a/doc/img/2.png -------------------------------------------------------------------------------- /hdata-api/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | ../pom.xml 9 | 10 | hdata-api 11 | hdata-api 12 | 13 | 14 | 15 | 2.5 16 | 17 | 18 | 19 | 20 | org.apache.logging.log4j 21 | log4j-api 22 | ${log4j.version} 23 | 24 | 25 | org.apache.logging.log4j 26 | log4j-core 27 | ${log4j.version} 28 | 29 | 30 | org.apache.logging.log4j 31 | log4j-slf4j-impl 32 | ${log4j.version} 33 | 34 | 35 | org.apache.logging.log4j 36 | log4j-1.2-api 37 | ${log4j.version} 38 | 39 | 40 | org.apache.logging.log4j 41 | log4j-jul 42 | ${log4j.version} 43 | 44 | 45 | org.apache.logging.log4j 46 | log4j-jcl 47 | ${log4j.version} 48 | 49 | 50 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/AbstractPlugin.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public abstract class AbstractPlugin implements Pluginable { 4 | 5 | private String pluginName; 6 | 7 | @Override 8 | public String getPluginName() { 9 | return this.pluginName; 10 | } 11 | 12 | @Override 13 | public void setPluginName(String name) { 14 | this.pluginName = name; 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Configuration.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | import java.util.Properties; 4 | 5 | public abstract class Configuration extends Properties { 6 | 7 | private static final long serialVersionUID = 8606831740240321865L; 8 | 9 | public String getString(String key, String defaultValue) { 10 | String value = getProperty(key); 11 | return value != null ? value : defaultValue; 12 | } 13 | 14 | public String getString(String key) { 15 | return getProperty(key); 16 | } 17 | 18 | public void setString(String key, String value) { 19 | setProperty(key, value); 20 | } 21 | 22 | public int getInt(String key, int defaultValue) { 23 | String value = getProperty(key); 24 | return value != null ? Integer.parseInt(value) : defaultValue; 25 | } 26 | 27 | public void setInt(String key, int value) { 28 | setString(key, Integer.toString(value)); 29 | } 30 | 31 | public long getLong(String key, long defaultValue) { 32 | String value = getProperty(key); 33 | return value != null ? Long.parseLong(value) : defaultValue; 34 | } 35 | 36 | public void setLong(String key, long value) { 37 | setString(key, Long.toString(value)); 38 | } 39 | 40 | public double getDouble(String key, double defaultValue) { 41 | String value = getProperty(key); 42 | return value != null ? Double.parseDouble(value) : defaultValue; 43 | } 44 | 45 | public void setDouble(String key, double value) { 46 | setString(key, Double.toString(value)); 47 | } 48 | 49 | public boolean getBoolean(String key, boolean defaultValue) { 50 | String value = getProperty(key); 51 | return value != null ? Boolean.parseBoolean(value) : defaultValue; 52 | } 53 | 54 | public void setBoolean(String key, boolean value) { 55 | setString(key, Boolean.toString(value)); 56 | } 57 | 58 | public float getFloat(String key, float defaultValue) { 59 | String value = getProperty(key); 60 | return value != null ? Float.parseFloat(value) : defaultValue; 61 | } 62 | 63 | public void setFloat(String key, float value) { 64 | setString(key, Float.toString(value)); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/DefaultRecord.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public class DefaultRecord implements Record { 4 | 5 | private final Object[] fields; 6 | private int cursor; 7 | 8 | public DefaultRecord(int fieldCount) { 9 | fields = new Object[fieldCount]; 10 | } 11 | 12 | @Override 13 | public void add(int index, Object field) { 14 | fields[index] = field; 15 | this.cursor++; 16 | } 17 | 18 | @Override 19 | public void add(Object field) { 20 | add(cursor, field); 21 | } 22 | 23 | @Override 24 | public Object get(int index) { 25 | return fields[index]; 26 | } 27 | 28 | @Override 29 | public int size() { 30 | return fields.length; 31 | } 32 | 33 | @Override 34 | public String toString() { 35 | StringBuilder sb = new StringBuilder(); 36 | sb.append("{"); 37 | for (int i = 0, len = fields.length; i < len; i++) { 38 | if (i > 0) { 39 | sb.append(", "); 40 | } 41 | sb.append(fields[i]); 42 | } 43 | sb.append("}"); 44 | return sb.toString(); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/EngineConfig.java: -------------------------------------------------------------------------------- 1 | 2 | package com.github.stuxuhai.hdata.api; 3 | 4 | public abstract class EngineConfig extends Configuration { 5 | 6 | private static final long serialVersionUID = 1L; 7 | 8 | } 9 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Fields.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | 6 | public class Fields extends ArrayList { 7 | 8 | private static final long serialVersionUID = -174064216143075549L; 9 | 10 | public Fields() { 11 | super(); 12 | } 13 | 14 | public Fields(String... fields) { 15 | super(); 16 | Collections.addAll(this, fields); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/JobConfig.java: -------------------------------------------------------------------------------- 1 | 2 | package com.github.stuxuhai.hdata.api; 3 | 4 | public abstract class JobConfig extends Configuration { 5 | 6 | private static final long serialVersionUID = 1L; 7 | 8 | public abstract PluginConfig getReaderConfig(); 9 | 10 | public abstract PluginConfig getWriterConfig(); 11 | 12 | public abstract String getReaderName(); 13 | 14 | public abstract String getWriterName(); 15 | 16 | public abstract Reader newReader(); 17 | 18 | public abstract Splitter newSplitter(); 19 | 20 | public abstract Writer newWriter(); 21 | } 22 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/JobContext.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public class JobContext { 4 | 5 | private EngineConfig engineConfig; 6 | private JobConfig jobConfig; 7 | private OutputFieldsDeclarer declarer; 8 | private Storage storage; 9 | private Metric metric; 10 | private Reader[] readers; 11 | private Writer[] writers; 12 | private boolean isReaderFinished; 13 | private boolean isReaderError; 14 | private boolean isWriterFinished; 15 | private boolean isWriterError; 16 | private JobStatus jobStatus = JobStatus.SUCCESS; 17 | 18 | public Fields getFields() { 19 | return declarer.getFields(); 20 | } 21 | 22 | public void setFields(Fields fields) { 23 | declarer.declare(fields); 24 | } 25 | 26 | public EngineConfig getEngineConfig() { 27 | return engineConfig; 28 | } 29 | 30 | public void setEngineConfig(EngineConfig engineConfig) { 31 | this.engineConfig = engineConfig; 32 | } 33 | 34 | public OutputFieldsDeclarer getDeclarer() { 35 | return declarer; 36 | } 37 | 38 | public void setDeclarer(OutputFieldsDeclarer declarer) { 39 | this.declarer = declarer; 40 | } 41 | 42 | public Storage getStorage() { 43 | return storage; 44 | } 45 | 46 | public void setStorage(Storage storage) { 47 | this.storage = storage; 48 | } 49 | 50 | public Metric getMetric() { 51 | return metric; 52 | } 53 | 54 | public void setMetric(Metric metric) { 55 | this.metric = metric; 56 | } 57 | 58 | public JobConfig getJobConfig() { 59 | return jobConfig; 60 | } 61 | 62 | public void setJobConfig(JobConfig jobConfig) { 63 | this.jobConfig = jobConfig; 64 | } 65 | 66 | public boolean isWriterError() { 67 | return isWriterError; 68 | } 69 | 70 | public void setWriterError(boolean isWriterError) { 71 | this.isWriterError = isWriterError; 72 | } 73 | 74 | public boolean isReaderFinished() { 75 | return isReaderFinished; 76 | } 77 | 78 | public void setReaderFinished(boolean isReaderFinished) { 79 | this.isReaderFinished = isReaderFinished; 80 | } 81 | 82 | public boolean isReaderError() { 83 | return isReaderError; 84 | } 85 | 86 | public void setReaderError(boolean isReaderError) { 87 | this.isReaderError = isReaderError; 88 | } 89 | 90 | public boolean isWriterFinished() { 91 | return isWriterFinished; 92 | } 93 | 94 | public void setWriterFinished(boolean isWriterFinished) { 95 | this.isWriterFinished = isWriterFinished; 96 | } 97 | 98 | public Reader[] getReaders() { 99 | return readers; 100 | } 101 | 102 | public void setReaders(Reader[] readers) { 103 | this.readers = readers; 104 | } 105 | 106 | public Writer[] getWriters() { 107 | return writers; 108 | } 109 | 110 | public void setWriters(Writer[] writers) { 111 | this.writers = writers; 112 | } 113 | 114 | public JobStatus getJobStatus() { 115 | return jobStatus; 116 | } 117 | 118 | public void setJobStatus(JobStatus jobStatus) { 119 | this.jobStatus = jobStatus; 120 | } 121 | 122 | public void declareOutputFields() { 123 | for (Reader reader : readers) { 124 | if (getFields() == null) { 125 | reader.declareOutputFields(getDeclarer()); 126 | } else { 127 | break; 128 | } 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/JobStatus.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public enum JobStatus { 4 | 5 | SUCCESS(0), FAILED(1); 6 | 7 | private int status; 8 | 9 | JobStatus(int status) { 10 | this.status = status; 11 | } 12 | 13 | public int getStatus() { 14 | return this.status; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Metric.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | import java.util.concurrent.atomic.AtomicLong; 4 | 5 | public class Metric { 6 | 7 | private AtomicLong readCount = new AtomicLong(0); 8 | private AtomicLong writeCount = new AtomicLong(0); 9 | private AtomicLong readBytes = new AtomicLong(0); 10 | 11 | private long readerStartTime; 12 | private long readerEndTime; 13 | private long writerStartTime; 14 | private long writerEndTime; 15 | 16 | public AtomicLong getReadCount() { 17 | return readCount; 18 | } 19 | 20 | public void setReadCount(AtomicLong readCount) { 21 | this.readCount = readCount; 22 | } 23 | 24 | public AtomicLong getWriteCount() { 25 | return writeCount; 26 | } 27 | 28 | public void setWriteCount(AtomicLong writeCount) { 29 | this.writeCount = writeCount; 30 | } 31 | 32 | public long getReaderStartTime() { 33 | return readerStartTime; 34 | } 35 | 36 | public void setReaderStartTime(long readerStartTime) { 37 | this.readerStartTime = readerStartTime; 38 | } 39 | 40 | public long getReaderEndTime() { 41 | return readerEndTime; 42 | } 43 | 44 | public void setReaderEndTime(long readerEndTime) { 45 | this.readerEndTime = readerEndTime; 46 | } 47 | 48 | public long getWriterStartTime() { 49 | return writerStartTime; 50 | } 51 | 52 | public void setWriterStartTime(long writerStartTime) { 53 | this.writerStartTime = writerStartTime; 54 | } 55 | 56 | public long getWriterEndTime() { 57 | return writerEndTime; 58 | } 59 | 60 | public void setWriterEndTime(long writerEndTime) { 61 | this.writerEndTime = writerEndTime; 62 | } 63 | 64 | public AtomicLong getReadBytes() { 65 | return readBytes; 66 | } 67 | 68 | public void setReadBytes(AtomicLong readBytes) { 69 | this.readBytes = readBytes; 70 | } 71 | 72 | public long getSpeed() { 73 | long distance = (System.currentTimeMillis() - this.readerStartTime) / 1000; 74 | if (distance == 0) { 75 | return 0; 76 | } 77 | return this.readBytes.get() / distance; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/OutputFieldsDeclarer.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public class OutputFieldsDeclarer { 4 | private Fields fields; 5 | 6 | public void declare(Fields fields) { 7 | this.fields = fields; 8 | } 9 | 10 | public Fields getFields() { 11 | return fields; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/PluginConfig.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public class PluginConfig extends Configuration { 4 | 5 | private static final String PARALLELISM_KEY = "parallelism"; 6 | private static final int DEFAULT_PARALLELISM = 1; 7 | 8 | private static final String FLOWLIMIT_KEY = "flow.limit"; 9 | private static final long DEFAULT_FLOWLIMIT = 0; 10 | 11 | private static final long serialVersionUID = 3311331304791946068L; 12 | 13 | public PluginConfig() { 14 | super(); 15 | } 16 | 17 | public int getParallelism() { 18 | int parallelism = getInt(PARALLELISM_KEY, DEFAULT_PARALLELISM); 19 | if (parallelism < 1) { 20 | throw new IllegalArgumentException("Reader and Writer parallelism must be >= 1."); 21 | } 22 | return parallelism; 23 | } 24 | 25 | public long getFlowLimit() { 26 | long flowLimit = getLong(FLOWLIMIT_KEY, DEFAULT_FLOWLIMIT); 27 | if (flowLimit < 0) { 28 | throw new IllegalArgumentException("Reader and Writer FLowLimit must be >= 0."); 29 | } 30 | return flowLimit; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Pluginable.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public interface Pluginable { 4 | 5 | public String getPluginName(); 6 | 7 | public void setPluginName(String name); 8 | } 9 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Reader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public abstract class Reader extends AbstractPlugin { 4 | 5 | public void prepare(JobContext context, PluginConfig readerConfig) { 6 | } 7 | 8 | public void execute(RecordCollector recordCollector) { 9 | } 10 | 11 | public void close() { 12 | } 13 | 14 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 15 | } 16 | 17 | public abstract Splitter newSplitter(); 18 | } 19 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Record.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public interface Record { 4 | 5 | public void add(Object object); 6 | 7 | public void add(int index, Object object); 8 | 9 | public Object get(int index); 10 | 11 | public int size(); 12 | } 13 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/RecordCollector.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public interface RecordCollector { 4 | 5 | public void send(Record record); 6 | 7 | public void send(Record[] records); 8 | } 9 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Splitter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | import java.util.List; 4 | 5 | public abstract class Splitter extends AbstractPlugin { 6 | 7 | public abstract List split(JobConfig jobConfig); 8 | } 9 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Storage.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public interface Storage { 4 | 5 | public void put(Record record); 6 | 7 | public void put(Record[] records); 8 | 9 | public boolean isEmpty(); 10 | 11 | public int size(); 12 | 13 | public void close(); 14 | } 15 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/api/Writer.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.api; 2 | 3 | public abstract class Writer extends AbstractPlugin { 4 | 5 | public void prepare(JobContext context, PluginConfig writerConfig) { 6 | } 7 | 8 | public void execute(Record record) { 9 | } 10 | 11 | public void close() { 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /hdata-api/src/main/java/com/github/stuxuhai/hdata/exception/HDataException.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.exception; 2 | 3 | public class HDataException extends RuntimeException { 4 | 5 | private static final long serialVersionUID = 2510267358921118998L; 6 | 7 | private String message; 8 | 9 | public HDataException() { 10 | super(); 11 | } 12 | 13 | public HDataException(final String message) { 14 | super(message); 15 | } 16 | 17 | public HDataException(final Exception e) { 18 | super(e); 19 | } 20 | 21 | public HDataException(Throwable cause) { 22 | super(cause); 23 | } 24 | 25 | public HDataException(final String message, final Throwable cause) { 26 | super(message, cause); 27 | } 28 | 29 | @Override 30 | public String getMessage() { 31 | return this.message == null ? super.getMessage() : this.message; 32 | } 33 | 34 | public void setMessage(String message) { 35 | this.message = message; 36 | } 37 | 38 | @Override 39 | public String toString() { 40 | return this.message; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /hdata-console/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-console 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | -------------------------------------------------------------------------------- /hdata-console/src/main/java/com/github/stuxuhai/hdata/plugin/reader/console/ConsoleReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.console; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | 7 | import com.github.stuxuhai.hdata.api.DefaultRecord; 8 | import com.github.stuxuhai.hdata.api.JobContext; 9 | import com.github.stuxuhai.hdata.api.PluginConfig; 10 | import com.github.stuxuhai.hdata.api.Reader; 11 | import com.github.stuxuhai.hdata.api.Record; 12 | import com.github.stuxuhai.hdata.api.RecordCollector; 13 | import com.github.stuxuhai.hdata.api.Splitter; 14 | import com.github.stuxuhai.hdata.exception.HDataException; 15 | 16 | public class ConsoleReader extends Reader { 17 | 18 | private BufferedReader br = null; 19 | 20 | @Override 21 | public void prepare(JobContext context, PluginConfig readerConfig) { 22 | br = new BufferedReader(new InputStreamReader(System.in)); 23 | } 24 | 25 | @Override 26 | public void execute(RecordCollector recordCollector) { 27 | try { 28 | String line = null; 29 | while ((line = br.readLine()) != null) { 30 | Record record = new DefaultRecord(1); 31 | record.add(line); 32 | recordCollector.send(record); 33 | } 34 | br.close(); 35 | } catch (IOException e) { 36 | new HDataException(e); 37 | } 38 | } 39 | 40 | @Override 41 | public Splitter newSplitter() { 42 | return null; 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /hdata-console/src/main/java/com/github/stuxuhai/hdata/plugin/writer/console/ConsoleWriter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.console; 2 | 3 | import java.sql.Timestamp; 4 | import java.text.DateFormat; 5 | import java.text.SimpleDateFormat; 6 | 7 | import com.github.stuxuhai.hdata.api.Record; 8 | import com.github.stuxuhai.hdata.api.Writer; 9 | 10 | public class ConsoleWriter extends Writer { 11 | 12 | private DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 13 | 14 | @Override 15 | public void execute(Record record) { 16 | StringBuilder sb = new StringBuilder(); 17 | sb.append("{"); 18 | for (int i = 0, len = record.size(); i < len; i++) { 19 | if (i > 0) { 20 | sb.append(", "); 21 | } 22 | Object obj = record.get(i); 23 | if (obj instanceof Timestamp) { 24 | sb.append(dateFormat.format(obj)); 25 | } else { 26 | sb.append(obj); 27 | } 28 | } 29 | sb.append("}"); 30 | System.out.println(sb.toString()); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /hdata-core/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | ../pom.xml 9 | 10 | hdata-core 11 | hdata-core 12 | 13 | 14 | 15 | com.github.stuxuhai 16 | hdata-api 17 | 18 | 19 | com.google.guava 20 | guava 21 | 22 | 23 | com.lmax 24 | disruptor 25 | 3.3.4 26 | 27 | 28 | commons-cli 29 | commons-cli 30 | 1.3.1 31 | 32 | 33 | com.carrotsearch 34 | java-sizeof 35 | 0.0.5 36 | 37 | 38 | org.apache.commons 39 | commons-lang3 40 | 3.4 41 | 42 | 43 | commons-configuration 44 | commons-configuration 45 | 1.10 46 | 47 | 48 | commons-collections 49 | commons-collections 50 | 3.2.2 51 | 52 | 53 | commons-beanutils 54 | commons-beanutils 55 | 1.9.2 56 | 57 | 58 | net.hydromatic 59 | eigenbase-properties 60 | 1.1.5 61 | 62 | 63 | org.apache.commons 64 | commons-digester3 65 | 3.2 66 | 67 | 68 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/CliDriver.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata; 2 | 3 | import com.github.stuxuhai.hdata.api.JobConfig; 4 | import java.util.Map.Entry; 5 | import java.util.Properties; 6 | 7 | import org.apache.commons.cli.CommandLine; 8 | import org.apache.commons.cli.CommandLineParser; 9 | import org.apache.commons.cli.DefaultParser; 10 | import org.apache.commons.cli.HelpFormatter; 11 | import org.apache.commons.cli.Option; 12 | import org.apache.commons.cli.Options; 13 | import org.apache.commons.cli.ParseException; 14 | import org.apache.logging.log4j.Level; 15 | import org.apache.logging.log4j.LogManager; 16 | import org.apache.logging.log4j.Logger; 17 | import org.apache.logging.log4j.core.LoggerContext; 18 | import org.apache.logging.log4j.core.config.Configuration; 19 | 20 | import com.github.stuxuhai.hdata.api.PluginConfig; 21 | import com.github.stuxuhai.hdata.config.DefaultJobConfig; 22 | import com.github.stuxuhai.hdata.core.HData; 23 | import com.github.stuxuhai.hdata.exception.HDataException; 24 | import com.google.common.base.Throwables; 25 | 26 | public class CliDriver { 27 | 28 | private static final String XML_FILE_OPTION = "f"; 29 | private static final String HDATA_VARS_OPTION = "D"; 30 | private static final String QUIET_OPTION = "q"; 31 | private static final String READER_OPTION = "reader"; 32 | private static final String WRITER_OPTION = "writer"; 33 | private static final String READER_VARS_OPTION = "R"; 34 | private static final String WRITER_VARS_OPTION = "W"; 35 | 36 | private static final Logger LOGGER = LogManager.getLogger(); 37 | 38 | /** 39 | * 创建命令行选项 40 | * 41 | * @return 42 | */ 43 | public Options createOptions() { 44 | Options options = new Options(); 45 | options.addOption(XML_FILE_OPTION, null, true, "job xml path"); 46 | options.addOption(QUIET_OPTION, null, false, "quiet"); 47 | options.addOption(Option.builder(HDATA_VARS_OPTION).hasArgs().build()); 48 | 49 | options.addOption(null, READER_OPTION, true, "reader name"); 50 | options.addOption(Option.builder(READER_VARS_OPTION).hasArgs().build()); 51 | 52 | options.addOption(null, WRITER_OPTION, true, "writer name"); 53 | options.addOption(Option.builder(WRITER_VARS_OPTION).hasArgs().build()); 54 | return options; 55 | } 56 | 57 | /** 58 | * 打印命令行帮助信息 59 | * 60 | * @param options 61 | */ 62 | public void printHelp(Options options) { 63 | HelpFormatter formatter = new HelpFormatter(); 64 | formatter.printHelp(" ", options); 65 | } 66 | 67 | /** 68 | * 替换命令行变量 69 | * 70 | * @param config 71 | * @param vars 72 | */ 73 | public void replaceConfigVars(PluginConfig config, Properties vars) { 74 | for (Entry confEntry : config.entrySet()) { 75 | if (confEntry.getKey().getClass() == String.class && confEntry.getValue().getClass() == String.class) { 76 | for (Entry varEntry : vars.entrySet()) { 77 | String replaceVar = "${" + varEntry.getKey() + "}"; 78 | if (confEntry.getValue().toString().contains(replaceVar)) { 79 | config.put(confEntry.getKey(), confEntry.getValue().toString().replace(replaceVar, varEntry.getValue().toString())); 80 | } 81 | } 82 | } 83 | } 84 | } 85 | 86 | private void putOptionValues(Properties props, String[] values) { 87 | if (props != null && values != null) { 88 | for (int i = 0, len = values.length; i < len; i++) { 89 | props.put(values[i], values[++i]); 90 | } 91 | } 92 | } 93 | 94 | /** 95 | * 主程序入口 96 | * 97 | * @param args 98 | */ 99 | public static void main(String[] args) { 100 | 101 | CliDriver cliDriver = new CliDriver(); 102 | Options options = cliDriver.createOptions(); 103 | if (args.length < 1) { 104 | cliDriver.printHelp(options); 105 | System.exit(-1); 106 | } 107 | 108 | CommandLineParser parser = new DefaultParser(); 109 | CommandLine cmd = null; 110 | try { 111 | cmd = parser.parse(options, args); 112 | if (cmd.hasOption(QUIET_OPTION)) { 113 | LoggerContext ctx = (LoggerContext) LogManager.getContext(false); 114 | Configuration conf = ctx.getConfiguration(); 115 | conf.getLoggerConfig(LogManager.ROOT_LOGGER_NAME).setLevel(Level.WARN); 116 | ctx.updateLoggers(conf); 117 | } 118 | 119 | final JobConfig jobConfig; 120 | if (cmd.hasOption(XML_FILE_OPTION)) { 121 | String jobXmlPath = cmd.getOptionValue(XML_FILE_OPTION); 122 | jobConfig = DefaultJobConfig.createFromXML(jobXmlPath); 123 | Properties vars = new Properties(); 124 | cliDriver.putOptionValues(vars, cmd.getOptionValues(HDATA_VARS_OPTION)); 125 | 126 | final PluginConfig readerConfig = jobConfig.getReaderConfig(); 127 | final PluginConfig writerConfig = jobConfig.getWriterConfig(); 128 | 129 | cliDriver.replaceConfigVars(readerConfig, vars); 130 | cliDriver.replaceConfigVars(writerConfig, vars); 131 | } else { 132 | if (!cmd.hasOption(READER_OPTION) || !cmd.hasOption(WRITER_OPTION)) { 133 | throw new HDataException("Option --reader and --writer should be both given if -f option not exists."); 134 | } 135 | 136 | String readerName = cmd.getOptionValue(READER_OPTION); 137 | String writerName = cmd.getOptionValue(WRITER_OPTION); 138 | 139 | PluginConfig readerConfig = new PluginConfig(); 140 | cliDriver.putOptionValues(readerConfig, cmd.getOptionValues(READER_VARS_OPTION)); 141 | 142 | PluginConfig writerConfig = new PluginConfig(); 143 | cliDriver.putOptionValues(writerConfig, cmd.getOptionValues(WRITER_VARS_OPTION)); 144 | 145 | jobConfig = new DefaultJobConfig(readerName, readerConfig, writerName, writerConfig); 146 | } 147 | 148 | HData hData = new HData(); 149 | hData.start(jobConfig); 150 | } catch (ParseException e) { 151 | cliDriver.printHelp(options); 152 | System.exit(-1); 153 | } catch (Exception e) { 154 | LOGGER.error(Throwables.getStackTraceAsString(e)); 155 | System.exit(-1); 156 | } 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/common/Constants.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.common; 2 | 3 | public interface Constants { 4 | 5 | public static final String HDATA_XML = "hdata.xml"; 6 | public static final String PLUGINS_XML = "plugins.xml"; 7 | public static final String LOG4J2_XML = "log4j2.xml"; 8 | public static final String DATE_FORMAT_STRING = "yyyy-MM-dd HH:mm:ss"; 9 | public static final String COLUMNS_SPLIT_REGEX = "\\s*,\\s*"; 10 | public static final long DEFAULT_HDATA_SLEEP_MILLIS = 3000; 11 | 12 | } 13 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/common/HDataConfigConstants.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.common; 2 | 3 | public interface HDataConfigConstants { 4 | 5 | public static final String STORAGE_BUFFER_SIZE = "hdata.storage.default.buffer.size"; 6 | public static final String HDATA_STORAGE_DISRUPTOR_WAIT_STRATEGY = "hdata.storage.disruptor.wait.strategy"; 7 | public static final String HDATA_SLEEP_MILLIS = "hdata.sleep.millis"; 8 | public static final String HDATA_HIVE_WRITER_TMP_DIR = "hdata.hive.writer.tmp.dir"; 9 | public static final String JDBC_READER_SQL_METRIC_TIME_MS = "jdbc.reader.sql.metric.time.ms"; 10 | 11 | } 12 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/config/DefaultEngineConfig.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.config; 2 | 3 | import java.util.List; 4 | 5 | import org.apache.commons.configuration.ConfigurationException; 6 | import org.apache.commons.configuration.HierarchicalConfiguration; 7 | import org.apache.commons.configuration.XMLConfiguration; 8 | 9 | import com.github.stuxuhai.hdata.api.EngineConfig; 10 | import com.github.stuxuhai.hdata.common.Constants; 11 | import com.github.stuxuhai.hdata.util.Utils; 12 | import com.google.common.base.Throwables; 13 | 14 | public class DefaultEngineConfig extends EngineConfig { 15 | 16 | private static final long serialVersionUID = 1L; 17 | 18 | private DefaultEngineConfig() { 19 | super(); 20 | } 21 | 22 | public static DefaultEngineConfig create() { 23 | DefaultEngineConfig conf = new DefaultEngineConfig(); 24 | String path = Utils.getConfigDir() + Constants.HDATA_XML; 25 | 26 | try { 27 | XMLConfiguration config = new XMLConfiguration(path); 28 | config.setValidating(true); 29 | 30 | List properties = config.configurationsAt(".property"); 31 | for (HierarchicalConfiguration hc : properties) { 32 | String name = hc.getString("name"); 33 | String value = hc.getString("value"); 34 | conf.setProperty(name, value); 35 | } 36 | } catch (ConfigurationException e) { 37 | Throwables.propagate(e); 38 | } 39 | 40 | return conf; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/config/DefaultJobConfig.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.config; 2 | 3 | import com.github.stuxuhai.hdata.api.*; 4 | import com.github.stuxuhai.hdata.core.PluginLoader; 5 | import com.github.stuxuhai.hdata.exception.HDataException; 6 | import com.github.stuxuhai.hdata.util.PluginUtils; 7 | import com.google.common.base.Preconditions; 8 | import com.google.common.base.Throwables; 9 | import org.apache.commons.configuration.ConfigurationException; 10 | import org.apache.commons.configuration.SubnodeConfiguration; 11 | import org.apache.commons.configuration.XMLConfiguration; 12 | import org.apache.commons.lang.StringUtils; 13 | 14 | import java.util.Iterator; 15 | 16 | public class DefaultJobConfig extends JobConfig { 17 | 18 | private final PluginConfig readerConfig; 19 | private final PluginConfig writerConfig; 20 | private final String readerName; 21 | private final String writerName; 22 | private static final long serialVersionUID = 1L; 23 | 24 | public DefaultJobConfig(String readerName, PluginConfig readerConfig, String writerName, PluginConfig writerConfig) { 25 | super(); 26 | this.readerName = readerName; 27 | this.readerConfig = readerConfig; 28 | this.writerName = writerName; 29 | this.writerConfig = writerConfig; 30 | } 31 | 32 | @Override 33 | public PluginConfig getReaderConfig() { 34 | return readerConfig; 35 | } 36 | 37 | @Override 38 | public PluginConfig getWriterConfig() { 39 | return writerConfig; 40 | } 41 | 42 | @Override 43 | public String getReaderName() { 44 | return readerName; 45 | } 46 | 47 | @Override 48 | public String getWriterName() { 49 | return writerName; 50 | } 51 | 52 | @Override 53 | public Reader newReader() { 54 | String readerClassName = PluginLoader.getReaderClassName(readerName); 55 | Preconditions.checkNotNull(readerClassName, "Can not find class for reader: " + readerName); 56 | 57 | try { 58 | return (Reader) PluginUtils.loadClass(readerName, readerClassName).newInstance(); 59 | } catch (Exception e) { 60 | throw new HDataException("Can not create new reader instance for: " + readerName, e); 61 | } 62 | } 63 | 64 | @Override 65 | public Splitter newSplitter() { 66 | Reader reader = newReader(); 67 | return reader.newSplitter(); 68 | } 69 | 70 | @Override 71 | public Writer newWriter() { 72 | String writerClassName = PluginLoader.getWriterClassName(writerName); 73 | Preconditions.checkNotNull(writerClassName, "Can not find class for writer: " + writerName); 74 | 75 | try { 76 | return (Writer) PluginUtils.loadClass(writerName, writerClassName).newInstance(); 77 | } catch (Exception e) { 78 | throw new HDataException("Can not create new writer instance for: " + writerName, e); 79 | } 80 | } 81 | 82 | public static DefaultJobConfig createFromXML(String path) { 83 | try { 84 | XMLConfiguration xmlConfig = new XMLConfiguration(path); 85 | xmlConfig.setValidating(true); 86 | 87 | PluginConfig readerPluginConfig = new PluginConfig(); 88 | String readerName = xmlConfig.getString("reader[@name]"); 89 | SubnodeConfiguration readerSc = xmlConfig.configurationAt("reader"); 90 | Iterator readerIt = readerSc.getKeys(); 91 | while (readerIt.hasNext()) { 92 | String key = readerIt.next(); 93 | if (!key.startsWith("[@")) { 94 | readerPluginConfig.setProperty(key.replace("..", "."), StringUtils.join(readerSc.getList(key), ",")); 95 | } 96 | } 97 | 98 | PluginConfig writerPluginConfig = new PluginConfig(); 99 | String writerName = xmlConfig.getString("writer[@name]"); 100 | SubnodeConfiguration writerSc = xmlConfig.configurationAt("writer"); 101 | Iterator writerIt = writerSc.getKeys(); 102 | while (writerIt.hasNext()) { 103 | String key = writerIt.next(); 104 | if (!key.startsWith("[@")) { 105 | writerPluginConfig.setProperty(key.replace("..", "."), writerSc.getString(key)); 106 | } 107 | } 108 | 109 | return new DefaultJobConfig(readerName, readerPluginConfig, writerName, writerPluginConfig); 110 | } catch (ConfigurationException e) { 111 | Throwables.propagate(e); 112 | } 113 | 114 | return null; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/DefaultRecord.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import com.github.stuxuhai.hdata.api.Record; 4 | 5 | public class DefaultRecord implements Record { 6 | 7 | private final Object[] fields; 8 | private int cursor; 9 | 10 | public DefaultRecord(int fieldCount) { 11 | fields = new Object[fieldCount]; 12 | } 13 | 14 | @Override 15 | public void add(int index, Object field) { 16 | fields[index] = field; 17 | this.cursor++; 18 | } 19 | 20 | @Override 21 | public void add(Object field) { 22 | add(cursor, field); 23 | } 24 | 25 | @Override 26 | public Object get(int index) { 27 | return fields[index]; 28 | } 29 | 30 | @Override 31 | public int size() { 32 | return fields.length; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/DefaultRecordCollector.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import java.util.concurrent.TimeUnit; 4 | 5 | import org.apache.logging.log4j.LogManager; 6 | import org.apache.logging.log4j.Logger; 7 | 8 | import com.carrotsearch.sizeof.RamUsageEstimator; 9 | import com.github.stuxuhai.hdata.api.Metric; 10 | import com.github.stuxuhai.hdata.api.Record; 11 | import com.github.stuxuhai.hdata.api.RecordCollector; 12 | import com.github.stuxuhai.hdata.api.Storage; 13 | import com.github.stuxuhai.hdata.util.Utils; 14 | import com.google.common.base.Stopwatch; 15 | 16 | public class DefaultRecordCollector implements RecordCollector { 17 | 18 | private static final Logger LOGGER = LogManager.getLogger(DefaultRecordCollector.class); 19 | 20 | private static final long SLEEP_MILL_SECONDS = 1000; 21 | 22 | private final Storage storage; 23 | private final Metric metric; 24 | private final long flowLimit; 25 | private final Stopwatch stopwatch = Stopwatch.createStarted(); 26 | 27 | public DefaultRecordCollector(Storage storage, Metric metric, long flowLimit) { 28 | this.storage = storage; 29 | this.metric = metric; 30 | this.flowLimit = flowLimit; 31 | LOGGER.info("The flow limit is {} bytes/s.", this.flowLimit); 32 | } 33 | 34 | @Override 35 | public void send(Record record) { 36 | // 限速 37 | if (flowLimit > 0) { 38 | while (true) { 39 | long currentSpeed = metric.getSpeed(); 40 | if (currentSpeed > flowLimit) { 41 | if (stopwatch.elapsed(TimeUnit.SECONDS) >= 5) { 42 | LOGGER.info("Current Speed is {} MB/s, sleeping...", String.format("%.2f", (double) currentSpeed / 1024 / 1024)); 43 | stopwatch.reset(); 44 | } 45 | Utils.sleep(SLEEP_MILL_SECONDS); 46 | } else { 47 | break; 48 | } 49 | } 50 | } 51 | 52 | storage.put(record); 53 | metric.getReadCount().incrementAndGet(); 54 | 55 | if (flowLimit > 0) { 56 | metric.getReadBytes().addAndGet(RamUsageEstimator.sizeOf(record)); 57 | } 58 | 59 | } 60 | 61 | @Override 62 | public void send(Record[] records) { 63 | storage.put(records); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/DefaultStorage.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import com.github.stuxuhai.hdata.api.JobContext; 4 | import com.github.stuxuhai.hdata.api.Record; 5 | import com.github.stuxuhai.hdata.api.Storage; 6 | import com.lmax.disruptor.EventTranslatorOneArg; 7 | import com.lmax.disruptor.RingBuffer; 8 | import com.lmax.disruptor.dsl.Disruptor; 9 | 10 | public class DefaultStorage implements Storage { 11 | 12 | private final Disruptor disruptor; 13 | private final RingBuffer ringBuffer; 14 | 15 | private static final EventTranslatorOneArg TRANSLATOR = new EventTranslatorOneArg() { 16 | 17 | @Override 18 | public void translateTo(RecordEvent event, long sequence, Record record) { 19 | event.setRecord(record); 20 | } 21 | }; 22 | 23 | public DefaultStorage(Disruptor disruptor, RecordWorkHandler[] handlers, JobContext context) { 24 | this.disruptor = disruptor; 25 | disruptor.setDefaultExceptionHandler(new RecordEventExceptionHandler(disruptor, context)); 26 | disruptor.handleEventsWithWorkerPool(handlers); 27 | ringBuffer = disruptor.start(); 28 | } 29 | 30 | @Override 31 | public void put(Record record) { 32 | disruptor.publishEvent(TRANSLATOR, record); 33 | } 34 | 35 | @Override 36 | public void put(Record[] records) { 37 | for (Record record : records) { 38 | put(record); 39 | } 40 | } 41 | 42 | @Override 43 | public boolean isEmpty() { 44 | return ringBuffer.remainingCapacity() == ringBuffer.getBufferSize(); 45 | } 46 | 47 | @Override 48 | public int size() { 49 | return ringBuffer.getBufferSize(); 50 | } 51 | 52 | @Override 53 | public void close() { 54 | disruptor.shutdown(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/PluginClassLoader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import java.net.URL; 4 | import java.net.URLClassLoader; 5 | 6 | public class PluginClassLoader extends URLClassLoader { 7 | 8 | public PluginClassLoader(URL[] urls) { 9 | super(urls, PluginClassLoader.class.getClassLoader()); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/PluginLoader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import org.apache.commons.configuration.ConfigurationException; 8 | import org.apache.commons.configuration.HierarchicalConfiguration; 9 | import org.apache.commons.configuration.XMLConfiguration; 10 | 11 | import com.github.stuxuhai.hdata.common.Constants; 12 | import com.github.stuxuhai.hdata.util.Utils; 13 | import com.google.common.base.Throwables; 14 | 15 | public class PluginLoader { 16 | 17 | private static Map readerMap; 18 | private static Map writerMap; 19 | 20 | public static String getReaderClassName(String name) { 21 | return readerMap.get(name); 22 | } 23 | 24 | public static String getWriterClassName(String name) { 25 | return writerMap.get(name); 26 | } 27 | 28 | static { 29 | readerMap = new HashMap(); 30 | writerMap = new HashMap(); 31 | 32 | String path = Utils.getConfigDir() + Constants.PLUGINS_XML; 33 | try { 34 | XMLConfiguration config = new XMLConfiguration(path); 35 | config.setValidating(true); 36 | 37 | List readerList = config.configurationsAt("readers.reader"); 38 | for (HierarchicalConfiguration hc : readerList) { 39 | String name = hc.getString("name"); 40 | String clazz = hc.getString("class"); 41 | readerMap.put(name, clazz); 42 | } 43 | 44 | List writerList = config.configurationsAt("writers.writer"); 45 | for (HierarchicalConfiguration hc : writerList) { 46 | String name = hc.getString("name"); 47 | String clazz = hc.getString("class"); 48 | writerMap.put(name, clazz); 49 | } 50 | } catch (ConfigurationException e) { 51 | Throwables.propagate(e); 52 | } 53 | } 54 | } -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/ReaderWorker.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import java.util.concurrent.Callable; 4 | 5 | import com.github.stuxuhai.hdata.api.JobContext; 6 | import com.github.stuxuhai.hdata.api.PluginConfig; 7 | import com.github.stuxuhai.hdata.api.Reader; 8 | import com.github.stuxuhai.hdata.api.RecordCollector; 9 | 10 | public class ReaderWorker implements Callable { 11 | 12 | private final Reader reader; 13 | private final JobContext context; 14 | private final RecordCollector rc; 15 | private final PluginConfig readerConfig; 16 | 17 | public ReaderWorker(Reader reader, JobContext context, PluginConfig readerConfig, RecordCollector rc) { 18 | this.reader = reader; 19 | this.context = context; 20 | this.rc = rc; 21 | this.readerConfig = readerConfig; 22 | } 23 | 24 | @Override 25 | public Integer call() throws Exception { 26 | Thread.currentThread().setContextClassLoader(reader.getClass().getClassLoader()); 27 | reader.prepare(context, readerConfig); 28 | reader.execute(rc); 29 | reader.close(); 30 | return 0; 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/RecordEvent.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import com.github.stuxuhai.hdata.api.Record; 4 | import com.lmax.disruptor.EventFactory; 5 | 6 | public class RecordEvent { 7 | 8 | private Record record; 9 | 10 | public Record getRecord() { 11 | return record; 12 | } 13 | 14 | public void setRecord(Record record) { 15 | this.record = record; 16 | } 17 | 18 | public static final EventFactory FACTORY = new EventFactory() { 19 | 20 | public RecordEvent newInstance() { 21 | return new RecordEvent(); 22 | } 23 | }; 24 | 25 | } 26 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/RecordEventExceptionHandler.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import org.apache.logging.log4j.LogManager; 4 | import org.apache.logging.log4j.Logger; 5 | 6 | import com.github.stuxuhai.hdata.api.JobContext; 7 | import com.google.common.base.Throwables; 8 | import com.lmax.disruptor.ExceptionHandler; 9 | import com.lmax.disruptor.dsl.Disruptor; 10 | 11 | public class RecordEventExceptionHandler implements ExceptionHandler { 12 | 13 | private final Disruptor disruptor; 14 | private final JobContext context; 15 | private static Logger LOGGER = LogManager.getLogger(RecordEventExceptionHandler.class); 16 | 17 | public RecordEventExceptionHandler(Disruptor disruptor, JobContext context) { 18 | this.disruptor = disruptor; 19 | this.context = context; 20 | } 21 | 22 | public void handleEventException(Throwable t, long sequence, Object event) { 23 | LOGGER.error(Throwables.getStackTraceAsString(t)); 24 | context.setWriterError(true); 25 | disruptor.shutdown(); 26 | } 27 | 28 | public void handleOnShutdownException(Throwable t) { 29 | LOGGER.error(Throwables.getStackTraceAsString(t)); 30 | disruptor.shutdown(); 31 | } 32 | 33 | public void handleOnStartException(Throwable t) { 34 | LOGGER.error(Throwables.getStackTraceAsString(t)); 35 | disruptor.shutdown(); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/RecordWorkHandler.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import com.github.stuxuhai.hdata.api.JobContext; 4 | import com.github.stuxuhai.hdata.api.Metric; 5 | import com.github.stuxuhai.hdata.api.PluginConfig; 6 | import com.github.stuxuhai.hdata.api.Writer; 7 | import com.lmax.disruptor.WorkHandler; 8 | 9 | public class RecordWorkHandler implements WorkHandler { 10 | 11 | private final Writer writer; 12 | private final JobContext context; 13 | private final PluginConfig writerConfig; 14 | private boolean writerPrepared; 15 | private final Metric metric; 16 | 17 | public RecordWorkHandler(Writer writer, JobContext context, PluginConfig writerConfig) { 18 | this.writer = writer; 19 | this.context = context; 20 | this.writerConfig = writerConfig; 21 | this.metric = context.getMetric(); 22 | } 23 | 24 | @Override 25 | public void onEvent(RecordEvent event) { 26 | if (!writerPrepared) { 27 | context.declareOutputFields(); 28 | Thread.currentThread().setContextClassLoader(writer.getClass().getClassLoader()); 29 | writer.prepare(context, writerConfig); 30 | writerPrepared = true; 31 | if (metric.getWriterStartTime() == 0) { 32 | metric.setWriterStartTime(System.currentTimeMillis()); 33 | } 34 | } 35 | 36 | writer.execute(event.getRecord()); 37 | metric.getWriteCount().incrementAndGet(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/core/WaitStrategyFactory.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.core; 2 | 3 | import java.util.List; 4 | 5 | import com.github.stuxuhai.hdata.exception.HDataException; 6 | import com.google.common.collect.Lists; 7 | import com.lmax.disruptor.BlockingWaitStrategy; 8 | import com.lmax.disruptor.BusySpinWaitStrategy; 9 | import com.lmax.disruptor.SleepingWaitStrategy; 10 | import com.lmax.disruptor.WaitStrategy; 11 | import com.lmax.disruptor.YieldingWaitStrategy; 12 | 13 | public class WaitStrategyFactory { 14 | 15 | private static final List WAIT_STRATEGY_SUPPORTED = Lists.newArrayList(BlockingWaitStrategy.class.getName(), 16 | BusySpinWaitStrategy.class.getName(), SleepingWaitStrategy.class.getName(), YieldingWaitStrategy.class.getName()); 17 | 18 | /** 19 | * 构造线程等待策略 20 | */ 21 | public static WaitStrategy build(String name) { 22 | if (WAIT_STRATEGY_SUPPORTED.contains(name)) { 23 | try { 24 | return (WaitStrategy) Class.forName(name).newInstance(); 25 | } catch (InstantiationException e) { 26 | throw new HDataException(e); 27 | } catch (IllegalAccessException e) { 28 | throw new HDataException(e); 29 | } catch (ClassNotFoundException e) { 30 | throw new HDataException(e); 31 | } 32 | } else { 33 | throw new HDataException("Invalid wait strategy: " + name); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/util/NumberUtils.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.util; 2 | 3 | /** 4 | * 数字处理工具类 5 | * 6 | */ 7 | public class NumberUtils { 8 | /** 9 | * 获取 起始和结束 范围内的所有 数值 10 | * 11 | */ 12 | public static int[] getRange(int before, int after) { 13 | int bigger = Math.max(before, after); 14 | int smaller = Math.min(before, after); 15 | 16 | int[] range = new int[bigger + 1 - smaller]; 17 | for (int i = smaller; i <= bigger; i++) { 18 | range[i - smaller] = i; 19 | } 20 | 21 | return range; 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/util/PluginUtils.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.util; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.MalformedURLException; 6 | import java.net.URL; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.Map.Entry; 10 | 11 | import org.apache.logging.log4j.LogManager; 12 | import org.apache.logging.log4j.Logger; 13 | 14 | import com.github.stuxuhai.hdata.core.PluginClassLoader; 15 | import com.github.stuxuhai.hdata.exception.HDataException; 16 | import com.google.common.collect.Lists; 17 | import com.google.common.collect.Maps; 18 | 19 | public class PluginUtils { 20 | 21 | private static Map cache = Maps.newConcurrentMap(); 22 | private static final Logger LOGGER = LogManager.getLogger(); 23 | 24 | private static List listFileByPluginName(String pluginName) throws MalformedURLException { 25 | List result = Lists.newArrayList(); 26 | File file = new File(PluginUtils.class.getProtectionDomain().getCodeSource().getLocation().getPath().replaceAll("/lib/.*\\.jar", "") 27 | + "/plugins/" + pluginName); 28 | if (!file.exists()) { 29 | throw new HDataException("Plugin not found: " + pluginName); 30 | } 31 | 32 | File[] jars = file.listFiles(); 33 | for (File jar : jars) { 34 | result.add(jar.toURI().toURL()); 35 | } 36 | return result; 37 | } 38 | 39 | public static Class loadClass(String pluginName, String className) throws ClassNotFoundException, MalformedURLException { 40 | List list = listFileByPluginName(pluginName); 41 | PluginClassLoader classLoader = cache.get(pluginName); 42 | if (classLoader == null) { 43 | classLoader = new PluginClassLoader(list.toArray(new URL[list.size()])); 44 | cache.put(pluginName, classLoader); 45 | } 46 | return classLoader.loadClass(className); 47 | } 48 | 49 | public static void closeURLClassLoader() { 50 | for (Entry entry : cache.entrySet()) { 51 | try { 52 | entry.getValue().close(); 53 | } catch (IOException e) { 54 | LOGGER.error("", e); 55 | } 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/util/TypeConvertUtils.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.util; 2 | 3 | import java.math.BigDecimal; 4 | import java.math.BigInteger; 5 | 6 | public class TypeConvertUtils { 7 | 8 | /** 9 | * 数据类型转换 10 | * 11 | * @param src 12 | * @param clazz 13 | * @return 14 | */ 15 | public static Object convert(Object src, Class clazz) { 16 | if (src == null) { 17 | return null; 18 | } else if (src instanceof String) { 19 | if (clazz == Integer.class) { 20 | return Integer.valueOf(src.toString()); 21 | } else if (clazz == Long.class) { 22 | return Long.valueOf(src.toString()); 23 | } else if (clazz == Double.class) { 24 | return Double.valueOf(src.toString()); 25 | } else if (clazz == Float.class) { 26 | return Float.valueOf(src.toString()); 27 | } else if (clazz == Boolean.class) { 28 | return Boolean.valueOf(src.toString()); 29 | } else if (clazz == Short.class) { 30 | return Short.valueOf(src.toString()); 31 | } else if (clazz == Byte.class) { 32 | return Byte.valueOf(src.toString()); 33 | } else if (clazz == BigInteger.class) { 34 | return BigInteger.valueOf(Long.valueOf(src.toString())); 35 | } else if (clazz == BigDecimal.class) { 36 | return new BigDecimal(src.toString()); 37 | } 38 | } else if (clazz == String.class) { 39 | return src.toString(); 40 | } 41 | return src; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /hdata-core/src/main/java/com/github/stuxuhai/hdata/util/Utils.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.util; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.List; 6 | import java.util.regex.Matcher; 7 | import java.util.regex.Pattern; 8 | 9 | import org.apache.commons.lang3.ArrayUtils; 10 | import org.apache.commons.lang3.StringUtils; 11 | 12 | import com.google.common.base.Throwables; 13 | 14 | public class Utils { 15 | 16 | /** 17 | * 线程休眠 18 | * 19 | * @param millis 20 | */ 21 | public static void sleep(long millis) { 22 | try { 23 | Thread.sleep(millis); 24 | } catch (InterruptedException e) { 25 | Throwables.propagate(e); 26 | } 27 | } 28 | 29 | public static List getColumns(String[] columns, String[] excludeColumns) { 30 | if (excludeColumns == null || excludeColumns.length < 1) { 31 | return columns == null ? null : Arrays.asList(columns); 32 | } 33 | 34 | List list = new ArrayList(); 35 | for (String column : columns) { 36 | if (!ArrayUtils.contains(excludeColumns, column)) { 37 | list.add(column); 38 | } 39 | } 40 | return list; 41 | } 42 | 43 | public static List getColumns(List columns, String[] excludeColumns) { 44 | return getColumns(columns.toArray(new String[columns.size()]), excludeColumns); 45 | } 46 | 47 | /** 48 | * 修复HDFS路径(将主机名改成IP) 49 | * 50 | * @param srcLocaltion 51 | * @param metastoreUris 52 | * @return 53 | */ 54 | public static String fixLocaltion(String srcLocaltion, String metastoreUris) { 55 | Matcher ipMatcher = Pattern.compile("(\\d+\\.){3}\\d+").matcher(metastoreUris.split(",")[0].trim()); 56 | if (ipMatcher.find()) { 57 | String masterIP = ipMatcher.group(); 58 | return srcLocaltion.replaceFirst("^hdfs://\\w+:", "hdfs://" + masterIP + ":"); 59 | } 60 | return srcLocaltion; 61 | } 62 | 63 | /** 64 | * 解析分区值 65 | * 66 | * @param partitions 67 | * @return 68 | */ 69 | public static List parsePartitionValue(String partitions) { 70 | List partitionValues = new ArrayList(); 71 | String[] partitionKeyValue = partitions.split("\\s*,\\s*"); 72 | for (String kv : partitionKeyValue) { 73 | String[] tokens = StringUtils.splitPreserveAllTokens(kv, "="); 74 | partitionValues.add(tokens[1]); 75 | } 76 | return partitionValues; 77 | } 78 | 79 | /** 80 | * 获取配置目录 81 | * 82 | * @return 83 | */ 84 | public static String getConfigDir() { 85 | return System.getProperty("hdata.conf.dir") + System.getProperty("file.separator"); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /hdata-csv/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-csv 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | org.apache.commons 19 | commons-lang3 20 | 3.4 21 | provided 22 | 23 | 24 | org.apache.commons 25 | commons-csv 26 | 1.2 27 | 28 | 29 | com.google.guava 30 | guava 31 | provided 32 | 33 | 34 | -------------------------------------------------------------------------------- /hdata-csv/src/main/java/com/github/stuxuhai/hdata/plugin/FormatConf.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin; 2 | 3 | 4 | import org.apache.commons.csv.CSVFormat; 5 | /** 6 | * Created by dog on 4/15/17. 7 | */ 8 | final public class FormatConf { 9 | 10 | public static void confCsvFormat(String format,CSVFormat csvFormat){ 11 | if (format == null) { 12 | csvFormat = CSVFormat.DEFAULT; 13 | return; 14 | } 15 | switch (format){ 16 | case "excel": 17 | csvFormat = CSVFormat.EXCEL; 18 | break; 19 | case "mysql": 20 | csvFormat = CSVFormat.MYSQL; 21 | break; 22 | case "tdf": 23 | csvFormat = CSVFormat.TDF; 24 | break; 25 | case "rfc4180": 26 | csvFormat = CSVFormat.RFC4180; 27 | break; 28 | default: 29 | csvFormat = CSVFormat.DEFAULT; 30 | break; 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /hdata-csv/src/main/java/com/github/stuxuhai/hdata/plugin/reader/csv/CSVReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.csv; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | import java.io.UnsupportedEncodingException; 7 | 8 | import com.github.stuxuhai.hdata.plugin.FormatConf; 9 | import org.apache.commons.csv.CSVFormat; 10 | import org.apache.commons.csv.CSVRecord; 11 | 12 | import com.github.stuxuhai.hdata.api.DefaultRecord; 13 | import com.github.stuxuhai.hdata.api.JobContext; 14 | import com.github.stuxuhai.hdata.api.PluginConfig; 15 | import com.github.stuxuhai.hdata.api.Reader; 16 | import com.github.stuxuhai.hdata.api.Record; 17 | import com.github.stuxuhai.hdata.api.RecordCollector; 18 | import com.github.stuxuhai.hdata.api.Splitter; 19 | import com.github.stuxuhai.hdata.exception.HDataException; 20 | 21 | public class CSVReader extends Reader { 22 | 23 | private String path = null; 24 | private int startRow = 1; 25 | private String encoding = null; 26 | private String format; 27 | private CSVFormat csvFormat = CSVFormat.DEFAULT; 28 | 29 | @Override 30 | public void prepare(JobContext context, PluginConfig readerConfig) { 31 | path = readerConfig.getString(CSVReaderProperties.PATH); 32 | startRow = readerConfig.getInt(CSVReaderProperties.START_ROW, 1); 33 | encoding = readerConfig.getString(CSVReaderProperties.ENCODING, "UTF-8"); 34 | format = readerConfig.getString(CSVReaderProperties.FORMAT); 35 | FormatConf.confCsvFormat(format,csvFormat); 36 | } 37 | 38 | @Override 39 | public void execute(RecordCollector recordCollector) { 40 | long currentRow = 0; 41 | try { 42 | java.io.Reader in = new InputStreamReader(new FileInputStream(path), encoding); 43 | Iterable records = csvFormat.parse(in); 44 | for (CSVRecord csvRecord : records) { 45 | currentRow++; 46 | if (currentRow >= startRow) { 47 | Record hdataRecord = new DefaultRecord(csvRecord.size()); 48 | for (int i = 0, len = csvRecord.size(); i < len; i++) { 49 | hdataRecord.add(csvRecord.get(i)); 50 | } 51 | recordCollector.send(hdataRecord); 52 | } 53 | } 54 | } catch (UnsupportedEncodingException e) { 55 | throw new HDataException(e); 56 | } catch (IOException e) { 57 | throw new HDataException(e); 58 | } 59 | } 60 | 61 | @Override 62 | public Splitter newSplitter() { 63 | return new CSVSplitter(); 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /hdata-csv/src/main/java/com/github/stuxuhai/hdata/plugin/reader/csv/CSVReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.csv; 2 | 3 | public class CSVReaderProperties { 4 | public static final String PATH = "path"; 5 | public static final String START_ROW = "start.row"; 6 | public static final String ENCODING = "encoding"; 7 | public static final String FORMAT = "format"; 8 | } 9 | -------------------------------------------------------------------------------- /hdata-csv/src/main/java/com/github/stuxuhai/hdata/plugin/reader/csv/CSVSplitter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.csv; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.github.stuxuhai.hdata.api.JobConfig; 7 | import com.github.stuxuhai.hdata.api.PluginConfig; 8 | import com.github.stuxuhai.hdata.api.Splitter; 9 | import com.google.common.base.Preconditions; 10 | 11 | public class CSVSplitter extends Splitter { 12 | 13 | @Override 14 | public List split(JobConfig jobConfig) { 15 | List list = new ArrayList(); 16 | PluginConfig readerConfig = jobConfig.getReaderConfig(); 17 | 18 | String paths = readerConfig.getString(CSVReaderProperties.PATH); 19 | Preconditions.checkNotNull(paths, "CSV reader required property: path"); 20 | 21 | if (paths != null) { 22 | String[] pathArray = paths.split(","); 23 | for (String path : pathArray) { 24 | if (!path.trim().isEmpty()) { 25 | PluginConfig pluginConfig = (PluginConfig) readerConfig.clone(); 26 | pluginConfig.put(CSVReaderProperties.PATH, path); 27 | list.add(pluginConfig); 28 | } 29 | } 30 | } 31 | 32 | return list; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /hdata-csv/src/main/java/com/github/stuxuhai/hdata/plugin/writer/csv/CSVWriter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.csv; 2 | 3 | import java.io.FileOutputStream; 4 | import java.io.IOException; 5 | import java.io.OutputStreamWriter; 6 | import java.sql.Timestamp; 7 | import java.text.DateFormat; 8 | import java.text.SimpleDateFormat; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.concurrent.atomic.AtomicInteger; 12 | import java.util.regex.Matcher; 13 | import java.util.regex.Pattern; 14 | 15 | import com.github.stuxuhai.hdata.plugin.FormatConf; 16 | import org.apache.commons.csv.CSVFormat; 17 | import org.apache.commons.csv.CSVPrinter; 18 | import org.apache.commons.lang3.StringEscapeUtils; 19 | 20 | import com.github.stuxuhai.hdata.api.Fields; 21 | import com.github.stuxuhai.hdata.api.JobContext; 22 | import com.github.stuxuhai.hdata.api.PluginConfig; 23 | import com.github.stuxuhai.hdata.api.Record; 24 | import com.github.stuxuhai.hdata.api.Writer; 25 | import com.github.stuxuhai.hdata.exception.HDataException; 26 | import com.google.common.base.Preconditions; 27 | 28 | public class CSVWriter extends Writer { 29 | 30 | private String path = null; 31 | private String encoding = null; 32 | private String separator = null; 33 | private java.io.Writer writer; 34 | private CSVPrinter csvPrinter; 35 | private Fields fields; 36 | private boolean showColumns; 37 | private boolean showTypesAndComments; 38 | private String[] types; 39 | private String[] comments; 40 | private List csvList = new ArrayList(); 41 | private static AtomicInteger sequence = new AtomicInteger(0); 42 | private DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 43 | private static final Pattern REG_FILE_PATH_WITHOUT_EXTENSION = Pattern.compile(".*?(?=\\.\\w+$)"); 44 | private static final Pattern REG_FILE_EXTENSION = Pattern.compile("(\\.\\w+)$"); 45 | private String format; 46 | private CSVFormat csvFormat = CSVFormat.DEFAULT; 47 | 48 | @Override 49 | public void prepare(JobContext context, PluginConfig writerConfig) { 50 | path = writerConfig.getString(CSVWriterProperties.PATH); 51 | Preconditions.checkNotNull(path, "CSV writer required property: path"); 52 | 53 | encoding = writerConfig.getString(CSVWriterProperties.ENCODING, "UTF-8"); 54 | separator = StringEscapeUtils.unescapeJava(writerConfig.getString(CSVWriterProperties.SEPARATOR, ",")); 55 | 56 | format = writerConfig.getString(CSVWriterProperties.FORMAT); 57 | FormatConf.confCsvFormat(format,csvFormat); 58 | 59 | fields = context.getFields(); 60 | showColumns = writerConfig.getBoolean(CSVWriterProperties.SHOW_COLUMNS, false); 61 | showTypesAndComments = writerConfig.getBoolean(CSVWriterProperties.SHOW_TYPES_AND_COMMENTS, false); 62 | if (showTypesAndComments) { 63 | types = context.getJobConfig().getString("types").split("\001"); 64 | comments = context.getJobConfig().getString("comments").split("\001"); 65 | } 66 | 67 | int parallelism = writerConfig.getParallelism(); 68 | if (parallelism > 1) { 69 | String filePathWithoutExtension = ""; 70 | String fileExtension = ""; 71 | Matcher m1 = REG_FILE_PATH_WITHOUT_EXTENSION.matcher(path.trim()); 72 | if (m1.find()) { 73 | filePathWithoutExtension = m1.group(); 74 | } 75 | 76 | Matcher m2 = REG_FILE_EXTENSION.matcher(path.trim()); 77 | if (m2.find()) { 78 | fileExtension = m2.group(); 79 | } 80 | path = String.format("%s_%04d%s", filePathWithoutExtension, sequence.getAndIncrement(), fileExtension); 81 | } 82 | 83 | try { 84 | writer = new OutputStreamWriter(new FileOutputStream(path), encoding); 85 | } catch (Exception e) { 86 | throw new HDataException(e); 87 | } 88 | } 89 | 90 | @Override 91 | public void execute(Record record) { 92 | if (csvPrinter == null) { 93 | try { 94 | csvPrinter = new CSVPrinter(writer, csvFormat.withDelimiter(separator.charAt(0))); 95 | if (showTypesAndComments) { 96 | for (String type : types) { 97 | csvList.add(type); 98 | } 99 | csvPrinter.printRecord(csvList); 100 | csvList.clear(); 101 | 102 | for (String comment : comments) { 103 | csvList.add(comment); 104 | } 105 | csvPrinter.printRecord(csvList); 106 | csvList.clear(); 107 | } 108 | 109 | if (showColumns) { 110 | for (Object object : fields) { 111 | csvList.add(object); 112 | } 113 | csvPrinter.printRecord(csvList); 114 | csvList.clear(); 115 | } 116 | } catch (IOException e) { 117 | throw new HDataException(e); 118 | } 119 | } 120 | 121 | for (int i = 0, len = record.size(); i < len; i++) { 122 | Object obj = record.get(i); 123 | if (obj instanceof Timestamp) { 124 | csvList.add(dateFormat.format(obj)); 125 | } else { 126 | csvList.add(obj); 127 | } 128 | } 129 | 130 | try { 131 | csvPrinter.printRecord(csvList); 132 | } catch (IOException e) { 133 | throw new HDataException(e); 134 | } 135 | csvList.clear(); 136 | } 137 | 138 | @Override 139 | public void close() { 140 | if (csvPrinter != null) { 141 | try { 142 | csvPrinter.close(); 143 | } catch (IOException e) { 144 | throw new HDataException(e); 145 | } 146 | } 147 | 148 | if (writer != null) { 149 | try { 150 | writer.close(); 151 | } catch (IOException e) { 152 | throw new HDataException(e); 153 | } 154 | } 155 | } 156 | 157 | } 158 | -------------------------------------------------------------------------------- /hdata-csv/src/main/java/com/github/stuxuhai/hdata/plugin/writer/csv/CSVWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.csv; 2 | 3 | public class CSVWriterProperties { 4 | public static final String PATH = "path"; 5 | public static final String ENCODING = "encoding"; 6 | public static final String SEPARATOR = "separator"; 7 | public static final String SHOW_COLUMNS = "show.columns"; 8 | public static final String SHOW_TYPES_AND_COMMENTS = "show.types.and.comments"; 9 | public static final String FORMAT = "format"; 10 | } 11 | -------------------------------------------------------------------------------- /hdata-excel/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-excel 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | com.google.guava 19 | guava 20 | provided 21 | 22 | 23 | org.apache.poi 24 | poi 25 | 3.14 26 | 27 | 28 | org.apache.poi 29 | poi-ooxml 30 | 3.14 31 | 32 | 33 | -------------------------------------------------------------------------------- /hdata-excel/src/main/java/com/github/stuxuhai/hdata/plugin/excel/ExcelProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.excel; 2 | 3 | public interface ExcelProperties { 4 | public static final String PATH = "path"; 5 | public static final String INCLUDE_COLUMN_NAMES = "include.column.names"; 6 | } 7 | -------------------------------------------------------------------------------- /hdata-excel/src/main/java/com/github/stuxuhai/hdata/plugin/excel/reader/ExcelReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.excel.reader; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | 7 | import org.apache.poi.hssf.usermodel.HSSFWorkbook; 8 | import org.apache.poi.openxml4j.exceptions.InvalidFormatException; 9 | import org.apache.poi.ss.usermodel.Row; 10 | import org.apache.poi.ss.usermodel.Sheet; 11 | import org.apache.poi.ss.usermodel.Workbook; 12 | import org.apache.poi.xssf.usermodel.XSSFWorkbook; 13 | 14 | import com.github.stuxuhai.hdata.api.DefaultRecord; 15 | import com.github.stuxuhai.hdata.api.Fields; 16 | import com.github.stuxuhai.hdata.api.JobContext; 17 | import com.github.stuxuhai.hdata.api.OutputFieldsDeclarer; 18 | import com.github.stuxuhai.hdata.api.PluginConfig; 19 | import com.github.stuxuhai.hdata.api.Reader; 20 | import com.github.stuxuhai.hdata.api.Record; 21 | import com.github.stuxuhai.hdata.api.RecordCollector; 22 | import com.github.stuxuhai.hdata.api.Splitter; 23 | import com.github.stuxuhai.hdata.exception.HDataException; 24 | import com.github.stuxuhai.hdata.plugin.excel.ExcelProperties; 25 | import com.google.common.base.Preconditions; 26 | 27 | public class ExcelReader extends Reader { 28 | 29 | private Workbook workbook = null; 30 | private boolean includeColumnNames = false; 31 | private Fields fields = new Fields(); 32 | 33 | @Override 34 | public void prepare(JobContext context, PluginConfig readerConfig) { 35 | String path = readerConfig.getString(ExcelProperties.PATH); 36 | Preconditions.checkNotNull(path, "Excel reader required property: path"); 37 | 38 | try { 39 | if (path.endsWith(".xlsx")) { 40 | workbook = new XSSFWorkbook(new File(path)); 41 | } else { 42 | workbook = new HSSFWorkbook(new FileInputStream(new File(path))); 43 | } 44 | } catch (InvalidFormatException | IOException e) { 45 | throw new HDataException(e); 46 | } 47 | 48 | includeColumnNames = readerConfig.getBoolean(ExcelProperties.INCLUDE_COLUMN_NAMES, false); 49 | } 50 | 51 | @Override 52 | public void execute(RecordCollector recordCollector) { 53 | if (workbook.getNumberOfSheets() > 0) { 54 | Sheet sheet = workbook.getSheetAt(0); 55 | 56 | if (includeColumnNames && sheet.getPhysicalNumberOfRows() > 0) { 57 | Row row = sheet.getRow(0); 58 | for (int cellIndex = row.getFirstCellNum(), cellLength = row 59 | .getPhysicalNumberOfCells(); cellIndex < cellLength; cellIndex++) { 60 | fields.add(row.getCell(cellIndex).toString()); 61 | } 62 | } 63 | 64 | int startRow = includeColumnNames ? 1 : 0; 65 | for (int rowIndex = startRow, rowLength = sheet 66 | .getPhysicalNumberOfRows(); rowIndex < rowLength; rowIndex++) { 67 | Row row = sheet.getRow(rowIndex); 68 | Record record = new DefaultRecord(row.getPhysicalNumberOfCells()); 69 | for (int cellIndex = row.getFirstCellNum(), cellLength = row 70 | .getPhysicalNumberOfCells(); cellIndex < cellLength; cellIndex++) { 71 | record.add(row.getCell(cellIndex).toString()); 72 | } 73 | 74 | recordCollector.send(record); 75 | } 76 | } 77 | } 78 | 79 | @Override 80 | public void close() { 81 | if (workbook != null) { 82 | try { 83 | workbook.close(); 84 | } catch (IOException e) { 85 | throw new HDataException(e); 86 | } 87 | } 88 | } 89 | 90 | @Override 91 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 92 | declarer.declare(fields); 93 | } 94 | 95 | @Override 96 | public Splitter newSplitter() { 97 | return null; 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /hdata-excel/src/main/java/com/github/stuxuhai/hdata/plugin/excel/writer/ExcelWriter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.excel.writer; 2 | 3 | import java.io.FileOutputStream; 4 | import java.io.IOException; 5 | 6 | import org.apache.poi.ss.usermodel.Cell; 7 | import org.apache.poi.ss.usermodel.Row; 8 | import org.apache.poi.ss.usermodel.Sheet; 9 | import org.apache.poi.ss.usermodel.Workbook; 10 | import org.apache.poi.xssf.usermodel.XSSFCell; 11 | import org.apache.poi.xssf.usermodel.XSSFRichTextString; 12 | import org.apache.poi.xssf.usermodel.XSSFWorkbook; 13 | 14 | import com.github.stuxuhai.hdata.api.Fields; 15 | import com.github.stuxuhai.hdata.api.JobContext; 16 | import com.github.stuxuhai.hdata.api.PluginConfig; 17 | import com.github.stuxuhai.hdata.api.Record; 18 | import com.github.stuxuhai.hdata.api.Writer; 19 | import com.github.stuxuhai.hdata.exception.HDataException; 20 | import com.github.stuxuhai.hdata.plugin.excel.ExcelProperties; 21 | import com.google.common.base.Preconditions; 22 | 23 | public class ExcelWriter extends Writer { 24 | 25 | private String path = null; 26 | private boolean includeColumnNames = true; 27 | private Workbook workbook = null; 28 | private Sheet sheet = null; 29 | private int rowIndex = 0; 30 | 31 | @Override 32 | public void prepare(JobContext context, PluginConfig writerConfig) { 33 | path = writerConfig.getString(ExcelProperties.PATH); 34 | Preconditions.checkNotNull(path, "Excel writer required property: path"); 35 | 36 | includeColumnNames = writerConfig.getBoolean(ExcelProperties.INCLUDE_COLUMN_NAMES, false); 37 | 38 | workbook = new XSSFWorkbook(); 39 | sheet = workbook.createSheet("工作表1"); 40 | 41 | if (includeColumnNames) { 42 | Fields fields = context.getFields(); 43 | if (fields != null && fields.size() > 0) { 44 | Row row = sheet.createRow(rowIndex++); 45 | for (int i = 0, len = fields.size(); i < len; i++) { 46 | Cell cell = row.createCell(i); 47 | cell.setCellType(XSSFCell.CELL_TYPE_STRING); 48 | Object value = fields.get(i); 49 | XSSFRichTextString content = new XSSFRichTextString(value != null ? value.toString() : null); 50 | cell.setCellValue(content); 51 | } 52 | } 53 | } 54 | } 55 | 56 | @Override 57 | public void execute(Record record) { 58 | Row row = sheet.createRow(rowIndex++); 59 | for (int i = 0, len = record.size(); i < len; i++) { 60 | Cell cell = row.createCell(i); 61 | cell.setCellType(XSSFCell.CELL_TYPE_STRING); 62 | Object value = record.get(i); 63 | XSSFRichTextString content = new XSSFRichTextString(value != null ? value.toString() : null); 64 | cell.setCellValue(content); 65 | } 66 | } 67 | 68 | @Override 69 | public void close() { 70 | if (workbook != null) { 71 | try { 72 | FileOutputStream fos = new FileOutputStream(path); 73 | workbook.write(fos); 74 | fos.flush(); 75 | fos.close(); 76 | workbook.close(); 77 | } catch (IOException e) { 78 | throw new HDataException(e); 79 | } 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /hdata-ftp/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-ftp 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | org.apache.commons 19 | commons-lang3 20 | 3.4 21 | 22 | 23 | commons-net 24 | commons-net 25 | 3.4 26 | 27 | 28 | com.google.guava 29 | guava 30 | provided 31 | 32 | 33 | -------------------------------------------------------------------------------- /hdata-ftp/src/main/java/com/github/stuxuhai/hdata/ftp/FTPUtils.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.ftp; 2 | 3 | import java.io.IOException; 4 | import java.net.SocketException; 5 | import java.util.List; 6 | import java.util.regex.Pattern; 7 | 8 | import org.apache.commons.net.ftp.FTP; 9 | import org.apache.commons.net.ftp.FTPClient; 10 | import org.apache.commons.net.ftp.FTPFile; 11 | import org.apache.commons.net.ftp.FTPReply; 12 | import org.apache.logging.log4j.LogManager; 13 | import org.apache.logging.log4j.Logger; 14 | 15 | import com.google.common.base.Throwables; 16 | 17 | public class FTPUtils { 18 | 19 | private static final Logger LOGGER = LogManager.getLogger(FTPUtils.class); 20 | 21 | public static FTPClient getFtpClient(String host, int port, String username, String password) 22 | throws SocketException, IOException { 23 | String LOCAL_CHARSET = "GB18030"; 24 | FTPClient ftpClient = new FTPClient(); 25 | ftpClient.connect(host, port); 26 | // 检测服务器是否支持UTF-8编码,如果支持就用UTF-8编码,否则就使用本地编码GB18030 27 | if (FTPReply.isPositiveCompletion(ftpClient.sendCommand("OPTS UTF8", "ON"))) { 28 | LOCAL_CHARSET = "UTF-8"; 29 | } 30 | ftpClient.setControlEncoding(LOCAL_CHARSET); 31 | ftpClient.login(username, password); 32 | ftpClient.setBufferSize(1024 * 1024 * 16); 33 | ftpClient.enterLocalPassiveMode(); 34 | ftpClient.setFileType(FTP.BINARY_FILE_TYPE); 35 | ftpClient.setControlKeepAliveTimeout(60); 36 | return ftpClient; 37 | } 38 | 39 | /** 40 | * 获取FTP目录下的文件 41 | * 42 | * @param files 43 | * @param ftpClient 44 | * @param path 45 | * FTP目录 46 | * @param filenameRegexp 47 | * 文件名正则表达式 48 | * @param recursive 49 | * 是否递归搜索 50 | * @throws IOException 51 | */ 52 | public static void listFile(List files, FTPClient ftpClient, String path, String filenameRegexp, 53 | boolean recursive) throws IOException { 54 | for (FTPFile ftpFile : ftpClient.listFiles(path)) { 55 | if (ftpFile.isFile()) { 56 | if (Pattern.matches(filenameRegexp, ftpFile.getName())) { 57 | files.add(path + "/" + ftpFile.getName()); 58 | } 59 | } else if (recursive && ftpFile.isDirectory()) { 60 | listFile(files, ftpClient, path + "/" + ftpFile.getName(), filenameRegexp, recursive); 61 | } 62 | } 63 | } 64 | 65 | /** 66 | * 关闭FTP客户端连接 67 | * 68 | * @param ftpClient 69 | */ 70 | public static void closeFtpClient(FTPClient ftpClient) { 71 | if (ftpClient != null) { 72 | try { 73 | ftpClient.disconnect(); 74 | } catch (IOException e) { 75 | LOGGER.error(Throwables.getStackTraceAsString(e)); 76 | } 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /hdata-ftp/src/main/java/com/github/stuxuhai/hdata/plugin/reader/ftp/FTPReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.ftp; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.InputStream; 5 | import java.io.InputStreamReader; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | import java.util.zip.GZIPInputStream; 9 | 10 | import org.apache.commons.lang3.StringEscapeUtils; 11 | import org.apache.commons.lang3.StringUtils; 12 | import org.apache.commons.net.ftp.FTPClient; 13 | 14 | import com.github.stuxuhai.hdata.api.DefaultRecord; 15 | import com.github.stuxuhai.hdata.api.Fields; 16 | import com.github.stuxuhai.hdata.api.JobContext; 17 | import com.github.stuxuhai.hdata.api.OutputFieldsDeclarer; 18 | import com.github.stuxuhai.hdata.api.PluginConfig; 19 | import com.github.stuxuhai.hdata.api.Reader; 20 | import com.github.stuxuhai.hdata.api.Record; 21 | import com.github.stuxuhai.hdata.api.RecordCollector; 22 | import com.github.stuxuhai.hdata.api.Splitter; 23 | import com.github.stuxuhai.hdata.exception.HDataException; 24 | import com.github.stuxuhai.hdata.ftp.FTPUtils; 25 | 26 | public class FTPReader extends Reader { 27 | 28 | private Fields fields; 29 | private String host; 30 | private int port; 31 | private String username; 32 | private String password; 33 | private String fieldsSeparator; 34 | private String encoding; 35 | private int fieldsCount; 36 | private int startRow; 37 | private List files = new ArrayList(); 38 | 39 | @SuppressWarnings("unchecked") 40 | @Override 41 | public void prepare(JobContext context, PluginConfig readerConfig) { 42 | host = readerConfig.getString(FTPReaderProperties.HOST); 43 | port = readerConfig.getInt(FTPReaderProperties.PORT, 21); 44 | username = readerConfig.getString(FTPReaderProperties.USERNAME, "anonymous"); 45 | password = readerConfig.getString(FTPReaderProperties.PASSWORD, ""); 46 | fieldsSeparator = StringEscapeUtils 47 | .unescapeJava(readerConfig.getString(FTPReaderProperties.FIELDS_SEPARATOR, "\t")); 48 | encoding = readerConfig.getString(FTPReaderProperties.ENCODING, "UTF-8"); 49 | files = (List) readerConfig.get(FTPReaderProperties.FILES); 50 | fieldsCount = readerConfig.getInt(FTPReaderProperties.FIELDS_COUNT, 0); 51 | startRow = readerConfig.getInt(FTPReaderProperties.START_ROW, 1); 52 | 53 | if (readerConfig.containsKey(FTPReaderProperties.SCHEMA)) { 54 | fields = new Fields(); 55 | String[] tokens = readerConfig.getString(FTPReaderProperties.SCHEMA).split("\\s*,\\s*"); 56 | for (String field : tokens) { 57 | fields.add(field); 58 | } 59 | } 60 | } 61 | 62 | @Override 63 | public void execute(RecordCollector recordCollector) { 64 | FTPClient ftpClient = null; 65 | try { 66 | ftpClient = FTPUtils.getFtpClient(host, port, username, password); 67 | for (String file : files) { 68 | InputStream is = ftpClient.retrieveFileStream(file); 69 | BufferedReader br = null; 70 | if (file.endsWith(".gz")) { 71 | GZIPInputStream gzin = new GZIPInputStream(is); 72 | br = new BufferedReader(new InputStreamReader(gzin, encoding)); 73 | } else { 74 | br = new BufferedReader(new InputStreamReader(is, encoding)); 75 | } 76 | 77 | String line = null; 78 | long currentRow = 0; 79 | while ((line = br.readLine()) != null) { 80 | currentRow++; 81 | if (currentRow >= startRow) { 82 | String[] tokens = StringUtils.splitPreserveAllTokens(line, fieldsSeparator); 83 | if (tokens.length >= fieldsCount) { 84 | Record record = new DefaultRecord(tokens.length); 85 | for (String field : tokens) { 86 | record.add(field); 87 | } 88 | recordCollector.send(record); 89 | } 90 | } 91 | } 92 | ftpClient.completePendingCommand(); 93 | br.close(); 94 | is.close(); 95 | } 96 | } catch (Exception e) { 97 | throw new HDataException(e); 98 | } finally { 99 | FTPUtils.closeFtpClient(ftpClient); 100 | } 101 | } 102 | 103 | @Override 104 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 105 | declarer.declare(fields); 106 | } 107 | 108 | @Override 109 | public Splitter newSplitter() { 110 | return new FTPSplitter(); 111 | } 112 | 113 | } 114 | -------------------------------------------------------------------------------- /hdata-ftp/src/main/java/com/github/stuxuhai/hdata/plugin/reader/ftp/FTPReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.ftp; 2 | 3 | public class FTPReaderProperties { 4 | public static final String HOST = "host"; 5 | public static final String PORT = "port"; 6 | public static final String USERNAME = "username"; 7 | public static final String PASSWORD = "password"; 8 | public static final String DIR = "dir"; 9 | public static final String FILENAME = "filename"; 10 | public static final String RECURSIVE = "recursive"; 11 | public static final String ENCODING = "ftp.encoding"; 12 | public static final String FIELDS_SEPARATOR = "fields.separator"; 13 | public static final String SCHEMA = "schema"; 14 | public static final String FIELDS_COUNT = "fields.count"; 15 | public static final String FILES = "reader.files"; 16 | public static final String START_ROW = "start.row"; 17 | } 18 | -------------------------------------------------------------------------------- /hdata-ftp/src/main/java/com/github/stuxuhai/hdata/plugin/reader/ftp/FTPSplitter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.ftp; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.commons.net.ftp.FTPClient; 7 | import org.apache.logging.log4j.LogManager; 8 | import org.apache.logging.log4j.Logger; 9 | 10 | import com.github.stuxuhai.hdata.api.JobConfig; 11 | import com.github.stuxuhai.hdata.api.PluginConfig; 12 | import com.github.stuxuhai.hdata.api.Splitter; 13 | import com.github.stuxuhai.hdata.ftp.FTPUtils; 14 | import com.google.common.base.Preconditions; 15 | import com.google.common.base.Throwables; 16 | 17 | public class FTPSplitter extends Splitter { 18 | 19 | private static final Logger LOGGER = LogManager.getLogger(FTPSplitter.class); 20 | 21 | @Override 22 | public List split(JobConfig jobConfig) { 23 | List list = new ArrayList(); 24 | PluginConfig readerConfig = jobConfig.getReaderConfig(); 25 | String host = readerConfig.getString(FTPReaderProperties.HOST); 26 | Preconditions.checkNotNull(host, "FTP reader required property: host"); 27 | 28 | int port = readerConfig.getInt(FTPReaderProperties.PORT, 21); 29 | String username = readerConfig.getString(FTPReaderProperties.USERNAME, "anonymous"); 30 | String password = readerConfig.getString(FTPReaderProperties.PASSWORD, ""); 31 | String dir = readerConfig.getString(FTPReaderProperties.DIR); 32 | Preconditions.checkNotNull(dir, "FTP reader required property: dir"); 33 | 34 | String filenameRegexp = readerConfig.getString(FTPReaderProperties.FILENAME); 35 | Preconditions.checkNotNull(filenameRegexp, "FTP reader required property: filename"); 36 | 37 | boolean recursive = readerConfig.getBoolean(FTPReaderProperties.RECURSIVE, false); 38 | int parallelism = readerConfig.getParallelism(); 39 | 40 | FTPClient ftpClient = null; 41 | try { 42 | ftpClient = FTPUtils.getFtpClient(host, port, username, password); 43 | List files = new ArrayList(); 44 | FTPUtils.listFile(files, ftpClient, dir, filenameRegexp, recursive); 45 | if (files.size() > 0) { 46 | if (parallelism == 1) { 47 | readerConfig.put(FTPReaderProperties.FILES, files); 48 | list.add(readerConfig); 49 | } else { 50 | double step = (double) files.size() / parallelism; 51 | for (int i = 0; i < parallelism; i++) { 52 | List splitedFiles = new ArrayList(); 53 | for (int start = (int) Math.ceil(step * i), end = (int) Math 54 | .ceil(step * (i + 1)); start < end; start++) { 55 | splitedFiles.add(files.get(start)); 56 | } 57 | PluginConfig pluginConfig = (PluginConfig) readerConfig.clone(); 58 | pluginConfig.put(FTPReaderProperties.FILES, splitedFiles); 59 | list.add(pluginConfig); 60 | } 61 | } 62 | } 63 | } catch (Exception e) { 64 | LOGGER.error(Throwables.getStackTraceAsString(e)); 65 | } finally { 66 | FTPUtils.closeFtpClient(ftpClient); 67 | } 68 | 69 | return list; 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /hdata-ftp/src/main/java/com/github/stuxuhai/hdata/plugin/writer/ftp/FTPWriter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.ftp; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.IOException; 5 | import java.io.OutputStream; 6 | import java.io.OutputStreamWriter; 7 | import java.util.concurrent.atomic.AtomicInteger; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | import java.util.zip.GZIPOutputStream; 11 | 12 | import org.apache.commons.lang3.StringEscapeUtils; 13 | import org.apache.commons.net.ftp.FTPClient; 14 | 15 | import com.github.stuxuhai.hdata.api.JobContext; 16 | import com.github.stuxuhai.hdata.api.PluginConfig; 17 | import com.github.stuxuhai.hdata.api.Record; 18 | import com.github.stuxuhai.hdata.api.Writer; 19 | import com.github.stuxuhai.hdata.exception.HDataException; 20 | import com.github.stuxuhai.hdata.ftp.FTPUtils; 21 | import com.google.common.base.Joiner; 22 | import com.google.common.base.Preconditions; 23 | 24 | public class FTPWriter extends Writer { 25 | 26 | private String host; 27 | private int port; 28 | private String username; 29 | private String password; 30 | private String fieldsSeparator; 31 | private String lineSeparator; 32 | private String encoding; 33 | private String path; 34 | private boolean gzipCompress; 35 | private FTPClient ftpClient; 36 | private BufferedWriter bw; 37 | private String[] strArray; 38 | private static AtomicInteger sequence = new AtomicInteger(0); 39 | private static final Pattern REG_FILE_PATH_WITHOUT_EXTENSION = Pattern.compile(".*?(?=\\.\\w+$)"); 40 | private static final Pattern REG_FILE_EXTENSION = Pattern.compile("(\\.\\w+)$"); 41 | 42 | @Override 43 | public void prepare(JobContext context, PluginConfig writerConfig) { 44 | host = writerConfig.getString(FTPWriterProperties.HOST); 45 | Preconditions.checkNotNull(host, "FTP writer required property: host"); 46 | 47 | port = writerConfig.getInt(FTPWriterProperties.PORT, 21); 48 | username = writerConfig.getString(FTPWriterProperties.USERNAME, "anonymous"); 49 | password = writerConfig.getString(FTPWriterProperties.PASSWORD, ""); 50 | fieldsSeparator = StringEscapeUtils 51 | .unescapeJava(writerConfig.getString(FTPWriterProperties.FIELDS_SEPARATOR, "\t")); 52 | lineSeparator = StringEscapeUtils 53 | .unescapeJava(writerConfig.getString(FTPWriterProperties.LINE_SEPARATOR, "\n")); 54 | encoding = writerConfig.getString(FTPWriterProperties.ENCODING, "UTF-8"); 55 | path = writerConfig.getString(FTPWriterProperties.PATH); 56 | Preconditions.checkNotNull(path, "FTP writer required property: path"); 57 | 58 | gzipCompress = writerConfig.getBoolean(FTPWriterProperties.GZIP_COMPRESS, false); 59 | 60 | int parallelism = writerConfig.getParallelism(); 61 | if (parallelism > 1) { 62 | String filePathWithoutExtension = ""; 63 | String fileExtension = ""; 64 | Matcher m1 = REG_FILE_PATH_WITHOUT_EXTENSION.matcher(path.trim()); 65 | if (m1.find()) { 66 | filePathWithoutExtension = m1.group(); 67 | } 68 | 69 | Matcher m2 = REG_FILE_EXTENSION.matcher(path.trim()); 70 | if (m2.find()) { 71 | fileExtension = m2.group(); 72 | } 73 | path = String.format("%s_%04d%s", filePathWithoutExtension, sequence.getAndIncrement(), fileExtension); 74 | } 75 | 76 | try { 77 | ftpClient = FTPUtils.getFtpClient(host, port, username, password); 78 | OutputStream outputStream = ftpClient.storeFileStream(path); 79 | if (gzipCompress) { 80 | bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(outputStream), encoding)); 81 | } else { 82 | bw = new BufferedWriter(new OutputStreamWriter(outputStream, encoding)); 83 | } 84 | } catch (Exception e) { 85 | throw new HDataException(e); 86 | } 87 | } 88 | 89 | @Override 90 | public void execute(Record record) { 91 | if (strArray == null) { 92 | strArray = new String[record.size()]; 93 | } 94 | 95 | for (int i = 0, len = record.size(); i < len; i++) { 96 | Object o = record.get(i); 97 | if (o == null) { 98 | strArray[i] = "NULL"; 99 | } else { 100 | strArray[i] = o.toString(); 101 | } 102 | } 103 | try { 104 | bw.write(Joiner.on(fieldsSeparator).join(strArray)); 105 | bw.write(lineSeparator); 106 | } catch (IOException e) { 107 | throw new HDataException(e); 108 | } 109 | } 110 | 111 | @Override 112 | public void close() { 113 | if (bw != null) { 114 | try { 115 | bw.close(); 116 | } catch (IOException e) { 117 | throw new HDataException(e); 118 | } 119 | } 120 | FTPUtils.closeFtpClient(ftpClient); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /hdata-ftp/src/main/java/com/github/stuxuhai/hdata/plugin/writer/ftp/FTPWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.ftp; 2 | 3 | public class FTPWriterProperties { 4 | public static final String HOST = "host"; 5 | public static final String PORT = "port"; 6 | public static final String USERNAME = "username"; 7 | public static final String PASSWORD = "password"; 8 | public static final String PATH = "path"; 9 | public static final String ENCODING = "ftp.encoding"; 10 | public static final String FIELDS_SEPARATOR = "fields.separator"; 11 | public static final String LINE_SEPARATOR = "line.separator"; 12 | public static final String GZIP_COMPRESS = "gzip.compress"; 13 | } 14 | -------------------------------------------------------------------------------- /hdata-hbase/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-hbase 10 | 11 | 12 | 13 | 14 | 15 | 16 | com.github.stuxuhai 17 | hdata-api 18 | provided 19 | 20 | 21 | org.apache.hadoop 22 | hadoop-common 23 | ${hadoop.version} 24 | ${hadoop.scope} 25 | 26 | 27 | org.apache.hadoop 28 | hadoop-hdfs 29 | ${hadoop.version} 30 | ${hadoop.scope} 31 | 32 | 33 | org.apache.hadoop 34 | hadoop-mapreduce-client-core 35 | ${hadoop.version} 36 | ${hadoop.scope} 37 | 38 | 39 | org.apache.hbase 40 | hbase-shaded-client 41 | ${hbase.version} 42 | ${hbase.scope} 43 | 44 | 45 | org.apache.zookeeper 46 | zookeeper 47 | ${zookeeper.version} 48 | ${zookeeper.scope} 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /hdata-hbase/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hbase/HBaseReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hbase; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.hbase.HBaseConfiguration; 7 | import org.apache.hadoop.hbase.TableName; 8 | import org.apache.hadoop.hbase.client.Connection; 9 | import org.apache.hadoop.hbase.client.ConnectionFactory; 10 | import org.apache.hadoop.hbase.client.Result; 11 | import org.apache.hadoop.hbase.client.ResultScanner; 12 | import org.apache.hadoop.hbase.client.Scan; 13 | import org.apache.hadoop.hbase.client.Table; 14 | import org.apache.hadoop.hbase.util.Bytes; 15 | 16 | import com.github.stuxuhai.hdata.api.DefaultRecord; 17 | import com.github.stuxuhai.hdata.api.Fields; 18 | import com.github.stuxuhai.hdata.api.JobContext; 19 | import com.github.stuxuhai.hdata.api.OutputFieldsDeclarer; 20 | import com.github.stuxuhai.hdata.api.PluginConfig; 21 | import com.github.stuxuhai.hdata.api.Reader; 22 | import com.github.stuxuhai.hdata.api.Record; 23 | import com.github.stuxuhai.hdata.api.RecordCollector; 24 | import com.github.stuxuhai.hdata.api.Splitter; 25 | import com.github.stuxuhai.hdata.exception.HDataException; 26 | import com.google.common.base.Preconditions; 27 | 28 | public class HBaseReader extends Reader { 29 | 30 | private final Fields fields = new Fields(); 31 | private Table table; 32 | private byte[] startRowkey; 33 | private byte[] endRowkey; 34 | private String[] columns; 35 | private int rowkeyIndex = -1; 36 | private static final String ROWKEY = ":rowkey"; 37 | 38 | @Override 39 | public void prepare(JobContext context, PluginConfig readerConfig) { 40 | startRowkey = (byte[]) readerConfig.get(HBaseReaderProperties.START_ROWKWY); 41 | endRowkey = (byte[]) readerConfig.get(HBaseReaderProperties.END_ROWKWY); 42 | 43 | Preconditions.checkNotNull(readerConfig.getString(HBaseReaderProperties.SCHEMA), 44 | "HBase reader required property: schema"); 45 | String[] schema = readerConfig.getString(HBaseReaderProperties.SCHEMA).split(","); 46 | for (String field : schema) { 47 | fields.add(field); 48 | } 49 | 50 | Configuration conf = HBaseConfiguration.create(); 51 | if (readerConfig.containsKey(HBaseReaderProperties.ZOOKEEPER_ZNODE_PARENT)) { 52 | conf.set(HBaseReaderProperties.ZOOKEEPER_ZNODE_PARENT, 53 | readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_ZNODE_PARENT)); 54 | } 55 | conf.set("hbase.zookeeper.quorum", readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_QUORUM)); 56 | conf.set("hbase.zookeeper.property.clientPort", 57 | readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181")); 58 | 59 | Preconditions.checkNotNull(readerConfig.getString(HBaseReaderProperties.COLUMNS), 60 | "HBase reader required property: columns"); 61 | columns = readerConfig.getString(HBaseReaderProperties.COLUMNS).split("\\s*,\\s*"); 62 | for (int i = 0, len = columns.length; i < len; i++) { 63 | if (ROWKEY.equalsIgnoreCase(columns[i])) { 64 | rowkeyIndex = i; 65 | break; 66 | } 67 | } 68 | 69 | try { 70 | Connection conn = ConnectionFactory.createConnection(conf); 71 | table = conn.getTable(TableName.valueOf(readerConfig.getString(HBaseReaderProperties.TABLE))); 72 | } catch (IOException e) { 73 | throw new HDataException(e); 74 | } 75 | } 76 | 77 | @Override 78 | public void execute(RecordCollector recordCollector) { 79 | Scan scan = new Scan(); 80 | if (startRowkey.length > 0) { 81 | scan.setStartRow(startRowkey); 82 | } 83 | if (endRowkey.length > 0) { 84 | scan.setStopRow(endRowkey); 85 | } 86 | 87 | for (int i = 0, len = columns.length; i < len; i++) { 88 | if (i != rowkeyIndex) { 89 | String[] column = columns[i].split(":"); 90 | scan.addColumn(Bytes.toBytes(column[0]), Bytes.toBytes(column[1])); 91 | } 92 | } 93 | 94 | try { 95 | ResultScanner results = table.getScanner(scan); 96 | for (Result result : results) { 97 | Record record = new DefaultRecord(fields.size()); 98 | for (int i = 0, len = fields.size(); i < len; i++) { 99 | if (i == rowkeyIndex) { 100 | record.add(Bytes.toString(result.getRow())); 101 | } else { 102 | String[] column = columns[i].split(":"); 103 | record.add(Bytes.toString(result.getValue(Bytes.toBytes(column[0]), Bytes.toBytes(column[1])))); 104 | } 105 | } 106 | recordCollector.send(record); 107 | } 108 | 109 | if (table != null) { 110 | table.close(); 111 | } 112 | } catch (IOException e) { 113 | throw new HDataException(e); 114 | } 115 | } 116 | 117 | @Override 118 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 119 | declarer.declare(fields); 120 | } 121 | 122 | @Override 123 | public Splitter newSplitter() { 124 | return new HBaseSplitter(); 125 | } 126 | 127 | } 128 | -------------------------------------------------------------------------------- /hdata-hbase/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hbase/HBaseReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hbase; 2 | 3 | public class HBaseReaderProperties { 4 | 5 | public static final String ZOOKEEPER_QUORUM = "zookeeper.quorum"; 6 | public static final String ZOOKEEPER_PROPERTY_CLIENTPORT = "zookeeper.client.port"; 7 | public static final String TABLE = "table"; 8 | public static final String START_ROWKWY = "start.rowkey"; 9 | public static final String END_ROWKWY = "end.rowkey"; 10 | public static final String COLUMNS = "columns"; 11 | public static final String SCHEMA = "schema"; 12 | public static final String ZOOKEEPER_ZNODE_PARENT = "zookeeper.znode.parent"; 13 | } 14 | -------------------------------------------------------------------------------- /hdata-hbase/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hbase/HBaseSplitter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hbase; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import org.apache.hadoop.conf.Configuration; 8 | import org.apache.hadoop.hbase.HBaseConfiguration; 9 | import org.apache.hadoop.hbase.TableName; 10 | import org.apache.hadoop.hbase.client.Connection; 11 | import org.apache.hadoop.hbase.client.ConnectionFactory; 12 | import org.apache.hadoop.hbase.client.RegionLocator; 13 | import org.apache.hadoop.hbase.client.Table; 14 | import org.apache.hadoop.hbase.util.Bytes; 15 | import org.apache.hadoop.hbase.util.Pair; 16 | import org.apache.logging.log4j.LogManager; 17 | import org.apache.logging.log4j.Logger; 18 | 19 | import com.github.stuxuhai.hdata.api.JobConfig; 20 | import com.github.stuxuhai.hdata.api.PluginConfig; 21 | import com.github.stuxuhai.hdata.api.Splitter; 22 | import com.google.common.base.Preconditions; 23 | import com.google.common.base.Throwables; 24 | 25 | public class HBaseSplitter extends Splitter { 26 | 27 | private static final Logger LOGGER = LogManager.getLogger(HBaseSplitter.class); 28 | 29 | @Override 30 | public List split(JobConfig jobConfig) { 31 | List list = new ArrayList(); 32 | PluginConfig readerConfig = jobConfig.getReaderConfig(); 33 | int parallelism = readerConfig.getParallelism(); 34 | 35 | String startRowkey = readerConfig.getString(HBaseReaderProperties.START_ROWKWY, ""); 36 | String endRowkey = readerConfig.getString(HBaseReaderProperties.END_ROWKWY, ""); 37 | byte[] startRowkeyBytes = startRowkey.getBytes(); 38 | byte[] endRowkeyBytes = endRowkey.getBytes(); 39 | 40 | if (parallelism == 1) { 41 | readerConfig.put(HBaseReaderProperties.START_ROWKWY, startRowkeyBytes); 42 | readerConfig.put(HBaseReaderProperties.END_ROWKWY, endRowkeyBytes); 43 | list.add(readerConfig); 44 | return list; 45 | } else { 46 | Configuration conf = HBaseConfiguration.create(); 47 | if (readerConfig.containsKey(HBaseReaderProperties.ZOOKEEPER_ZNODE_PARENT)) { 48 | conf.set(HBaseReaderProperties.ZOOKEEPER_ZNODE_PARENT, 49 | readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_ZNODE_PARENT)); 50 | } 51 | String zookeeperQuorum = readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_QUORUM); 52 | Preconditions.checkNotNull(zookeeperQuorum, "HBase reader required property: zookeeper.quorum"); 53 | 54 | conf.set("hbase.zookeeper.quorum", zookeeperQuorum); 55 | conf.set("hbase.zookeeper.property.clientPort", 56 | readerConfig.getString(HBaseReaderProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181")); 57 | try { 58 | Connection conn = ConnectionFactory.createConnection(conf); 59 | TableName tableName = TableName.valueOf(readerConfig.getString(HBaseReaderProperties.TABLE)); 60 | Table table = conn.getTable(tableName); 61 | RegionLocator regionLocator = conn.getRegionLocator(tableName); 62 | 63 | Preconditions.checkNotNull(table, "HBase reader required property: table"); 64 | Pair startEndKeysPair = regionLocator.getStartEndKeys(); 65 | table.close(); 66 | List> selectedPairList = new ArrayList>(); 67 | byte[][] startKeys = startEndKeysPair.getFirst(); 68 | byte[][] endKeys = startEndKeysPair.getSecond(); 69 | 70 | if (startKeys.length == 1) { 71 | Pair pair = new Pair(); 72 | pair.setFirst(startRowkeyBytes); 73 | pair.setSecond(endRowkeyBytes); 74 | selectedPairList.add(pair); 75 | } else { 76 | if (startRowkeyBytes.length == 0 && endRowkeyBytes.length == 0) { 77 | for (int i = 0, len = startKeys.length; i < len; i++) { 78 | Pair pair = new Pair(); 79 | pair.setFirst(startKeys[i]); 80 | pair.setSecond(endKeys[i]); 81 | selectedPairList.add(pair); 82 | } 83 | } else if (endRowkeyBytes.length == 0) { 84 | for (int i = 0, len = startKeys.length; i < len; i++) { 85 | if (Bytes.compareTo(endKeys[i], startRowkeyBytes) >= 0) { 86 | Pair pair = new Pair(); 87 | pair.setFirst(Bytes.compareTo(startKeys[i], startRowkeyBytes) >= 0 ? startKeys[i] 88 | : startRowkeyBytes); 89 | pair.setSecond(endKeys[i]); 90 | selectedPairList.add(pair); 91 | } 92 | } 93 | } else { 94 | for (int i = 0, len = startKeys.length; i < len; i++) { 95 | if (len == 1) { 96 | Pair pair = new Pair(); 97 | pair.setFirst(startRowkeyBytes); 98 | pair.setSecond(endRowkeyBytes); 99 | selectedPairList.add(pair); 100 | break; 101 | } else if (Bytes.compareTo(endKeys[i], startRowkeyBytes) >= 0 102 | && Bytes.compareTo(endRowkeyBytes, startKeys[i]) >= 0) { 103 | Pair pair = new Pair(); 104 | pair.setFirst(Bytes.compareTo(startKeys[i], startRowkeyBytes) >= 0 ? startKeys[i] 105 | : startRowkeyBytes); 106 | pair.setSecond( 107 | Bytes.compareTo(endKeys[i], endRowkeyBytes) <= 0 ? endKeys[i] : endRowkeyBytes); 108 | selectedPairList.add(pair); 109 | } 110 | } 111 | } 112 | } 113 | 114 | if (parallelism > selectedPairList.size()) { 115 | LOGGER.info( 116 | "parallelism: {} is greater than the region count: {} in the currently open table: {}, so parallelism is set equal to region count.", 117 | parallelism, selectedPairList.size(), table.getName().getNameAsString()); 118 | parallelism = selectedPairList.size(); 119 | } 120 | 121 | double step = (double) selectedPairList.size() / parallelism; 122 | for (int i = 0; i < parallelism; i++) { 123 | List> splitedPairs = new ArrayList>(); 124 | for (int start = (int) Math.ceil(step * i), end = (int) Math 125 | .ceil(step * (i + 1)); start < end; start++) { 126 | splitedPairs.add(selectedPairList.get(start)); 127 | } 128 | PluginConfig pluginConfig = (PluginConfig) readerConfig.clone(); 129 | pluginConfig.put(HBaseReaderProperties.START_ROWKWY, splitedPairs.get(0).getFirst()); 130 | pluginConfig.put(HBaseReaderProperties.END_ROWKWY, 131 | splitedPairs.get(splitedPairs.size() - 1).getSecond()); 132 | list.add(pluginConfig); 133 | } 134 | } catch (IOException e) { 135 | LOGGER.error(Throwables.getStackTraceAsString(e)); 136 | } 137 | 138 | return list; 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /hdata-hbase/src/main/java/com/github/stuxuhai/hdata/plugin/writer/hbase/HBaseWriter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.hbase; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import org.apache.hadoop.conf.Configuration; 8 | import org.apache.hadoop.hbase.HBaseConfiguration; 9 | import org.apache.hadoop.hbase.TableName; 10 | import org.apache.hadoop.hbase.client.Connection; 11 | import org.apache.hadoop.hbase.client.ConnectionFactory; 12 | import org.apache.hadoop.hbase.client.Put; 13 | import org.apache.hadoop.hbase.client.Table; 14 | import org.apache.hadoop.hbase.util.Bytes; 15 | 16 | import com.github.stuxuhai.hdata.api.JobContext; 17 | import com.github.stuxuhai.hdata.api.PluginConfig; 18 | import com.github.stuxuhai.hdata.api.Record; 19 | import com.github.stuxuhai.hdata.api.Writer; 20 | import com.github.stuxuhai.hdata.exception.HDataException; 21 | import com.google.common.base.Preconditions; 22 | 23 | public class HBaseWriter extends Writer { 24 | 25 | private Table table; 26 | private int batchSize; 27 | private int rowkeyIndex = -1; 28 | private final List putList = new ArrayList(); 29 | private String[] columns; 30 | private static final String ROWKEY = ":rowkey"; 31 | 32 | @Override 33 | public void prepare(JobContext context, PluginConfig writerConfig) { 34 | Configuration conf = HBaseConfiguration.create(); 35 | if (writerConfig.containsKey(HBaseWriterProperties.ZOOKEEPER_ZNODE_PARENT)) { 36 | conf.set(HBaseWriterProperties.ZOOKEEPER_ZNODE_PARENT, 37 | writerConfig.getString(HBaseWriterProperties.ZOOKEEPER_ZNODE_PARENT)); 38 | } 39 | 40 | Preconditions.checkNotNull(writerConfig.getString(HBaseWriterProperties.ZOOKEEPER_QUORUM), 41 | "HBase writer required property: zookeeper.quorum"); 42 | 43 | conf.set("hbase.zookeeper.quorum", writerConfig.getString(HBaseWriterProperties.ZOOKEEPER_QUORUM)); 44 | conf.set("hbase.zookeeper.property.clientPort", 45 | writerConfig.getString(HBaseWriterProperties.ZOOKEEPER_PROPERTY_CLIENTPORT, "2181")); 46 | batchSize = writerConfig.getInt(HBaseWriterProperties.BATCH_INSERT_SIZE, 10000); 47 | 48 | Preconditions.checkNotNull(writerConfig.getString(HBaseWriterProperties.COLUMNS), 49 | "HBase writer required property: zookeeper.columns"); 50 | columns = writerConfig.getString(HBaseWriterProperties.COLUMNS).split(","); 51 | for (int i = 0, len = columns.length; i < len; i++) { 52 | if (ROWKEY.equalsIgnoreCase(columns[i])) { 53 | rowkeyIndex = i; 54 | break; 55 | } 56 | } 57 | 58 | if (rowkeyIndex == -1) { 59 | throw new HDataException("Can not find :rowkey in columnsMapping of HBase Writer!"); 60 | } 61 | 62 | try { 63 | Preconditions.checkNotNull(writerConfig.getString(HBaseWriterProperties.TABLE), 64 | "HBase writer required property: table"); 65 | Connection conn = ConnectionFactory.createConnection(conf); 66 | table = conn.getTable(TableName.valueOf(writerConfig.getString(HBaseWriterProperties.TABLE))); 67 | } catch (IOException e) { 68 | throw new HDataException(e); 69 | } 70 | 71 | } 72 | 73 | @Override 74 | public void execute(Record record) { 75 | Object rowkeyValue = record.get(rowkeyIndex); 76 | Put put = new Put(Bytes.toBytes(rowkeyValue == null ? "NULL" : rowkeyValue.toString())); 77 | for (int i = 0, len = record.size(); i < len; i++) { 78 | if (i != rowkeyIndex) { 79 | String[] tokens = columns[i].split(":"); 80 | put.addColumn(Bytes.toBytes(tokens[0]), Bytes.toBytes(tokens[1]), 81 | record.get(i) == null ? null : Bytes.toBytes(record.get(i).toString())); 82 | } 83 | } 84 | 85 | putList.add(put); 86 | if (putList.size() == batchSize) { 87 | try { 88 | table.put(putList); 89 | } catch (IOException e) { 90 | throw new HDataException(e); 91 | } 92 | putList.clear(); 93 | } 94 | } 95 | 96 | @Override 97 | public void close() { 98 | if (table != null) { 99 | try { 100 | if (putList.size() > 0) { 101 | table.put(putList); 102 | } 103 | table.close(); 104 | } catch (IOException e) { 105 | throw new HDataException(e); 106 | } 107 | putList.clear(); 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /hdata-hbase/src/main/java/com/github/stuxuhai/hdata/plugin/writer/hbase/HBaseWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.hbase; 2 | 3 | public class HBaseWriterProperties { 4 | public static final String ZOOKEEPER_QUORUM = "zookeeper.quorum"; 5 | public static final String ZOOKEEPER_PROPERTY_CLIENTPORT = "zookeeper.client.port"; 6 | public static final String TABLE = "table"; 7 | public static final String COLUMNS = "columns"; 8 | public static final String BATCH_INSERT_SIZE = "batch.insert.size"; 9 | public static final String ZOOKEEPER_ZNODE_PARENT = "zookeeper.znode.parent"; 10 | } 11 | -------------------------------------------------------------------------------- /hdata-hdfs/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-hdfs 10 | 11 | 12 | 13 | 14 | 15 | 16 | com.github.stuxuhai 17 | hdata-api 18 | provided 19 | 20 | 21 | org.apache.hadoop 22 | hadoop-common 23 | ${hadoop.version} 24 | ${hadoop.scope} 25 | 26 | 27 | org.apache.hadoop 28 | hadoop-hdfs 29 | ${hadoop.version} 30 | ${hadoop.scope} 31 | 32 | 33 | org.anarres.lzo 34 | lzo-hadoop 35 | 1.0.5 36 | 37 | 38 | org.apache.hadoop 39 | hadoop-core 40 | 41 | 42 | 43 | 44 | org.apache.commons 45 | commons-lang3 46 | 3.4 47 | 48 | 49 | -------------------------------------------------------------------------------- /hdata-hdfs/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hdfs/HDFSReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hdfs; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | import org.apache.commons.lang3.StringEscapeUtils; 10 | import org.apache.commons.lang3.StringUtils; 11 | import org.apache.hadoop.conf.Configuration; 12 | import org.apache.hadoop.fs.FSDataInputStream; 13 | import org.apache.hadoop.fs.FileSystem; 14 | import org.apache.hadoop.fs.Path; 15 | import org.apache.hadoop.io.compress.CompressionCodec; 16 | import org.apache.hadoop.io.compress.CompressionCodecFactory; 17 | 18 | import com.github.stuxuhai.hdata.api.DefaultRecord; 19 | import com.github.stuxuhai.hdata.api.Fields; 20 | import com.github.stuxuhai.hdata.api.JobContext; 21 | import com.github.stuxuhai.hdata.api.OutputFieldsDeclarer; 22 | import com.github.stuxuhai.hdata.api.PluginConfig; 23 | import com.github.stuxuhai.hdata.api.Reader; 24 | import com.github.stuxuhai.hdata.api.Record; 25 | import com.github.stuxuhai.hdata.api.RecordCollector; 26 | import com.github.stuxuhai.hdata.api.Splitter; 27 | import com.github.stuxuhai.hdata.exception.HDataException; 28 | 29 | public class HDFSReader extends Reader { 30 | 31 | private Fields fields; 32 | private String fieldsSeparator; 33 | private String encoding; 34 | private PluginConfig readerConfig; 35 | private List files = new ArrayList(); 36 | 37 | @SuppressWarnings("unchecked") 38 | @Override 39 | public void prepare(JobContext context, PluginConfig readerConfig) { 40 | this.readerConfig = readerConfig; 41 | fieldsSeparator = StringEscapeUtils 42 | .unescapeJava(readerConfig.getString(HDFSReaderProperties.FIELDS_SEPARATOR, "\t")); 43 | files = (List) readerConfig.get(HDFSReaderProperties.FILES); 44 | encoding = readerConfig.getString(HDFSReaderProperties.ENCODING, "UTF-8"); 45 | 46 | String hadoopUser = readerConfig.getString(HDFSReaderProperties.HADOOP_USER); 47 | if (hadoopUser != null) { 48 | System.setProperty("HADOOP_USER_NAME", hadoopUser); 49 | } 50 | 51 | if (readerConfig.containsKey(HDFSReaderProperties.SCHEMA)) { 52 | fields = new Fields(); 53 | String[] tokens = readerConfig.getString(HDFSReaderProperties.SCHEMA).split("\\s*,\\s*"); 54 | for (String field : tokens) { 55 | fields.add(field); 56 | } 57 | } 58 | } 59 | 60 | @Override 61 | public void execute(RecordCollector recordCollector) { 62 | Configuration conf = new Configuration(); 63 | if (readerConfig.containsKey(HDFSReaderProperties.HDFS_CONF_PATH)) { 64 | for (String path: readerConfig.getString(HDFSReaderProperties.HDFS_CONF_PATH).split(",")) { 65 | conf.addResource(new Path("file://" + path)); 66 | } 67 | } 68 | 69 | CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); 70 | try { 71 | for (Path file : files) { 72 | FileSystem fs = file.getFileSystem(conf); 73 | CompressionCodec codec = codecFactory.getCodec(file); 74 | FSDataInputStream input = fs.open(file); 75 | BufferedReader br; 76 | String line = null; 77 | if (codec == null) { 78 | br = new BufferedReader(new InputStreamReader(input, encoding)); 79 | } else { 80 | br = new BufferedReader(new InputStreamReader(codec.createInputStream(input), encoding)); 81 | } 82 | while ((line = br.readLine()) != null) { 83 | String[] tokens = StringUtils.splitPreserveAllTokens(line, fieldsSeparator); 84 | Record record = new DefaultRecord(tokens.length); 85 | for (String field : tokens) { 86 | record.add(field); 87 | } 88 | recordCollector.send(record); 89 | } 90 | br.close(); 91 | } 92 | } catch (IOException e) { 93 | throw new HDataException(e); 94 | } 95 | } 96 | 97 | @Override 98 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 99 | declarer.declare(fields); 100 | } 101 | 102 | @Override 103 | public Splitter newSplitter() { 104 | return new HDFSSplitter(); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /hdata-hdfs/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hdfs/HDFSReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hdfs; 2 | 3 | public class HDFSReaderProperties { 4 | public static final String DIR = "dir"; 5 | public static final String FILENAME_REGEXP = "filename"; 6 | public static final String SCHEMA = "schema"; 7 | public static final String FIELDS_SEPARATOR = "fields.separator"; 8 | public static final String ENCODING = "encoding"; 9 | public static final String HADOOP_USER = "hadoop.user"; 10 | public static final String FILES = "reader.files"; 11 | public static final String HDFS_CONF_PATH = "hdfs.conf.path"; 12 | } 13 | -------------------------------------------------------------------------------- /hdata-hdfs/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hdfs/HDFSSplitter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hdfs; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | import java.util.regex.Matcher; 7 | import java.util.regex.Pattern; 8 | 9 | import org.apache.hadoop.conf.Configuration; 10 | import org.apache.hadoop.fs.FileStatus; 11 | import org.apache.hadoop.fs.FileSystem; 12 | import org.apache.hadoop.fs.Path; 13 | import org.apache.logging.log4j.LogManager; 14 | import org.apache.logging.log4j.Logger; 15 | 16 | import com.github.stuxuhai.hdata.api.JobConfig; 17 | import com.github.stuxuhai.hdata.api.PluginConfig; 18 | import com.github.stuxuhai.hdata.api.Splitter; 19 | import com.google.common.base.Preconditions; 20 | import com.google.common.base.Throwables; 21 | 22 | public class HDFSSplitter extends Splitter { 23 | 24 | private static final Logger LOGGER = LogManager.getLogger(HDFSSplitter.class); 25 | 26 | @Override 27 | public List split(JobConfig jobConfig) { 28 | List list = new ArrayList(); 29 | List matchedFiles = new ArrayList(); 30 | PluginConfig readerConfig = jobConfig.getReaderConfig(); 31 | String hadoopUser = readerConfig.getString(HDFSReaderProperties.HADOOP_USER); 32 | if (hadoopUser != null) { 33 | System.setProperty("HADOOP_USER_NAME", hadoopUser); 34 | } 35 | 36 | Path dir = new Path(readerConfig.getString(HDFSReaderProperties.DIR)); 37 | Preconditions.checkNotNull(dir, "HDFS reader required property: dir"); 38 | 39 | int parallelism = readerConfig.getParallelism(); 40 | 41 | String hadoopUserName = readerConfig.getString(HDFSReaderProperties.HADOOP_USER); 42 | if (hadoopUserName != null) { 43 | System.setProperty("HADOOP_USER_NAME", hadoopUserName); 44 | } 45 | 46 | Configuration conf = new Configuration(); 47 | if (readerConfig.containsKey(HDFSReaderProperties.HDFS_CONF_PATH)) { 48 | for (String path: readerConfig.getString(HDFSReaderProperties.HDFS_CONF_PATH).split(",")) { 49 | conf.addResource(new Path("file://" + path)); 50 | } 51 | } 52 | try { 53 | FileSystem fs = dir.getFileSystem(conf); 54 | Preconditions.checkNotNull(readerConfig.getString(HDFSReaderProperties.FILENAME_REGEXP), 55 | "HDFS reader required property: filename"); 56 | Pattern filenamePattern = Pattern.compile(readerConfig.getString(HDFSReaderProperties.FILENAME_REGEXP)); 57 | if (fs.exists(dir)) { 58 | for (FileStatus fileStatus : fs.listStatus(dir)) { 59 | Matcher m = filenamePattern.matcher(fileStatus.getPath().getName()); 60 | if (m.matches()) { 61 | matchedFiles.add(fileStatus.getPath()); 62 | } 63 | } 64 | 65 | if (matchedFiles.size() > 0) { 66 | if (parallelism == 1) { 67 | readerConfig.put(HDFSReaderProperties.FILES, matchedFiles); 68 | list.add(readerConfig); 69 | } else { 70 | double step = (double) matchedFiles.size() / parallelism; 71 | for (int i = 0; i < parallelism; i++) { 72 | List splitedFiles = new ArrayList(); 73 | for (int start = (int) Math.ceil(step * i), end = (int) Math 74 | .ceil(step * (i + 1)); start < end; start++) { 75 | splitedFiles.add(matchedFiles.get(start)); 76 | } 77 | PluginConfig pluginConfig = (PluginConfig) readerConfig.clone(); 78 | pluginConfig.put(HDFSReaderProperties.FILES, splitedFiles); 79 | list.add(pluginConfig); 80 | } 81 | } 82 | } 83 | 84 | } else { 85 | LOGGER.error(String.format("Path %s not found.", dir)); 86 | } 87 | } catch (IOException e) { 88 | LOGGER.error(Throwables.getStackTraceAsString(e)); 89 | } 90 | 91 | return list; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /hdata-hdfs/src/main/java/com/github/stuxuhai/hdata/plugin/writer/hdfs/HDFSWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.hdfs; 2 | 3 | public class HDFSWriterProperties { 4 | public static final String PATH = "path"; 5 | public static final String FIELDS_SEPARATOR = "fields.separator"; 6 | public static final String LINE_SEPARATOR = "line.separator"; 7 | public static final String ENCODING = "encoding"; 8 | public static final String COMPRESS_CODEC = "compress.codec"; 9 | public static final String HADOOP_USER = "hadoop.user"; 10 | public static final String MAX_FILE_SIZE_MB = "max.file.size.mb"; 11 | public static final String HDFS_CONF_PATH = "hdfs.conf.path"; 12 | public static final String PARTITION_DATE_INDEX = "partition.date.index"; 13 | public static final String PARTITIONED_DATE_FORMAT = "partition.date.format"; 14 | } 15 | -------------------------------------------------------------------------------- /hdata-hive/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-hive 10 | 11 | 12 | 13 | 14 | 15 | 16 | com.github.stuxuhai 17 | hdata-api 18 | provided 19 | 20 | 21 | org.anarres.lzo 22 | lzo-hadoop 23 | 1.0.5 24 | 25 | 26 | org.apache.hadoop 27 | hadoop-core 28 | 29 | 30 | 31 | 32 | org.apache.hive.hcatalog 33 | hive-hcatalog-core 34 | ${hive.version} 35 | ${hive.scope} 36 | 37 | 38 | org.slf4j 39 | slf4j-log4j12 40 | 41 | 42 | 43 | 44 | org.apache.hadoop 45 | hadoop-mapreduce-client-core 46 | ${hadoop.version} 47 | ${hadoop.scope} 48 | 49 | 50 | org.slf4j 51 | slf4j-log4j12 52 | 53 | 54 | 55 | 56 | org.apache.hadoop 57 | hadoop-common 58 | ${hadoop.version} 59 | ${hadoop.scope} 60 | 61 | 62 | org.slf4j 63 | slf4j-log4j12 64 | 65 | 66 | 67 | 68 | org.apache.hadoop 69 | hadoop-hdfs 70 | ${hadoop.version} 71 | ${hadoop.scope} 72 | 73 | 74 | -------------------------------------------------------------------------------- /hdata-hive/src/main/java/com/github/stuxuhai/hdata/plugin/hive/HiveMetaStoreUtils.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.hive; 2 | 3 | import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; 4 | import org.apache.hadoop.hive.metastore.api.Partition; 5 | import org.apache.hadoop.hive.metastore.api.Table; 6 | 7 | public class HiveMetaStoreUtils { 8 | 9 | /** 10 | * 获取Hive表 11 | * 12 | * @param client 13 | * @param database 14 | * @param table 15 | * @return 16 | */ 17 | public static Table getTable(HiveMetaStoreClient client, String database, String table) { 18 | try { 19 | return client.getTable(database, table); 20 | } catch (Exception e) { 21 | return null; 22 | } 23 | } 24 | 25 | /** 26 | * 判断是否为托管表 27 | * 28 | * @param table 29 | * @return 30 | */ 31 | public static boolean isManagedTable(Table table) { 32 | return "MANAGED_TABLE".equals(table.getTableType()); 33 | } 34 | 35 | /** 36 | * 判断是否为分区表 37 | * 38 | * @param table 39 | * @return 40 | */ 41 | public static boolean isPartitionTable(Table table) { 42 | return table.getPartitionKeys().size() > 0 ? true : false; 43 | } 44 | 45 | /** 46 | * 获取Hive表的分区 47 | * 48 | * @param client 49 | * @param table 50 | * @param partitionValues 51 | * @return 52 | */ 53 | public static Partition getPartition(HiveMetaStoreClient client, Table table, String partitionValues) { 54 | try { 55 | return client.getPartition(table.getDbName(), table.getTableName(), 56 | partitionValues.replaceAll("\"", "").replaceAll("\\s+,\\s+", "")); 57 | } catch (Exception e) { 58 | return null; 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /hdata-hive/src/main/java/com/github/stuxuhai/hdata/plugin/hive/HiveTypeUtils.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.hive; 2 | 3 | import java.math.BigInteger; 4 | 5 | import org.apache.hadoop.hive.common.type.HiveBaseChar; 6 | import org.apache.hadoop.hive.common.type.HiveDecimal; 7 | import org.apache.hadoop.hive.common.type.HiveVarchar; 8 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 9 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; 10 | 11 | import com.github.stuxuhai.hdata.api.Record; 12 | 13 | public class HiveTypeUtils { 14 | 15 | /** 16 | * 将Hive Writable类型转为标准Java类型 17 | * 18 | * @param o 19 | * @return 20 | */ 21 | public static Object toJavaObject(Object o) { 22 | if (o instanceof HiveBaseChar) { 23 | return ((HiveVarchar) o).getValue(); 24 | } else if (o instanceof HiveDecimal) { 25 | return ((HiveDecimal) o).bigDecimalValue(); 26 | } else if (o instanceof BigInteger) { 27 | return ((BigInteger) o).longValue(); 28 | } 29 | 30 | return o; 31 | } 32 | 33 | /** 34 | * 将Hive Writable类型转为标准Java类型() 35 | * 36 | * @param type 37 | * @param o 38 | * @return Object 39 | */ 40 | public static Object toJavaObjectSpecial(String type, Object o) { 41 | if (type == null || o == null) { 42 | return toJavaObject(o); 43 | } 44 | 45 | // Hive中的bigint 对应long 46 | if (type.equals("bigint")) { 47 | if (o.toString().isEmpty()) { 48 | return 0L; 49 | } 50 | return Long.parseLong(o.toString()); 51 | } else if (type.equals("int")) { 52 | if (o.toString().isEmpty()) { 53 | return 0; 54 | } 55 | return Integer.parseInt(o.toString()); 56 | } else { 57 | return toJavaObject(o); 58 | } 59 | } 60 | 61 | /** 62 | * 获取Hive类型的PrimitiveCategory 63 | * 64 | * @param type 65 | * @return 66 | */ 67 | public static PrimitiveCategory getPrimitiveCategory(String type) { 68 | if ("TINYINT".equals(type)) { 69 | return PrimitiveObjectInspector.PrimitiveCategory.BYTE; 70 | } else if ("SMALLINT".equals(type)) { 71 | return PrimitiveObjectInspector.PrimitiveCategory.SHORT; 72 | } else if ("BIGINT".equals(type)) { 73 | return PrimitiveObjectInspector.PrimitiveCategory.LONG; 74 | } else { 75 | return PrimitiveObjectInspector.PrimitiveCategory.valueOf(type); 76 | } 77 | } 78 | 79 | /** 80 | * converter special str in hive eg:NaN or Infinity to readable value in db 81 | * 82 | * @param record 83 | * @param obj 84 | * @param columnsTypes 85 | * @return 86 | */ 87 | public static Record convertHiveSpecialValue(Record record, Object obj, String columnsTypes, boolean convertNull) { 88 | if (!convertNull && obj == null) { 89 | record.add(obj); 90 | return record; 91 | } 92 | 93 | boolean isSpecialStr = "NaN".equals(String.valueOf(obj)) || "Infinity".equals(String.valueOf(obj)); 94 | if (convertNull) { 95 | isSpecialStr = isSpecialStr || obj == null; 96 | } 97 | 98 | if ("int".equals(columnsTypes)) { 99 | if (isSpecialStr) { 100 | record.add(Integer.valueOf(0)); 101 | } else { 102 | record.add(Integer.valueOf(String.valueOf(obj))); 103 | } 104 | } else if ("bigint".equals(columnsTypes)) { 105 | if (isSpecialStr) { 106 | record.add(Long.valueOf(0)); 107 | } else { 108 | record.add(Long.valueOf(String.valueOf(obj))); 109 | } 110 | } else if ("float".equals(columnsTypes)) { 111 | if (isSpecialStr) { 112 | record.add(Float.valueOf(0)); 113 | } else { 114 | record.add(Float.valueOf(String.valueOf(obj))); 115 | } 116 | } else if ("double".equals(columnsTypes)) { 117 | if (isSpecialStr) { 118 | record.add(Double.valueOf(0)); 119 | } else { 120 | record.add(Double.valueOf(String.valueOf(obj))); 121 | } 122 | } else { 123 | if (isSpecialStr) { 124 | record.add(""); 125 | } else { 126 | record.add(obj); 127 | } 128 | } 129 | 130 | return record; 131 | 132 | } 133 | 134 | } 135 | -------------------------------------------------------------------------------- /hdata-hive/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hive/HiveReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hive; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Iterator; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import org.apache.hive.hcatalog.common.HCatException; 9 | import org.apache.hive.hcatalog.data.HCatRecord; 10 | import org.apache.hive.hcatalog.data.transfer.DataTransferFactory; 11 | import org.apache.hive.hcatalog.data.transfer.HCatReader; 12 | import org.apache.hive.hcatalog.data.transfer.ReaderContext; 13 | 14 | import com.github.stuxuhai.hdata.api.DefaultRecord; 15 | import com.github.stuxuhai.hdata.api.Fields; 16 | import com.github.stuxuhai.hdata.api.JobContext; 17 | import com.github.stuxuhai.hdata.api.OutputFieldsDeclarer; 18 | import com.github.stuxuhai.hdata.api.PluginConfig; 19 | import com.github.stuxuhai.hdata.api.Reader; 20 | import com.github.stuxuhai.hdata.api.Record; 21 | import com.github.stuxuhai.hdata.api.RecordCollector; 22 | import com.github.stuxuhai.hdata.api.Splitter; 23 | import com.github.stuxuhai.hdata.exception.HDataException; 24 | import com.github.stuxuhai.hdata.plugin.hive.HiveTypeUtils; 25 | 26 | public class HiveReader extends Reader { 27 | 28 | private final Fields fields = new Fields(); 29 | private List splitList = null; 30 | private ReaderContext readerContext = null; 31 | private Map fieldSchemaMap = null; 32 | private boolean convertNull; 33 | private List selectColumnsIndexList = new ArrayList<>(); 34 | 35 | @SuppressWarnings("unchecked") 36 | @Override 37 | public void prepare(JobContext context, PluginConfig readerConfig) { 38 | if (readerConfig.containsKey(HiveReaderProperties.HADOOP_USER)) { 39 | System.setProperty("HADOOP_USER_NAME", readerConfig.getString(HiveReaderProperties.HADOOP_USER)); 40 | } 41 | 42 | convertNull = readerConfig.getBoolean(HiveReaderProperties.CONVERT_NULL, true); 43 | splitList = (List) readerConfig.get(HiveReaderProperties.INPUT_SPLITS); 44 | readerContext = (ReaderContext) readerConfig.get(HiveReaderProperties.READER_CONTEXT); 45 | fieldSchemaMap = (Map) readerConfig.get(HiveReaderProperties.COLUMNS_TYPES); 46 | 47 | List fieldsList = (List) readerConfig.get(HiveReaderProperties.FIELDS); 48 | for (String field : fieldsList) { 49 | fields.add(field); 50 | } 51 | 52 | String selectColumnsStr = readerConfig.getString(HiveReaderProperties.SELECT_COLUMNS); 53 | if (selectColumnsStr == null) { 54 | for (int i = 0; i < fields.size(); i++) { 55 | selectColumnsIndexList.add(i); 56 | } 57 | } else { 58 | String[] tokens = selectColumnsStr.split(","); 59 | for (String token : tokens) { 60 | int colunmnIndex = searchColumnsIndex(token); 61 | if (colunmnIndex < 0) { 62 | throw new HDataException("Column " + token + "not found"); 63 | } 64 | selectColumnsIndexList.add(colunmnIndex); 65 | } 66 | } 67 | } 68 | 69 | private int searchColumnsIndex(String colunm) { 70 | for (int i = 0; i < fields.size(); i++) { 71 | if (colunm.equals(fields.get(i))) { 72 | return i; 73 | } 74 | } 75 | return -1; 76 | } 77 | 78 | @Override 79 | public void execute(RecordCollector recordCollector) { 80 | try { 81 | for (int slaveNumber : splitList) { 82 | HCatReader slaveReader = DataTransferFactory.getHCatReader(readerContext, slaveNumber); 83 | Iterator itr = slaveReader.read(); 84 | while (itr.hasNext()) { 85 | HCatRecord hCatRecord = itr.next(); 86 | Record record = new DefaultRecord(selectColumnsIndexList.size()); 87 | for (int i : selectColumnsIndexList) { 88 | String columnsTypes = fieldSchemaMap.get(fields.get(i)); 89 | Object obj = hCatRecord.get(i); 90 | record = HiveTypeUtils.convertHiveSpecialValue(record, obj, columnsTypes, convertNull); 91 | } 92 | 93 | recordCollector.send(record); 94 | } 95 | } 96 | } catch (HCatException e) { 97 | throw new HDataException(e); 98 | } 99 | } 100 | 101 | @Override 102 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 103 | declarer.declare(fields); 104 | } 105 | 106 | @Override 107 | public Splitter newSplitter() { 108 | return new HiveSplitter(); 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /hdata-hive/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hive/HiveReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hive; 2 | 3 | public class HiveReaderProperties { 4 | public static final String METASTORE_URIS = "metastore.uris"; 5 | public static final String DATABASE = "database"; 6 | public static final String TABLE = "table"; 7 | public static final String PARTITIONS = "partitions"; 8 | public static final String HADOOP_USER = "hadoop.user"; 9 | public static final String FIELDS = "reader.fields"; 10 | public static final String INPUT_SPLITS = "reader.input.splits"; 11 | public static final String READER_CONTEXT = "reader.reader.context"; 12 | public static final String HDFS_CONF_PATH = "hdfs.conf.path"; 13 | public static final String COLUMNS_TYPES = "reader.columns.types"; 14 | public static final String CONVERT_NULL = "convert.null"; 15 | public static final String SELECT_COLUMNS = "select.columns"; 16 | } 17 | -------------------------------------------------------------------------------- /hdata-hive/src/main/java/com/github/stuxuhai/hdata/plugin/reader/hive/HiveSplitter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.hive; 2 | 3 | import java.lang.reflect.Method; 4 | import java.util.ArrayList; 5 | import java.util.HashMap; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.Map.Entry; 10 | 11 | import org.apache.hadoop.conf.Configuration; 12 | import org.apache.hadoop.fs.Path; 13 | import org.apache.hadoop.hive.conf.HiveConf; 14 | import org.apache.hadoop.hive.conf.HiveConf.ConfVars; 15 | import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; 16 | import org.apache.hadoop.hive.metastore.api.FieldSchema; 17 | import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; 18 | import org.apache.hive.hcatalog.data.schema.HCatSchema; 19 | import org.apache.hive.hcatalog.data.transfer.DataTransferFactory; 20 | import org.apache.hive.hcatalog.data.transfer.HCatReader; 21 | import org.apache.hive.hcatalog.data.transfer.ReadEntity; 22 | import org.apache.hive.hcatalog.data.transfer.ReaderContext; 23 | import org.apache.hive.hcatalog.mapreduce.HCatBaseInputFormat; 24 | import org.apache.logging.log4j.LogManager; 25 | import org.apache.logging.log4j.Logger; 26 | 27 | import com.github.stuxuhai.hdata.api.JobConfig; 28 | import com.github.stuxuhai.hdata.api.PluginConfig; 29 | import com.github.stuxuhai.hdata.api.Splitter; 30 | import com.github.stuxuhai.hdata.exception.HDataException; 31 | import com.google.common.base.Joiner; 32 | import com.google.common.base.Preconditions; 33 | import com.google.common.collect.Lists; 34 | import com.google.common.collect.Maps; 35 | 36 | public class HiveSplitter extends Splitter { 37 | 38 | private static final Logger LOG = LogManager.getLogger(HiveSplitter.class); 39 | 40 | @Override 41 | public List split(JobConfig jobConfig) { 42 | List list = new ArrayList<>(); 43 | PluginConfig readerConfig = jobConfig.getReaderConfig(); 44 | String metastoreUris = readerConfig.getString(HiveReaderProperties.METASTORE_URIS); 45 | if (metastoreUris == null || metastoreUris.isEmpty()) { 46 | HiveConf hiveConf = new HiveConf(); 47 | metastoreUris = hiveConf.getVar(ConfVars.METASTOREURIS); 48 | } 49 | Preconditions.checkNotNull(metastoreUris, "Hive reader required property: metastore.uris"); 50 | 51 | String hadoopUser = readerConfig.getString(HiveReaderProperties.HADOOP_USER); 52 | if (hadoopUser != null) { 53 | System.setProperty("HADOOP_USER_NAME", hadoopUser); 54 | } 55 | 56 | String dbName = readerConfig.getString(HiveReaderProperties.DATABASE, "default"); 57 | String tableName = readerConfig.getString(HiveReaderProperties.TABLE); 58 | Preconditions.checkNotNull(tableName, "Hive reader required property: table"); 59 | 60 | int parallelism = readerConfig.getParallelism(); 61 | 62 | ReadEntity.Builder builder = new ReadEntity.Builder(); 63 | ReadEntity entity = null; 64 | if (readerConfig.containsKey(HiveReaderProperties.PARTITIONS)) { 65 | entity = builder.withDatabase(dbName).withTable(tableName) 66 | .withFilter(readerConfig.getString(HiveReaderProperties.PARTITIONS)).build(); 67 | } else { 68 | entity = builder.withDatabase(dbName).withTable(tableName).build(); 69 | } 70 | 71 | Map config = new HashMap(); 72 | config.put(ConfVars.METASTOREURIS.varname, metastoreUris); 73 | 74 | Configuration conf = new Configuration(); 75 | if (readerConfig.containsKey(HiveReaderProperties.HDFS_CONF_PATH)) { 76 | conf.addResource(new Path("file://" + readerConfig.getString(HiveReaderProperties.HDFS_CONF_PATH))); 77 | } 78 | 79 | Iterator> it = conf.iterator(); 80 | while (it.hasNext()) { 81 | Entry entry = it.next(); 82 | config.put(entry.getKey(), entry.getValue()); 83 | } 84 | 85 | HCatReader masterReader = DataTransferFactory.getHCatReader(entity, config); 86 | 87 | try { 88 | ReaderContext readerContext = masterReader.prepareRead(); 89 | int numSplits = readerContext.numSplits(); 90 | readerConfig.put(HiveReaderProperties.READER_CONTEXT, readerContext); 91 | 92 | Method getConfMethod = readerContext.getClass().getDeclaredMethod("getConf"); 93 | getConfMethod.setAccessible(true); 94 | HCatSchema schema = HCatBaseInputFormat.getTableSchema((Configuration) getConfMethod.invoke(readerContext)); 95 | readerConfig.put(HiveReaderProperties.FIELDS, schema.getFieldNames()); 96 | 97 | Map fieldSchemaMap = Maps.newHashMap(); 98 | List fields = schema.getFields(); 99 | List types = Lists.newArrayList(); 100 | List comments = Lists.newArrayList(); 101 | for (HCatFieldSchema field : fields) { 102 | fieldSchemaMap.put(field.getName(), field.getTypeString()); 103 | types.add(field.getTypeString()); 104 | // comments.add(field.getComment()); // always return 105 | // null, a hdatcatlog bug? 106 | } 107 | 108 | for (FieldSchema fieldSchema : getSchema(metastoreUris, dbName, tableName)) { 109 | comments.add(fieldSchema.getComment()); 110 | } 111 | 112 | jobConfig.setProperty("types", Joiner.on("\001").useForNull("").join(types)); 113 | jobConfig.setProperty("comments", Joiner.on("\001").useForNull("").join(comments)); 114 | 115 | readerConfig.put(HiveReaderProperties.COLUMNS_TYPES, fieldSchemaMap); 116 | 117 | if (parallelism > numSplits) { 118 | parallelism = numSplits; 119 | LOG.info( 120 | "Reader parallelism is greater than input splits count, so parallelism is set to equal with input splits count."); 121 | } 122 | 123 | if (parallelism == 1) { 124 | List splitList = new ArrayList(); 125 | for (int i = 0; i < numSplits; i++) { 126 | splitList.add(i); 127 | } 128 | readerConfig.put(HiveReaderProperties.INPUT_SPLITS, splitList); 129 | list.add(readerConfig); 130 | } else { 131 | double step = (double) numSplits / parallelism; 132 | for (int i = 0; i < parallelism; i++) { 133 | List splitList = new ArrayList(); 134 | for (int start = (int) Math.ceil(step * i), end = (int) Math 135 | .ceil(step * (i + 1)); start < end; start++) { 136 | splitList.add(start); 137 | } 138 | PluginConfig pluginConfig = (PluginConfig) readerConfig.clone(); 139 | pluginConfig.put(HiveReaderProperties.INPUT_SPLITS, splitList); 140 | list.add(pluginConfig); 141 | } 142 | } 143 | 144 | return list; 145 | } catch (Exception e) { 146 | throw new HDataException(e); 147 | } 148 | } 149 | 150 | private List getSchema(String metastoreuris, String database, String table) { 151 | HiveConf conf = new HiveConf(); 152 | conf.set(ConfVars.METASTOREURIS.varname, metastoreuris); 153 | 154 | HiveMetaStoreClient client = null; 155 | try { 156 | client = new HiveMetaStoreClient(conf); 157 | return client.getSchema(database, table); 158 | } catch (Exception e) { 159 | LOG.error("", e); 160 | return Lists.newArrayList(); 161 | } finally { 162 | if (client != null) { 163 | client.close(); 164 | } 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /hdata-hive/src/main/java/com/github/stuxuhai/hdata/plugin/writer/hive/HiveWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.hive; 2 | 3 | public class HiveWriterProperties { 4 | 5 | public static final String METASTORE_URIS = "metastore.uris"; 6 | public static final String DATABASE = "database"; 7 | public static final String TABLE = "table"; 8 | public static final String PARTITIONS = "partitions"; 9 | public static final String HADOOP_USER = "hadoop.user"; 10 | public static final String HDFS_CONF_PATH = "hdfs.conf.path"; 11 | } 12 | -------------------------------------------------------------------------------- /hdata-hive/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | package org.apache.hive.hcatalog.mapreduce; 20 | 21 | import java.io.Serializable; 22 | import java.util.Map; 23 | import java.util.Properties; 24 | 25 | import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; 26 | import org.apache.hive.hcatalog.data.schema.HCatSchema; 27 | 28 | /** 29 | * The Class used to serialize the partition information read from the metadata 30 | * server that maps to a partition. 31 | */ 32 | public class PartInfo implements Serializable { 33 | 34 | /** The serialization version */ 35 | private static final long serialVersionUID = 1L; 36 | 37 | /** The information about which input storage handler to use */ 38 | private final String storageHandlerClassName; 39 | private final String inputFormatClassName; 40 | private final String outputFormatClassName; 41 | private final String serdeClassName; 42 | 43 | /** HCat-specific properties set at the partition */ 44 | private final Properties hcatProperties; 45 | 46 | /** The data location. */ 47 | private final String location; 48 | 49 | /** The map of partition key names and their values. */ 50 | private Map partitionValues; 51 | 52 | /** Job properties associated with this parition */ 53 | Map jobProperties; 54 | 55 | /** the table info associated with this partition */ 56 | HCatTableInfo tableInfo; 57 | 58 | /** 59 | * Instantiates a new hcat partition info. 60 | * 61 | * @param partitionSchema 62 | * the partition schema 63 | * @param storageHandler 64 | * the storage handler 65 | * @param location 66 | * the location 67 | * @param hcatProperties 68 | * hcat-specific properties at the partition 69 | * @param jobProperties 70 | * the job properties 71 | * @param tableInfo 72 | * the table information 73 | */ 74 | public PartInfo(HCatSchema partitionSchema, HiveStorageHandler storageHandler, String location, Properties hcatProperties, 75 | Map jobProperties, HCatTableInfo tableInfo) { 76 | this.location = location; 77 | this.hcatProperties = hcatProperties; 78 | this.jobProperties = jobProperties; 79 | this.tableInfo = tableInfo; 80 | 81 | this.storageHandlerClassName = storageHandler.getClass().getName(); 82 | this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); 83 | this.serdeClassName = storageHandler.getSerDeClass().getName(); 84 | this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); 85 | } 86 | 87 | /** 88 | * Gets the value of partitionSchema. 89 | * 90 | * @return the partitionSchema 91 | */ 92 | public HCatSchema getPartitionSchema() { 93 | return tableInfo.getDataColumns(); 94 | } 95 | 96 | /** 97 | * @return the storage handler class name 98 | */ 99 | public String getStorageHandlerClassName() { 100 | return storageHandlerClassName; 101 | } 102 | 103 | /** 104 | * @return the inputFormatClassName 105 | */ 106 | public String getInputFormatClassName() { 107 | return inputFormatClassName; 108 | } 109 | 110 | /** 111 | * @return the outputFormatClassName 112 | */ 113 | public String getOutputFormatClassName() { 114 | return outputFormatClassName; 115 | } 116 | 117 | /** 118 | * @return the serdeClassName 119 | */ 120 | public String getSerdeClassName() { 121 | return serdeClassName; 122 | } 123 | 124 | /** 125 | * Gets the input storage handler properties. 126 | * 127 | * @return HCat-specific properties set at the partition 128 | */ 129 | public Properties getInputStorageHandlerProperties() { 130 | return hcatProperties; 131 | } 132 | 133 | /** 134 | * Gets the value of location. 135 | * 136 | * @return the location 137 | */ 138 | public String getLocation() { 139 | return location; 140 | } 141 | 142 | /** 143 | * Sets the partition values. 144 | * 145 | * @param partitionValues 146 | * the new partition values 147 | */ 148 | public void setPartitionValues(Map partitionValues) { 149 | this.partitionValues = partitionValues; 150 | } 151 | 152 | /** 153 | * Gets the partition values. 154 | * 155 | * @return the partition values 156 | */ 157 | public Map getPartitionValues() { 158 | return partitionValues; 159 | } 160 | 161 | /** 162 | * Gets the job properties. 163 | * 164 | * @return a map of the job properties 165 | */ 166 | public Map getJobProperties() { 167 | return jobProperties; 168 | } 169 | 170 | /** 171 | * Gets the HCatalog table information. 172 | * 173 | * @return the table information 174 | */ 175 | public HCatTableInfo getTableInfo() { 176 | return tableInfo; 177 | } 178 | 179 | } 180 | -------------------------------------------------------------------------------- /hdata-http/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-http 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | com.google.guava 19 | guava 20 | provided 21 | 22 | 23 | -------------------------------------------------------------------------------- /hdata-http/src/main/java/com/github/stuxuhai/hdata/plugin/reader/http/HttpReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.http; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | import java.net.MalformedURLException; 7 | import java.net.URL; 8 | import java.net.URLConnection; 9 | 10 | import org.apache.logging.log4j.LogManager; 11 | import org.apache.logging.log4j.Logger; 12 | 13 | import com.github.stuxuhai.hdata.api.DefaultRecord; 14 | import com.github.stuxuhai.hdata.api.JobContext; 15 | import com.github.stuxuhai.hdata.api.PluginConfig; 16 | import com.github.stuxuhai.hdata.api.Reader; 17 | import com.github.stuxuhai.hdata.api.Record; 18 | import com.github.stuxuhai.hdata.api.RecordCollector; 19 | import com.github.stuxuhai.hdata.api.Splitter; 20 | import com.github.stuxuhai.hdata.exception.HDataException; 21 | 22 | public class HttpReader extends Reader { 23 | 24 | private String urlstr = null; 25 | private String encoding = null; 26 | private static final Logger LOG = LogManager.getLogger(HttpReader.class); 27 | 28 | @Override 29 | public void prepare(JobContext context, PluginConfig readerConfig) { 30 | urlstr = readerConfig.getString(HttpReaderProperties.URL); 31 | encoding = readerConfig.getString(HttpReaderProperties.ENCODING, "UTF-8"); 32 | } 33 | 34 | @Override 35 | public void execute(RecordCollector recordCollector) { 36 | URL url; 37 | try { 38 | url = new URL(urlstr); 39 | URLConnection connection = url.openConnection(); 40 | BufferedReader br = new BufferedReader(new InputStreamReader(connection.getInputStream(), encoding)); 41 | String line = null; 42 | while ((line = br.readLine()) != null) { 43 | if (line.startsWith("offset:")) { 44 | LOG.info(line); 45 | } else { 46 | Record record = new DefaultRecord(1); 47 | record.add(line); 48 | recordCollector.send(record); 49 | } 50 | } 51 | br.close(); 52 | } catch (MalformedURLException e) { 53 | throw new HDataException(e); 54 | } catch (IOException e) { 55 | throw new HDataException(e); 56 | } 57 | } 58 | 59 | @Override 60 | public Splitter newSplitter() { 61 | return new HttpSplitter(); 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /hdata-http/src/main/java/com/github/stuxuhai/hdata/plugin/reader/http/HttpReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.http; 2 | 3 | public class HttpReaderProperties { 4 | public static final String URL = "url"; 5 | public static final String ENCODING = "encoding"; 6 | } 7 | -------------------------------------------------------------------------------- /hdata-http/src/main/java/com/github/stuxuhai/hdata/plugin/reader/http/HttpSplitter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.http; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.github.stuxuhai.hdata.api.JobConfig; 7 | import com.github.stuxuhai.hdata.api.PluginConfig; 8 | import com.github.stuxuhai.hdata.api.Splitter; 9 | import com.google.common.base.Preconditions; 10 | 11 | public class HttpSplitter extends Splitter { 12 | 13 | @Override 14 | public List split(JobConfig jobConfig) { 15 | List list = new ArrayList(); 16 | PluginConfig readerConfig = jobConfig.getReaderConfig(); 17 | 18 | String urls = readerConfig.getString(HttpReaderProperties.URL); 19 | Preconditions.checkNotNull(urls, "HTTP reader required property: url"); 20 | 21 | String[] urlArray = urls.split(","); 22 | for (String url : urlArray) { 23 | if (!url.trim().isEmpty()) { 24 | PluginConfig pluginConfig = (PluginConfig) readerConfig.clone(); 25 | pluginConfig.put(HttpReaderProperties.URL, url); 26 | list.add(pluginConfig); 27 | } 28 | } 29 | 30 | List ids = new ArrayList(); 31 | readerConfig.put("ids", ids); 32 | 33 | return list; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /hdata-jdbc/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-jdbc 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | com.github.stuxuhai 19 | hdata-core 20 | provided 21 | 22 | 23 | com.google.guava 24 | guava 25 | provided 26 | 27 | 28 | commons-dbutils 29 | commons-dbutils 30 | 1.6 31 | 32 | 33 | mysql 34 | mysql-connector-java 35 | 5.1.38 36 | 37 | 38 | -------------------------------------------------------------------------------- /hdata-jdbc/src/main/java/com/github/stuxuhai/hdata/plugin/reader/jdbc/JDBCIterator.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.jdbc; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.logging.log4j.LogManager; 7 | import org.apache.logging.log4j.Logger; 8 | 9 | public class JDBCIterator { 10 | 11 | private static final Logger LOG = LogManager.getLogger(JDBCIterator.class); 12 | 13 | private List unitList = new ArrayList(); 14 | private Integer length = 0; 15 | private Integer current = 0; 16 | 17 | public void add(JDBCUnit unit) { 18 | unitList.add(unit); 19 | length++; 20 | } 21 | 22 | /** 23 | * 获取 下一条 SQL 24 | * 25 | */ 26 | public synchronized String getNextSQL(int seq) { 27 | if (current >= length) { 28 | return null; 29 | } 30 | 31 | String sql = unitList.get(current).getNextSQL(seq); 32 | if (sql == null) { 33 | current++; 34 | } else { 35 | return sql; 36 | } 37 | 38 | return getNextSQL(seq); 39 | } 40 | 41 | protected static class JDBCUnit { 42 | private long startCursor; 43 | private long endCursor; 44 | private long start; 45 | private long end; 46 | private long step; 47 | private int parallelism; 48 | private int middle; 49 | private String column; 50 | private String sql; 51 | 52 | public JDBCUnit(String sql, String column, long start, long end, long step, int parallelism) { 53 | 54 | this.sql = sql; 55 | this.column = column; 56 | this.start = start; 57 | this.end = end; 58 | this.step = step; 59 | 60 | this.startCursor = start; 61 | this.endCursor = end; 62 | this.parallelism = parallelism; 63 | 64 | this.middle = (int) Math.ceil(parallelism / 2); 65 | } 66 | 67 | public String getNextSQL(int seq) { 68 | if (startCursor >= endCursor) { 69 | return null; 70 | } 71 | 72 | long tempStart, tempEnd; 73 | 74 | // from the start to the middle, from the end to the middle 75 | if (seq <= middle) { 76 | tempStart = startCursor; 77 | 78 | if (step <= 0 || startCursor + step > endCursor) { 79 | tempEnd = endCursor; 80 | } else { 81 | tempEnd = startCursor + step; 82 | } 83 | 84 | startCursor = tempEnd; 85 | } else { 86 | tempEnd = endCursor; 87 | 88 | if (step <= 0 || startCursor + step > endCursor) { 89 | tempStart = startCursor; 90 | } else { 91 | tempStart = endCursor - step; 92 | } 93 | 94 | endCursor = tempStart; 95 | } 96 | 97 | String currentSql = sql.replace(JDBCSplitter.CONDITIONS, 98 | column + " >= " + tempStart + " AND " + column + " < " + tempEnd); 99 | 100 | LOG.debug("sql:{}", currentSql); 101 | 102 | return currentSql; 103 | } 104 | 105 | @Override 106 | public String toString() { 107 | return "JDBCUnit{" + "startCursor=" + startCursor + ", endCursor=" + endCursor + ", start=" + start 108 | + ", end=" + end + ", step=" + step + ", parallelism=" + parallelism + ", middle=" + middle 109 | + ", column='" + column + '\'' + ", sql='" + sql + '\'' + '}'; 110 | } 111 | } 112 | 113 | } 114 | -------------------------------------------------------------------------------- /hdata-jdbc/src/main/java/com/github/stuxuhai/hdata/plugin/reader/jdbc/JDBCReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.jdbc; 2 | 3 | public class JDBCReaderProperties { 4 | 5 | public static final String DRIVER = "driver"; 6 | public static final String URL = "url"; 7 | public static final String USERNAME = "username"; 8 | public static final String PASSWORD = "password"; 9 | public static final String TABLE = "table"; 10 | public static final String COLUMNS = "columns"; 11 | public static final String EXCLUDE_COLUMNS = "exclude.columns"; 12 | public static final String WHERE = "where"; 13 | public static final String SQL = "sql"; 14 | public static final String SQL_ITERATOR = "sql.iterator"; 15 | public static final String SQL_SEQ = "sql.sequence"; 16 | public static final String SPLIT_BY = "split.by"; 17 | public static final String MAX_SIZE_PER_FETCH = "max.size.per.fetch"; 18 | public static final String NULL_STRING = "null.string"; 19 | public static final String NULL_NON_STRING = "null.non.string"; 20 | public static final String FIELD_WRAP_REPLACE_STRING = "field.wrap.replace.string"; 21 | public static final String NUMBER_FORMAT = "number.format"; 22 | public static final String KEYWORD_ESCAPER = "keyword.escaper"; 23 | } 24 | -------------------------------------------------------------------------------- /hdata-jdbc/src/main/java/com/github/stuxuhai/hdata/plugin/writer/jdbc/JDBCWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.jdbc; 2 | 3 | public class JDBCWriterProperties { 4 | 5 | public static final String DRIVER = "driver"; 6 | public static final String URL = "url"; 7 | public static final String USERNAME = "username"; 8 | public static final String PASSWORD = "password"; 9 | public static final String TABLE = "table"; 10 | public static final String BATCH_INSERT_SIZE = "batch.insert.size"; 11 | public static final String PARALLELISM = "parallelism"; 12 | public static final String SCHEMA = "schema"; 13 | public static final String KEYWORD_ESCAPER = "keyword.escaper"; 14 | public static final String UPSERT_COLUMNS = "upsert.columns"; 15 | 16 | } 17 | -------------------------------------------------------------------------------- /hdata-kafka/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-kafka 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | org.apache.kafka 19 | kafka_2.11 20 | 0.8.2.1 21 | 22 | 23 | org.apache.commons 24 | commons-lang3 25 | 3.4 26 | 27 | 28 | com.google.guava 29 | guava 30 | provided 31 | 32 | 33 | com.google.code.gson 34 | gson 35 | 2.6.2 36 | 37 | 38 | -------------------------------------------------------------------------------- /hdata-kafka/src/main/java/com/github/stuxuhai/hdata/plugin/reader/kafka/KafkaReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.kafka; 2 | 3 | import java.lang.Thread.State; 4 | 5 | import com.github.stuxuhai.hdata.api.Fields; 6 | import com.github.stuxuhai.hdata.api.JobContext; 7 | import com.github.stuxuhai.hdata.api.OutputFieldsDeclarer; 8 | import com.github.stuxuhai.hdata.api.PluginConfig; 9 | import com.github.stuxuhai.hdata.api.Reader; 10 | import com.github.stuxuhai.hdata.api.RecordCollector; 11 | import com.github.stuxuhai.hdata.api.Splitter; 12 | import com.github.stuxuhai.hdata.exception.HDataException; 13 | 14 | public class KafkaReader extends Reader { 15 | 16 | private Fields fields; 17 | private int maxWaitingSeconds; 18 | private PluginConfig readerConfig; 19 | 20 | @Override 21 | public void prepare(JobContext context, PluginConfig readerConfig) { 22 | this.readerConfig = readerConfig; 23 | maxWaitingSeconds = readerConfig.getInt(KafkaReaderProperties.MAX_WAIT_SECOND, 300); 24 | 25 | if (readerConfig.containsKey("schema")) { 26 | fields = new Fields(); 27 | String[] tokens = readerConfig.getString("schema").split("\\s*,\\s*"); 28 | for (String field : tokens) { 29 | fields.add(field); 30 | } 31 | } 32 | } 33 | 34 | @Override 35 | public void execute(RecordCollector recordCollector) { 36 | KafkaConsumer consumer = new KafkaConsumer(readerConfig, recordCollector); 37 | 38 | Thread t = new Thread(consumer); 39 | t.start(); 40 | 41 | int sleepedSecond = 0; 42 | try { 43 | while (!t.getState().equals(State.TERMINATED)) { 44 | Thread.sleep(1000); 45 | 46 | sleepedSecond++; 47 | if (sleepedSecond >= maxWaitingSeconds) { 48 | consumer.stop(); 49 | break; 50 | } 51 | } 52 | } catch (InterruptedException e) { 53 | throw new HDataException(e); 54 | } 55 | 56 | if (sleepedSecond < maxWaitingSeconds) { 57 | consumer.stop(); 58 | } 59 | } 60 | 61 | @Override 62 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 63 | declarer.declare(fields); 64 | } 65 | 66 | @Override 67 | public Splitter newSplitter() { 68 | return null; 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /hdata-kafka/src/main/java/com/github/stuxuhai/hdata/plugin/reader/kafka/KafkaReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.kafka; 2 | 3 | public class KafkaReaderProperties { 4 | public static final String TOPIC = "topic"; 5 | public static final String GROUP_ID = "group.id"; 6 | public static final String ZOOKEEPER_CONNECT = "zookeeper.connect"; 7 | public static final String CONSUME_STREAM_COUNT = "consumer.stream.count"; 8 | public static final String ENCODING = "encoding"; 9 | public static final String MAX_FETCH_SIZE = "max.fetch.size"; 10 | public static final String MAX_WAIT_SECOND = "max.wait.second"; 11 | public static final String PARTITION_ID = "partition.id"; 12 | public static final String START_OFFSET = "start.offset"; 13 | public static final String FIELDS_SEPARATOR = "fields.separator"; 14 | public static final String SCHEMA = "schema"; 15 | } 16 | -------------------------------------------------------------------------------- /hdata-kafka/src/main/java/com/github/stuxuhai/hdata/plugin/writer/kafka/KafkaWriter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.kafka; 2 | 3 | import org.apache.commons.lang3.StringEscapeUtils; 4 | 5 | import com.github.stuxuhai.hdata.api.JobContext; 6 | import com.github.stuxuhai.hdata.api.PluginConfig; 7 | import com.github.stuxuhai.hdata.api.Record; 8 | import com.github.stuxuhai.hdata.api.Writer; 9 | import com.google.common.base.Joiner; 10 | import com.google.common.base.Preconditions; 11 | 12 | import kafka.javaapi.producer.Producer; 13 | import kafka.producer.KeyedMessage; 14 | import kafka.producer.ProducerConfig; 15 | 16 | public class KafkaWriter extends Writer { 17 | 18 | private String topic = null; 19 | private String separator = null; 20 | private Producer producer; 21 | private Object[] array = null; 22 | 23 | @Override 24 | public void prepare(JobContext context, PluginConfig writerConfig) { 25 | topic = writerConfig.getString(KafkaWriterProperties.TOPIC); 26 | Preconditions.checkNotNull(topic, "Kafka writer required property: topic"); 27 | 28 | separator = StringEscapeUtils 29 | .unescapeJava(writerConfig.getString(KafkaWriterProperties.FIELDS_SEPARATOR, "\t")); 30 | producer = new Producer(new ProducerConfig(writerConfig)); 31 | } 32 | 33 | @Override 34 | public void execute(Record record) { 35 | if (array == null) { 36 | array = new Object[record.size()]; 37 | } 38 | 39 | for (int i = 0, len = record.size(); i < len; i++) { 40 | array[i] = record.get(i); 41 | } 42 | 43 | String message = Joiner.on(separator).join(array); 44 | KeyedMessage data = new KeyedMessage(topic, message, message); 45 | producer.send(data); 46 | } 47 | 48 | @Override 49 | public void close() { 50 | if (producer != null) { 51 | producer.close(); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /hdata-kafka/src/main/java/com/github/stuxuhai/hdata/plugin/writer/kafka/KafkaWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.kafka; 2 | 3 | public class KafkaWriterProperties { 4 | public static final String TOPIC = "topic"; 5 | public static final String FIELDS_SEPARATOR = "fields.separator"; 6 | } 7 | -------------------------------------------------------------------------------- /hdata-mongodb/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-mongodb 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | org.mongodb 19 | mongo-java-driver 20 | 2.12.4 21 | 22 | 23 | com.google.guava 24 | guava 25 | provided 26 | 27 | 28 | org.apache.commons 29 | commons-lang3 30 | 3.4 31 | 32 | 33 | -------------------------------------------------------------------------------- /hdata-mongodb/src/main/java/com/github/stuxuhai/hdata/plugin/reader/mongodb/MongoDBReader.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.mongodb; 2 | 3 | import java.net.UnknownHostException; 4 | import java.util.Set; 5 | 6 | import com.github.stuxuhai.hdata.api.DefaultRecord; 7 | import com.github.stuxuhai.hdata.api.Fields; 8 | import com.github.stuxuhai.hdata.api.JobContext; 9 | import com.github.stuxuhai.hdata.api.OutputFieldsDeclarer; 10 | import com.github.stuxuhai.hdata.api.PluginConfig; 11 | import com.github.stuxuhai.hdata.api.Reader; 12 | import com.github.stuxuhai.hdata.api.Record; 13 | import com.github.stuxuhai.hdata.api.RecordCollector; 14 | import com.github.stuxuhai.hdata.api.Splitter; 15 | import com.github.stuxuhai.hdata.exception.HDataException; 16 | import com.mongodb.BasicDBObject; 17 | import com.mongodb.DB; 18 | import com.mongodb.DBCollection; 19 | import com.mongodb.DBCursor; 20 | import com.mongodb.DBObject; 21 | import com.mongodb.MongoClient; 22 | import com.mongodb.MongoClientURI; 23 | 24 | public class MongoDBReader extends Reader { 25 | 26 | private Fields fields; 27 | private String uri; 28 | private BasicDBObject condition; 29 | private static final String OBJECT_ID_KEY = "_id"; 30 | 31 | @Override 32 | public void prepare(JobContext context, PluginConfig readerConfig) { 33 | uri = readerConfig.getString(MongoDBReaderProperties.URI); 34 | condition = (BasicDBObject) readerConfig.get(MongoDBReaderProperties.QUERY); 35 | } 36 | 37 | @Override 38 | public void execute(RecordCollector recordCollector) { 39 | MongoClientURI clientURI = new MongoClientURI(uri); 40 | MongoClient mongoClient = null; 41 | try { 42 | mongoClient = new MongoClient(clientURI); 43 | DB db = mongoClient.getDB(clientURI.getDatabase()); 44 | DBCollection coll = db.getCollection(clientURI.getCollection()); 45 | DBCursor cur = coll.find(condition); 46 | while (cur.hasNext()) { 47 | DBObject doc = cur.next(); 48 | Set keys = doc.keySet(); 49 | Record record = new DefaultRecord(keys.size() - 1); 50 | if (fields == null) { 51 | fields = new Fields(); 52 | for (String key : keys) { 53 | fields.add(key); 54 | } 55 | } 56 | 57 | for (String key : keys) { 58 | if (!OBJECT_ID_KEY.equals(key)) { 59 | record.add(doc.get(key)); 60 | } 61 | } 62 | 63 | recordCollector.send(record); 64 | } 65 | } catch (UnknownHostException e) { 66 | throw new HDataException(e); 67 | } finally { 68 | if (mongoClient != null) { 69 | mongoClient.close(); 70 | } 71 | } 72 | } 73 | 74 | @Override 75 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 76 | declarer.declare(fields); 77 | } 78 | 79 | @Override 80 | public Splitter newSplitter() { 81 | return new MongoDBSplitter(); 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /hdata-mongodb/src/main/java/com/github/stuxuhai/hdata/plugin/reader/mongodb/MongoDBReaderProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.mongodb; 2 | 3 | public class MongoDBReaderProperties { 4 | 5 | public static final String URI = "uri"; 6 | public static final String QUERY = "query"; 7 | } 8 | -------------------------------------------------------------------------------- /hdata-mongodb/src/main/java/com/github/stuxuhai/hdata/plugin/reader/mongodb/MongoDBSplitter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.reader.mongodb; 2 | 3 | import java.math.BigInteger; 4 | import java.net.UnknownHostException; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.logging.log4j.LogManager; 9 | import org.apache.logging.log4j.Logger; 10 | import org.bson.types.ObjectId; 11 | 12 | import com.github.stuxuhai.hdata.api.JobConfig; 13 | import com.github.stuxuhai.hdata.api.PluginConfig; 14 | import com.github.stuxuhai.hdata.api.Splitter; 15 | import com.google.common.base.Preconditions; 16 | import com.google.common.base.Throwables; 17 | import com.mongodb.BasicDBObject; 18 | import com.mongodb.DB; 19 | import com.mongodb.DBCollection; 20 | import com.mongodb.DBCursor; 21 | import com.mongodb.DBObject; 22 | import com.mongodb.MongoClient; 23 | import com.mongodb.MongoClientURI; 24 | import com.mongodb.util.JSON; 25 | 26 | public class MongoDBSplitter extends Splitter { 27 | 28 | private static final String OBJECT_ID_KEY = "_id"; 29 | private static final int HEXADECIMAL = 16; 30 | private static final Logger LOGGER = LogManager.getLogger(MongoDBSplitter.class); 31 | 32 | @Override 33 | public List split(JobConfig jobConfig) { 34 | List list = new ArrayList(); 35 | PluginConfig readerConfig = jobConfig.getReaderConfig(); 36 | String uri = readerConfig.getString(MongoDBReaderProperties.URI); 37 | Preconditions.checkNotNull(uri, "HBase reader required property: uri"); 38 | int parallelism = readerConfig.getParallelism(); 39 | 40 | MongoClientURI clientURI = new MongoClientURI(uri); 41 | MongoClient mongoClient = null; 42 | try { 43 | mongoClient = new MongoClient(clientURI); 44 | DB db = mongoClient.getDB(clientURI.getDatabase()); 45 | DBCollection coll = db.getCollection(clientURI.getCollection()); 46 | 47 | String maxID = ""; 48 | String minID = ""; 49 | DBObject sort = new BasicDBObject(); 50 | sort.put(OBJECT_ID_KEY, -1); 51 | DBCursor cursor = coll.find().sort(sort).limit(1); 52 | while (cursor.hasNext()) { 53 | maxID = cursor.next().get(OBJECT_ID_KEY).toString(); 54 | } 55 | 56 | sort.put(OBJECT_ID_KEY, 1); 57 | cursor = coll.find().sort(sort).limit(1); 58 | while (cursor.hasNext()) { 59 | minID = cursor.next().get(OBJECT_ID_KEY).toString(); 60 | } 61 | 62 | if (!maxID.isEmpty() && !minID.isEmpty()) { 63 | BigInteger maxBigInteger = new BigInteger(maxID, HEXADECIMAL); 64 | BigInteger minBigInteger = new BigInteger(minID, HEXADECIMAL); 65 | BigInteger step = (maxBigInteger.subtract(minBigInteger).divide(BigInteger.valueOf(parallelism))); 66 | for (int i = 0, len = parallelism; i < len; i++) { 67 | BasicDBObject condition = null; 68 | if (readerConfig.containsKey(MongoDBReaderProperties.QUERY)) { 69 | condition = (BasicDBObject) JSON.parse(readerConfig.getString(MongoDBReaderProperties.QUERY)); 70 | } else { 71 | condition = new BasicDBObject(); 72 | } 73 | 74 | BasicDBObject idRange = new BasicDBObject("$gte", new ObjectId( 75 | minBigInteger.add(step.multiply(BigInteger.valueOf(i))).toString(HEXADECIMAL))); 76 | if (i == len - 1) { 77 | idRange.append("$lte", new ObjectId(maxBigInteger.toString(HEXADECIMAL))); 78 | } else { 79 | idRange.append("$lt", new ObjectId( 80 | minBigInteger.add(step.multiply(BigInteger.valueOf(i + 1))).toString(HEXADECIMAL))); 81 | } 82 | 83 | condition.put(OBJECT_ID_KEY, idRange); 84 | 85 | PluginConfig pluginConfig = (PluginConfig) readerConfig.clone(); 86 | pluginConfig.put(MongoDBReaderProperties.QUERY, condition); 87 | list.add(pluginConfig); 88 | } 89 | } 90 | } catch (UnknownHostException e) { 91 | LOGGER.error(Throwables.getStackTraceAsString(e)); 92 | } finally { 93 | if (mongoClient != null) { 94 | mongoClient.close(); 95 | } 96 | } 97 | 98 | return list; 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /hdata-mongodb/src/main/java/com/github/stuxuhai/hdata/plugin/writer/mongodb/MongoDBWriter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.mongodb; 2 | 3 | import java.net.UnknownHostException; 4 | 5 | import org.apache.commons.lang3.ArrayUtils; 6 | 7 | import com.github.stuxuhai.hdata.api.Fields; 8 | import com.github.stuxuhai.hdata.api.JobContext; 9 | import com.github.stuxuhai.hdata.api.PluginConfig; 10 | import com.github.stuxuhai.hdata.api.Record; 11 | import com.github.stuxuhai.hdata.api.Writer; 12 | import com.github.stuxuhai.hdata.exception.HDataException; 13 | import com.google.common.base.Preconditions; 14 | import com.mongodb.BasicDBObject; 15 | import com.mongodb.DB; 16 | import com.mongodb.DBCollection; 17 | import com.mongodb.MongoClient; 18 | import com.mongodb.MongoClientURI; 19 | 20 | public class MongoDBWriter extends Writer { 21 | 22 | private Fields fields; 23 | private MongoClient mongoClient = null; 24 | private DBCollection coll; 25 | private BasicDBObject[] insertDocs; 26 | private int batchsize; 27 | private int count; 28 | 29 | @Override 30 | public void prepare(JobContext context, PluginConfig writerConfig) { 31 | fields = context.getFields(); 32 | batchsize = writerConfig.getInt(MongoDBWriterProperties.BATCH_INSERT_SIZE, 1000); 33 | insertDocs = new BasicDBObject[batchsize]; 34 | 35 | Preconditions.checkNotNull(writerConfig.getString(MongoDBWriterProperties.URI), 36 | "MongoDB writer required property: uri"); 37 | MongoClientURI clientURI = new MongoClientURI(writerConfig.getString(MongoDBWriterProperties.URI)); 38 | try { 39 | mongoClient = new MongoClient(clientURI); 40 | DB db = mongoClient.getDB(clientURI.getDatabase()); 41 | coll = db.getCollection(clientURI.getCollection()); 42 | } catch (UnknownHostException e) { 43 | throw new HDataException(e); 44 | } 45 | } 46 | 47 | @Override 48 | public void execute(Record record) { 49 | BasicDBObject doc = new BasicDBObject(); 50 | for (int i = 0, len = fields.size(); i < len; i++) { 51 | doc.put(fields.get(i), record.get(i)); 52 | } 53 | 54 | insertDocs[count++] = doc; 55 | if (count == batchsize) { 56 | coll.insert(insertDocs); 57 | count = 0; 58 | } 59 | } 60 | 61 | @Override 62 | public void close() { 63 | if (mongoClient != null) { 64 | if (count > 0) { 65 | coll.insert(ArrayUtils.subarray(insertDocs, 0, count)); 66 | } 67 | mongoClient.close(); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /hdata-mongodb/src/main/java/com/github/stuxuhai/hdata/plugin/writer/mongodb/MongoDBWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.writer.mongodb; 2 | 3 | public class MongoDBWriterProperties { 4 | public static final String URI = "uri"; 5 | public static final String BATCH_INSERT_SIZE = "batch.insert.size"; 6 | } 7 | -------------------------------------------------------------------------------- /hdata-wit/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.stuxuhai 6 | hdata 7 | 0.2.8 8 | 9 | hdata-wit 10 | 11 | 12 | 13 | com.github.stuxuhai 14 | hdata-api 15 | provided 16 | 17 | 18 | com.github.stuxuhai 19 | hdata-core 20 | provided 21 | 22 | 23 | org.slf4j 24 | slf4j-api 25 | ${slf4j.version} 26 | provided 27 | 28 | 29 | org.febit.wit 30 | wit-core 31 | ${wit.version} 32 | 33 | 34 | -------------------------------------------------------------------------------- /hdata-wit/src/main/java/com/github/stuxuhai/hdata/plugin/wit/Methods.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.wit; 2 | 3 | import com.github.stuxuhai.hdata.api.Record; 4 | import org.febit.wit.Context; 5 | import org.febit.wit.Engine; 6 | import org.febit.wit.InternalContext; 7 | import org.febit.wit.core.NativeFactory; 8 | import org.febit.wit.global.GlobalManager; 9 | import org.febit.wit.lang.MethodDeclare; 10 | import org.febit.wit.plugin.EnginePlugin; 11 | import org.febit.wit.util.JavaNativeUtil; 12 | 13 | /** 14 | * 15 | * @author zqq90 16 | */ 17 | public class Methods implements EnginePlugin { 18 | 19 | /** 20 | * A empty function, do nothing. 21 | */ 22 | public static final MethodDeclare noop = new MethodDeclare() { 23 | @Override 24 | public Object invoke(InternalContext context, Object[] args) { 25 | return Context.VOID; 26 | } 27 | }; 28 | 29 | public static final Record newRecord() { 30 | return new WitDynamicRecord(); 31 | } 32 | 33 | public static final Record copyRecord(Record record) { 34 | return new WitDynamicRecord(record); 35 | } 36 | 37 | @Override 38 | public void apply(Engine engine) { 39 | NativeFactory nativeFactory = engine.getNativeFactory(); 40 | GlobalManager manager = engine.getGlobalManager(); 41 | JavaNativeUtil.addStaticMethods(manager, nativeFactory, Methods.class); 42 | JavaNativeUtil.addConstFields(manager, nativeFactory, Methods.class); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /hdata-wit/src/main/java/com/github/stuxuhai/hdata/plugin/wit/WitDynamicRecord.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.wit; 2 | 3 | import com.github.stuxuhai.hdata.api.Record; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | /** 8 | * 9 | * @author zqq90 10 | */ 11 | public class WitDynamicRecord implements Record { 12 | 13 | private final List datas; 14 | 15 | public WitDynamicRecord() { 16 | this.datas = new ArrayList<>(); 17 | } 18 | 19 | public WitDynamicRecord(Record record) { 20 | List list = new ArrayList<>(); 21 | for (int i = 0, len = record.size(); i < len; i++) { 22 | list.add(record.get(i)); 23 | } 24 | this.datas = list; 25 | } 26 | 27 | @Override 28 | public void add(Object object) { 29 | this.datas.add(object); 30 | } 31 | 32 | @Override 33 | public void add(int index, Object object) { 34 | final int size = this.datas.size(); 35 | if (index >= size) { 36 | for (int i = index - size; i != 0; i--) { 37 | this.datas.add(null); 38 | } 39 | this.datas.add(object); 40 | } else { 41 | this.datas.set(index, object); 42 | } 43 | } 44 | 45 | @Override 46 | public Object get(int index) { 47 | if (index >= this.datas.size()) { 48 | return null; 49 | } 50 | return this.datas.get(index); 51 | } 52 | 53 | @Override 54 | public int size() { 55 | return this.datas.size(); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /hdata-wit/src/main/java/com/github/stuxuhai/hdata/plugin/wit/resolvers/RecordResolver.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.wit.resolvers; 2 | 3 | import com.github.stuxuhai.hdata.api.Record; 4 | import org.febit.wit.exceptions.ScriptRuntimeException; 5 | import org.febit.wit.resolvers.GetResolver; 6 | import org.febit.wit.resolvers.SetResolver; 7 | import org.febit.wit.util.StringUtil; 8 | 9 | /** 10 | * 11 | * @author zqq90 12 | */ 13 | public class RecordResolver implements GetResolver, SetResolver { 14 | 15 | @Override 16 | public Class getMatchClass() { 17 | return Record.class; 18 | } 19 | 20 | @Override 21 | public Object get(Object object, Object property) { 22 | if (property instanceof Number) { 23 | try { 24 | return ((Record) object).get(((Number) property).intValue()); 25 | } catch (IndexOutOfBoundsException e) { 26 | throw new ScriptRuntimeException(StringUtil.format("index out of bounds:{}", property)); 27 | } 28 | } 29 | switch (property.toString()) { 30 | case "size": 31 | case "length": 32 | return ((Record) object).size(); 33 | } 34 | throw new ScriptRuntimeException(StringUtil.format("Invalid property or can't read: com.github.stuxuhai.hdata.api.Record#{}", property)); 35 | } 36 | 37 | @Override 38 | public void set(Object object, Object property, Object value) { 39 | if (property instanceof Number) { 40 | ((Record) object).add(((Number) property).intValue(), value); 41 | return; 42 | } 43 | throw new ScriptRuntimeException(StringUtil.format("Invalid property or can't write: com.github.stuxuhai.hdata.api.Record#{}", property)); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /hdata-wit/src/main/java/com/github/stuxuhai/hdata/plugin/wit/writer/WitWriter.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.wit.writer; 2 | 3 | import java.io.IOException; 4 | import com.github.stuxuhai.hdata.api.JobContext; 5 | import com.github.stuxuhai.hdata.api.PluginConfig; 6 | import com.github.stuxuhai.hdata.api.Record; 7 | import com.github.stuxuhai.hdata.api.Writer; 8 | import com.github.stuxuhai.hdata.core.PluginLoader; 9 | import com.github.stuxuhai.hdata.exception.HDataException; 10 | import com.github.stuxuhai.hdata.util.PluginUtils; 11 | import com.google.common.base.Preconditions; 12 | import org.febit.wit.Context; 13 | import org.febit.wit.Engine; 14 | import org.febit.wit.Template; 15 | import org.febit.wit.util.KeyValuesUtil; 16 | 17 | public class WitWriter extends Writer { 18 | 19 | public static final String KEY_INPUT = "input"; 20 | public static final String KEY_RESULT = "__result"; 21 | 22 | private static class LazyHolder { 23 | 24 | static final Engine ENGINE; 25 | 26 | static { 27 | ENGINE = Engine.create("hdata-wit-writer.wim"); 28 | } 29 | } 30 | 31 | private Template template = null; 32 | private Writer innerWriter = null; 33 | private final String[] templateParamNames = new String[]{KEY_INPUT}; 34 | 35 | protected Template createTemplate(String tmpl) throws IOException { 36 | return LazyHolder.ENGINE.getTemplate("code: var " + KEY_INPUT + "; var " + KEY_RESULT + " = (()->{\n" + tmpl + "\n})();"); 37 | } 38 | 39 | protected Object executeTemplate(Record input) { 40 | Context context = template.merge( 41 | KeyValuesUtil.wrap(templateParamNames, new Object[]{input})); 42 | Object result = context.get(KEY_RESULT); 43 | return result; 44 | } 45 | 46 | protected Writer createInnerWriter(JobContext context, PluginConfig writerConfig) { 47 | String innerWriterName = writerConfig.getString(WitWriterProperties.INNER_WRITER); 48 | Preconditions.checkNotNull(innerWriterName, "Wit writer required property: " + WitWriterProperties.INNER_WRITER); 49 | 50 | String writerClassName = PluginLoader.getWriterClassName(innerWriterName); 51 | Preconditions.checkNotNull(writerClassName, "Can not find class for writer: " + innerWriterName); 52 | 53 | try { 54 | return (Writer) PluginUtils.loadClass(innerWriterName, writerClassName).newInstance(); 55 | } catch (Exception e) { 56 | throw new HDataException("Can not create new writer instance for: " + innerWriterName, e); 57 | } 58 | } 59 | 60 | @Override 61 | public void prepare(JobContext context, PluginConfig writerConfig) { 62 | String wit = writerConfig.getString(WitWriterProperties.WIT); 63 | Preconditions.checkNotNull(wit, "Wit writer required property: " + WitWriterProperties.WIT); 64 | 65 | try { 66 | this.template = createTemplate(wit); 67 | } catch (IOException ex) { 68 | throw new HDataException("Failed to load wit", ex); 69 | } 70 | this.template.reload(); 71 | 72 | PluginConfig innerWriterConfig = new PluginConfig(); 73 | innerWriterConfig.putAll(writerConfig); 74 | innerWriterConfig.remove(WitWriterProperties.INNER_WRITER); 75 | innerWriterConfig.remove(WitWriterProperties.WIT); 76 | this.innerWriter = createInnerWriter(context, writerConfig); 77 | this.innerWriter.prepare(context, innerWriterConfig); 78 | } 79 | 80 | @Override 81 | public void execute(Record input) { 82 | Object result = executeTemplate(input); 83 | write(result); 84 | } 85 | 86 | protected void write(Object result) { 87 | if (result == null 88 | || result == Context.VOID) { 89 | // TODO filtered 90 | } else if (result instanceof Record) { 91 | this.innerWriter.execute((Record) result); 92 | } else if (result instanceof Object[]) { 93 | for (Object item : (Object[]) result) { 94 | write(item); 95 | } 96 | } else if (result instanceof String) { 97 | // TODO filtered with reason 98 | } else { 99 | throw new HDataException("Wit result type not support: " + result.getClass()); 100 | } 101 | } 102 | 103 | @Override 104 | public void close() { 105 | this.innerWriter.close(); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /hdata-wit/src/main/java/com/github/stuxuhai/hdata/plugin/wit/writer/WitWriterProperties.java: -------------------------------------------------------------------------------- 1 | package com.github.stuxuhai.hdata.plugin.wit.writer; 2 | 3 | public class WitWriterProperties { 4 | public static final String WIT = "wit"; 5 | public static final String INNER_WRITER = "witInnerWriter"; 6 | } 7 | -------------------------------------------------------------------------------- /hdata-wit/src/main/resources/META-INF/services/org.febit.wit.plugin.EnginePlugin: -------------------------------------------------------------------------------- 1 | com.github.stuxuhai.hdata.plugin.wit.Methods 2 | -------------------------------------------------------------------------------- /hdata-wit/src/main/resources/hdata-wit-writer.wim: -------------------------------------------------------------------------------- 1 | 2 | [engine] 3 | 4 | [logger :slf4jLogger] 5 | [nativeFactory :defaultNativeFactory] 6 | [resolverManager :defaultResolverManager] 7 | 8 | [spiPluginCollector] 9 | enable=true 10 | 11 | [codeStringLoader :stringLoader] 12 | codeFirst=true 13 | 14 | [routeLoader] 15 | loaders +=''' 16 | code: codeStringLoader 17 | ''' 18 | 19 | [resolverManager] 20 | resolvers+=''' 21 | com.github.stuxuhai.hdata.plugin.wit.resolvers.RecordResolver 22 | ''' 23 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.github.stuxuhai 5 | hdata 6 | 0.2.8 7 | HData 8 | pom 9 | 10 | 11 | UTF-8 12 | 1.7 13 | 1.7 14 | 1.7.12 15 | 3.4.6 16 | 2.7.1 17 | 1.1.2 18 | 1.2.1 19 | 2.4.0-beta 20 | compile 21 | compile 22 | compile 23 | compile 24 | 25 | 26 | 27 | 28 | 29 | com.github.stuxuhai 30 | hdata-api 31 | ${project.version} 32 | 33 | 34 | com.github.stuxuhai 35 | hdata-core 36 | ${project.version} 37 | 38 | 39 | com.google.guava 40 | guava 41 | 19.0 42 | 43 | 44 | 45 | 46 | 47 | 48 | cdh5 49 | 50 | cdh5.7.4 51 | 3.4.5-${cdh.version} 52 | 2.6.0-${cdh.version} 53 | 1.2.0-${cdh.version} 54 | 1.1.0-${cdh.version} 55 | provided 56 | provided 57 | provided 58 | provided 59 | 60 | 61 | 62 | cloudera-repos 63 | https://repository.cloudera.com/artifactory/cloudera-repos 64 | Cloudera Repositories 65 | 66 | false 67 | 68 | 69 | 70 | 71 | 72 | copy-dependency 73 | 74 | 75 | 76 | org.apache.maven.plugins 77 | maven-dependency-plugin 78 | 79 | runtime 80 | 81 | 82 | 83 | package 84 | 85 | copy-dependencies 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | make-package 95 | 96 | ${basedir}/../target/all-modules/${project.artifactId} 97 | 98 | 99 | assembly 100 | 101 | 102 | 103 | 104 | org.apache.maven.plugins 105 | maven-jar-plugin 106 | 107 | ${modulesOutputDirectory} 108 | 109 | 110 | 111 | org.apache.maven.plugins 112 | maven-dependency-plugin 113 | 114 | 115 | package 116 | 117 | copy-dependencies 118 | 119 | 120 | ${modulesOutputDirectory} 121 | runtime 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | org.apache.maven.plugins 136 | maven-compiler-plugin 137 | 3.5.1 138 | 139 | 140 | org.apache.maven.plugins 141 | maven-jar-plugin 142 | 2.6 143 | 144 | 145 | org.apache.maven.plugins 146 | maven-dependency-plugin 147 | 2.10 148 | 149 | 150 | org.apache.maven.plugins 151 | maven-assembly-plugin 152 | 2.5.3 153 | 154 | 155 | 156 | 157 | 158 | 159 | hdata-api 160 | hdata-core 161 | hdata-console 162 | hdata-csv 163 | hdata-jdbc 164 | hdata-ftp 165 | hdata-http 166 | hdata-kafka 167 | hdata-hdfs 168 | hdata-hive 169 | hdata-hbase 170 | hdata-mongodb 171 | hdata-excel 172 | hdata-wit 173 | 174 | 175 | --------------------------------------------------------------------------------