├── conf ├── jobInfoDB.properties ├── hive-site.xml ├── core-site.xml ├── log4j.properties └── engine.xml ├── .gitignore ├── src ├── test │ ├── java │ │ └── com │ │ │ └── dp │ │ │ └── nebula │ │ │ └── wormhole │ │ │ ├── engine │ │ │ ├── storage │ │ │ │ ├── RAMStorageTest.java │ │ │ │ ├── SingleQueueTest.java │ │ │ │ └── DoubleQueueTest.java │ │ │ ├── common │ │ │ │ ├── FakeReader.java │ │ │ │ ├── FakeWriter.java │ │ │ │ └── TestUtils.java │ │ │ ├── utils │ │ │ │ ├── JarLoaderTest.java │ │ │ │ └── ReflectionUtilTest.java │ │ │ └── core │ │ │ │ ├── EngineTest.java │ │ │ │ ├── ReaderThreadTest.java │ │ │ │ └── WriterThreadTest.java │ │ │ ├── transform │ │ │ ├── utils │ │ │ │ └── FieldTransformUtilsTest.java │ │ │ └── impls │ │ │ │ └── AddAndFiltTransformerTest.java │ │ │ ├── plugins │ │ │ ├── reader │ │ │ │ ├── sftpreader │ │ │ │ │ ├── MyPublickeyAuthenticator.java │ │ │ │ │ └── MyPasswordAuthenticator.java │ │ │ │ ├── mongoreader │ │ │ │ │ └── MongoReaderTest.java │ │ │ │ └── greenplumreader │ │ │ │ │ └── GreenplumReaderTest.java │ │ │ └── common │ │ │ │ ├── SFTPUtilsTest.java │ │ │ │ └── DFSUtilsTest.java │ │ │ └── common │ │ │ ├── utils │ │ │ ├── StringUtilTest.java │ │ │ └── ParseXMLUtilTest.java │ │ │ ├── DefaultParamTest.java │ │ │ └── BufferedLineExchangerTest.java │ └── resources │ │ └── wormhole_hivereader_to_hdfswriter_test.xml └── main │ └── java │ └── com │ └── dp │ └── nebula │ └── wormhole │ ├── common │ ├── interfaces │ │ ├── ILineReceiver.java │ │ ├── IReaderPeriphery.java │ │ ├── ISourceCounter.java │ │ ├── IReader.java │ │ ├── ITargetCounter.java │ │ ├── IWriterPeriphery.java │ │ ├── IWriter.java │ │ ├── ILineSender.java │ │ ├── ISplitter.java │ │ ├── ITransformer.java │ │ ├── ITransmissionPeriphery.java │ │ ├── IPluginMonitor.java │ │ ├── IPlugin.java │ │ ├── IParam.java │ │ └── ILine.java │ ├── plugin │ │ └── PluginStatus.java │ ├── AbstractTransformer.java │ ├── AbstractSplitter.java │ ├── LineExchangerFactory.java │ ├── utils │ │ ├── Environment.java │ │ ├── StringUtil.java │ │ └── ClassNode.java │ ├── DefaultPluginMonitor.java │ ├── DefaultLineExchanger.java │ ├── config │ │ ├── JobPluginConf.java │ │ └── JobConf.java │ ├── WormholeException.java │ ├── AbstractPlugin.java │ ├── JobStatus.java │ └── BufferedLineExchanger.java │ ├── engine │ ├── core │ │ ├── DefaultSplitter.java │ │ ├── DefaultReaderPeriphery.java │ │ ├── DefaultWriterPeriphery.java │ │ ├── WriterThread.java │ │ └── ReaderThread.java │ ├── config │ │ ├── PluginConfParamKey.java │ │ └── EngineConfParamKey.java │ ├── storage │ │ ├── AbstractStorage.java │ │ ├── StorageQueue.java │ │ ├── StorageManager.java │ │ ├── StorageConf.java │ │ └── IStorage.java │ ├── utils │ │ ├── ReflectionUtil.java │ │ └── JarLoader.java │ └── monitor │ │ ├── FailedInfo.java │ │ ├── CompletedMonitorInfo.java │ │ ├── RealtimeMonitorInfo.java │ │ ├── StorageMonitorInfo.java │ │ └── WormHoleJobInfo.java │ ├── plugins │ ├── reader │ │ ├── hivereader │ │ │ ├── HiveReaderMode.java │ │ │ ├── ParamKey.java │ │ │ ├── HiveReaderSplitter.java │ │ │ └── HiveReaderPeriphery.java │ │ ├── salesforcereader │ │ │ ├── DescribeRefObject.java │ │ │ └── ParamKey.java │ │ ├── hbasereader │ │ │ ├── ParamKey.java │ │ │ └── HBaseReader.java │ │ ├── hdfsreader │ │ │ ├── ParamKey.java │ │ │ └── HdfsDirSplitter.java │ │ ├── sftpreader │ │ │ └── ParamKey.java │ │ ├── mongoreader │ │ │ ├── MongoReaderPeriphery.java │ │ │ └── ParamKey.java │ │ ├── mysqlreader │ │ │ ├── MysqlReaderSplitter.java │ │ │ └── MysqlReader.java │ │ └── sqlserverreader │ │ │ └── SqlserverReader.java │ ├── common │ │ ├── ParamKey.java │ │ ├── SFTPUtils.java │ │ ├── DBResultSetReceiver.java │ │ ├── PCInfo.java │ │ ├── ErrorCodeUtils.java │ │ ├── MongoUtils.java │ │ ├── MetaData.java │ │ ├── DBUtils.java │ │ └── DBResultSetSender.java │ └── writer │ │ ├── mongowriter │ │ ├── MongoWriterSplitter.java │ │ ├── ParamKey.java │ │ ├── MongoWriterPeriphery.java │ │ └── MongoWriter.java │ │ ├── hbasewriter │ │ ├── HBaseSplitter.java │ │ ├── ParamKey.java │ │ ├── HBaseWriterPeriphery.java │ │ └── HBaseWriter.java │ │ ├── sftpwriter │ │ ├── ParamKey.java │ │ ├── SftpDirSplitter.java │ │ └── SftpWriterPeriphery.java │ │ ├── hdfswriter │ │ ├── HdfsDirSplitter.java │ │ └── ParamKey.java │ │ └── greenplumwriter │ │ └── ParamKey.java │ └── transform │ ├── utils │ └── FieldTransformUtils.java │ ├── common │ └── TransformerFactory.java │ └── impls │ ├── AddAndFiltTransformer.java │ └── HippoMongoToGPTransformer.java ├── README.md ├── job.xml └── bin └── wormhole.sh /conf/jobInfoDB.properties: -------------------------------------------------------------------------------- 1 | ip=10.1.1.220 2 | port=3306 3 | database=hadoop 4 | user=hive 5 | password=acoc ?ea0gbEibOkETmO -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | .classpath 3 | .project 4 | target/ 5 | .settings/ 6 | .svn/ 7 | build/ 8 | 9 | # Package Files # 10 | *.jar 11 | *.war 12 | *.ear 13 | 14 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/storage/RAMStorageTest.java: -------------------------------------------------------------------------------- 1 | //package com.dp.nebula.wormhole.engine.storage; 2 | // 3 | //public class RAMStorageTest { 4 | // 5 | //} 6 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/ILineReceiver.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface ILineReceiver { 4 | 5 | ILine receive(); 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/IReaderPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface IReaderPeriphery extends ITransmissionPeriphery{ 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/ISourceCounter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface ISourceCounter { 4 | 5 | void setSourceLines(long lines); 6 | } 7 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/IReader.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface IReader extends IPlugin{ 4 | 5 | void read(ILineSender lineSender); 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/ITargetCounter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface ITargetCounter { 4 | 5 | void setTargetLines(String name, long lines); 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/core/DefaultSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.core; 2 | 3 | import com.dp.nebula.wormhole.common.AbstractSplitter; 4 | 5 | class DefaultSplitter extends AbstractSplitter{} 6 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/IWriterPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface IWriterPeriphery extends ITransmissionPeriphery{ 4 | 5 | void rollback(IParam param); 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/IWriter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface IWriter extends IPlugin{ 4 | 5 | void write(ILineReceiver lineReceiver); 6 | 7 | void commit(); 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/ILineSender.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface ILineSender { 4 | 5 | ILine createNewLine(); 6 | 7 | Boolean send(ILine line); 8 | 9 | void flush(); 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/ISplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | import java.util.List; 4 | 5 | public interface ISplitter { 6 | 7 | void init(IParam jobParams); 8 | 9 | List split(); 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/ITransformer.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface ITransformer { 4 | 5 | ILine transform(ILine line); 6 | 7 | ILine transform(ILine line, String params); 8 | 9 | } 10 | 11 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/ITransmissionPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | 4 | interface ITransmissionPeriphery { 5 | 6 | void prepare(IParam param, ISourceCounter counter); 7 | 8 | void doPost(IParam param, ITargetCounter counter); 9 | 10 | } 11 | -------------------------------------------------------------------------------- /conf/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | hive.hiveserver.kerberos.principal 9 | hadoop/10.1.1.161@DIANPING.COM 10 | 11 | 12 | 13 | hive.hiveserver.sasl.enabled 14 | true 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/IPluginMonitor.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | public interface IPluginMonitor { 4 | 5 | long getSuccessLines(); 6 | 7 | long getFailedLines(); 8 | 9 | void increaseSuccessLines(); 10 | 11 | void increaseSuccessLine(long lines); 12 | 13 | void increaseFailedLines(); 14 | 15 | void increaseFailedLines(long lines); 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/transform/utils/FieldTransformUtilsTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.transform.utils; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | 6 | import org.junit.Test; 7 | 8 | public class FieldTransformUtilsTest { 9 | 10 | @Test 11 | public void testFromUnixTime() { 12 | assertEquals("2012-07-18 00:00:00",FieldTransformUtils.fromUnixTime(1342540800L)); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/hivereader/HiveReaderMode.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.hivereader; 2 | 3 | public enum HiveReaderMode { 4 | READ_FROM_HIVESERVER ("READ_FROM_HIVESERVER"), 5 | READ_FROM_HDFS ("READ_FROM_HDFS"); 6 | 7 | private String mode = null; 8 | private HiveReaderMode(String mode) { 9 | this.mode = mode; 10 | } 11 | String getMode(){ 12 | return mode; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /conf/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | hadoop.security.authentication 10 | kerberos 11 | 12 | 13 | 14 | hadoop.security.authorization 15 | true 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/IPlugin.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | import java.util.Map; 4 | 5 | public interface IPlugin { 6 | 7 | void setParam(IParam param); 8 | 9 | IParam getParam(); 10 | 11 | void setMonitor(IPluginMonitor monitor); 12 | 13 | IPluginMonitor getMonitor(); 14 | 15 | void init(); 16 | 17 | void connection(); 18 | 19 | void finish(); 20 | 21 | Map getMonitorInfo(); 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/plugin/PluginStatus.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.plugin; 2 | 3 | public enum PluginStatus { 4 | FAILURE(-1), 5 | SUCCESS(0), 6 | CONNECT(1), 7 | READ(2), 8 | READ_OVER(3), 9 | WRITE(4), 10 | WRITE_OVER(5), 11 | WAITING(6); 12 | 13 | private int status; 14 | 15 | private PluginStatus(int status) { 16 | this.status = status; 17 | } 18 | 19 | public int value(){ 20 | return this.status; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /conf/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO 2 | log4j.logger.com.dp.nebula.wormhole.common=INFO,STDOUT 3 | log4j.logger.com.dp.nebula.wormhole.plugins=INFO,STDOUT 4 | log4j.logger.com.dp.nebula.wormhole.engine=INFO,STDOUT 5 | log4j.logger.org.apache=INFO,STDOUT 6 | log4j.logger.org.apache.zookeeper=ERROR,STDOUT 7 | 8 | log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender 9 | log4j.appender.STDOUT.layout=org.apache.log4j.PatternLayout 10 | log4j.appender.STDOUT.layout.ConversionPattern=%d [%t] %l %-5p %c{2} - %m%n 11 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/plugins/reader/sftpreader/MyPublickeyAuthenticator.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.sftpreader; 2 | 3 | import org.apache.sshd.server.PublickeyAuthenticator; 4 | import org.apache.sshd.server.session.ServerSession; 5 | 6 | import java.security.PublicKey; 7 | 8 | public class MyPublickeyAuthenticator implements PublickeyAuthenticator { 9 | public boolean authenticate(String s, PublicKey publicKey, ServerSession serverSession) { 10 | return false; 11 | } 12 | } -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/config/PluginConfParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.config; 2 | 3 | public class PluginConfParamKey { 4 | 5 | public static final String PLUGIN_CLASS_NAME = "PluginClassName"; 6 | 7 | public static final String PERIPHERY_CLASS_NAME = "PeripheryClassName"; 8 | 9 | public static final String SPLITTER_CLASS_NAME = "SplitterClassName"; 10 | 11 | public static final String PATH = "Path"; 12 | 13 | public static final String MAX_THREAD_NUMBER = "MaxThreadNumber"; 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | public final class ParamKey { 4 | 5 | private ParamKey() { 6 | } 7 | 8 | public static final String connectProps = "connectProps"; 9 | 10 | public static final String ip = "ip"; 11 | 12 | public static final String port = "port"; 13 | 14 | public static final String dbname = "dbname"; 15 | 16 | public static final String username = "username"; 17 | 18 | public static final String password = "password"; 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/plugins/common/SFTPUtilsTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.junit.Test; 7 | 8 | public class SFTPUtilsTest { 9 | 10 | @Test 11 | public void testGetConf() { 12 | Configuration cfg = SFTPUtils.getConf(); 13 | assertNotNull(cfg); 14 | assertNotNull(cfg.get("io.compression.codecs")); 15 | System.out.println(cfg.get("io.compression.codecs")); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/transform/utils/FieldTransformUtils.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.transform.utils; 2 | 3 | import java.text.SimpleDateFormat; 4 | import java.util.Date; 5 | 6 | public final class FieldTransformUtils { 7 | 8 | private FieldTransformUtils () { 9 | 10 | } 11 | 12 | public static String fromUnixTime(long unixTime){ 13 | Date dateTime = new Date(unixTime*1000); 14 | SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 15 | return dateFormat.format(dateTime); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/AbstractTransformer.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import com.dp.nebula.wormhole.common.interfaces.ILine; 4 | import com.dp.nebula.wormhole.common.interfaces.ITransformer; 5 | 6 | public abstract class AbstractTransformer implements ITransformer{ 7 | 8 | @Override 9 | public ILine transform(ILine line, String params) { 10 | if(params == null || params.equals("")) 11 | return transform(line); 12 | else 13 | return line; 14 | } 15 | 16 | @Override 17 | public ILine transform(ILine line) { 18 | return line; 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/AbstractSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.dp.nebula.wormhole.common.interfaces.IParam; 7 | import com.dp.nebula.wormhole.common.interfaces.ISplitter; 8 | 9 | public abstract class AbstractSplitter implements ISplitter{ 10 | 11 | protected IParam param; 12 | 13 | @Override 14 | public void init(IParam jobParams){ 15 | param = jobParams; 16 | } 17 | 18 | @Override 19 | public List split(){ 20 | List result = new ArrayList(); 21 | result.add(param); 22 | return result; 23 | } 24 | 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/core/DefaultReaderPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.core; 2 | 3 | import com.dp.nebula.wormhole.common.interfaces.IParam; 4 | import com.dp.nebula.wormhole.common.interfaces.IReaderPeriphery; 5 | import com.dp.nebula.wormhole.common.interfaces.ISourceCounter; 6 | import com.dp.nebula.wormhole.common.interfaces.ITargetCounter; 7 | 8 | class DefaultReaderPeriphery implements IReaderPeriphery{ 9 | 10 | @Override 11 | public void prepare(IParam param, ISourceCounter counter) { 12 | //do nothing 13 | } 14 | 15 | @Override 16 | public void doPost(IParam param, ITargetCounter counter) { 17 | //do nothing 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/salesforcereader/DescribeRefObject.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.salesforcereader; 2 | 3 | import java.util.Map; 4 | 5 | import com.sforce.soap.partner.Field; 6 | 7 | public class DescribeRefObject { 8 | 9 | private String objectName; 10 | private Map fieldInfoMap; 11 | 12 | DescribeRefObject(String objectName, Map fieldInfoMap) { 13 | this.objectName = objectName; 14 | this.fieldInfoMap = fieldInfoMap; 15 | } 16 | 17 | public Map getFieldInfoMap() { 18 | return fieldInfoMap; 19 | } 20 | 21 | public String getObjectName() { 22 | return objectName; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/transform/common/TransformerFactory.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.transform.common; 2 | 3 | import com.dp.nebula.wormhole.common.interfaces.ITransformer; 4 | import com.dp.nebula.wormhole.engine.utils.JarLoader; 5 | import com.dp.nebula.wormhole.engine.utils.ReflectionUtil; 6 | 7 | 8 | 9 | public class TransformerFactory { 10 | 11 | public static final String JAR_PATH = "transformers/"; 12 | public static ITransformer create(String name){ 13 | ITransformer result = ReflectionUtil.createInstanceByDefaultConstructor( 14 | name, 15 | ITransformer.class, 16 | JarLoader.getInstance(JAR_PATH)); 17 | return result; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/plugins/reader/sftpreader/MyPasswordAuthenticator.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.sftpreader; 2 | 3 | import org.apache.sshd.server.PasswordAuthenticator; 4 | import org.apache.sshd.server.session.ServerSession; 5 | 6 | /** 7 | * Very basic PasswordAuthenticator used for unit tests. 8 | */ 9 | public class MyPasswordAuthenticator implements PasswordAuthenticator { 10 | 11 | public boolean authenticate(String username, String password, ServerSession session) { 12 | boolean retour = false; 13 | 14 | if ("username".equals(username) && "password".equals(password)) { 15 | retour = true; 16 | } 17 | 18 | return retour; 19 | } 20 | } -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/common/FakeReader.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.common; 2 | 3 | import java.util.Map; 4 | 5 | import com.dp.nebula.wormhole.common.AbstractPlugin; 6 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 7 | import com.dp.nebula.wormhole.common.interfaces.IReader; 8 | 9 | public class FakeReader extends AbstractPlugin implements IReader{ 10 | 11 | @Override 12 | public void init() {} 13 | 14 | @Override 15 | public void connection() {} 16 | 17 | @Override 18 | public void finish() {} 19 | 20 | @Override 21 | public Map getMonitorInfo() { 22 | return null; 23 | } 24 | 25 | @Override 26 | public void read(ILineSender lineSender) {} 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/transform/impls/AddAndFiltTransformerTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.transform.impls; 2 | 3 | 4 | import org.junit.Test; 5 | 6 | import com.dp.nebula.wormhole.common.DefaultLine; 7 | import com.dp.nebula.wormhole.common.interfaces.ILine; 8 | 9 | public class AddAndFiltTransformerTest { 10 | @Test 11 | public void testTransform(){ 12 | ILine line = new DefaultLine(); 13 | line.addField("first"); 14 | line.addField("second"); 15 | line.addField("third"); 16 | 17 | ILine result = new DefaultLine(); 18 | result.addField("third"); 19 | result.addField("second"); 20 | result.addField("1,2"); 21 | result.addField("first"); 22 | 23 | 24 | result = new DefaultLine(); 25 | result.addField("good"); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/LineExchangerFactory.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import java.util.List; 4 | 5 | import com.dp.nebula.wormhole.common.interfaces.ILineReceiver; 6 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 7 | import com.dp.nebula.wormhole.engine.storage.IStorage; 8 | 9 | public class LineExchangerFactory { 10 | 11 | public static ILineSender createNewLineSender(IStorage storageForRead, List storageForWrite){ 12 | 13 | return new BufferedLineExchanger(storageForRead, storageForWrite); 14 | } 15 | 16 | public static ILineReceiver createNewLineReceiver(IStorage storageForRead, List storageForWrite){ 17 | 18 | return new BufferedLineExchanger(storageForRead, storageForWrite); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/core/DefaultWriterPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.core; 2 | 3 | import com.dp.nebula.wormhole.common.interfaces.IParam; 4 | import com.dp.nebula.wormhole.common.interfaces.ISourceCounter; 5 | import com.dp.nebula.wormhole.common.interfaces.ITargetCounter; 6 | import com.dp.nebula.wormhole.common.interfaces.IWriterPeriphery; 7 | 8 | class DefaultWriterPeriphery implements IWriterPeriphery { 9 | 10 | @Override 11 | public void prepare(IParam param, ISourceCounter counter) { 12 | // do nothing 13 | } 14 | 15 | @Override 16 | public void doPost(IParam param, ITargetCounter counter) { 17 | // do nothing 18 | 19 | } 20 | 21 | @Override 22 | public void rollback(IParam param) { 23 | // do nothing 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/common/FakeWriter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.common; 2 | 3 | import java.util.Map; 4 | 5 | import com.dp.nebula.wormhole.common.AbstractPlugin; 6 | import com.dp.nebula.wormhole.common.interfaces.ILineReceiver; 7 | import com.dp.nebula.wormhole.common.interfaces.IWriter; 8 | 9 | public class FakeWriter extends AbstractPlugin implements IWriter{ 10 | 11 | @Override 12 | public void init() {} 13 | 14 | @Override 15 | public void connection() {} 16 | 17 | @Override 18 | public void finish() {} 19 | 20 | @Override 21 | public Map getMonitorInfo() { 22 | return null; 23 | } 24 | 25 | @Override 26 | public void write(ILineReceiver lineReceiver) {} 27 | 28 | @Override 29 | public void commit() {} 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/storage/AbstractStorage.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.storage; 2 | 3 | public abstract class AbstractStorage implements IStorage{ 4 | 5 | private Boolean pushClosed; 6 | private int destructLimit; 7 | private Statistics stat; 8 | 9 | public Boolean getPushClosed() { 10 | return pushClosed; 11 | } 12 | public void setPushClosed(Boolean pushClosed) { 13 | this.pushClosed = pushClosed; 14 | } 15 | public int getDestructLimit() { 16 | return destructLimit; 17 | } 18 | public void setDestructLimit(int destructLimit) { 19 | this.destructLimit = destructLimit; 20 | } 21 | public Statistics getStat() { 22 | return stat; 23 | } 24 | public void setStat(Statistics stat) { 25 | this.stat = stat; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /conf/engine.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | com.dp.nebula.wormhole.engine.storage.RAMStorage 4 | 1000 5 | 1000000 6 | 1 7 | 1000 8 | 9 | 10 10 | 12 | 600000 13 | true 14 | 600000 15 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/utils/Environment.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.utils; 2 | 3 | public abstract class Environment { 4 | public static final String USER_DIR = System.getProperty("user.dir"); 5 | 6 | public static final String ENGINE_CONF = String.format("%s/conf/engine.xml", USER_DIR); 7 | public static final String PLUGINS_CONF = String.format("%s/conf/plugins.xml", USER_DIR); 8 | public static final String LOG4J_CONF = String.format("%s/conf/log4j.properties", USER_DIR); 9 | public static final String JOB_INFO_DB_PROP = String.format("%s/conf/jobInfoDB.properties", USER_DIR); 10 | public static final String READER_PLUGINS_DIR = String.format("%s/plugins/reader", USER_DIR); 11 | public static final String WRITER_PLUGINS_DIR = String.format("%s/plugins/writer", USER_DIR); 12 | 13 | } -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/config/EngineConfParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.config; 2 | 3 | public class EngineConfParamKey { 4 | 5 | public static final String STORAGE_CLASS_NAME = "storageClassName"; 6 | 7 | public static final String STORAGE_LINE_LIMIT = "storageLineLimit"; 8 | 9 | public static final String STORAGE_BYTE_LIMIT = "storageByteLimit"; 10 | 11 | public static final String STORAGE_DESTRUCT_LIMIT = "storageDistructLimit"; 12 | 13 | public static final String STORAGE_WAIT_TIME = "storageWaitTime"; 14 | 15 | public static final String STATUS_CHECK_INTERVAL = "statusCheckInterval"; 16 | 17 | public static final String MONITOR_INFO_DISPLAY_PERIOD = "monitorInfoDisplayPeriod"; 18 | 19 | public static final String WRITER_CONSISTENCY = "writerConsistency"; 20 | 21 | public static final String READER_AND_WRITER_PERIPHERAL_TIMEOUT = "readerAndWriterPeripheralTimeout"; 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/DefaultPluginMonitor.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 4 | 5 | public class DefaultPluginMonitor implements IPluginMonitor{ 6 | 7 | private long successLines; 8 | private long failedLines; 9 | 10 | @Override 11 | public long getSuccessLines() { 12 | return successLines; 13 | } 14 | 15 | @Override 16 | public long getFailedLines() { 17 | return failedLines; 18 | } 19 | 20 | @Override 21 | public void increaseSuccessLines() { 22 | increaseSuccessLine(1); 23 | } 24 | 25 | @Override 26 | public void increaseSuccessLine(long lines) { 27 | successLines += lines; 28 | } 29 | 30 | @Override 31 | public void increaseFailedLines() { 32 | increaseFailedLines(1); 33 | } 34 | 35 | @Override 36 | public void increaseFailedLines(long lines) { 37 | failedLines += lines; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/common/TestUtils.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.common; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.io.File; 6 | 7 | import org.junit.Test; 8 | 9 | public class TestUtils { 10 | 11 | public static String getResourcePath(String[] paths){ 12 | StringBuilder sb = new StringBuilder(); 13 | sb.append("src").append(File.separator).append("test"). 14 | append(File.separator).append("resources").append(File.separator); 15 | for(int i = 0; i < paths.length; i++){ 16 | if(i > 0){ 17 | sb.append(File.separator); 18 | } 19 | sb.append(paths[i]); 20 | } 21 | return sb.toString(); 22 | } 23 | @Test 24 | public void testGetResourcePath() { 25 | assertEquals("src"+File.separator+ "test"+File.separator+ "resources"+File.separator+ "jar"+File.separator+ "path01",getResourcePath(new String[] {"jar", "path01"})); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | wormhole 2 | ======== 3 | 4 | Wormhole is dianping massive data transfer tool, it currently support data source and destination like hdfs, hive, hbase, mysql, greenplum, sqlserver, mongodb, sftp, salesforce 5 | 6 | Project contact: yukang chen (yukang.chen@dianping.com) 7 | 8 | 9 | Copyright and license 10 | --------------------- 11 | 12 | Copyright 2013 DianPing, Inc. 13 | 14 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the License in the LICENSE file, or at: 15 | 16 | http://www.apache.org/licenses/LICENSE-2.0 17 | 18 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 19 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/DefaultLineExchanger.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import com.dp.nebula.wormhole.common.interfaces.ILine; 4 | import com.dp.nebula.wormhole.common.interfaces.ILineReceiver; 5 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 6 | import com.dp.nebula.wormhole.engine.storage.IStorage; 7 | 8 | public class DefaultLineExchanger implements ILineSender, ILineReceiver{ 9 | 10 | private IStorage storage; 11 | 12 | public DefaultLineExchanger(IStorage storage) { 13 | super(); 14 | this.storage = storage; 15 | } 16 | 17 | @Override 18 | public ILine createNewLine() { 19 | return new DefaultLine(); 20 | } 21 | 22 | @Override 23 | public Boolean send(ILine line) { 24 | return storage.push(line); 25 | } 26 | 27 | @Override 28 | public void flush() { 29 | } 30 | 31 | @Override 32 | public ILine receive() { 33 | return storage.pull(); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/storage/StorageQueue.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.storage; 2 | 3 | import java.util.concurrent.TimeUnit; 4 | 5 | import com.dp.nebula.wormhole.common.interfaces.ILine; 6 | 7 | public abstract class StorageQueue implements java.io.Serializable{ 8 | 9 | private static final long serialVersionUID = -7334864414523350826L; 10 | 11 | public abstract boolean push(ILine line, long timeout, TimeUnit unit) throws InterruptedException; 12 | 13 | public abstract boolean push(ILine[] lines, int size, long timeout, TimeUnit unit) throws InterruptedException; 14 | 15 | public abstract ILine pull(long timeout, TimeUnit unit) throws InterruptedException; 16 | 17 | public abstract int pull(ILine[] ea, long timeout, TimeUnit unit) throws InterruptedException ; 18 | 19 | public abstract void close(); 20 | 21 | public abstract int size(); 22 | 23 | public abstract int getLineLimit(); 24 | 25 | public abstract String info(); 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/mongowriter/MongoWriterSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.mongowriter; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.log4j.Logger; 7 | 8 | import com.dp.nebula.wormhole.common.AbstractSplitter; 9 | import com.dp.nebula.wormhole.common.interfaces.IParam; 10 | 11 | public class MongoWriterSplitter extends AbstractSplitter { 12 | private final static Logger log = Logger.getLogger(MongoWriterSplitter.class); 13 | 14 | @Override 15 | public void init(IParam jobParams){ 16 | param = jobParams; 17 | } 18 | 19 | @Override 20 | public List split(){ 21 | List result = new ArrayList(); 22 | int concurrency = param.getIntValue(ParamKey.concurrency); 23 | for (int i = 0; i < concurrency; i++){ 24 | IParam p = param.clone(); 25 | result.add(p); 26 | } 27 | log.info("the number of split: " + result.size()); 28 | return result; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/config/JobPluginConf.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.config; 2 | 3 | import com.dp.nebula.wormhole.common.interfaces.IParam; 4 | 5 | public class JobPluginConf { 6 | 7 | private String pluginName; 8 | private String id; 9 | private IParam pluginParam; 10 | 11 | public JobPluginConf(String pluginName, IParam pluginParam, String id) { 12 | super(); 13 | this.pluginName = pluginName; 14 | this.pluginParam = pluginParam; 15 | this.id = id; 16 | } 17 | 18 | public JobPluginConf() { 19 | } 20 | 21 | public String getPluginName() { 22 | return pluginName; 23 | } 24 | 25 | public IParam getPluginParam() { 26 | return pluginParam; 27 | } 28 | 29 | public void setPluginName(String pluginName) { 30 | this.pluginName = pluginName; 31 | } 32 | 33 | public void setPluginParam(IParam pluginParam) { 34 | this.pluginParam = pluginParam; 35 | } 36 | 37 | public String getId() { 38 | return id; 39 | } 40 | 41 | public void setId(String id) { 42 | this.id = id; 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/utils/StringUtil.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.utils; 2 | 3 | import java.util.regex.Matcher; 4 | import java.util.regex.Pattern; 5 | 6 | import org.apache.commons.lang.StringUtils; 7 | import org.apache.log4j.Logger; 8 | 9 | public final class StringUtil { 10 | private static final Logger logger = Logger.getLogger(StringUtil.class); 11 | 12 | private static final String VARIABLE_PATTERN = "(\\$)\\{(\\w+)\\}"; 13 | 14 | private StringUtil(){ 15 | } 16 | 17 | public static String replaceEnvironmentVariables(String text) { 18 | Pattern pattern = Pattern.compile(VARIABLE_PATTERN); 19 | Matcher matcher = pattern.matcher(text); 20 | 21 | while(matcher.find()){ 22 | logger.info("replace " + matcher.group(2) + 23 | " with " + System.getenv(matcher.group(2))); 24 | 25 | text = StringUtils.replace(text, matcher.group(), 26 | StringUtils.defaultString(System.getenv(matcher.group(2)), matcher.group())); 27 | } 28 | return text; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/hbasewriter/HBaseSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.hbasewriter; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.log4j.Logger; 7 | 8 | import com.dp.nebula.wormhole.common.AbstractSplitter; 9 | import com.dp.nebula.wormhole.common.interfaces.IParam; 10 | import com.google.common.base.Preconditions; 11 | 12 | public class HBaseSplitter extends AbstractSplitter { 13 | private final static Logger LOG = Logger.getLogger(HBaseSplitter.class); 14 | 15 | @Override 16 | public List split() { 17 | List result = new ArrayList(); 18 | int concurrency = param.getIntValue(ParamKey.concurrency, 1); 19 | Preconditions.checkArgument((concurrency > 0 && concurrency <= 10), 20 | "illegal concurrency number argument " + concurrency); 21 | 22 | for (int i = 0; i < concurrency; i++) { 23 | IParam iParam = param.clone(); 24 | result.add(iParam); 25 | } 26 | LOG.info("the number of split: " + result.size()); 27 | return result; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /job.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | mysqlreader 4 | 5 | 10.1.77.85 6 | 3306 7 | TaurusCopy 8 | root 9 | 123456 10 | UTF-8 11 | 12 | 13 | select * from TaurusTaskAttempt 14 | 15 | 16 | 1 17 | 18 | 19 | 20 | false> 21 | 22 | 23 | sftpwriter 24 | sftp://root@10.1.77.85:58422/root/wormholeTest/ 25 | 12qwaszx 26 | \001 27 | \n 28 | UTF-8 29 | 4096 30 | \N 31 | 1 32 | 33 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/utils/ReflectionUtil.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.utils; 2 | 3 | import org.apache.commons.logging.Log; 4 | import org.apache.commons.logging.LogFactory; 5 | 6 | public class ReflectionUtil { 7 | 8 | private static final Log s_logger = LogFactory.getLog(ReflectionUtil.class); 9 | 10 | @SuppressWarnings("unchecked") 11 | public static T createInstanceByDefaultConstructor(String className, Class type, 12 | JarLoader jarLoader){ 13 | try{ 14 | Class clazz = null; 15 | if(jarLoader != null){ 16 | clazz = (Class)jarLoader.loadClass(className); 17 | } 18 | if(clazz == null){ 19 | clazz = (Class) Class.forName(className); 20 | } 21 | 22 | return clazz.newInstance(); 23 | } 24 | catch(Exception e){ 25 | s_logger.error("Exception occurs when creating " + className , e); 26 | return null; 27 | } 28 | } 29 | 30 | 31 | public static T createInstanceByDefaultConstructor(String className, Class type){ 32 | return createInstanceByDefaultConstructor(className, type, null); 33 | } 34 | 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/salesforcereader/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.salesforcereader; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name:username 6 | * @description:username 7 | * @range: 8 | * @mandatory: 9 | * @default: 10 | */ 11 | public final static String username = "username"; 12 | /* 13 | * @name:password 14 | * @description:password 15 | * @range: 16 | * @mandatory: 17 | * @default: 18 | */ 19 | public final static String password = "password"; 20 | /* 21 | * @name:entity 22 | * @description:entity 23 | * @range: 24 | * @mandatory: 25 | * @default: 26 | */ 27 | public final static String entity = "entity"; 28 | /* 29 | * @name:extractionSOQL 30 | * @description:extractionSOQL 31 | * @range: 32 | * @mandatory: 33 | * @default: 34 | */ 35 | public final static String extractionSOQL = "extractionSOQL"; 36 | /* 37 | * @name:encryptionKeyFile 38 | * @description: 39 | * @range: 40 | * @mandatory: 41 | * @default: 42 | */ 43 | public final static String encryptionKeyFile = "encryptionKeyFile"; 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/common/utils/StringUtilTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.utils; 2 | 3 | import static org.easymock.EasyMock.expect; 4 | import static org.powermock.api.easymock.PowerMock.mockStatic; 5 | import junit.framework.Assert; 6 | 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | import org.powermock.api.easymock.PowerMock; 10 | import org.powermock.core.classloader.annotations.PrepareForTest; 11 | import org.powermock.modules.junit4.PowerMockRunner; 12 | 13 | @RunWith(PowerMockRunner.class) 14 | @PrepareForTest({ StringUtil.class, System.class }) 15 | public class StringUtilTest { 16 | 17 | @Test 18 | public void testReplaceEnvironmentVariables() { 19 | String expectedText = "abcdedfg ${abc} abcdefg"; 20 | String actualText = "abcdedfg cde abcdefg"; 21 | 22 | mockStatic(System.class); 23 | expect(System.getenv("abc")).andReturn("cde").anyTimes(); 24 | PowerMock.replay(System.class); 25 | Assert.assertEquals(actualText, 26 | StringUtil.replaceEnvironmentVariables(expectedText)); 27 | PowerMock.verify(System.class); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/IParam.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | import java.util.Collection; 4 | import java.util.List; 5 | 6 | public interface IParam extends Cloneable{ 7 | 8 | String getValue(String key); 9 | 10 | String getValue(String key, String defaultValue); 11 | 12 | char getCharValue(String key); 13 | 14 | char getCharValue(String key, char defaultValue); 15 | 16 | int getIntValue(String key); 17 | 18 | int getIntValue(String key, int defaultValue); 19 | 20 | boolean getBooleanValue(String key); 21 | 22 | boolean getBooleanValue(String key, boolean defaultValue); 23 | 24 | long getLongValue(String key); 25 | 26 | long getLongValue(String key, long defaultValue); 27 | 28 | double getDoubleValue(String key); 29 | 30 | double getDoubleValue(String key, double defaultValue); 31 | 32 | List getCharList(String key); 33 | 34 | List getCharList(String key,List list); 35 | 36 | void putValue(String key, String value); 37 | 38 | void mergeTo(IParam param); 39 | 40 | void mergeTo(Collection paramCollection); 41 | 42 | IParam clone(); 43 | } -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/hbasereader/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.hbasereader; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name: htable 6 | * 7 | * @description:hbase table name 8 | * 9 | * @range: 10 | * 11 | * @mandatory: true 12 | * 13 | * @default: 14 | */ 15 | public final static String htable = "htable"; 16 | /* 17 | * @name: columns_key 18 | * 19 | * @description: indicate which CF:qualifier should be write, split by "," 20 | * 21 | * @range: 22 | * 23 | * @mandatory: true 24 | * 25 | * @default: 26 | */ 27 | public final static String columns_key = "columns_key"; 28 | /* 29 | * @name: rowkey_range, split by "," 30 | * 31 | * @description: range of rowkey 32 | * 33 | * @range: 34 | * 35 | * @mandatory: false 36 | * 37 | * @default: 38 | */ 39 | public final static String rowkey_range = "rowkey_range"; 40 | /* 41 | * @name:concurrency 42 | * 43 | * @description:concurrency of the job 44 | * 45 | * @range:1-10 46 | * 47 | * @mandatory: false 48 | * 49 | * @default:1 50 | */ 51 | public final static String concurrency = "concurrency"; 52 | } 53 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/plugins/common/DFSUtilsTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | 5 | import java.io.IOException; 6 | 7 | import org.junit.After; 8 | import org.junit.AfterClass; 9 | import org.junit.Before; 10 | import org.junit.BeforeClass; 11 | import org.junit.Test; 12 | 13 | public class DFSUtilsTest { 14 | 15 | @BeforeClass 16 | public static void setUpBeforeClass() throws Exception { 17 | } 18 | 19 | @AfterClass 20 | public static void tearDownAfterClass() throws Exception { 21 | } 22 | 23 | @Before 24 | public void setUp() throws Exception { 25 | } 26 | 27 | @After 28 | public void tearDown() throws Exception { 29 | } 30 | 31 | @Test 32 | public void testGetTypeMap() throws IOException{ 33 | assertNotNull(DFSUtils.getTypeMap()); 34 | assertNotNull(DFSUtils.getCompressionSuffixMap()); 35 | } 36 | 37 | @Test 38 | public void testGetConfiguration() throws Exception{ 39 | DFSUtils.getConf("file:///data/home/workcron/imglog/", ""); 40 | // Assert 41 | // DFSUtils.getConf("/data/home/workcron/imglog/", ""); 42 | // Assert.fail("get conf failed"); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/SFTPUtils.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import org.apache.hadoop.conf.Configuration; 7 | 8 | 9 | public final class SFTPUtils { 10 | private static Map fileTypeSuffixMap = null; 11 | 12 | private SFTPUtils(){ 13 | } 14 | 15 | public enum FileType { 16 | TXT, COMP_TXT 17 | } 18 | 19 | static{ 20 | fileTypeSuffixMap = new HashMap(); 21 | fileTypeSuffixMap.put("gz", "org.apache.hadoop.io.compress.GzipCodec"); 22 | fileTypeSuffixMap.put("gzip", "org.apache.hadoop.io.compress.GzipCodec"); 23 | fileTypeSuffixMap.put("lzo", "com.hadoop.compression.lzo.LzopCodec"); 24 | } 25 | 26 | public static Configuration getConf(){ 27 | Configuration cfg = new Configuration(); 28 | cfg.setClassLoader(SFTPUtils.class.getClassLoader()); 29 | cfg.set("io.compression.codecs", 30 | "org.apache.hadoop.io.compress.GzipCodec," + 31 | "org.apache.hadoop.io.compress.DefaultCodec," + 32 | "com.hadoop.compression.lzo.LzoCodec," + 33 | "com.hadoop.compression.lzo.LzopCodec," + 34 | "org.apache.hadoop.io.compress.BZip2Codec"); 35 | return cfg; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/transform/impls/AddAndFiltTransformer.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.transform.impls; 2 | 3 | //import org.apache.commons.logging.Log; 4 | //import org.apache.commons.logging.LogFactory; 5 | 6 | import com.dp.nebula.wormhole.common.AbstractTransformer; 7 | import com.dp.nebula.wormhole.common.DefaultLine; 8 | import com.dp.nebula.wormhole.common.interfaces.ILine; 9 | 10 | public class AddAndFiltTransformer extends AbstractTransformer{ 11 | // private final Log s_logger = LogFactory.getLog(HippoMongoToGPTransformer.class); 12 | 13 | @Override 14 | public ILine transform(ILine line, String params) { 15 | ILine result = new DefaultLine(); 16 | String [] idStrs = params.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)"); 17 | for(String idStr:idStrs) { 18 | int id = 0; 19 | try{ 20 | id = Integer.parseInt(idStr); 21 | } catch(NumberFormatException e){ 22 | if(idStr.length()>=2 && idStr.startsWith("\"") && idStr.endsWith("\"")) { 23 | result.addField(idStr.substring(1,idStr.length()-1)); 24 | } 25 | continue; 26 | } 27 | result.addField(line.getField(id)); 28 | } 29 | return result; 30 | } 31 | 32 | @Override 33 | public ILine transform(ILine line) { 34 | ILine result = new DefaultLine(); 35 | result.addField("good"); 36 | return result; 37 | } 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/utils/JarLoaderTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.utils; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | import static org.junit.Assert.assertNull; 5 | 6 | import org.junit.Test; 7 | 8 | import com.dp.nebula.wormhole.engine.common.TestUtils; 9 | 10 | public class JarLoaderTest { 11 | 12 | @Test 13 | public void testGetInstance_with_one_path_success(){ 14 | JarLoader jl = JarLoader.getInstance(getPath(new String[] {"jar", "path01"})); 15 | assertNotNull(jl); 16 | } 17 | 18 | @Test 19 | public void testGetInstance_with_one_path_failed(){ 20 | JarLoader jl = JarLoader.getInstance("httq://123.123.12.cn"); 21 | assertNull(jl); 22 | } 23 | 24 | @Test 25 | public void testGetInstance_with_multiple_paths_success(){ 26 | JarLoader jl = JarLoader.getInstance( 27 | new String[] {getPath(new String[] {"jar", "path01"}), 28 | getPath(new String[] {"jar", "path02"})} 29 | ); 30 | assertNotNull(jl); 31 | } 32 | 33 | @Test 34 | public void testGetInstance_with_multiple_paths_failed(){ 35 | JarLoader jl = JarLoader.getInstance( 36 | new String[] {getPath(new String[] {"jar", "path01"}), 37 | "httq://aaaa"} 38 | ); 39 | assertNull(jl); 40 | } 41 | 42 | private String getPath(String[] paths){ 43 | return TestUtils.getResourcePath(paths); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/WormholeException.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | public class WormholeException extends RuntimeException{ 4 | /** 5 | * 6 | */ 7 | private static final long serialVersionUID = -5961255124852822007L; 8 | private int statusCode; 9 | private String pluginID; 10 | 11 | 12 | public int getStatusCode() { 13 | return statusCode; 14 | } 15 | 16 | public void setStatusCode(int statusCode) { 17 | this.statusCode = statusCode; 18 | } 19 | 20 | public String getPluginID() { 21 | return pluginID; 22 | } 23 | 24 | public void setPluginID(String pluginID) { 25 | this.pluginID = pluginID; 26 | } 27 | 28 | public WormholeException(Exception e, int jobStatus) { 29 | super(e); 30 | this.statusCode = jobStatus; 31 | } 32 | 33 | public WormholeException(String m, int jobStatus) { 34 | super(m); 35 | this.statusCode = jobStatus; 36 | } 37 | 38 | public WormholeException(Exception e, int jobStatus, String pluginID) { 39 | super(e); 40 | this.statusCode = jobStatus; 41 | this.pluginID = pluginID; 42 | } 43 | 44 | public WormholeException(String m, int jobStatus, String pluginID) { 45 | super(m); 46 | this.statusCode = jobStatus; 47 | this.pluginID = pluginID; 48 | } 49 | 50 | public WormholeException(int jobStatus) { 51 | this.statusCode = jobStatus; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/monitor/FailedInfo.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.monitor; 2 | 3 | public class FailedInfo { 4 | 5 | private String failedWriterID; 6 | private int failedLines; 7 | 8 | public FailedInfo(String failedWriterID, int failedLines) { 9 | super(); 10 | this.failedWriterID = failedWriterID; 11 | this.failedLines = failedLines; 12 | } 13 | 14 | public String getFailedWriterID() { 15 | return failedWriterID; 16 | } 17 | public int getFailedLines() { 18 | return failedLines; 19 | } 20 | 21 | @Override 22 | public int hashCode() { 23 | final int prime = 31; 24 | int result = 1; 25 | result = prime * result + failedLines; 26 | result = prime * result 27 | + ((failedWriterID == null) ? 0 : failedWriterID.hashCode()); 28 | return result; 29 | } 30 | 31 | @Override 32 | public boolean equals(Object obj) { 33 | if (this == obj) { 34 | return true; 35 | } 36 | if (obj == null) { 37 | return false; 38 | } 39 | if (getClass() != obj.getClass()) { 40 | return false; 41 | } 42 | FailedInfo other = (FailedInfo) obj; 43 | if (failedLines != other.failedLines) { 44 | return false; 45 | } 46 | if (failedWriterID == null) { 47 | if (other.failedWriterID != null) { 48 | return false; 49 | } 50 | } else if (!failedWriterID.equals(other.failedWriterID)) { 51 | return false; 52 | } 53 | return true; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/monitor/CompletedMonitorInfo.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.monitor; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import com.dp.nebula.wormhole.engine.storage.Statistics; 7 | 8 | public class CompletedMonitorInfo extends RealtimeMonitorInfo{ 9 | 10 | private long sourceLines; 11 | private Map targetLinesMap; 12 | 13 | CompletedMonitorInfo(int writerNum){ 14 | super(writerNum); 15 | targetLinesMap = new HashMap(writerNum); 16 | } 17 | 18 | public long getSourceLines() { 19 | return sourceLines; 20 | } 21 | 22 | public void setSourceLines(long sourceLines) { 23 | this.sourceLines = sourceLines; 24 | } 25 | 26 | public void addTargetLines(String name, Long number){ 27 | targetLinesMap.put(name, number); 28 | } 29 | 30 | public void setTargetLinesMap(Map targetLinesMap) { 31 | this.targetLinesMap = targetLinesMap; 32 | } 33 | 34 | public Map getTargetLinesMap() { 35 | return targetLinesMap; 36 | } 37 | 38 | @Override 39 | public String getInfo(){ 40 | StringBuilder builder = new StringBuilder(); 41 | Map map = getStorageMonitorCriteriaMap(); 42 | builder.append("\n"); 43 | for(String key : map.keySet()){ 44 | Statistics stat = map.get(key); 45 | builder.append(key).append(":").append(stat.getTotalStat()).append("\n"); 46 | } 47 | return builder.toString(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/DBResultSetReceiver.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import java.sql.SQLException; 4 | 5 | import com.dp.nebula.wormhole.common.interfaces.ILineReceiver; 6 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 7 | 8 | public class DBResultSetReceiver { 9 | 10 | //private static final Log s_logger = LogFactory.getLog(DBResultSetReceiver.class); 11 | 12 | private ILineReceiver receiver; 13 | 14 | private int columnCount; 15 | 16 | private IPluginMonitor monitor; 17 | 18 | public static DBResultSetReceiver newReceiver(ILineReceiver receiver) { 19 | return new DBResultSetReceiver(receiver); 20 | } 21 | 22 | public DBResultSetReceiver(ILineReceiver receiver) { 23 | this.receiver = receiver; 24 | } 25 | 26 | public ILineReceiver getReceiver() { 27 | return receiver; 28 | } 29 | 30 | public void setReceiver(ILineReceiver receiver) { 31 | this.receiver = receiver; 32 | } 33 | 34 | public int getColumnCount() { 35 | return columnCount; 36 | } 37 | 38 | public void setColumnCount(int columnCount) { 39 | this.columnCount = columnCount; 40 | } 41 | 42 | public IPluginMonitor getMonitor() { 43 | return monitor; 44 | } 45 | 46 | public void setMonitor(IPluginMonitor monitor) { 47 | this.monitor = monitor; 48 | } 49 | 50 | public void getFromReader() throws SQLException{ 51 | 52 | } 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/transform/impls/HippoMongoToGPTransformer.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.transform.impls; 2 | 3 | import org.apache.commons.logging.Log; 4 | import org.apache.commons.logging.LogFactory; 5 | 6 | import com.dp.nebula.common.utils.TypeConvertionHelper; 7 | import com.dp.nebula.wormhole.common.AbstractTransformer; 8 | import com.dp.nebula.wormhole.common.DefaultLine; 9 | import com.dp.nebula.wormhole.common.interfaces.ILine; 10 | import com.dp.nebula.wormhole.transform.utils.FieldTransformUtils; 11 | 12 | public class HippoMongoToGPTransformer extends AbstractTransformer{ 13 | 14 | private final Log s_logger = LogFactory.getLog(HippoMongoToGPTransformer.class); 15 | 16 | @Override 17 | public ILine transform(ILine line) { 18 | ILine result = new DefaultLine(); 19 | String statDate = null; 20 | for(int i = 0; i < line.getFieldNum(); i++) { 21 | if(i==0 || i==2 || i==4) { 22 | long unixTime = TypeConvertionHelper.convertStringToInteger(line.getField(i), 0); 23 | if(unixTime == 0) { 24 | s_logger.error("Unix time error for:" + line.getField(i)); 25 | } 26 | //String dateStr = "2012-07-19 00:00:00"; 27 | String dateStr = FieldTransformUtils.fromUnixTime(unixTime); 28 | result.addField(dateStr); 29 | if(i==0) { 30 | statDate = dateStr.substring(0,dateStr.indexOf(' ')); 31 | } 32 | } else { 33 | result.addField(line.getField(i)); 34 | } 35 | } 36 | result.addField(statDate); 37 | return result; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/storage/StorageManager.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.storage; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import com.dp.nebula.wormhole.engine.utils.ReflectionUtil; 9 | 10 | 11 | public class StorageManager { 12 | 13 | private Map storageMap = new HashMap(); 14 | 15 | public StorageManager(List confList){ 16 | for(StorageConf conf:confList) { 17 | if(conf == null || conf.getId() == null) { 18 | continue; 19 | } 20 | IStorage storage = ReflectionUtil.createInstanceByDefaultConstructor(conf.getStorageClassName(), IStorage.class); 21 | if(storage.init(conf.getId(), conf.getLineLimit(), conf.getByteLimit(), conf.getDestructLimit(),conf.getWaitTime())){ 22 | storage.getStat().setPeriodInSeconds(conf.getPeriod()); 23 | storageMap.put(conf.getId(), storage); 24 | } 25 | } 26 | } 27 | 28 | public Map getStorageMap() { 29 | return storageMap; 30 | } 31 | 32 | public List getStorageForReader() { 33 | List result = new ArrayList(); 34 | for(IStorage storage : storageMap.values()) { 35 | result.add(storage); 36 | } 37 | return result; 38 | } 39 | 40 | public IStorage getStorageForWriter(String id){ 41 | return storageMap.get(id); 42 | } 43 | 44 | public void closeInput(){ 45 | for(String key:storageMap.keySet()){ 46 | storageMap.get(key).close(); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/AbstractPlugin.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import java.util.Map; 4 | 5 | import com.dp.nebula.wormhole.common.interfaces.IParam; 6 | import com.dp.nebula.wormhole.common.interfaces.IPlugin; 7 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 8 | 9 | public abstract class AbstractPlugin implements IPlugin{ 10 | 11 | private IParam param; 12 | 13 | private IPluginMonitor monitor; 14 | 15 | private String pluginName; 16 | 17 | private String pluginVersion; 18 | 19 | public static final String PLUGINID = "pluginID"; 20 | 21 | public void setParam(IParam param){ 22 | this.param = param; 23 | } 24 | 25 | public IParam getParam(){ 26 | return param; 27 | } 28 | 29 | public void setMonitor(IPluginMonitor monitor){ 30 | this.monitor = monitor; 31 | } 32 | 33 | public IPluginMonitor getMonitor(){ 34 | return monitor; 35 | } 36 | 37 | public String getPluginName() { 38 | return pluginName; 39 | } 40 | 41 | public void setPluginName(String pluginName) { 42 | this.pluginName = pluginName; 43 | } 44 | 45 | public String getPluginVersion() { 46 | return pluginVersion; 47 | } 48 | 49 | public void setPluginVersion(String pluginVersion) { 50 | this.pluginVersion = pluginVersion; 51 | } 52 | 53 | @Override 54 | public void init() { 55 | } 56 | 57 | @Override 58 | public void connection() { 59 | } 60 | 61 | @Override 62 | public void finish() { 63 | } 64 | 65 | @Override 66 | public Map getMonitorInfo() { 67 | return null; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /bin/wormhole.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | source /etc/profile 4 | export LD_LIBRARY_PATH=/usr/local/hadoop/hadoop-release/lib/native/Linux-amd64-64:/usr/local/hadoop/lzo/lib 5 | 6 | CURR_DIR=`pwd` 7 | cd `dirname "$0"`/.. 8 | WORMHOLE_HOME=`pwd` 9 | 10 | #set JAVA_OPTS 11 | JAVA_OPTS=" -Xms1024m -Xmx4048m -Xmn256m -Xss2048k" 12 | 13 | #performance Options 14 | #JAVA_OPTS="$JAVA_OPTS -XX:+AggressiveOpts" 15 | #JAVA_OPTS="$JAVA_OPTS -XX:+UseBiasedLocking" 16 | #JAVA_OPTS="$JAVA_OPTS -XX:+UseFastAccessorMethods" 17 | #JAVA_OPTS="$JAVA_OPTS -XX:+DisableExplicitGC" 18 | #JAVA_OPTS="$JAVA_OPTS -XX:+UseParNewGC" 19 | #JAVA_OPTS="$JAVA_OPTS -XX:+UseConcMarkSweepGC" 20 | #JAVA_OPTS="$JAVA_OPTS -XX:+CMSParallelRemarkEnabled" 21 | #JAVA_OPTS="$JAVA_OPTS -XX:+UseCMSCompactAtFullCollection" 22 | #JAVA_OPTS="$JAVA_OPTS -XX:+UseCMSInitiatingOccupancyOnly" 23 | #JAVA_OPTS="$JAVA_OPTS -XX:CMSInitiatingOccupancyFraction=75" 24 | #JAVA_OPTS="$JAVA_OPTS -XX:LargePageSizeInBytes=128m" 25 | 26 | #log print Options 27 | #JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCApplicationStoppedTime" 28 | #JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCTimeStamps" 29 | #JAVA_OPTS="$JAVA_OPTS -XX:+PrintGCDetails" 30 | #========================================================================== 31 | 32 | #start 33 | RUN_CMD="/usr/local/jdk/bin/java -classpath \"${WORMHOLE_HOME}/lib/*:${WORMHOLE_HOME}/conf/*:${WORMHOLE_HOME}/lib/conf/\"" 34 | RUN_CMD="$RUN_CMD $JAVA_OPTS" 35 | RUN_CMD="$RUN_CMD com.dp.nebula.wormhole.engine.core.Engine $@" 36 | echo $RUN_CMD 37 | eval $RUN_CMD 38 | #========================================================================== -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/mongowriter/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.mongowriter; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name: outputUri 6 | * @description: mongo uri, format like: mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options] 7 | * @range: 8 | * @mandatory: true 9 | * @default:mongodb://127.0.0.1:27017/db.coll 10 | */ 11 | public final static String outputUri = "output_uri"; 12 | /* 13 | * @name: outputFields 14 | * @description: The fields, in JSON, to write, format like { _id:1, name:1, age:1, sex:1 } 15 | * @range: 16 | * @mandatory: true 17 | * @default:{ _id:1 } 18 | */ 19 | public final static String outputFields = "output_fields"; 20 | /* 21 | * @name: bulkInsertLine 22 | * @description: bulk insertion line count 23 | * @range: 24 | * @mandatory: false 25 | * @default:3000 26 | */ 27 | public final static String bulkInsertLine = "bulk_insert_line"; 28 | /* 29 | * @name: concurrency 30 | * @description: concurrency of the job 31 | * @range:1-10 32 | * @mandatory: false 33 | * @default:1 34 | */ 35 | public final static String concurrency = "concurrency"; 36 | /* 37 | * @name: dropCollectionBeforeInsertionSwitch 38 | * @description: whether to drop collection before insert data into collection 39 | * @range: true, false 40 | * @mandatory: false 41 | * @default:false 42 | */ 43 | public final static String dropCollectionBeforeInsertionSwitch = "dropCollectionBeforeInsertionSwitch"; 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/PCInfo.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import com.jcraft.jsch.UserInfo; 4 | 5 | public class PCInfo implements UserInfo { 6 | private String ip; 7 | private int port; 8 | private String user; 9 | private String pwd; 10 | private String path; 11 | 12 | public String getIp() { 13 | return ip; 14 | } 15 | 16 | public void setIp(String ip) { 17 | this.ip = ip; 18 | } 19 | 20 | public int getPort() { 21 | return port; 22 | } 23 | 24 | public void setPort(int port) { 25 | this.port = port; 26 | } 27 | 28 | public String getUser() { 29 | return user; 30 | } 31 | 32 | public void setUser(String user) { 33 | this.user = user; 34 | } 35 | 36 | public String getPwd() { 37 | return pwd; 38 | } 39 | 40 | public void setPwd(String pwd) { 41 | this.pwd = pwd; 42 | } 43 | 44 | public String getPath() { 45 | return path; 46 | } 47 | 48 | public void setPath(String path) { 49 | this.path = path; 50 | } 51 | 52 | @Override 53 | public String getPassphrase() { 54 | return null; 55 | } 56 | 57 | @Override 58 | public String getPassword() { 59 | return getPwd(); 60 | } 61 | 62 | @Override 63 | public boolean promptPassphrase(String message) { 64 | return true; 65 | } 66 | 67 | @Override 68 | public boolean promptPassword(String message) { 69 | return true; 70 | } 71 | 72 | @Override 73 | public boolean promptYesNo(String message) { 74 | return true; 75 | } 76 | 77 | @Override 78 | public void showMessage(String message) { 79 | } 80 | 81 | @Override 82 | public String toString() { 83 | return "PCInfo [ip=" + ip + ", path=" + path + ", port=" + port 84 | + ", pwd=" + pwd + ", user=" + user + "]"; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/ErrorCodeUtils.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import org.postgresql.util.PSQLException; 4 | 5 | import com.dp.nebula.wormhole.common.JobStatus; 6 | import com.dp.nebula.wormhole.common.WormholeException; 7 | 8 | public final class ErrorCodeUtils { 9 | 10 | private ErrorCodeUtils() { 11 | 12 | } 13 | 14 | public static void psqlWriterWrapper (PSQLException pe,WormholeException we) { 15 | if (pe.getSQLState().startsWith("57")) { 16 | we.setStatusCode(JobStatus.WRITE_OPERATOR_INTERVENTION.getStatus()); 17 | } else if (pe.getSQLState().startsWith("5")||pe.getSQLState().startsWith("F0") 18 | ||pe.getSQLState().startsWith("P0")||pe.getSQLState().startsWith("XX")) { 19 | we.setStatusCode(JobStatus.WRITE_SYSTEM_ERROR.getStatus()); 20 | } else if (pe.getSQLState().startsWith("2")||pe.getSQLState().startsWith("3")||pe.getSQLState().startsWith("4")) { 21 | we.setStatusCode(JobStatus.WRITE_DATA_EXCEPTION.getStatus()); 22 | } else { 23 | we.setStatusCode(JobStatus.WRITE_FAILED.getStatus()); 24 | } 25 | } 26 | public static void psqlReaderWrapper (PSQLException pe,WormholeException we) { 27 | if (pe.getSQLState().startsWith("57")) { 28 | we.setStatusCode(JobStatus.READ_OPERATOR_INTERVENTION.getStatus()); 29 | } else if (pe.getSQLState().startsWith("5")||pe.getSQLState().startsWith("F0") 30 | ||pe.getSQLState().startsWith("P0")||pe.getSQLState().startsWith("XX")) { 31 | we.setStatusCode(JobStatus.READ_SYSTEM_ERROR.getStatus()); 32 | } else if (pe.getSQLState().startsWith("2")||pe.getSQLState().startsWith("3")||pe.getSQLState().startsWith("4")) { 33 | we.setStatusCode(JobStatus.READ_DATA_EXCEPTION.getStatus()); 34 | } else { 35 | we.setStatusCode(JobStatus.READ_FAILED.getStatus()); 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/common/DefaultParamTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.util.ArrayList; 6 | import java.util.HashMap; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | 14 | import com.dp.nebula.wormhole.common.interfaces.IParam; 15 | 16 | public class DefaultParamTest { 17 | 18 | private IParam param; 19 | 20 | @Before 21 | public void init() { 22 | Map map = new HashMap (); 23 | param = new DefaultParam(map); 24 | map.put("booltrue", "true"); 25 | map.put("boolfalse", "error"); 26 | map.put("chart", "\\t"); 27 | map.put("char1", "\001"); 28 | map.put("char2", "\\u0051"); 29 | map.put("char3", "\\001"); 30 | map.put("arrayList", "1:2"); 31 | map.put("int", "31"); 32 | map.put("long", "31"); 33 | map.put("double", "31.13"); 34 | map.put("string", "system"); 35 | 36 | } 37 | 38 | @Test 39 | public void test() { 40 | assertEquals(true,param.getBooleanValue("booltrue")); 41 | assertEquals(false,param.getBooleanValue("boolfalse",false)); 42 | assertEquals('\t',param.getCharValue("chart")); 43 | assertEquals('\001',param.getCharValue("char1")); 44 | assertEquals('3',param.getCharValue("char2")); 45 | assertEquals('\001',param.getCharValue("char3")); 46 | 47 | assertEquals(31,param.getIntValue("int")); 48 | assertEquals(31L,param.getLongValue("long")); 49 | assertEquals((double)31.13,param.getDoubleValue("double"),0.1); 50 | 51 | List result = new ArrayList(); 52 | result.add('1'); 53 | result.add('2'); 54 | assertEquals(result,param.getCharList("arrayList")); 55 | assertEquals("system", param.getValue("string","see")); 56 | 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/MongoUtils.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import org.apache.log4j.Logger; 4 | 5 | import com.mongodb.DB; 6 | import com.mongodb.DBCollection; 7 | import com.mongodb.DBObject; 8 | import com.mongodb.Mongo; 9 | import com.mongodb.MongoURI; 10 | import com.mongodb.util.JSON; 11 | 12 | public final class MongoUtils { 13 | private final static Logger log = Logger.getLogger(MongoUtils.class); 14 | 15 | private MongoUtils(){ 16 | throw new AssertionError(); 17 | } 18 | 19 | private static final Mongo.Holder mongos = new Mongo.Holder(); 20 | 21 | public static DBCollection getCollection( MongoURI uri ){ 22 | try { 23 | Mongo mongo = mongos.connect( uri ); 24 | DB myDb = mongo.getDB(uri.getDatabase()); 25 | //if there's a username and password 26 | if(uri.getUsername() != null && uri.getPassword() != null && !myDb.isAuthenticated()) { 27 | boolean auth = myDb.authenticate(uri.getUsername(), uri.getPassword()); 28 | if(auth) { 29 | log.info("Sucessfully authenticated with collection."); 30 | } 31 | else { 32 | throw new IllegalArgumentException( "Unable to connect to collection." ); 33 | } 34 | } 35 | return uri.connectCollection(mongo); 36 | } 37 | catch ( final Exception e ) { 38 | throw new IllegalArgumentException( "Unable to connect to collection." + e.getMessage(), e ); 39 | } 40 | } 41 | 42 | public static DBObject convertStringToDBObject(String str){ 43 | DBObject bson = null; 44 | Object obj = JSON.parse(str); 45 | if (obj instanceof DBObject){ 46 | bson = (DBObject) obj; 47 | } 48 | return bson; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/JobStatus.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | public enum JobStatus { 4 | 5 | RUNNING(-1), 6 | SUCCESS(0), 7 | //200-299 System status 8 | SUCCESS_WITH_ERROR(201), 9 | PARTIAL_FAILED(202), 10 | FAILED(203), 11 | CONF_FAILED(204), 12 | 13 | //300-599 Reader status 14 | READ_FAILED(300), 15 | READ_CONNECTION_FAILED(301), 16 | PRE_CHECK_FAILED(302), 17 | READ_OUT_OF_TIME(305), 18 | READ_DATA_EXCEPTION(306), 19 | READ_SYSTEM_ERROR(307), 20 | READ_OPERATOR_INTERVENTION(308), 21 | 22 | 23 | //600-999 Writer status 24 | WRITE_FAILED(600), 25 | WRITE_CONNECTION_FAILED(601), 26 | PRE_WRITE_FAILED(602), 27 | ROLL_BACK_FAILED(603), 28 | POST_WRITE_FAILED(604), 29 | WRITE_OUT_OF_TIME(605), 30 | WRITE_DATA_EXCEPTION(606), 31 | WRITE_SYSTEM_ERROR(607), 32 | WRITE_OPERATOR_INTERVENTION(608); 33 | 34 | private int status; 35 | 36 | public static final int PLUGIN_BASE = 1000; 37 | public static final int WRITER_BASE = 100000; 38 | 39 | public int getStatus() { 40 | return status; 41 | } 42 | 43 | private JobStatus(int status){ 44 | this.status = status; 45 | } 46 | 47 | public static JobStatus fromName (String v) { 48 | try { 49 | return valueOf(JobStatus.class,v.toUpperCase()); 50 | } catch (Exception e) { 51 | return null; 52 | } 53 | } 54 | 55 | public static JobStatus fromStatus (int status) { 56 | for(JobStatus jobStatus : JobStatus.values()) { 57 | if(jobStatus.status == status%PLUGIN_BASE) { 58 | return jobStatus; 59 | } 60 | } 61 | return null; 62 | } 63 | 64 | public boolean isFailed() { 65 | switch(this) 66 | { 67 | case RUNNING: 68 | return false; 69 | case SUCCESS: 70 | return false; 71 | case SUCCESS_WITH_ERROR: 72 | return false; 73 | default: 74 | return true; 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/storage/SingleQueueTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.storage; 2 | 3 | import static org.junit.Assert.assertArrayEquals; 4 | import static org.junit.Assert.assertEquals; 5 | 6 | import java.util.concurrent.TimeUnit; 7 | 8 | import org.junit.Test; 9 | 10 | import com.dp.nebula.wormhole.common.DefaultLine; 11 | import com.dp.nebula.wormhole.common.interfaces.ILine; 12 | 13 | public class SingleQueueTest { 14 | @Test 15 | public void queueTest(){ 16 | SingleQueue dq = new SingleQueue(200,1024*200); 17 | ILine setLine,getLine = null; 18 | setLine = new DefaultLine(); 19 | setLine.addField("1"); 20 | setLine.addField("sunny"); 21 | try { 22 | dq.push(setLine, 1, TimeUnit.MILLISECONDS); 23 | getLine = dq.pull(1, TimeUnit.MILLISECONDS); 24 | } catch (InterruptedException e) { 25 | e.printStackTrace(); 26 | } 27 | try { 28 | Thread.sleep ( 1000L ) ; 29 | } catch (InterruptedException e) { 30 | e.printStackTrace(); 31 | } 32 | assertEquals(setLine,getLine); 33 | } 34 | @Test 35 | public void bufferedQueueTest(){ 36 | SingleQueue dq = new SingleQueue(200,1024*200); 37 | ILine[] setLines,getLines = null; 38 | setLines = new ILine[10]; 39 | getLines = new ILine[10]; 40 | ILine item1 = new DefaultLine(); 41 | item1.addField("1"); 42 | item1.addField("sunny"); 43 | ILine item2 = new DefaultLine(); 44 | item2.addField("2"); 45 | item2.addField("jack"); 46 | setLines[0]=item1; 47 | setLines[1]=item2; 48 | try { 49 | dq.push(setLines,2, 1, TimeUnit.MILLISECONDS); 50 | dq.pull(getLines, 1, TimeUnit.MILLISECONDS); 51 | } catch (InterruptedException e) { 52 | e.printStackTrace(); 53 | } 54 | try { 55 | Thread.sleep ( 1000L ) ; 56 | } catch (InterruptedException e) { 57 | e.printStackTrace(); 58 | } 59 | assertArrayEquals(setLines,getLines); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/utils/ReflectionUtilTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.utils; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | import static org.junit.Assert.assertNull; 5 | import static org.junit.Assert.assertTrue; 6 | 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | import com.dp.nebula.wormhole.common.DefaultLine; 11 | import com.dp.nebula.wormhole.common.interfaces.ILine; 12 | import com.dp.nebula.wormhole.engine.common.TestUtils; 13 | 14 | public class ReflectionUtilTest { 15 | 16 | private JarLoader jarLoader; 17 | private String className; 18 | 19 | @Before 20 | public void init(){ 21 | jarLoader = JarLoader.getInstance( 22 | new String[] {getPath(new String[] {"jar", "path01"}), 23 | getPath(new String[] {"jar", "path02"})}); 24 | className = "com.dianping.ls.analysis.common.CommonAnalysisUtils"; 25 | 26 | } 27 | 28 | @Test 29 | public void testCreateInstanceByDefaultConstructor_with_class_loader_Success(){ 30 | Object o = ReflectionUtil.createInstanceByDefaultConstructor(className, Object.class, jarLoader); 31 | assertNotNull(o); 32 | } 33 | 34 | @Test 35 | public void testCreateInstanceByDefaultConstructor_with_class_loader_Failed(){ 36 | Object o = ReflectionUtil.createInstanceByDefaultConstructor("aaaaa", Object.class, jarLoader); 37 | assertNull(o); 38 | } 39 | 40 | @Test 41 | public void testCreateInstanceByDefaultConstructor_without_class_loader_Success(){ 42 | DefaultLine line = ReflectionUtil.createInstanceByDefaultConstructor("com.dp.nebula.wormhole.common.DefaultLine", DefaultLine.class); 43 | assertNotNull(line); 44 | assertTrue(line instanceof ILine); 45 | assertTrue(line instanceof DefaultLine); 46 | } 47 | 48 | @Test 49 | public void testCreateInstanceByDefaultConstructor_without_class_loader_Failed(){ 50 | Object o = ReflectionUtil.createInstanceByDefaultConstructor(className, Object.class); 51 | assertNull(o); 52 | } 53 | 54 | 55 | private String getPath(String[] paths){ 56 | return TestUtils.getResourcePath(paths); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/hdfsreader/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.hdfsreader; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name: dir 6 | * @description: hdfs path, format like: hdfs://ip:port/path , file:////data/tmp/ 7 | * @range: 8 | * @mandatory: true 9 | * @default: 10 | */ 11 | public final static String dir = "dir"; 12 | /* 13 | * @name: fieldSplit 14 | * @description: field separator 15 | * @range: 16 | * @mandatory: false 17 | * @default:\t 18 | */ 19 | public final static String fieldSplit = "field_split"; 20 | /* 21 | * @name: encoding 22 | * @description: hdfs encode 23 | * @range:UTF-8|GBK|GB2312 24 | * @mandatory: false 25 | * @default:UTF-8 26 | */ 27 | public final static String encoding = "encoding"; 28 | /* 29 | * @name: bufferSize 30 | * @description: how large the buffer 31 | * @range: [1024-4194304] 32 | * @mandatory: false 33 | * @default: 4096 34 | */ 35 | public final static String bufferSize = "buffer_size"; 36 | 37 | /* 38 | * @name: nullString 39 | * @description: specify nullString and replace it to null 40 | * @range: 41 | * @mandatory: false 42 | * @default: \N 43 | */ 44 | public final static String nullString = "nullstring"; 45 | 46 | /* 47 | * @name: colFilter 48 | * @description:filter column 49 | * @range: 50 | * @mandatory: false 51 | * @default: 52 | */ 53 | public final static String colFilter = "col_filter"; 54 | 55 | /* 56 | * @name:concurrency 57 | * @description:concurrency of the job 58 | * @range:1-30 59 | * @mandatory: false 60 | * @default:1 61 | */ 62 | public final static String concurrency = "concurrency"; 63 | /* 64 | * @name: firstLineReadSwitch 65 | * @description: whether the first line to be read, if switch to false, the first line will be discarded 66 | * @range: true,false 67 | * @mandatory: false 68 | * @default: true 69 | */ 70 | public final static String firstLineReadSwitch = "first_line_read_switch"; 71 | } 72 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/common/BufferedLineExchangerTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.junit.Test; 9 | 10 | import com.dp.nebula.wormhole.common.interfaces.ILine; 11 | import com.dp.nebula.wormhole.common.interfaces.IParam; 12 | import com.dp.nebula.wormhole.common.utils.ParseXMLUtil; 13 | import com.dp.nebula.wormhole.engine.config.EngineConfParamKey; 14 | import com.dp.nebula.wormhole.engine.storage.StorageConf; 15 | import com.dp.nebula.wormhole.engine.storage.StorageManager; 16 | 17 | public class BufferedLineExchangerTest { 18 | public static BufferedLineExchanger getLineExchanger(){ 19 | IParam engineConf = null; 20 | engineConf = ParseXMLUtil.loadEngineConfig(); 21 | List result = new ArrayList(); 22 | 23 | for(int i = 0; i< 5; i++){ 24 | StorageConf storageConf = new StorageConf(); 25 | storageConf.setId(String.valueOf(i)); 26 | storageConf.setStorageClassName( 27 | engineConf.getValue(EngineConfParamKey.STORAGE_CLASS_NAME)); 28 | storageConf.setLineLimit( 29 | 10); 30 | storageConf.setByteLimit( 31 | engineConf.getIntValue(EngineConfParamKey.STORAGE_BYTE_LIMIT)); 32 | storageConf.setDestructLimit( 33 | engineConf.getIntValue(EngineConfParamKey.STORAGE_DESTRUCT_LIMIT)); 34 | storageConf.setPeriod( 35 | engineConf.getIntValue(EngineConfParamKey.MONITOR_INFO_DISPLAY_PERIOD)); 36 | storageConf.setWaitTime( 37 | 1000); 38 | result.add(storageConf); 39 | } 40 | StorageManager manager = new StorageManager(result); 41 | return new BufferedLineExchanger(manager.getStorageForWriter("1"), manager.getStorageForReader()); 42 | } 43 | @Test 44 | public void init(){ 45 | BufferedLineExchanger exchanger = getLineExchanger(); 46 | ILine line = new DefaultLine(); 47 | line.addField("this"); 48 | line.addField("that"); 49 | exchanger.send(line); 50 | exchanger.flush(); 51 | ILine getLine = exchanger.receive(); 52 | assertEquals(line,getLine); 53 | } 54 | } -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/common/utils/ParseXMLUtilTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.utils; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertNotNull; 5 | import static org.junit.Assert.assertTrue; 6 | 7 | import java.util.Iterator; 8 | import java.util.List; 9 | import java.util.Map; 10 | 11 | import org.junit.Test; 12 | 13 | import com.dp.nebula.wormhole.common.config.JobConf; 14 | import com.dp.nebula.wormhole.common.config.JobPluginConf; 15 | import com.dp.nebula.wormhole.common.interfaces.IParam; 16 | 17 | public class ParseXMLUtilTest { 18 | 19 | @Test 20 | public void testLoadJobConf() { 21 | String fileName = "src/test/resources/wormhole_hivereader_to_hdfswriter_test.xml"; 22 | JobConf jobConf = ParseXMLUtil.loadJobConf(fileName); 23 | assertNotNull(jobConf); 24 | assertEquals("hivereader_to_hdfswriter_job", jobConf.getId()); 25 | 26 | JobPluginConf readerConf = jobConf.getReaderConf(); 27 | List writerConf = jobConf.getWriterConfs(); 28 | 29 | assertEquals("hivereader", readerConf.getPluginName()); 30 | IParam readerPluginParam = readerConf.getPluginParam(); 31 | assertNotNull(readerPluginParam); 32 | assertTrue(readerPluginParam instanceof IParam); 33 | 34 | assertNotNull(writerConf); 35 | assertEquals(1, writerConf.size()); 36 | assertEquals("hdfswriter", writerConf.get(0).getPluginName()); 37 | } 38 | 39 | @SuppressWarnings("unchecked") 40 | @Test 41 | public void testLoadPluginConf() { 42 | Map pluginMap = ParseXMLUtil.loadPluginConf(); 43 | assertNotNull(pluginMap); 44 | Iterator iter = pluginMap.entrySet().iterator(); 45 | while (iter.hasNext()) { 46 | Map.Entry entry = (Map.Entry) iter.next(); 47 | System.out.println(entry.getKey()); 48 | System.out.println(entry.getValue()); 49 | } 50 | } 51 | 52 | @Test 53 | public void testLoadEngineConfig() { 54 | IParam engineConf = ParseXMLUtil.loadEngineConfig(); 55 | assertNotNull(engineConf); 56 | assertNotNull(engineConf.getValue("storageClassName")); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/core/WriterThread.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.core; 2 | 3 | import java.util.concurrent.Callable; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import com.dp.nebula.wormhole.common.JobStatus; 9 | import com.dp.nebula.wormhole.common.WormholeException; 10 | import com.dp.nebula.wormhole.common.interfaces.ILineReceiver; 11 | import com.dp.nebula.wormhole.common.interfaces.IParam; 12 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 13 | import com.dp.nebula.wormhole.common.interfaces.IWriter; 14 | import com.dp.nebula.wormhole.engine.utils.JarLoader; 15 | import com.dp.nebula.wormhole.engine.utils.ReflectionUtil; 16 | 17 | final class WriterThread implements Callable{ 18 | 19 | private static final Log s_logger = LogFactory.getLog(WriterThread.class); 20 | 21 | private IWriter writer; 22 | private ILineReceiver lineReceiver; 23 | 24 | public static WriterThread getInstance(ILineReceiver lineReceiver, IParam param, String writerClassName, 25 | String writerPath, IPluginMonitor pm){ 26 | try{ 27 | IWriter writer = ReflectionUtil.createInstanceByDefaultConstructor( 28 | writerClassName, IWriter.class, 29 | JarLoader.getInstance(new String[]{writerPath})); 30 | writer.setParam(param); 31 | writer.setMonitor(pm); 32 | return new WriterThread(lineReceiver, writer); 33 | } catch(Exception e){ 34 | s_logger.error("Error to create WriterThread: ", e); 35 | return null; 36 | } 37 | 38 | } 39 | 40 | private WriterThread(ILineReceiver lineReceiver, IWriter writer) { 41 | super(); 42 | this.lineReceiver = lineReceiver; 43 | this.writer = writer; 44 | } 45 | 46 | @Override 47 | public Integer call() throws Exception { 48 | try{ 49 | writer.init(); 50 | writer.connection(); 51 | writer.write(lineReceiver); 52 | writer.commit(); 53 | writer.finish(); 54 | return JobStatus.SUCCESS.getStatus(); 55 | } catch(WormholeException e){ 56 | s_logger.error("Exception occurs in writer thread!", e); 57 | return e.getStatusCode(); 58 | } catch(Exception e){ 59 | s_logger.error("Exception occurs in writer thread!", e); 60 | return JobStatus.FAILED.getStatus(); 61 | } 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/storage/StorageConf.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.storage; 2 | 3 | public class StorageConf { 4 | 5 | private String id; 6 | private String storageClassName; 7 | private int lineLimit; 8 | private int byteLimit; 9 | private int destructLimit; 10 | private int period; 11 | private int waitTime; 12 | private int peripheralTimeout; 13 | 14 | public String getId() { 15 | return id; 16 | } 17 | 18 | public void setId(String id) { 19 | this.id = id; 20 | } 21 | 22 | public String getStorageClassName() { 23 | return storageClassName; 24 | } 25 | 26 | public void setStorageClassName(String storageClassName) { 27 | this.storageClassName = storageClassName; 28 | } 29 | 30 | public int getLineLimit() { 31 | return lineLimit; 32 | } 33 | 34 | public void setLineLimit(int lineLimit) { 35 | this.lineLimit = lineLimit; 36 | } 37 | 38 | public int getByteLimit() { 39 | return byteLimit; 40 | } 41 | 42 | public void setByteLimit(int byteLimit) { 43 | this.byteLimit = byteLimit; 44 | } 45 | 46 | public int getDestructLimit() { 47 | return destructLimit; 48 | } 49 | 50 | public void setDestructLimit(int destructLimit) { 51 | this.destructLimit = destructLimit; 52 | } 53 | 54 | public int getPeriod() { 55 | return period; 56 | } 57 | 58 | public void setPeriod(int period) { 59 | this.period = period; 60 | } 61 | 62 | public int getWaitTime() { 63 | return waitTime; 64 | } 65 | 66 | public void setWaitTime(int waitTime) { 67 | this.waitTime = waitTime; 68 | } 69 | 70 | public int getPeripheralTimeout() { 71 | return peripheralTimeout; 72 | } 73 | 74 | public void setPeripheralTimeout(int peripheralTimeout) { 75 | this.peripheralTimeout = peripheralTimeout; 76 | } 77 | 78 | public StorageConf(String id, String storageClassName, int lineLimit, 79 | int byteLimit, int destructLimit, int period, int waitTime) { 80 | super(); 81 | this.id = id; 82 | this.storageClassName = storageClassName; 83 | this.lineLimit = lineLimit; 84 | this.byteLimit = byteLimit; 85 | this.destructLimit = destructLimit; 86 | this.period = period; 87 | this.waitTime = waitTime; 88 | } 89 | 90 | public StorageConf() { 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/utils/ClassNode.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.utils; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.Set; 8 | 9 | public final class ClassNode { 10 | private List members; 11 | 12 | private String name; 13 | 14 | public static ClassNode newInstance() { 15 | return new ClassNode(); 16 | } 17 | 18 | public static ClassMember createMember(String name, Map attrs) { 19 | return new ClassMember(name, attrs); 20 | } 21 | 22 | private ClassNode() { 23 | members = new ArrayList(); 24 | } 25 | 26 | public void addMember(ClassMember cm) { 27 | this.members.add(cm); 28 | } 29 | 30 | public List getAllMembers() { 31 | return this.members; 32 | } 33 | 34 | public String getName() { 35 | return name; 36 | } 37 | 38 | public void setName(String name) { 39 | this.name = name; 40 | } 41 | 42 | public String toString() { 43 | StringBuilder sb = new StringBuilder(); 44 | sb.append(String.format("Class name: %s\n", this.name)); 45 | for (ClassMember cm: this.members) { 46 | sb.append(cm.toString()); 47 | } 48 | return sb.toString(); 49 | } 50 | } 51 | 52 | class ClassMember { 53 | private String name; 54 | 55 | private Map attris = new HashMap(); 56 | 57 | ClassMember(String name, Map attrs) { 58 | this.name = name; 59 | this.attris = attrs; 60 | } 61 | 62 | public String getName() { 63 | return name; 64 | } 65 | 66 | public void setName(String name) { 67 | this.name = name; 68 | } 69 | 70 | public void addAttr(String key, String value) { 71 | this.attris.put(key, value); 72 | } 73 | 74 | public String getAttr(String key) { 75 | return this.attris.get(key); 76 | } 77 | 78 | public Set getAllKeys() { 79 | return this.attris.keySet(); 80 | } 81 | 82 | public String toString() { 83 | StringBuilder sb = new StringBuilder(); 84 | sb.append(String.format("name: %s\n", this.name)); 85 | for (String key: attris.keySet()) { 86 | sb.append(String.format("key: %s, value: %s\n", key, attris.get(key))); 87 | } 88 | return sb.toString(); 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/monitor/RealtimeMonitorInfo.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.monitor; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import com.dp.nebula.wormhole.engine.storage.Statistics; 7 | 8 | public class RealtimeMonitorInfo { 9 | 10 | private long readSuccessLines; 11 | private long readFailedLines; 12 | 13 | private Map smcMap; 14 | 15 | private Map writeSuccessLinesMap = new HashMap(); 16 | private Map writeFailedLinesMap = new HashMap(); 17 | 18 | RealtimeMonitorInfo(int writerNum){ 19 | smcMap = new HashMap(writerNum); 20 | writeSuccessLinesMap = new HashMap(writerNum); 21 | writeFailedLinesMap = new HashMap(writerNum); 22 | } 23 | 24 | public long getReadSuccessLines() { 25 | return readSuccessLines; 26 | } 27 | 28 | public void setReadSuccessLines(long readSuccessLines) { 29 | this.readSuccessLines = readSuccessLines; 30 | } 31 | 32 | public long getReadFailedLines() { 33 | return readFailedLines; 34 | } 35 | 36 | public void setReadFailedLines(long readFailedLines) { 37 | this.readFailedLines = readFailedLines; 38 | } 39 | 40 | public void addStorageMonitorCriteria(String name, Statistics smc){ 41 | smcMap.put(name, smc); 42 | } 43 | 44 | public void addwriteSuccessLines(String name, long number){ 45 | writeSuccessLinesMap.put(name, Long.valueOf(number)); 46 | } 47 | 48 | public void addwriteFailedLines(String name, long number){ 49 | writeFailedLinesMap.put(name, number); 50 | } 51 | 52 | public Map getStorageMonitorCriteriaMap() { 53 | return smcMap; 54 | } 55 | 56 | public Map getWriteSuccessLinesMap() { 57 | return writeSuccessLinesMap; 58 | } 59 | 60 | public Map getWriteFailedLinesMap() { 61 | return writeFailedLinesMap; 62 | } 63 | 64 | public String getInfo(){ 65 | StringBuilder builder = new StringBuilder(); 66 | Map map = getStorageMonitorCriteriaMap(); 67 | builder.append("\n"); 68 | for(String key : map.keySet()){ 69 | Statistics stat = map.get(key); 70 | builder.append(key).append(" ").append(stat.getPeriodState()).append("\n"); 71 | stat.periodPass(); 72 | } 73 | return builder.toString(); 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/hivereader/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.hivereader; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name:path 6 | * @description:hive path ,format like "jdbc:hive://192.168.7.80:10000/default" 7 | * @range: 8 | * @mandatory:true 9 | * @default:jdbc:hive://10.1.1.161:10000/default 10 | */ 11 | public final static String path = "path"; 12 | /* 13 | * @name: username 14 | * @description: hive login name 15 | * @range: 16 | * @mandatory: false 17 | * @default: 18 | */ 19 | public final static String username = "username"; 20 | /* 21 | * @name: password 22 | * @description: hive login password 23 | * @range: 24 | * @mandatory: false 25 | * @default: 26 | */ 27 | public final static String password = "password"; 28 | /* 29 | * @name: sql 30 | * @description: self-defined sql statement 31 | * @range: 32 | * @mandatory: true 33 | * @default: 34 | */ 35 | public final static String sql = "sql"; 36 | /* 37 | * @name: mode 38 | * @description: query mode, READ_FROM_HIVESERVER: fetch data directly from hive server, READ_FROM_HDFS: insert the data into hdfs directory and fetch data directly from datanode 39 | * @range: READ_FROM_HIVESERVER,READ_FROM_HDFS 40 | * @mandatory: true 41 | * @default: READ_FROM_HIVESERVER 42 | */ 43 | public final static String mode = "mode"; 44 | /* 45 | * @name: dataDir 46 | * @description: the temporary data directory to fetch on hdfs if using mode 2 47 | * @range: 48 | * @mandatory: true 49 | * @default:hdfs://10.2.6.102/tmp/ 50 | */ 51 | public final static String dataDir = "dataDir"; 52 | /* 53 | * @name:reduceNumber 54 | * @description:reduce task number when doing insert query, when it set to -1, hive will automatically computer reduce number 55 | * @range:1-1000 56 | * @mandatory: true 57 | * @default:-1 58 | */ 59 | public final static String reduceNumber = "reduceNumber"; 60 | /* 61 | * @name:concurrency 62 | * @description:concurrency of the job 63 | * @range:1-10 64 | * @mandatory: false 65 | * @default:1 66 | */ 67 | public final static String concurrency = "concurrency"; 68 | 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/hivereader/HiveReaderSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.hivereader; 2 | 3 | import java.io.IOException; 4 | import java.net.URI; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.hadoop.fs.FileStatus; 10 | import org.apache.hadoop.fs.FileSystem; 11 | import org.apache.hadoop.fs.Path; 12 | import org.apache.log4j.Logger; 13 | 14 | import com.dp.nebula.wormhole.common.AbstractSplitter; 15 | import com.dp.nebula.wormhole.common.JobStatus; 16 | import com.dp.nebula.wormhole.common.WormholeException; 17 | import com.dp.nebula.wormhole.common.interfaces.IParam; 18 | import com.dp.nebula.wormhole.plugins.common.DFSUtils; 19 | 20 | public class HiveReaderSplitter extends AbstractSplitter { 21 | private static final Logger LOG = Logger 22 | .getLogger(HiveReaderSplitter.class); 23 | 24 | private String mode = HiveReaderMode.READ_FROM_HIVESERVER.getMode(); 25 | private String sourceDir; 26 | 27 | @Override 28 | public void init(IParam jobParams) { 29 | super.init(jobParams); 30 | mode = param.getValue(ParamKey.mode, mode); 31 | sourceDir = param.getValue(ParamKey.dataDir); 32 | } 33 | 34 | @Override 35 | public List split() { 36 | List result = new ArrayList(); 37 | HiveReaderMode readerMode = HiveReaderMode.valueOf(mode); 38 | 39 | switch (readerMode) { 40 | case READ_FROM_HIVESERVER: 41 | result.add(param); 42 | break; 43 | case READ_FROM_HDFS: 44 | FileSystem fs = null; 45 | try { 46 | Configuration conf = DFSUtils.getConf(sourceDir, null); 47 | fs = DFSUtils.createFileSystem(new URI(sourceDir), conf); 48 | FileStatus[] files = fs.listStatus(new Path(sourceDir)); 49 | for (FileStatus fileStatus : files) { 50 | IParam p = param.clone(); 51 | p.putValue(ParamKey.dataDir, fileStatus.getPath() 52 | .toString()); 53 | result.add(p); 54 | } 55 | } catch (Exception e) { 56 | throw new WormholeException(e, 57 | JobStatus.READ_FAILED.getStatus()); 58 | } finally { 59 | if (fs != null) { 60 | try { 61 | fs.close(); 62 | } catch (IOException e) { 63 | } 64 | } 65 | } 66 | break; 67 | 68 | } 69 | LOG.info("splitted files num:" + result.size()); 70 | 71 | return result; 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/core/ReaderThread.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.core; 2 | 3 | import java.util.concurrent.Callable; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import com.dp.nebula.wormhole.common.JobStatus; 9 | import com.dp.nebula.wormhole.common.WormholeException; 10 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 11 | import com.dp.nebula.wormhole.common.interfaces.IParam; 12 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 13 | import com.dp.nebula.wormhole.common.interfaces.IReader; 14 | import com.dp.nebula.wormhole.engine.utils.JarLoader; 15 | import com.dp.nebula.wormhole.engine.utils.ReflectionUtil; 16 | 17 | final class ReaderThread implements Callable{ 18 | 19 | private static final Log s_logger = LogFactory.getLog(ReaderThread.class); 20 | 21 | private IReader reader; 22 | private ILineSender lineSender; 23 | 24 | public static ReaderThread getInstance(ILineSender lineSender, IParam param, String readerClassName, 25 | String readerPath, IPluginMonitor monitor){ 26 | try{ 27 | IReader reader = ReflectionUtil.createInstanceByDefaultConstructor( 28 | readerClassName, IReader.class, JarLoader.getInstance(readerPath)); 29 | reader.setParam(param); 30 | reader.setMonitor(monitor); 31 | return new ReaderThread(lineSender, reader); 32 | } catch(Exception e){ 33 | s_logger.error("Error to create Reader Thread!", e); 34 | return null; 35 | } 36 | } 37 | 38 | private ReaderThread(ILineSender lineSender, IReader reader){ 39 | super(); 40 | this.lineSender = lineSender; 41 | this.reader = reader; 42 | } 43 | 44 | 45 | 46 | @Override 47 | /** 48 | * invoke method init, connection, read & finish sequentially 49 | * any exception occurs is thrown to upper classes (Thread Pool) 50 | */ 51 | public Integer call() throws Exception { 52 | try{ 53 | reader.init(); 54 | reader.connection(); 55 | reader.read(lineSender); 56 | reader.finish(); 57 | return JobStatus.SUCCESS.getStatus(); 58 | } catch(WormholeException e){ 59 | s_logger.error("Exception occurs in reader thread!", e); 60 | return e.getStatusCode(); 61 | } catch(Exception e){ 62 | s_logger.error("Exception occurs in reader thread!", e); 63 | return JobStatus.FAILED.getStatus(); 64 | } 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/sftpwriter/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.sftpwriter; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name: dir 6 | * @description: sftp path format like: sftp://[@][:]// 7 | * @range: 8 | * @mandatory: true 9 | * @default: 10 | */ 11 | public final static String dir = "dir"; 12 | /* 13 | * @name: password 14 | * @description: password 15 | * @range: 16 | * @mandatory: true 17 | * @default: 18 | */ 19 | public final static String password = "password"; 20 | /* 21 | * @name: prefixname 22 | * @description: prefix filename 23 | * @range: 24 | * @mandatory: false 25 | * @default:part 26 | */ 27 | public final static String prefixname = "prefix_filename"; 28 | /* 29 | * @name: fileType 30 | * @description: fileType 31 | * @range: txt|gz|lzo 32 | * @mandatory: true 33 | * @default: txt 34 | */ 35 | public final static String fileType = "file_type"; 36 | /* 37 | * @name: fieldSplit 38 | * @description: field separator 39 | * @range: 40 | * @mandatory: false 41 | * @default:\t 42 | */ 43 | public final static String fieldSplit = "field_split"; 44 | /* 45 | * @name: lineSplit 46 | * @description: how to seperate fields 47 | * @range:\n 48 | * @mandatory: false 49 | * @default:\n 50 | */ 51 | public final static String lineSplit = "line_split"; 52 | /* 53 | * @name: encoding 54 | * @description: file encode 55 | * @range:UTF-8|GBK|GB2312 56 | * @mandatory: false 57 | * @default:UTF-8 58 | */ 59 | public final static String encoding = "encoding"; 60 | /* 61 | * @name: bufferSize 62 | * @description: how large the buffer 63 | * @range: [1024-4194304] 64 | * @mandatory: false 65 | * @default: 4096 66 | */ 67 | public final static String bufferSize = "buffer_size"; 68 | /* 69 | * @name: nullChar 70 | * @description: how to replace null in remote server 71 | * @range: 72 | * @mandatory: false 73 | * @default: 74 | */ 75 | public final static String nullChar = "nullchar"; 76 | /* 77 | * @name: concurrency 78 | * @description: concurrency of the job 79 | * @range:1-30 80 | * @mandatory: false 81 | * @default:1 82 | */ 83 | public final static String concurrency = "concurrency"; 84 | } -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/core/EngineTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.core; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import java.util.Map; 6 | 7 | import org.apache.commons.logging.Log; 8 | import org.apache.commons.logging.LogFactory; 9 | import org.apache.log4j.PropertyConfigurator; 10 | import org.junit.Test; 11 | 12 | import com.dp.nebula.wormhole.common.JobStatus; 13 | import com.dp.nebula.wormhole.common.config.JobConf; 14 | import com.dp.nebula.wormhole.common.interfaces.IParam; 15 | import com.dp.nebula.wormhole.common.utils.Environment; 16 | import com.dp.nebula.wormhole.common.utils.JobConfGenDriver; 17 | import com.dp.nebula.wormhole.common.utils.ParseXMLUtil; 18 | 19 | public class EngineTest { 20 | private static final Log s_logger = LogFactory.getLog(EngineTest.class); 21 | @Test 22 | public void test() { 23 | String []args = {"job.xml"}; 24 | PropertyConfigurator.configure(Environment.LOG4J_CONF); 25 | 26 | String jobDescriptionXML = null; 27 | //if no parameters are passed in, 28 | //it generates job configure XML first 29 | if(args.length < 1){ 30 | try{ 31 | JobConfGenDriver.generateJobConfXml(); 32 | } catch(Exception e){ 33 | s_logger.error("Error in generating job configure XML: ", e); 34 | System.exit(JobStatus.FAILED.getStatus()); 35 | } 36 | System.exit(JobStatus.SUCCESS.getStatus()); 37 | } 38 | else if(args.length == 1){ 39 | jobDescriptionXML = args[0]; 40 | } 41 | //return usage information 42 | else{ 43 | s_logger.error("Usage: ./wormhole.sh job.xml ."); 44 | System.exit(JobStatus.FAILED.getStatus()); 45 | } 46 | JobConf jobConf = null; 47 | IParam engineConf = null; 48 | Map pluginConfs = null; 49 | try{ 50 | //read configurations from XML for engine & plugins 51 | jobConf = ParseXMLUtil.loadJobConf(jobDescriptionXML); 52 | engineConf = ParseXMLUtil.loadEngineConfig(); 53 | pluginConfs = ParseXMLUtil.loadPluginConf(); 54 | //start data transmission 55 | 56 | } catch (Exception e) { 57 | s_logger.error("Configure file error occurs: ",e); 58 | System.exit(JobStatus.CONF_FAILED.getStatus()); 59 | } 60 | 61 | Engine engine = new Engine(engineConf, pluginConfs); 62 | int jobStatus = engine.run(jobConf); 63 | s_logger.info("wormhole return code-" + jobStatus); 64 | assertEquals(0,jobStatus); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/hbasewriter/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.hbasewriter; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name: htable 6 | * @description: hbase table name 7 | * @range: 8 | * @mandatory: true 9 | * @default: 10 | */ 11 | public final static String htable = "htable"; 12 | /* 13 | * @name: autoFlush 14 | * @description: turn on/off autoFlush 15 | * @range: true/false 16 | * @mandatory: true 17 | * @default: false 18 | */ 19 | public final static String autoFlush = "autoFlush"; 20 | /* 21 | * @name: writebufferSize 22 | * @description: write buffer size 23 | * @range: [0-26214400] 24 | * @mandatory: true 25 | * @default: 1048576 26 | */ 27 | public final static String writebufferSize = "writebufferSize"; 28 | /* 29 | * @name: writeAheadLog 30 | * @description: turn on/off wal 31 | * @range: true/false 32 | * @mandatory: true 33 | * @default: true 34 | */ 35 | public final static String writeAheadLog = "writeAheadLog"; 36 | /* 37 | * @name: rowKeyIndex 38 | * @description: specify the rowkey index number 39 | * @range: 40 | * @mandatory: true 41 | * @default: 42 | */ 43 | public final static String rowKeyIndex = "rowKeyIndex"; 44 | /* 45 | * @name: columnsName 46 | * @description: specify the column family and qualifier to write, split by comma, e.g."cf1:col1,cf2:col2" 47 | * @range: 48 | * @mandatory: true 49 | * @default: 50 | */ 51 | public final static String columnsName = "columnsName"; 52 | /* 53 | * @name: deleteMode 54 | * @description: deleteMode before write data into htable, 0:do nothing, 1:delete table data, 2.truncate and create table 55 | * @range: [0-2] 56 | * @mandatory: true 57 | * @default:0 58 | */ 59 | public final static String deleteMode = "deleteMode"; 60 | /* 61 | * @name: rollbackMode 62 | * @description: rollbackMode when writer failure, 0:do nothing, 1:delete table data, 2.truncate and create table 63 | * @range: [0-2] 64 | * @mandatory: true 65 | * @default:0 66 | */ 67 | public final static String rollbackMode = "rollbackMode"; 68 | /* 69 | * @name: concurrency 70 | * @description: concurrency of the job 71 | * @range:1-10 72 | * @mandatory: false 73 | * @default:1 74 | */ 75 | public final static String concurrency = "concurrency"; 76 | 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/sftpreader/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.sftpreader; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name: dir 6 | * @description: sftp path format like: sftp://[@][:]// 7 | * @range: 8 | * @mandatory: true 9 | * @default: 10 | */ 11 | public final static String dir = "dir"; 12 | /* 13 | * @name: password 14 | * @description: password 15 | * @range: 16 | * @mandatory: true 17 | * @default: 18 | */ 19 | public final static String password = "password"; 20 | /* 21 | * @name: fileType 22 | * @description: fileType 23 | * @range:txt,gz,lzo 24 | * @mandatory: true 25 | * @default:txt 26 | */ 27 | public final static String fileType = "file_type"; 28 | /* 29 | * @name: fieldSplit 30 | * @description: field separator 31 | * @range: 32 | * @mandatory: false 33 | * @default:\t 34 | */ 35 | public final static String fieldSplit = "field_split"; 36 | /* 37 | * @name: encoding 38 | * @description: hdfs encode 39 | * @range:UTF-8|GBK|GB2312 40 | * @mandatory: false 41 | * @default:UTF-8 42 | */ 43 | public final static String encoding = "encoding"; 44 | /* 45 | * @name: bufferSize 46 | * @description: how large the buffer 47 | * @range: [1024-4194304] 48 | * @mandatory: false 49 | * @default: 4096 50 | */ 51 | public final static String bufferSize = "buffer_size"; 52 | 53 | /* 54 | * @name: nullString 55 | * @description: specify nullString and replace it to null 56 | * @range: 57 | * @mandatory: false 58 | * @default: \N 59 | */ 60 | public final static String nullString = "nullstring"; 61 | /* 62 | * @name: colFilter 63 | * @description: filter column 64 | * @range: 65 | * @mandatory: false 66 | * @default: 67 | */ 68 | public final static String colFilter = "col_filter"; 69 | /* 70 | * @name: concurrency 71 | * @description: concurrency of the job 72 | * @range:1-30 73 | * @mandatory: false 74 | * @default:1 75 | */ 76 | public final static String concurrency = "concurrency"; 77 | /* 78 | * @name: firstLineReadOrNot 79 | * @description: whether the first line to be read, if set to false, the first line will be discarded 80 | * @range: true,false 81 | * @mandatory: false 82 | * @default: true 83 | */ 84 | public final static String firstLineReadOrNot = "first_line_read_or_not"; 85 | 86 | } 87 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/utils/JarLoader.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.utils; 2 | 3 | import java.io.File; 4 | import java.io.FileFilter; 5 | import java.net.MalformedURLException; 6 | import java.net.URL; 7 | import java.net.URLClassLoader; 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import org.apache.commons.lang.StringUtils; 13 | import org.apache.log4j.Logger; 14 | 15 | public final class JarLoader extends URLClassLoader { 16 | 17 | private static Logger logger = Logger.getLogger(JarLoader.class); 18 | 19 | public static JarLoader getInstance(String[] paths){ 20 | try{ 21 | return new JarLoader(paths); 22 | } catch(Exception e){ 23 | return null; 24 | } 25 | } 26 | 27 | public static JarLoader getInstance(String path){ 28 | return getInstance(new String[] {path}); 29 | } 30 | 31 | private JarLoader(String[] paths) { 32 | this(paths, JarLoader.class.getClassLoader()); 33 | } 34 | 35 | private JarLoader(String[] paths, ClassLoader parent) { 36 | super(getUrls(paths), parent); 37 | } 38 | 39 | private static URL[] getUrls(String[] paths) { 40 | if (null == paths || 0 == paths.length) { 41 | throw new IllegalArgumentException("Paths cannot be empty ."); 42 | } 43 | 44 | List urls = new ArrayList(); 45 | for (String path : paths) { 46 | urls.addAll(Arrays.asList(getUrl(path))); 47 | } 48 | 49 | return urls.toArray(new URL[0]); 50 | } 51 | 52 | private static URL[] getUrl(String path) { 53 | /* check path exist */ 54 | if (null == path || StringUtils.isBlank(path)) { 55 | throw new IllegalArgumentException("Path cannot be empty ."); 56 | } 57 | 58 | File jarPath = new File(path); 59 | if (!jarPath.exists() || !jarPath.isDirectory()) { 60 | throw new IllegalArgumentException("Path must be directory ."); 61 | } 62 | 63 | /* set filter */ 64 | FileFilter jarFilter = new FileFilter() { 65 | @Override 66 | public boolean accept(File pathname) { 67 | return pathname.getName().endsWith(".jar"); 68 | } 69 | }; 70 | 71 | /* iterate all jar */ 72 | File[] allJars = new File(path).listFiles(jarFilter); 73 | URL[] jarUrls = new URL[allJars.length]; 74 | 75 | for (int i = 0; i < allJars.length; i++) { 76 | try { 77 | jarUrls[i] = allJars[i].toURI().toURL(); 78 | } catch (MalformedURLException e) { 79 | logger.error("Error in getting jar URL!",e); 80 | throw new RuntimeException("Error in getting jar URL", e); 81 | } 82 | logger.debug(jarUrls[i]); 83 | } 84 | 85 | return jarUrls; 86 | } 87 | 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/config/JobConf.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.config; 2 | 3 | import java.util.List; 4 | 5 | public class JobConf { 6 | 7 | private String id; 8 | 9 | private JobPluginConf readerConf; 10 | 11 | private List writerConfs; 12 | 13 | /** 14 | * Get reader plugin's configuration. 15 | * 16 | * @return 17 | * a {@link JobPluginConf}. 18 | * 19 | */ 20 | public JobPluginConf getReaderConf() { 21 | return readerConf; 22 | } 23 | 24 | /** 25 | * Set reader plugin's configuration. 26 | * 27 | * @param readerConf 28 | * a {@link JobPluginConf}. 29 | * 30 | */ 31 | public void setReaderConf(JobPluginConf readerConf) { 32 | this.readerConf = readerConf; 33 | } 34 | 35 | /** 36 | * Get writer plugin's configuration. The return value is a list, 37 | * this is designed to suit multiple data destination. 38 | * 39 | * @return 40 | * a list of {@link JobPluginConf}. 41 | * 42 | */ 43 | public List getWriterConfs() { 44 | return this.writerConfs; 45 | } 46 | 47 | /** 48 | * Set writer plugin's configuration. 49 | * 50 | * @param writerConfs 51 | * a list of {@link JobPluginConf}. 52 | * 53 | */ 54 | public void setWriterConfs(List writerConfs) { 55 | this.writerConfs = writerConfs; 56 | } 57 | 58 | /** 59 | * Get number of writers. 60 | * 61 | * @return 62 | * number of writers. 63 | * 64 | */ 65 | public int getWriterNum() { 66 | return this.writerConfs.size(); 67 | } 68 | 69 | /** 70 | * Set job id. 71 | * 72 | * @param id 73 | * job id. 74 | * 75 | */ 76 | public void setId(String id) { 77 | this.id = id; 78 | } 79 | 80 | /** 81 | * Get job id. 82 | * 83 | * @return 84 | * job id. 85 | * 86 | */ 87 | public String getId() { 88 | return id; 89 | } 90 | 91 | /** 92 | * Information of job configuration. 93 | * 94 | * @return 95 | * string of job configuration information. 96 | * 97 | */ 98 | @Override 99 | public String toString() { 100 | StringBuilder sb = new StringBuilder(300); 101 | sb.append(String.format("\njob:%s", this.getId())); 102 | sb.append("\nReader conf:"); 103 | sb.append(this.readerConf.toString()); 104 | sb.append(String.format("\n\nWriter conf [num %d]:", this.writerConfs.size())); 105 | for (JobPluginConf dpc : this.writerConfs) { 106 | sb.append(dpc.toString()); 107 | } 108 | return sb.toString(); 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/core/ReaderThreadTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.core; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | import static org.junit.Assert.assertNull; 5 | import static org.junit.Assert.assertTrue; 6 | import static org.mockito.Mockito.inOrder; 7 | import static org.mockito.Mockito.mock; 8 | 9 | import java.lang.reflect.Constructor; 10 | 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | import org.mockito.InOrder; 14 | 15 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 16 | import com.dp.nebula.wormhole.common.interfaces.IParam; 17 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 18 | import com.dp.nebula.wormhole.common.interfaces.IReader; 19 | 20 | 21 | public class ReaderThreadTest { 22 | 23 | private ILineSender lineSender; 24 | private IParam param; 25 | private IPluginMonitor monitor; 26 | 27 | @Before 28 | public void init(){ 29 | lineSender = mock(ILineSender.class); 30 | param = mock(IParam.class); 31 | monitor = mock(IPluginMonitor.class); 32 | } 33 | 34 | @Test 35 | public void testConstructor_Success(){ 36 | ReaderThread reader = createReaderThread("com.dp.nebula.wormhole.engine.common.FakeReader"); 37 | assertNotNull(reader); 38 | assertTrue(reader instanceof ReaderThread); 39 | } 40 | 41 | @Test 42 | public void testConstuctor_Failed(){ 43 | ReaderThread reader = createReaderThread("com.dp.nebula.wormhole.engine.common.NotExistReader"); 44 | assertNull(reader); 45 | } 46 | 47 | @Test 48 | public void testRun() throws Exception{ 49 | IReader reader = mock(IReader.class); 50 | ReaderThread readerThread = createReaderThread(reader); 51 | readerThread.call(); 52 | 53 | InOrder orderedExecution = inOrder(reader); 54 | orderedExecution.verify(reader).init(); 55 | orderedExecution.verify(reader).connection(); 56 | orderedExecution.verify(reader).read(lineSender); 57 | orderedExecution.verify(reader).finish(); 58 | } 59 | 60 | @SuppressWarnings("unchecked") 61 | private ReaderThread createReaderThread(IReader reader){ 62 | try { 63 | Class clazz = Class.forName("com.dp.nebula.wormhole.engine.core.ReaderThread"); 64 | Constructor con = clazz.getDeclaredConstructor(ILineSender.class, IReader.class); 65 | con.setAccessible(true); 66 | return (ReaderThread)con.newInstance(lineSender, reader); 67 | 68 | } catch (Exception e) { 69 | return null; 70 | } 71 | } 72 | 73 | private ReaderThread createReaderThread(String className){ 74 | return ReaderThread.getInstance(lineSender, param, 75 | className, null, monitor); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/monitor/StorageMonitorInfo.java: -------------------------------------------------------------------------------- 1 | //package com.dp.nebula.wormhole.engine.monitor; 2 | // 3 | //@Deprecated 4 | //public class StorageMonitorInfo { 5 | // 6 | // private long storageSuccessReceivedLines; 7 | // private long storageFailedReceivedLines; 8 | // private long storageSuccessSentLines; 9 | // private long storageFailedSentLines; 10 | // private long storageSuccessReceivedBytes; 11 | // private long storageFailedReceivedBytes; 12 | // private long storageSuccessSentBytes; 13 | // private long storageFailedSentBytes; 14 | // 15 | // public long getStorageSuccessReceivedLines() { 16 | // return storageSuccessReceivedLines; 17 | // } 18 | // public void setStorageSuccessReceivedLines(long storageSuccessReceivedLines) { 19 | // this.storageSuccessReceivedLines = storageSuccessReceivedLines; 20 | // } 21 | // public long getStorageFailedReceivedLines() { 22 | // return storageFailedReceivedLines; 23 | // } 24 | // public void setStorageFailedReceivedLines(long storageFailedReceivedLines) { 25 | // this.storageFailedReceivedLines = storageFailedReceivedLines; 26 | // } 27 | // public long getStorageSuccessSentLines() { 28 | // return storageSuccessSentLines; 29 | // } 30 | // public void setStorageSuccessSentLines(long storageSuccessSentLines) { 31 | // this.storageSuccessSentLines = storageSuccessSentLines; 32 | // } 33 | // public long getStorageFailedSentLines() { 34 | // return storageFailedSentLines; 35 | // } 36 | // public void setStorageFailedSentLines(long storageFailedSentLines) { 37 | // this.storageFailedSentLines = storageFailedSentLines; 38 | // } 39 | // public long getStorageSuccessReceivedBytes() { 40 | // return storageSuccessReceivedBytes; 41 | // } 42 | // public void setStorageSuccessReceivedBytes(long storageSuccessReceivedBytes) { 43 | // this.storageSuccessReceivedBytes = storageSuccessReceivedBytes; 44 | // } 45 | // public long getStorageFailedReceivedBytes() { 46 | // return storageFailedReceivedBytes; 47 | // } 48 | // public void setStorageFailedReceivedBytes(long storageFailedReceivedBytes) { 49 | // this.storageFailedReceivedBytes = storageFailedReceivedBytes; 50 | // } 51 | // public long getStorageSuccessSentBytes() { 52 | // return storageSuccessSentBytes; 53 | // } 54 | // public void setStorageSuccessSentBytes(long storageSuccessSentBytes) { 55 | // this.storageSuccessSentBytes = storageSuccessSentBytes; 56 | // } 57 | // public long getStorageFailedSentBytes() { 58 | // return storageFailedSentBytes; 59 | // } 60 | // public void setStorageFailedSentBytes(long storageFailedSentBytes) { 61 | // this.storageFailedSentBytes = storageFailedSentBytes; 62 | // } 63 | //} 64 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/core/WriterThreadTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.core; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | import static org.junit.Assert.assertNull; 5 | import static org.junit.Assert.assertTrue; 6 | import static org.mockito.Mockito.inOrder; 7 | import static org.mockito.Mockito.mock; 8 | 9 | import java.lang.reflect.Constructor; 10 | 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | import org.mockito.InOrder; 14 | 15 | import com.dp.nebula.wormhole.common.interfaces.ILineReceiver; 16 | import com.dp.nebula.wormhole.common.interfaces.IParam; 17 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 18 | import com.dp.nebula.wormhole.common.interfaces.IWriter; 19 | 20 | public class WriterThreadTest { 21 | 22 | private ILineReceiver lineReceiver; 23 | private IParam param; 24 | private IPluginMonitor monitor; 25 | 26 | @Before 27 | public void init(){ 28 | lineReceiver = mock(ILineReceiver.class); 29 | param = mock(IParam.class); 30 | monitor = mock(IPluginMonitor.class); 31 | } 32 | 33 | @Test 34 | public void testConstructor_Success(){ 35 | WriterThread writer = createWriterThread("com.dp.nebula.wormhole.engine.common.FakeWriter"); 36 | assertNotNull(writer); 37 | assertTrue(writer instanceof WriterThread); 38 | } 39 | 40 | @Test 41 | public void testConstuctor_Failed(){ 42 | WriterThread writer = createWriterThread("com.dp.nebula.wormhole.engine.common.NotExistWriter"); 43 | assertNull(writer); 44 | } 45 | 46 | @Test 47 | public void testRun() throws Exception{ 48 | IWriter writer = mock(IWriter.class); 49 | WriterThread writerThread = createWriterThread(writer); 50 | writerThread.call(); 51 | 52 | InOrder orderedExecution = inOrder(writer); 53 | orderedExecution.verify(writer).init(); 54 | orderedExecution.verify(writer).connection(); 55 | orderedExecution.verify(writer).write(lineReceiver); 56 | orderedExecution.verify(writer).commit(); 57 | orderedExecution.verify(writer).finish(); 58 | } 59 | 60 | @SuppressWarnings("unchecked") 61 | private WriterThread createWriterThread(IWriter writer){ 62 | try { 63 | Class clazz = Class.forName("com.dp.nebula.wormhole.engine.core.WriterThread"); 64 | Constructor con = clazz.getDeclaredConstructor(ILineReceiver.class, IWriter.class); 65 | con.setAccessible(true); 66 | return (WriterThread)con.newInstance(lineReceiver, writer); 67 | } catch (Exception e) { 68 | return null; 69 | } 70 | } 71 | 72 | private WriterThread createWriterThread(String className){ 73 | return WriterThread.getInstance(lineReceiver, param, 74 | className, null, monitor); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/sftpwriter/SftpDirSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.sftpwriter; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.commons.lang.StringUtils; 7 | import org.apache.log4j.Logger; 8 | 9 | import com.dp.nebula.wormhole.common.AbstractSplitter; 10 | import com.dp.nebula.wormhole.common.interfaces.IParam; 11 | 12 | public class SftpDirSplitter extends AbstractSplitter { 13 | private static final Logger logger = Logger.getLogger(SftpDirSplitter.class); 14 | 15 | private String dir = ""; 16 | private String prefixname = "part"; 17 | private String fileType = "txt"; 18 | private String suffix = ""; 19 | private int concurrency = 5; 20 | 21 | @Override 22 | public void init(IParam jobParams){ 23 | super.init(jobParams); 24 | 25 | dir = jobParams.getValue(ParamKey.dir, this.dir).trim(); 26 | if (StringUtils.isBlank(dir)){ 27 | logger.error("Can't find the param [" 28 | + ParamKey.dir + "] in sftp-spliter-param."); 29 | return; 30 | } 31 | if (dir.endsWith("*")){ 32 | dir = dir.substring(0, dir.lastIndexOf("*")); 33 | } 34 | if (dir.endsWith("/")){ 35 | dir = dir.substring(0, dir.lastIndexOf("/")); 36 | } 37 | 38 | concurrency = param.getIntValue(ParamKey.concurrency, this.concurrency); 39 | prefixname = jobParams.getValue(ParamKey.prefixname, this.prefixname).trim(); 40 | fileType = jobParams.getValue(ParamKey.fileType, this.fileType).trim(); 41 | 42 | if (fileType.equalsIgnoreCase("gz") || fileType.equalsIgnoreCase("gzip")){ 43 | suffix = "gz"; 44 | }else if (fileType.equalsIgnoreCase("lzo")){ 45 | suffix = "lzo"; 46 | } 47 | } 48 | 49 | @Override 50 | public List split(){ 51 | List v = new ArrayList(); 52 | String absolutePath = ""; 53 | if (1 == concurrency){ 54 | absolutePath = dir + "/" + prefixname; 55 | if (!StringUtils.isBlank(suffix)){ 56 | absolutePath = absolutePath + "." + suffix; 57 | } 58 | logger.info(String 59 | .format("SftpWriter set no splitting, Use %s as absolute filename.", 60 | absolutePath)); 61 | param.putValue(ParamKey.dir, absolutePath); 62 | v.add(param); 63 | }else{ 64 | logger.info(String.format("HdfsWriter splits file to %d sub-files .", 65 | concurrency)); 66 | for (int i = 0; i < concurrency; i++) { 67 | absolutePath = dir + "/" + prefixname + "-" + i; 68 | if (!StringUtils.isBlank(suffix)){ 69 | absolutePath = absolutePath + "." + suffix; 70 | } 71 | IParam oParams = param.clone(); 72 | oParams.putValue(ParamKey.dir, absolutePath); 73 | v.add(oParams); 74 | } 75 | } 76 | return v; 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/mongoreader/MongoReaderPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.mongoreader; 2 | 3 | import java.net.UnknownHostException; 4 | 5 | import org.apache.log4j.Logger; 6 | 7 | import com.dp.nebula.wormhole.common.interfaces.IParam; 8 | import com.dp.nebula.wormhole.common.interfaces.IReaderPeriphery; 9 | import com.dp.nebula.wormhole.common.interfaces.ISourceCounter; 10 | import com.dp.nebula.wormhole.common.interfaces.ITargetCounter; 11 | import com.mongodb.DB; 12 | import com.mongodb.Mongo; 13 | import com.mongodb.MongoURI; 14 | 15 | /** 16 | * @author yukang.chen 17 | * 18 | */ 19 | public class MongoReaderPeriphery implements IReaderPeriphery { 20 | private final static Logger log = Logger.getLogger(MongoReaderPeriphery.class); 21 | 22 | private String inputUri = ""; 23 | private String inputFields = ""; 24 | private String inputQuery = ""; 25 | private String inputSort = ""; 26 | private String inputLimit = ""; 27 | 28 | @Override 29 | public void doPost(IParam param, ITargetCounter counter) { 30 | // TODO Auto-generated method stub 31 | } 32 | 33 | @Override 34 | public void prepare(IParam param, ISourceCounter counter) { 35 | inputUri = param.getValue(ParamKey.inputUri, this.inputUri).trim(); 36 | inputFields = param.getValue(ParamKey.inputFields, this.inputFields).trim(); 37 | inputQuery = param.getValue(ParamKey.inputQuery, this.inputQuery).trim(); 38 | inputSort = param.getValue(ParamKey.inputSort, this.inputSort).trim(); 39 | inputLimit = param.getValue(ParamKey.inputLimit, this.inputLimit).trim(); 40 | 41 | MongoURI uri = new MongoURI(inputUri); 42 | Mongo mongo = null; 43 | try { 44 | mongo = new Mongo(uri); 45 | }catch (UnknownHostException e) { 46 | throw new IllegalStateException(" Unable to connect to MongoDB at '" + uri + "'", e); 47 | } 48 | DB db = mongo.getDB(uri.getDatabase()); 49 | 50 | //if there's a username and password 51 | if(uri.getUsername() != null && uri.getPassword() != null && !db.isAuthenticated()){ 52 | boolean auth = db.authenticate(uri.getUsername(), uri.getPassword()); 53 | if(auth) { 54 | log.info("Sucessfully authenticated with collection."); 55 | } 56 | else { 57 | throw new IllegalArgumentException("Unable to connect to collection. You have to check your username and password" ); 58 | } 59 | } 60 | 61 | param.putValue(ParamKey.inputUri, this.inputUri); 62 | param.putValue(ParamKey.inputFields, this.inputFields); 63 | param.putValue(ParamKey.inputQuery, this.inputQuery); 64 | param.putValue(ParamKey.inputSort, this.inputSort); 65 | param.putValue(ParamKey.inputLimit, this.inputLimit); 66 | 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/hdfswriter/HdfsDirSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.hdfswriter; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.commons.lang.StringUtils; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.log4j.Logger; 9 | 10 | import com.dp.nebula.wormhole.common.AbstractSplitter; 11 | import com.dp.nebula.wormhole.common.interfaces.IParam; 12 | import com.dp.nebula.wormhole.plugins.common.DFSUtils; 13 | 14 | public class HdfsDirSplitter extends AbstractSplitter { 15 | private Logger logger = Logger.getLogger(HdfsDirSplitter.class); 16 | 17 | private Path p = null; 18 | private String prefix = "prefix"; 19 | private int concurrency = 5; 20 | private String codecClass = ""; 21 | private String fileType = "TXT"; 22 | 23 | @Override 24 | public void init(IParam jobParams) { 25 | super.init(jobParams); 26 | 27 | String dir = param.getValue(ParamKey.dir); 28 | 29 | if (dir.endsWith("*")) { 30 | dir = dir.substring(0, dir.lastIndexOf('*')); 31 | } 32 | if (dir.endsWith("/")) { 33 | dir = dir.substring(0, dir.lastIndexOf('/')); 34 | } 35 | 36 | codecClass = param.getValue(ParamKey.codecClass, this.codecClass); 37 | 38 | p = new Path(dir); 39 | 40 | fileType = param.getValue(ParamKey.fileType, this.fileType); 41 | 42 | prefix = param.getValue(ParamKey.prefixname, prefix); 43 | 44 | //add '_' before filename, make it invisible for hdfs 45 | prefix = "_" + prefix; 46 | 47 | concurrency = param.getIntValue(ParamKey.concurrency, 48 | this.concurrency); 49 | } 50 | 51 | @Override 52 | public List split() { 53 | String suffix = ""; 54 | if (fileType.equalsIgnoreCase("TXT_COMP")){ 55 | suffix = DFSUtils.getCompressionSuffixMap().get(codecClass); 56 | if (StringUtils.isEmpty(suffix)){ 57 | suffix = "lzo"; 58 | } 59 | } 60 | 61 | List v = new ArrayList(); 62 | if (1 == concurrency){ 63 | if (!StringUtils.isEmpty(suffix)){ 64 | prefix = prefix + "." + suffix; 65 | } 66 | 67 | logger.info(String 68 | .format("HdfsWriter set no splitting, Use %s as absolute filename .", 69 | p.toString() + "/" + prefix)); 70 | param.putValue(ParamKey.dir, p.toString() + "/" + prefix); 71 | v.add(param); 72 | }else{ 73 | logger.info(String.format("HdfsWriter splits file to %d sub-files .", 74 | concurrency)); 75 | for (int i = 0; i < concurrency; i++) { 76 | String file = p.toString() + "/" + prefix + "-" + i; 77 | if (!StringUtils.isEmpty(suffix)){ 78 | file = file + "." + suffix; 79 | } 80 | 81 | IParam oParams = param.clone(); 82 | oParams.putValue(ParamKey.dir, file); 83 | v.add(oParams); 84 | } 85 | } 86 | return v; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/storage/IStorage.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.storage; 2 | 3 | import com.dp.nebula.wormhole.common.interfaces.ILine; 4 | 5 | /** 6 | * A buffer where {@link IReader} and {@link IWriter} exchange data. 7 | * 8 | * */ 9 | public interface IStorage { 10 | 11 | /** 12 | * Initialization for {@link IStorage}. 13 | * 14 | * @param id 15 | * {@link IWriter} id. 16 | * 17 | * @param lineLimit 18 | * Limit of the line number the {@link IStorage} can hold. 19 | * 20 | * @param byteLimit 21 | * Limit of the bytes the {@link IStorage} can hold. 22 | * 23 | * @param destructLimit 24 | * Limit of the times the {@link IStorage} can fail. 25 | * 26 | * @return 27 | * true for OK, false for failed. 28 | * 29 | * */ 30 | public boolean init(String id, int lineLimit, int byteLimit, int destructLimit, int waitTime); 31 | 32 | /** 33 | * Push one line into {@link IStorage}, used by {@link IReader} 34 | * 35 | * @param line 36 | * One line of record, see {@link ILine} 37 | * 38 | * @return 39 | * true for OK, false for failure. 40 | * 41 | * */ 42 | public boolean push(ILine line); 43 | 44 | /** 45 | * Push multiple lines into {@link IStorage}, used by {@link IReader} 46 | * 47 | * @param lines 48 | * multiple lines of records, see {@link ILine} 49 | * 50 | * @param size 51 | * limit of line number to be pushed. 52 | * 53 | * @return 54 | * true for OK, false for failure. 55 | * 56 | * */ 57 | public boolean push(ILine[] lines, int size); 58 | 59 | /** 60 | * Pull one line from {@link IStorage}, used by {@link IWriter}. 61 | * 62 | * @return 63 | * one {@link ILine} of record. 64 | * 65 | * */ 66 | public ILine pull(); 67 | 68 | /** 69 | * Pull multiple lines from {@link IStorage}, used by {@link IWriter}. 70 | * 71 | * @param lines 72 | * an empty array which will be filled with multiple {@link ILine} as the result. 73 | * 74 | * @return 75 | * number of lines pulled。 76 | * 77 | * */ 78 | public int pull(ILine[] lines); 79 | 80 | 81 | public void close(); 82 | /** 83 | * Get size of {@link IStorage} in bytes 84 | * 85 | * @return 86 | * Storage size. 87 | * 88 | * */ 89 | public int size(); 90 | 91 | /** 92 | * Check {@link IStorage} is empty. 93 | * 94 | * @return 95 | * true if empty. 96 | * 97 | * */ 98 | public boolean empty(); 99 | 100 | /** 101 | * Get information about {@link IStorage}. 102 | * 103 | * @return 104 | * {@link IStorage} information. 105 | * 106 | * */ 107 | public String info(); 108 | 109 | 110 | public Statistics getStat(); 111 | } 112 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/engine/storage/DoubleQueueTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.storage; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertArrayEquals; 5 | 6 | import java.util.concurrent.TimeUnit; 7 | 8 | import org.junit.Test; 9 | 10 | import com.dp.nebula.wormhole.common.DefaultLine; 11 | import com.dp.nebula.wormhole.common.interfaces.ILine; 12 | 13 | public class DoubleQueueTest { 14 | @Test 15 | public void queueTest(){ 16 | DoubleQueue dq = new DoubleQueue(200,1024*200); 17 | ILine setLine,getLine = null; 18 | setLine = new DefaultLine(); 19 | setLine.addField("1"); 20 | setLine.addField("sunny"); 21 | try { 22 | dq.push(setLine, 1, TimeUnit.MILLISECONDS); 23 | getLine = dq.pull(1, TimeUnit.MILLISECONDS); 24 | } catch (InterruptedException e) { 25 | e.printStackTrace(); 26 | } 27 | try { 28 | Thread.sleep ( 1000L ) ; 29 | } catch (InterruptedException e) { 30 | e.printStackTrace(); 31 | } 32 | assertEquals(setLine,getLine); 33 | } 34 | @Test 35 | public void bufferedQueueTest(){ 36 | DoubleQueue dq = new DoubleQueue(200,1024*200); 37 | ILine[] setLines,getLines = null; 38 | setLines = new ILine[10]; 39 | getLines = new ILine[10]; 40 | ILine item1 = new DefaultLine(); 41 | item1.addField("1"); 42 | item1.addField("sunny"); 43 | ILine item2 = new DefaultLine(); 44 | item2.addField("2"); 45 | item2.addField("jack"); 46 | setLines[0]=item1; 47 | setLines[1]=item2; 48 | try { 49 | dq.push(setLines,2, 1, TimeUnit.MILLISECONDS); 50 | dq.pull(getLines, 1, TimeUnit.MILLISECONDS); 51 | } catch (InterruptedException e) { 52 | e.printStackTrace(); 53 | } 54 | try { 55 | Thread.sleep ( 1000L ) ; 56 | } catch (InterruptedException e) { 57 | e.printStackTrace(); 58 | } 59 | assertArrayEquals(setLines,getLines); 60 | } 61 | @Test 62 | public void queueTestNeededSwitchQueue(){ 63 | DoubleQueue dq = new DoubleQueue(10,1024*10); 64 | ILine[] setLines,getLines = null; 65 | setLines = new ILine[10]; 66 | getLines = new ILine[10]; 67 | ILine item1 = new DefaultLine(); 68 | item1.addField("1"); 69 | item1.addField("sunny"); 70 | ILine item2 = new DefaultLine(); 71 | item2.addField("2"); 72 | item2.addField("jack"); 73 | setLines[0]=item1; 74 | setLines[1]=item2; 75 | try { 76 | dq.push(setLines,2, 1, TimeUnit.MILLISECONDS); 77 | dq.pull(getLines, 1, TimeUnit.MILLISECONDS); 78 | dq.pull(getLines, 1, TimeUnit.MILLISECONDS); 79 | dq.pull(getLines, 1, TimeUnit.MILLISECONDS); 80 | dq.push(setLines,2, 1, TimeUnit.MILLISECONDS); 81 | dq.push(setLines,2, 1, TimeUnit.MILLISECONDS); 82 | } catch (InterruptedException e) { 83 | e.printStackTrace(); 84 | } 85 | try { 86 | Thread.sleep ( 1000L ) ; 87 | } catch (InterruptedException e) { 88 | e.printStackTrace(); 89 | } 90 | assertArrayEquals(setLines,getLines); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/engine/monitor/WormHoleJobInfo.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.engine.monitor; 2 | 3 | import java.util.Date; 4 | 5 | import com.dp.nebula.common.utils.DateHelper; 6 | 7 | public class WormHoleJobInfo { 8 | 9 | private String userName; 10 | private String dataSource; 11 | private String dataTarget; 12 | private Date startTime; 13 | private long totalLines; 14 | private long totalBytes; 15 | private long time; 16 | private int resultCode; 17 | 18 | public String getUserName() { 19 | return userName; 20 | } 21 | public void setUserName(String userName) { 22 | this.userName = userName; 23 | } 24 | public String getDataSource() { 25 | return dataSource; 26 | } 27 | public void setDataSource(String dataSource) { 28 | this.dataSource = dataSource; 29 | } 30 | public String getDataTarget() { 31 | return dataTarget; 32 | } 33 | public void setDataTarget(String dataTarget) { 34 | this.dataTarget = dataTarget; 35 | } 36 | public long getTotalLines() { 37 | return totalLines; 38 | } 39 | public void setTotalLines(long totalLines) { 40 | this.totalLines = totalLines; 41 | } 42 | public long getTotalBytes() { 43 | return totalBytes; 44 | } 45 | public void setTotalBytes(long totalBytes) { 46 | this.totalBytes = totalBytes; 47 | } 48 | public long getTime() { 49 | return time; 50 | } 51 | public void setTime(long time) { 52 | this.time = time; 53 | } 54 | public Date getStartTime() { 55 | return startTime; 56 | } 57 | public void setStartTime(Date startTime) { 58 | this.startTime = startTime; 59 | } 60 | public int getResultCode() { 61 | return resultCode; 62 | } 63 | public void setResultCode(int resultCode) { 64 | this.resultCode = resultCode; 65 | } 66 | public WormHoleJobInfo() { 67 | } 68 | 69 | @Override 70 | public String toString() { 71 | return "WormHoleJobInfo [dataSource=" + dataSource + ", dataTarget=" 72 | + dataTarget + ", resultCode=" + resultCode + ", startTime=" 73 | + startTime + ", time=" + time + ", totalBytes=" + totalBytes 74 | + ", totalLines=" + totalLines + ", userName=" + userName + "]"; 75 | } 76 | 77 | 78 | public String getString() { 79 | return "(\"" + dataSource + "\",\"" 80 | + dataTarget + "\"," + resultCode + "," 81 | + time + "," + totalBytes + "," 82 | + totalLines + ",\"" + userName +"\",\"" 83 | + DateHelper.format(startTime, DateHelper.DATE_FORMAT_PATTERN_YEAR_MONTH_DAY_HOUR_MINUTE_SECOND) + "\")"; 84 | } 85 | public WormHoleJobInfo(String userName, String dataSource, 86 | String dataTarget, Date startTime, long totalLines, 87 | long totalBytes, long time, int resultCode) { 88 | super(); 89 | this.userName = userName; 90 | this.dataSource = dataSource; 91 | this.dataTarget = dataTarget; 92 | this.startTime = startTime; 93 | this.totalLines = totalLines; 94 | this.totalBytes = totalBytes; 95 | this.time = time; 96 | this.resultCode = resultCode; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/interfaces/ILine.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common.interfaces; 2 | 3 | 4 | public interface ILine { 5 | 6 | /** 7 | * Add a field into the {@link ILine}. 8 | * 9 | * @param field 10 | * Field added into {@link ILine}. 11 | * @return 12 | * true for OK, false for failure. 13 | * 14 | * */ 15 | boolean addField(String field); 16 | 17 | /** 18 | * Add a field into the {@link ILine}. 19 | * 20 | * @param field 21 | * field added into {@link ILine}. 22 | * 23 | * @param index 24 | * given position of field in the {@link ILine}. 25 | * 26 | * @return 27 | * true for OK, false for failure. 28 | * 29 | * */ 30 | boolean addField(String field, int index); 31 | 32 | /** 33 | * Get one field of the {@link ILine} indexed by the param. 34 | * 35 | * NOTE: 36 | * if index specifed by user beyond field number of {@link ILine} 37 | * it may throw runtime excepiton 38 | * 39 | * 40 | * @param idx 41 | * given position of the {@link ILine}. 42 | * 43 | * @return 44 | * field indexed by the param. 45 | * 46 | * */ 47 | String getField(int idx); 48 | 49 | /** 50 | * Get one field of the {@link ILine} indexed by the param. 51 | * if idx specified by user beyond field number of {@link ILine} 52 | * null will be returned 53 | * 54 | * @param idx 55 | * given position of the {@link ILine}. 56 | * 57 | * @return 58 | * field indexed by the param. 59 | * 60 | * */ 61 | String checkAndGetField(int idx); 62 | 63 | /** 64 | * Get number of total fields in the {@link ILine}. 65 | * 66 | * @return 67 | * number of total fields in {@link ILine}. 68 | * 69 | * */ 70 | int getFieldNum(); 71 | 72 | /** 73 | * Use param as separator of field, format the {@link ILine} into {@link StringBuffer}. 74 | * 75 | * @param separator 76 | * field separator. 77 | * 78 | * @return 79 | * {@link ILine} in {@link StringBuffer} style. 80 | * 81 | * */ 82 | StringBuffer toStringBuffer(char separator); 83 | 84 | /** 85 | * Use param as separator of field, translate the {@link ILine} into {@link String}. 86 | * 87 | * @param separator 88 | * field separator. 89 | * 90 | * @return 91 | * {@link ILine} in {@link String}. 92 | * 93 | * */ 94 | String toString(char separator); 95 | 96 | /** 97 | * Use param(separator) as separator of field, split param(linestr) and construct a {@link ILine}. 98 | * 99 | * @param lineStr 100 | * String will be translated into {@link ILine}. 101 | * 102 | * @param separator 103 | * field separate. 104 | * 105 | * @return 106 | * {@link ILine} 107 | * 108 | * */ 109 | ILine fromString(String lineStr, char separator); 110 | 111 | /** 112 | * Get length of all fields, exclude separate. 113 | * 114 | * @return 115 | * length of all fields. 116 | * 117 | * */ 118 | int length(); 119 | 120 | } 121 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/MetaData.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * MetaData records a source or sink Database meta information. 7 | * 8 | * */ 9 | public class MetaData { 10 | private String dataBaseName; 11 | 12 | private String dataBaseVersion; 13 | 14 | private String tableName = "default_table"; 15 | 16 | private List colInfo; 17 | 18 | /** 19 | * Get name of this table. 20 | * 21 | * @return 22 | * name of table. 23 | * 24 | */ 25 | public String getTableName() { 26 | return tableName; 27 | } 28 | 29 | /** 30 | * Set name of this table. 31 | * 32 | * @param tableName 33 | * name of this table. 34 | * 35 | * */ 36 | public void setTableName(String tableName) { 37 | this.tableName = tableName; 38 | } 39 | 40 | /** 41 | * Get name of database. 42 | * 43 | * @return 44 | * name of database. 45 | * 46 | * */ 47 | public String getDataBaseName() { 48 | return dataBaseName; 49 | } 50 | 51 | /** 52 | * Set name of database. 53 | * 54 | * @param dataBaseName 55 | * name of database. 56 | * 57 | * */ 58 | public void setDataBaseName(String dataBaseName) { 59 | this.dataBaseName = dataBaseName; 60 | } 61 | 62 | /** 63 | * Get version of database. 64 | * 65 | * @return 66 | * version of database. 67 | * */ 68 | public String getDataBaseVersion() { 69 | return this.dataBaseVersion; 70 | } 71 | 72 | /** 73 | * Set version of database. 74 | * 75 | * @param dataBaseVersion 76 | * version of database. 77 | * 78 | * */ 79 | public void setDataBaseVersion(String dataBaseVersion) { 80 | this.dataBaseVersion = dataBaseVersion; 81 | } 82 | 83 | /** 84 | * Get information of all columns. 85 | * 86 | * @return 87 | * columns information. 88 | * 89 | * */ 90 | public List getColInfo() { 91 | return colInfo; 92 | } 93 | 94 | /** 95 | * Set column information. 96 | * 97 | * @param colInfo 98 | * a list of column information. 99 | * 100 | * */ 101 | public void setColInfo(List colInfo) { 102 | this.colInfo = colInfo; 103 | } 104 | 105 | public class Column { 106 | private boolean isText = false; 107 | 108 | private boolean isNum = false; 109 | 110 | private String colName; 111 | 112 | private String dataType; // no use now 113 | 114 | private boolean isPK; 115 | 116 | public String getDataType() { 117 | return dataType; 118 | } 119 | 120 | public String getColName() { 121 | return colName; 122 | } 123 | 124 | public void setDataType(String dataType) { 125 | this.dataType = dataType; 126 | } 127 | 128 | public boolean isPK() { 129 | return isPK; 130 | } 131 | 132 | public void setPK(boolean isPK) { 133 | this.isPK = isPK; 134 | } 135 | 136 | public boolean isText() { 137 | return isText; 138 | } 139 | 140 | public void setText(boolean isText) { 141 | this.isText = isText; 142 | } 143 | 144 | public boolean isNum() { 145 | return isNum; 146 | } 147 | 148 | public void setNum(boolean isNum) { 149 | this.isNum = isNum; 150 | } 151 | 152 | public void setColName(String name) { 153 | colName = name; 154 | } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/mysqlreader/MysqlReaderSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.mysqlreader; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.commons.logging.Log; 7 | import org.apache.commons.logging.LogFactory; 8 | 9 | import com.dp.nebula.wormhole.common.AbstractSplitter; 10 | import com.dp.nebula.wormhole.common.JobStatus; 11 | import com.dp.nebula.wormhole.common.WormholeException; 12 | import com.dp.nebula.wormhole.common.interfaces.IParam; 13 | 14 | 15 | public class MysqlReaderSplitter extends AbstractSplitter{ 16 | 17 | private static final String SQL_PATTEN = "%s limit %d, %d"; 18 | 19 | private Log logger = LogFactory.getLog(MysqlReaderSplitter.class); 20 | 21 | private String sql; 22 | 23 | private int blockSize; 24 | 25 | private int concurrency; 26 | 27 | private String tableName; 28 | 29 | private String columns; 30 | 31 | private String where; 32 | 33 | private static final int DEFAULT_BLOCK_SIZE = 1000; 34 | 35 | private static final String SQL_WITH_WHERE_PATTEN = "select %s from %s where %s"; 36 | 37 | private static final String SQL_WITHOUT_WHERE_PATTEN = "select %s from %s"; 38 | 39 | @Override 40 | public void init(IParam jobParams){ 41 | super.init(jobParams); 42 | sql = param.getValue(ParamKey.sql, ""); 43 | blockSize = param.getIntValue(ParamKey.blockSize, DEFAULT_BLOCK_SIZE); 44 | concurrency = param.getIntValue(ParamKey.concurrency,1); 45 | tableName = param.getValue(ParamKey.tableName, ""); 46 | columns = param.getValue(ParamKey.columns, ""); 47 | where = param.getValue(ParamKey.where, ""); 48 | } 49 | 50 | @Override 51 | public List split() { 52 | List paramList = new ArrayList() ; 53 | if(sql.isEmpty()) { 54 | if(tableName.isEmpty()||columns.isEmpty()) { 55 | logger.error("Mysql reader sql is empty"); 56 | throw new WormholeException("Mysql reader sql is empty",JobStatus.CONF_FAILED.getStatus()+MysqlReader.ERROR_CODE_ADD); 57 | } 58 | if(!where.isEmpty()) { 59 | sql = String.format(SQL_WITH_WHERE_PATTEN, columns, tableName, where); 60 | } else { 61 | sql = String.format(SQL_WITHOUT_WHERE_PATTEN, columns, tableName); 62 | } 63 | } 64 | if(!sql.isEmpty()){ 65 | long size = param.getLongValue(MysqlReaderPeriphery.DATA_AMOUNT_KEY,-1); 66 | if (size == -1){ 67 | paramList.add(param); 68 | logger.warn("Cannot get data amount for mysql reader"); 69 | return paramList; 70 | } 71 | int amount = 0; 72 | StringBuilder []sqlArray = new StringBuilder[concurrency]; 73 | for(long i = 0; i <= size/blockSize; i++){ 74 | String sqlSplitted = String.format(SQL_PATTEN, sql, amount, blockSize); 75 | int index = (int) (i%concurrency); 76 | if(sqlArray[index] == null){ 77 | sqlArray[index] = new StringBuilder(); 78 | } 79 | sqlArray[index].append(sqlSplitted).append(";") ; 80 | amount += blockSize; 81 | } 82 | for(int j = 0; j < concurrency; j++){ 83 | if(sqlArray[j] == null) { 84 | continue; 85 | } 86 | IParam paramSplitted = param.clone(); 87 | paramSplitted.putValue(ParamKey.sql, sqlArray[j].toString()); 88 | paramList.add(paramSplitted); 89 | } 90 | } 91 | return paramList; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/DBUtils.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import java.sql.Connection; 4 | import java.sql.ResultSet; 5 | import java.sql.SQLException; 6 | import java.sql.Statement; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import org.apache.commons.logging.Log; 11 | import org.apache.commons.logging.LogFactory; 12 | 13 | import com.dp.nebula.wormhole.common.JobStatus; 14 | import com.dp.nebula.wormhole.common.WormholeException; 15 | 16 | public final class DBUtils { 17 | 18 | private static Log logger = LogFactory.getLog(DBUtils.class); 19 | 20 | private DBUtils() { 21 | } 22 | 23 | public static void dbPreCheck(String preSql,Connection conn) { 24 | int flag = -1; 25 | ResultSet rs = null; 26 | try { 27 | logger.info("pre check sql:" + preSql); 28 | rs = DBUtils.query(conn, preSql); 29 | rs.next(); 30 | flag = rs.getInt(1); 31 | } catch (Exception e) { 32 | logger.error("Pre check sql has error" ); 33 | throw new WormholeException(e,JobStatus.PRE_CHECK_FAILED.getStatus()); 34 | } finally { 35 | if (null != rs) { 36 | try { 37 | DBUtils.closeResultSet(rs); 38 | } catch (SQLException e) { 39 | throw new WormholeException(e,JobStatus.PRE_CHECK_FAILED.getStatus()); 40 | } 41 | } 42 | } 43 | if(flag != 1) { 44 | logger.error("Pre check condition is not satisfied." ); 45 | throw new WormholeException(JobStatus.PRE_CHECK_FAILED.getStatus()); 46 | } 47 | } 48 | 49 | public static ResultSet query(Connection conn, String sql) throws SQLException { 50 | Statement stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, 51 | ResultSet.CONCUR_READ_ONLY); 52 | return stmt.executeQuery(sql); 53 | } 54 | 55 | public static int update(Connection conn, String sql) throws SQLException { 56 | Statement stmt = conn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, 57 | ResultSet.CONCUR_UPDATABLE); 58 | int rs = stmt.executeUpdate(sql); 59 | stmt.close(); 60 | return rs; 61 | } 62 | 63 | 64 | public static void closeResultSet(ResultSet rs) throws SQLException { 65 | if (null != rs) { 66 | Statement stmt = rs.getStatement(); 67 | if (null != stmt) { 68 | stmt.close(); 69 | stmt = null; 70 | } 71 | rs.close(); 72 | } 73 | } 74 | 75 | public static MetaData genMetaData(Connection conn, String sql) 76 | throws SQLException { 77 | MetaData meta = new MetaData(); 78 | List columns = new ArrayList(); 79 | 80 | ResultSet resultSet = null; 81 | try { 82 | resultSet = query(conn, sql); 83 | int columnCount = resultSet.getMetaData().getColumnCount(); 84 | for (int i = 1; i <= columnCount; i++) { 85 | MetaData.Column col = meta.new Column(); 86 | col.setColName(resultSet.getMetaData().getColumnName(i) 87 | .toLowerCase().trim()); 88 | col.setDataType(resultSet.getMetaData().getColumnTypeName(i) 89 | .toLowerCase().trim()); 90 | columns.add(col); 91 | } 92 | meta.setColInfo(columns); 93 | meta.setTableName(resultSet.getMetaData().getTableName(1).toLowerCase()); 94 | } finally { 95 | closeResultSet(resultSet); 96 | } 97 | return meta; 98 | } 99 | 100 | } -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/mongowriter/MongoWriterPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.mongowriter; 2 | 3 | import java.net.UnknownHostException; 4 | 5 | import org.apache.log4j.Logger; 6 | 7 | import com.dp.nebula.wormhole.common.interfaces.IParam; 8 | import com.dp.nebula.wormhole.common.interfaces.ISourceCounter; 9 | import com.dp.nebula.wormhole.common.interfaces.ITargetCounter; 10 | import com.dp.nebula.wormhole.common.interfaces.IWriterPeriphery; 11 | import com.dp.nebula.wormhole.plugins.common.MongoUtils; 12 | import com.mongodb.DB; 13 | import com.mongodb.DBCollection; 14 | import com.mongodb.Mongo; 15 | import com.mongodb.MongoURI; 16 | 17 | public class MongoWriterPeriphery implements IWriterPeriphery { 18 | private static final Logger log = Logger 19 | .getLogger(MongoWriterPeriphery.class); 20 | private static final int MAX_SPLIT_NUM = 10; 21 | 22 | private String outputUri = ""; 23 | private String outputFields = ""; 24 | private int concurrency = 1; 25 | private int bulkInsertLine = 100000; 26 | private Boolean dropCollectionBeforeInsertionSwitch = false; 27 | 28 | @Override 29 | public void rollback(IParam param) { 30 | // TODO Auto-generated method stub 31 | } 32 | 33 | @Override 34 | public void doPost(IParam param, ITargetCounter counter) { 35 | // TODO Auto-generated method stub 36 | } 37 | 38 | @Override 39 | public void prepare(IParam param, ISourceCounter counter) { 40 | outputUri = param.getValue(ParamKey.outputUri, this.outputUri).trim(); 41 | outputFields = param.getValue(ParamKey.outputFields, this.outputFields) 42 | .trim(); 43 | concurrency = Math.min( 44 | param.getIntValue(ParamKey.concurrency, this.concurrency), 45 | MAX_SPLIT_NUM); 46 | bulkInsertLine = param.getIntValue(ParamKey.bulkInsertLine, 47 | this.bulkInsertLine); 48 | dropCollectionBeforeInsertionSwitch = param.getBooleanValue(ParamKey.dropCollectionBeforeInsertionSwitch, false); 49 | 50 | MongoURI uri = new MongoURI(outputUri); 51 | Mongo mongo = null; 52 | try { 53 | mongo = new Mongo(uri); 54 | } catch (UnknownHostException e) { 55 | throw new IllegalStateException( 56 | " Unable to connect to MongoDB at '" + uri + "'", e); 57 | } 58 | DB db = mongo.getDB(uri.getDatabase()); 59 | 60 | // if there's a username and password 61 | if (uri.getUsername() != null && uri.getPassword() != null 62 | && !db.isAuthenticated()) { 63 | boolean auth = db 64 | .authenticate(uri.getUsername(), uri.getPassword()); 65 | if (auth) { 66 | log.info("Sucessfully authenticated with collection."); 67 | } else { 68 | throw new IllegalArgumentException( 69 | "Unable to connect to collection. You have to check your username and password"); 70 | } 71 | } 72 | 73 | if (dropCollectionBeforeInsertionSwitch){ 74 | log.info("start to drop collection " + uri.getCollection()); 75 | DBCollection coll = MongoUtils.getCollection(uri); 76 | coll.drop(); 77 | log.info("drop collection " + uri.getCollection() + " before insert data successfully"); 78 | } 79 | 80 | param.putValue(ParamKey.outputUri, this.outputUri); 81 | param.putValue(ParamKey.outputFields, this.outputFields); 82 | param.putValue(ParamKey.concurrency, String.valueOf(this.concurrency)); 83 | param.putValue(ParamKey.bulkInsertLine, 84 | String.valueOf(this.bulkInsertLine)); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/hbasewriter/HBaseWriterPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.hbasewriter; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.log4j.Logger; 6 | 7 | import com.dp.nebula.wormhole.common.JobStatus; 8 | import com.dp.nebula.wormhole.common.WormholeException; 9 | import com.dp.nebula.wormhole.common.interfaces.IParam; 10 | import com.dp.nebula.wormhole.common.interfaces.ISourceCounter; 11 | import com.dp.nebula.wormhole.common.interfaces.ITargetCounter; 12 | import com.dp.nebula.wormhole.common.interfaces.IWriterPeriphery; 13 | import com.dp.nebula.wormhole.plugins.common.HBaseClient; 14 | import com.google.common.base.Preconditions; 15 | 16 | public class HBaseWriterPeriphery implements IWriterPeriphery { 17 | private final static Logger LOG = Logger 18 | .getLogger(HBaseWriterPeriphery.class); 19 | 20 | private final static int DEFAULT_WRITE_BUFFER_SIZE = 1024 * 1024; 21 | 22 | private String htable; 23 | private int deleteMode; 24 | private int rollbackMode; 25 | private int concurrency; 26 | private Boolean autoFlush; 27 | private Boolean writeAheadLog; 28 | private int writebufferSize; 29 | private HBaseClient client; 30 | 31 | @Override 32 | public void prepare(IParam param, ISourceCounter counter) { 33 | htable = param.getValue(ParamKey.htable); 34 | concurrency = param.getIntValue(ParamKey.concurrency, 1); 35 | deleteMode = param.getIntValue(ParamKey.deleteMode, 0); 36 | Preconditions.checkArgument(deleteMode >= 0 && deleteMode <= 2, 37 | "deleteMode must be between 0 and 2"); 38 | rollbackMode = param.getIntValue(ParamKey.rollbackMode, 0); 39 | Preconditions.checkArgument(rollbackMode >= 0 && rollbackMode <= 2, 40 | "rollbackMode must be between 0 and 2"); 41 | 42 | autoFlush = param.getBooleanValue(ParamKey.autoFlush, false); 43 | writeAheadLog = param.getBooleanValue(ParamKey.writeAheadLog, true); 44 | writebufferSize = param.getIntValue(ParamKey.writebufferSize, 45 | DEFAULT_WRITE_BUFFER_SIZE); 46 | Preconditions.checkArgument(writebufferSize > 0 47 | && writebufferSize <= 32 * 1024 * 1024, 48 | "write buffer size must be within 0-32MB"); 49 | 50 | client = HBaseClient.getInstance(); 51 | client.initialize(htable, autoFlush, writebufferSize, 52 | writeAheadLog); 53 | deleteTableByMode(deleteMode); 54 | } 55 | 56 | @Override 57 | public void doPost(IParam param, ITargetCounter counter) { 58 | LOG.info("start to close HBaseClient"); 59 | if (client != null) { 60 | try { 61 | client.close(); 62 | } catch (IOException e) { 63 | } 64 | } 65 | } 66 | 67 | @Override 68 | public void rollback(IParam param) { 69 | LOG.info("start to execute `delete table` by rollbackMode on rollback stage"); 70 | deleteTableByMode(rollbackMode); 71 | } 72 | 73 | private void deleteTableByMode(int mode) { 74 | if (0 == mode) { 75 | LOG.info("mode 0, do nothing with table data"); 76 | } else if (1 == mode) { 77 | try { 78 | LOG.info("mode 1, delete table data"); 79 | client.deleteTableData(htable); 80 | } catch (IOException e) { 81 | throw new WormholeException(e, 82 | JobStatus.PRE_CHECK_FAILED.getStatus()); 83 | } 84 | } else if (2 == mode) { 85 | try { 86 | LOG.info("mode 2, truncate and recreate table"); 87 | client.truncateTable(htable); 88 | } catch (IOException e) { 89 | throw new WormholeException(e, 90 | JobStatus.PRE_CHECK_FAILED.getStatus()); 91 | } 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/hbasereader/HBaseReader.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.hbasereader; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.lang.StringUtils; 6 | import org.apache.log4j.Logger; 7 | 8 | import com.dp.nebula.wormhole.common.AbstractPlugin; 9 | import com.dp.nebula.wormhole.common.JobStatus; 10 | import com.dp.nebula.wormhole.common.WormholeException; 11 | import com.dp.nebula.wormhole.common.interfaces.ILine; 12 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 13 | import com.dp.nebula.wormhole.common.interfaces.IReader; 14 | 15 | public class HBaseReader extends AbstractPlugin implements IReader { 16 | private static final Logger logger = Logger.getLogger(HBaseReader.class); 17 | 18 | private String tableName = null; 19 | private String columns = null; 20 | private String rowkeyRange = null; 21 | private HBaseProxy proxy = null; 22 | 23 | @Override 24 | public void init() { 25 | this.tableName = getParam().getValue(ParamKey.htable, ""); 26 | this.columns = getParam().getValue(ParamKey.columns_key, ""); 27 | this.rowkeyRange = getParam().getValue(ParamKey.rowkey_range, ""); 28 | 29 | try { 30 | proxy = HBaseProxy.newProxy(tableName); 31 | } catch (IOException e) { 32 | try { 33 | if (null != proxy) { 34 | proxy.close(); 35 | } 36 | } catch (IOException e1) { 37 | } 38 | logger.error(e); 39 | throw new WormholeException(e.getMessage(), 40 | JobStatus.READ_FAILED.getStatus()); 41 | } 42 | } 43 | 44 | @Override 45 | public void connection() { 46 | logger.info("HBaseReader start to connect to HBase ."); 47 | if (StringUtils.isBlank(rowkeyRange)) { 48 | logger.info("HBaseReader prepare to query all records . "); 49 | proxy.setStartEndRange(null, null); 50 | } else { 51 | rowkeyRange = " " + rowkeyRange + " "; 52 | String[] pair = rowkeyRange.split(","); 53 | if (null == pair || 0 == pair.length) { 54 | logger.info("HBaseReader prepare to query all records . "); 55 | proxy.setStartEndRange(null, null); 56 | } else { 57 | String start = StringUtils.isBlank(pair[0].trim()) ? null 58 | : pair[0].trim(); 59 | String end = StringUtils.isBlank(pair[1].trim()) ? null 60 | : pair[1].trim(); 61 | logger.info(String.format( 62 | "HBaseReader prepare to query records [%s, %s) .", 63 | (start == null ? "-infinite" : start), 64 | (end == null ? "+infinite" : end))); 65 | proxy.setStartEndRange( 66 | (start == null ? null : start.getBytes()), 67 | (end == null ? null : end.getBytes())); 68 | } 69 | } 70 | } 71 | 72 | @Override 73 | public void read(ILineSender lineSender) { 74 | try { 75 | proxy.prepare(columns.split(",")); 76 | ILine line = lineSender.createNewLine(); 77 | while (proxy.fetchLine(line)) { 78 | Boolean flag = lineSender.send(line); 79 | if(getMonitor() != null) { 80 | if (flag){ 81 | getMonitor().increaseSuccessLines(); 82 | }else{ 83 | getMonitor().increaseFailedLines(); 84 | } 85 | } 86 | line = lineSender.createNewLine(); 87 | } 88 | lineSender.flush(); 89 | } catch (IOException e) { 90 | logger.error("HBase Reader fetch line error " + e.toString()); 91 | throw new WormholeException(e, JobStatus.READ_DATA_EXCEPTION.getStatus()); 92 | } finally { 93 | try { 94 | proxy.close(); 95 | } catch (IOException e) { 96 | } 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/common/DBResultSetSender.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.common; 2 | 3 | import java.sql.ResultSet; 4 | import java.sql.ResultSetMetaData; 5 | import java.sql.SQLException; 6 | import java.sql.Timestamp; 7 | import java.text.SimpleDateFormat; 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | 11 | import org.apache.commons.logging.Log; 12 | import org.apache.commons.logging.LogFactory; 13 | 14 | import com.dp.nebula.wormhole.common.interfaces.ILine; 15 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 16 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 17 | 18 | 19 | public class DBResultSetSender { 20 | 21 | private ILineSender sender; 22 | 23 | private int columnCount; 24 | 25 | private IPluginMonitor monitor; 26 | 27 | private Map dateFormatMap = new HashMap(); 28 | 29 | private SimpleDateFormat[] timeMap = null; 30 | 31 | private static final Log s_logger = LogFactory.getLog(DBResultSetSender.class); 32 | 33 | public static DBResultSetSender newSender(ILineSender sender) { 34 | return new DBResultSetSender(sender); 35 | } 36 | 37 | public DBResultSetSender(ILineSender lineSender) { 38 | this.sender = lineSender; 39 | } 40 | 41 | public void setMonitor(IPluginMonitor iMonitor) { 42 | this.monitor = iMonitor; 43 | } 44 | 45 | public void setDateFormatMap(Map dateFormatMap) { 46 | this.dateFormatMap = dateFormatMap; 47 | } 48 | 49 | public void sendToWriter(ResultSet resultSet) throws SQLException{ 50 | String item = null; 51 | Timestamp ts = null; 52 | setColumnCount(resultSet.getMetaData().getColumnCount()); 53 | setColumnTypes(resultSet); 54 | while (resultSet.next()) { 55 | ILine line = sender.createNewLine(); 56 | try { 57 | /* TODO: date format need to handle by transfomer plugin */ 58 | for (int i = 1; i <= columnCount; i++) { 59 | if (null != timeMap[i]) { 60 | ts = resultSet.getTimestamp(i); 61 | if (null != ts) { 62 | item = timeMap[i].format(ts); 63 | } else { 64 | item = null; 65 | } 66 | } else { 67 | item = resultSet.getString(i); 68 | } 69 | line.addField(item); 70 | } 71 | Boolean b = sender.send(line); 72 | if (null != monitor) { 73 | if (b) { 74 | monitor.increaseSuccessLines(); 75 | } else { 76 | monitor.increaseFailedLines(); 77 | } 78 | } 79 | } catch (SQLException e) { 80 | monitor.increaseFailedLines(); 81 | s_logger.error(e.getMessage() + "| One dirty line : " + line.toString('\t')); 82 | } 83 | } 84 | 85 | } 86 | 87 | public void flush() { 88 | if (sender != null) { 89 | sender.flush(); 90 | } 91 | } 92 | 93 | private void setColumnTypes(ResultSet resultSet) throws SQLException { 94 | timeMap = new SimpleDateFormat[columnCount + 1]; 95 | 96 | ResultSetMetaData rsmd = resultSet.getMetaData(); 97 | 98 | for (int i = 1; i <= columnCount; i++) { 99 | String type = rsmd.getColumnTypeName(i).toLowerCase().trim(); 100 | if (this.dateFormatMap.containsKey(type)) { 101 | timeMap[i] = this.dateFormatMap.get(type); 102 | } 103 | } 104 | } 105 | 106 | private void setColumnCount(int columnCount) { 107 | this.columnCount = columnCount; 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/mongoreader/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.mongoreader; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name: inputUri 6 | * @description: mongo uri, format like: mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options] 7 | * @range: 8 | * @mandatory: true 9 | * @default:mongodb://127.0.0.1:27017/db.coll 10 | */ 11 | public final static String inputUri = "input_uri"; 12 | /* 13 | * @name: inputFields 14 | * @description: The fields, in JSON, to read 15 | * @range: 16 | * @mandatory: true 17 | * @default:{ _id:1 } 18 | */ 19 | public final static String inputFields = "input_fields"; 20 | /* 21 | * @name: inputQuery 22 | * @description: The query, in JSON, to execute [OPTIONAL] 23 | * @range: 24 | * @mandatory: false 25 | * @default: 26 | */ 27 | public final static String inputQuery = "input_query"; 28 | /* 29 | * @name: inputSort 30 | * @description: A JSON sort specification for read [OPTIONAL], opposite operation due to the storage implementation 31 | * @range: 32 | * @mandatory: false 33 | * @default: 34 | */ 35 | public final static String inputSort = "input_sort"; 36 | /* 37 | * @name: inputLimit 38 | * @description: The number of documents to limit to for read 39 | * @range: 40 | * @mandatory: false 41 | * @default: 42 | */ 43 | public final static String inputLimit = "input_limit"; 44 | /* 45 | * @name:needSplit 46 | * @description: split switch 47 | * @range:true,false 48 | * @mandatory:true 49 | * @default:false 50 | */ 51 | public final static String needSplit = "need_split"; 52 | /* 53 | * @name:splitKeyPattern 54 | * @description: split key JSON pattern, it must be an index on the collection 55 | * @range: 56 | * @mandatory: false 57 | * @default:{ "_id": 1 } 58 | */ 59 | public final static String splitKeyPattern = "split_key_pattern"; 60 | /* 61 | * @name:splitSize 62 | * @description: if you want to control the split size for input, set it here, it should be an integer which refer to megabytes 63 | * @range:1-256 64 | * @mandatory: false 65 | * @default:8 66 | */ 67 | public final static String splitSize = "split_size"; 68 | /* 69 | * @name: concurrency 70 | * @description: concurrency of the job 71 | * @range: 1-10 72 | * @mandatory: false 73 | * @default: 1 74 | */ 75 | public final static String concurrency = "concurrency"; 76 | /* 77 | * @name: field_need_split 78 | * @description: In some mongo tables, all fields are integrated into one field. 79 | * This field indicates whether these fields should be split from the first field 80 | * @range: true,false 81 | * @mandatory: false 82 | * @default: 83 | */ 84 | public final static String fieldNeedSplit = "field_need_split"; 85 | /* 86 | * @name: field_split_char 87 | * @description: In some mongo tables, all fields are integrated into one field. 88 | * Plugin can split these fields use this character as splitter. 89 | * @range: eg. \t 90 | * @mandatory: false 91 | * @default: 92 | */ 93 | public final static String filedSplitChar = "field_split_char"; 94 | /* 95 | * @name: dataTransformClass 96 | * @description: data transformer class path 97 | * @range: 98 | * @mandatory: false 99 | * @default: 100 | */ 101 | public final static String dataTransformClass = "dataTransformClass"; 102 | 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/hdfsreader/HdfsDirSplitter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.hdfsreader; 2 | 3 | import java.io.IOException; 4 | import java.net.URI; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.hadoop.fs.FileStatus; 10 | import org.apache.hadoop.fs.FileSystem; 11 | import org.apache.hadoop.fs.Path; 12 | import org.apache.hadoop.fs.PathFilter; 13 | import org.apache.hadoop.io.IOUtils; 14 | import org.apache.log4j.Logger; 15 | 16 | import com.dp.nebula.wormhole.common.AbstractSplitter; 17 | import com.dp.nebula.wormhole.common.interfaces.IParam; 18 | import com.dp.nebula.wormhole.plugins.common.DFSUtils; 19 | 20 | public class HdfsDirSplitter extends AbstractSplitter { 21 | private static final Logger logger = Logger.getLogger(HdfsDirSplitter.class); 22 | 23 | private Path p = null; 24 | private FileSystem fs = null; 25 | private List paramsList = null; 26 | 27 | @Override 28 | public void init(IParam jobParams){ 29 | super.init(jobParams); 30 | 31 | String dir = jobParams.getValue(ParamKey.dir, null); 32 | if (dir == null) { 33 | logger.error("Can't find the param [" 34 | + ParamKey.dir + "] in hdfs-spliter-param."); 35 | return; 36 | } 37 | 38 | if (dir.endsWith("*")) { 39 | dir = dir.substring(0, dir.lastIndexOf("*")); 40 | } 41 | 42 | p = new Path(dir); 43 | 44 | try { 45 | Configuration cfg = DFSUtils.getConf(dir, null); 46 | logger.info("fs.default.name: " + cfg.get("fs.default.name", "Not Found")); 47 | 48 | fs = DFSUtils.createFileSystem(URI.create(dir), cfg); 49 | 50 | if (!fs.exists(p)) { 51 | IOUtils.closeStream(fs); 52 | throw new Exception("the path[" + dir 53 | + "] does not exist."); 54 | }else{ 55 | logger.info("file " + p.toString() + " exitsts;"); 56 | } 57 | } catch (Exception e) { 58 | e.printStackTrace(); 59 | throw new RuntimeException("Can't create the HDFS file system:" 60 | + e.getCause()); 61 | } 62 | } 63 | 64 | @Override 65 | public List split() { 66 | paramsList = new ArrayList(); 67 | splitFilesRecursively(p); 68 | IOUtils.closeStream(fs); 69 | logger.info("the number of splitted files: " + paramsList.size()); 70 | 71 | return paramsList; 72 | } 73 | 74 | private void splitFilesRecursively(Path path){ 75 | try { 76 | FileStatus[] status = fs.listStatus(path, fileFilter); 77 | for (FileStatus state : status) { 78 | logger.debug("FileStatus path: " + state.getPath().toString() + 79 | "\tlength: " + state.getLen() + 80 | "\tblock size: " + state.getBlockSize()); 81 | 82 | if (!state.isDir()) { 83 | String file = state.getPath().toString(); 84 | logger.info(ParamKey.dir + " split filename:" + file + "\tlength:" + state.getLen()); 85 | 86 | IParam oParams = param.clone(); 87 | oParams.putValue(ParamKey.dir, file); 88 | paramsList.add(oParams); 89 | } else { 90 | splitFilesRecursively(state.getPath()); 91 | } 92 | } 93 | } catch (IOException e) { 94 | throw new RuntimeException("some errors have happened in fetching the file-status:" 95 | + e.getCause()); 96 | } 97 | } 98 | 99 | /* filter hidden files and LZO index files */ 100 | private final PathFilter fileFilter = new PathFilter() { 101 | public boolean accept(Path p) { 102 | String name = p.getName(); 103 | return !name.startsWith("_") && !name.startsWith(".") && !name.endsWith(".index"); 104 | } 105 | }; 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/mongowriter/MongoWriter.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.mongowriter; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.log4j.Logger; 7 | 8 | import com.dp.nebula.wormhole.common.AbstractPlugin; 9 | import com.dp.nebula.wormhole.common.interfaces.ILine; 10 | import com.dp.nebula.wormhole.common.interfaces.ILineReceiver; 11 | import com.dp.nebula.wormhole.common.interfaces.IWriter; 12 | import com.dp.nebula.wormhole.plugins.common.MongoUtils; 13 | import com.mongodb.BasicDBObject; 14 | import com.mongodb.DBCollection; 15 | import com.mongodb.DBObject; 16 | import com.mongodb.MongoException; 17 | import com.mongodb.MongoURI; 18 | import com.mongodb.WriteConcern; 19 | import com.mongodb.util.JSON; 20 | 21 | public class MongoWriter extends AbstractPlugin implements IWriter { 22 | private static final Logger log = Logger.getLogger(MongoWriter.class); 23 | private static final int MONGO_INSERT_RETRY_TIMES = 1; 24 | 25 | private String outputUri = ""; 26 | private String outputFields = ""; 27 | private int bulkInsertLine = 3000; 28 | 29 | private String[] fieldNames; 30 | private DBCollection coll; 31 | 32 | @Override 33 | public void commit() { 34 | } 35 | 36 | @Override 37 | public void init() { 38 | outputUri = getParam().getValue(ParamKey.outputUri, this.outputUri).trim(); 39 | outputFields = getParam().getValue(ParamKey.outputFields, this.outputFields).trim(); 40 | bulkInsertLine = getParam().getIntValue(ParamKey.bulkInsertLine, this.bulkInsertLine); 41 | 42 | BasicDBObject json = (BasicDBObject) JSON.parse(outputFields); 43 | fieldNames = json.keySet().toArray(new String[json.keySet().size()]); 44 | } 45 | 46 | @Override 47 | public void connection() { 48 | MongoURI uri = new MongoURI(outputUri); 49 | log.info("try to connect " + uri.toString()); 50 | coll = MongoUtils.getCollection(uri); 51 | } 52 | 53 | @Override 54 | public void finish() { 55 | } 56 | 57 | @Override 58 | public void write(ILineReceiver lineReceiver) { 59 | List objList = new ArrayList(); 60 | ILine line = null; 61 | while ((line = lineReceiver.receive()) != null){ 62 | int fieldNum = line.getFieldNum(); 63 | DBObject obj = new BasicDBObject(); 64 | for (int i = 0; i < fieldNum; i++) { 65 | obj.put(fieldNames[i], line.getField(i)); 66 | } 67 | objList.add(obj); 68 | if (bulkInsertLine == objList.size()){ 69 | if (bulkInsertToMongo(coll, objList)){ 70 | getMonitor().increaseSuccessLine(objList.size()); 71 | log.debug(objList.size() + " lines have been inserted into mongodb."); 72 | } 73 | objList.clear(); 74 | } 75 | } 76 | if (bulkInsertLine != objList.size()){ 77 | if (bulkInsertToMongo(coll, objList)){ 78 | getMonitor().increaseSuccessLine(objList.size()); 79 | log.debug(objList.size() + " lines have been inserted into mongodb."); 80 | } 81 | } 82 | } 83 | 84 | private boolean bulkInsertToMongo(DBCollection coll, List dbObjectList){ 85 | int retryTimes = 0; 86 | boolean success = false; 87 | do { 88 | try { 89 | coll.insert(dbObjectList, WriteConcern.SAFE); 90 | success = true; 91 | } catch (MongoException mge){ 92 | log.warn("insert mongodb failed, retryTimes:" + retryTimes, mge); 93 | } 94 | } while (!success && ++retryTimes <= MONGO_INSERT_RETRY_TIMES); 95 | if (!success) { 96 | throw new RuntimeException( 97 | String.format("Miss %s log since insert mongo failed with max retry limit.", 98 | dbObjectList.size())); 99 | } 100 | return true; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/test/resources/wormhole_hivereader_to_hdfswriter_test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | hivereader 6 | 12 | jdbc:hive://10.1.1.161:10000/bi 13 | 18 | 19 | 24 | 25 | 32 | select dt,kw,imprsn_cnt,click_cnt from bi.dm_dp_rsa_kw_sd 33 | 34 | 35 | hdfswriter 36 | 41 | file:///tmp/yukang.chen/wormhole-hive-test 42 | 48 | prefix 49 | 56 | \t 57 | 64 | \n 65 | 72 | UTF-8 73 | 79 | 80 | 86 | 87 | 94 | com.hadoop.compression.lzo.LzopCodec 95 | 102 | 4096 103 | 110 | TXT 111 | 118 | 1 119 | 126 | false 127 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/plugins/reader/mongoreader/MongoReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.mongoreader; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.mockito.Mockito.mock; 5 | 6 | import java.util.HashMap; 7 | import java.util.Map; 8 | 9 | import org.junit.After; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import com.dp.nebula.wormhole.common.BufferedLineExchanger; 14 | import com.dp.nebula.wormhole.common.DefaultParam; 15 | import com.dp.nebula.wormhole.common.interfaces.ILine; 16 | import com.dp.nebula.wormhole.common.interfaces.IParam; 17 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 18 | import com.dp.nebula.wormhole.plugins.common.MongoUtils; 19 | import com.dp.nebula.wormhole.plugins.reader.hivereader.HiveReaderTest; 20 | import com.mongodb.BasicDBObject; 21 | import com.mongodb.DB; 22 | import com.mongodb.DBCollection; 23 | import com.mongodb.DBCursor; 24 | import com.mongodb.Mongo; 25 | 26 | import de.flapdoodle.embed.mongo.MongodExecutable; 27 | import de.flapdoodle.embed.mongo.MongodProcess; 28 | import de.flapdoodle.embed.mongo.MongodStarter; 29 | import de.flapdoodle.embed.mongo.config.MongodConfig; 30 | import de.flapdoodle.embed.mongo.distribution.Version; 31 | import de.flapdoodle.embed.process.runtime.Network; 32 | 33 | public class MongoReaderTest { 34 | private static final String DATABASE_NAME = "embedded"; 35 | private static final int MONGODB_PORT = 12348; 36 | 37 | private MongodExecutable mongodExe; 38 | private MongodProcess mongod; 39 | private Mongo mongo; 40 | 41 | @SuppressWarnings("deprecation") 42 | @Before 43 | public void beforeEach() throws Exception { 44 | MongodStarter runtime = MongodStarter .getDefaultInstance(); 45 | mongodExe = runtime.prepare(new MongodConfig(Version.V2_0_1, MONGODB_PORT, Network.localhostIsIPv6())); 46 | mongod = mongodExe.start(); 47 | mongo = new Mongo("localhost", MONGODB_PORT); 48 | } 49 | 50 | @After 51 | public void afterEach() throws Exception { 52 | mongo.close(); 53 | if (mongod != null) { 54 | mongod.stop(); 55 | } 56 | if (mongodExe != null){ 57 | mongodExe.stop(); 58 | } 59 | } 60 | 61 | @Test 62 | public void shouldCreateNewObjectInEmbeddedMongoDb() { 63 | // given 64 | DB db = mongo.getDB(DATABASE_NAME); 65 | DBCollection col = db.createCollection("testCollection", new BasicDBObject()); 66 | //col.save(new BasicDBObject("testDoc", new Date())); 67 | BasicDBObject document = new BasicDBObject(); 68 | document.put("id", 1001); 69 | document.put("msg", "hello world mongoDB in Java"); 70 | col.insert(document); 71 | 72 | DBCursor cursor = col.find(null, MongoUtils.convertStringToDBObject("{ msg:1}")); 73 | while (cursor.hasNext()){ 74 | System.out.println(cursor.next().toString()); 75 | } 76 | 77 | 78 | //assertEquals(col.getCount(), 1); 79 | 80 | MongoReader mongoReader = new MongoReader(); 81 | Map params = new HashMap(); 82 | params.put(ParamKey.inputUri, "mongodb://localhost:" + MONGODB_PORT + "/embedded.testCollection"); 83 | params.put(ParamKey.inputFields, "{ msg : 1 }"); 84 | IParam iParam = new DefaultParam(params); 85 | mongoReader.setParam(iParam); 86 | BufferedLineExchanger bufLineExchanger = HiveReaderTest.getBufferedLineExchangerInstance(); 87 | 88 | IPluginMonitor pluginMonitor = mock(IPluginMonitor.class); 89 | mongoReader.setMonitor(pluginMonitor); 90 | 91 | mongoReader.init(); 92 | mongoReader.connection(); 93 | mongoReader.read(bufLineExchanger); 94 | ILine line = bufLineExchanger.receive(); 95 | 96 | System.out.println(line.getField(0)); 97 | assertEquals("hello world mongoDB in Java", line.getField(0)); 98 | 99 | mongoReader.finish(); 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/sftpwriter/SftpWriterPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.sftpwriter; 2 | 3 | import java.net.URI; 4 | 5 | import org.apache.commons.lang.StringUtils; 6 | import org.apache.log4j.Logger; 7 | 8 | import com.dp.nebula.wormhole.common.interfaces.IParam; 9 | import com.dp.nebula.wormhole.common.interfaces.ISourceCounter; 10 | import com.dp.nebula.wormhole.common.interfaces.ITargetCounter; 11 | import com.dp.nebula.wormhole.common.interfaces.IWriterPeriphery; 12 | import com.dp.nebula.wormhole.plugins.common.PCInfo; 13 | import com.jcraft.jsch.Channel; 14 | import com.jcraft.jsch.ChannelSftp; 15 | import com.jcraft.jsch.JSch; 16 | import com.jcraft.jsch.Session; 17 | import com.jcraft.jsch.SftpATTRS; 18 | 19 | public class SftpWriterPeriphery implements IWriterPeriphery { 20 | private static final Logger logger = Logger.getLogger(SftpWriterPeriphery.class); 21 | 22 | private String dir = ""; 23 | private String prefixname = "part"; 24 | 25 | private String scheme = ""; 26 | private String host = ""; 27 | private int port = 58422; 28 | private String path = ""; 29 | private String username = ""; 30 | private String password = ""; 31 | private URI uri = null; 32 | 33 | private JSch jsch = null; 34 | private Session session = null; 35 | private Channel channel = null; 36 | private ChannelSftp c = null; 37 | 38 | @Override 39 | public void rollback(IParam param) { 40 | } 41 | 42 | @Override 43 | public void doPost(IParam param, ITargetCounter counter) { 44 | logger.info("doPost stage do nothing"); 45 | } 46 | 47 | @Override 48 | public void prepare(IParam param, ISourceCounter counter) { 49 | dir = param.getValue(ParamKey.dir, this.dir); 50 | prefixname = param.getValue(ParamKey.prefixname, this.prefixname); 51 | password = param.getValue(ParamKey.password, this.password); 52 | if (dir.endsWith("*")) { 53 | dir = dir.substring(0, dir.lastIndexOf('*')); 54 | } 55 | if (dir.endsWith("/")) { 56 | dir = dir.substring(0, dir.lastIndexOf('/')); 57 | } 58 | 59 | uri = URI.create(dir); 60 | scheme = uri.getScheme(); 61 | host = uri.getHost(); 62 | port = uri.getPort(); 63 | path = uri.getPath(); 64 | username = uri.getUserInfo(); 65 | 66 | if (!scheme.equalsIgnoreCase("sftp") || StringUtils.isBlank(host) || 67 | -1 == port || StringUtils.isBlank(path) || 68 | StringUtils.isBlank(username) || StringUtils.isBlank(password)){ 69 | throw new IllegalArgumentException( 70 | "paramkey dir is not set properly, the correct sftp path format like: " + 71 | "sftp://[@][:]//"); 72 | } 73 | 74 | PCInfo pi = new PCInfo(); 75 | pi.setIp(host); 76 | pi.setPort(port); 77 | pi.setUser(username); 78 | pi.setPwd(password); 79 | pi.setPath(path); 80 | 81 | try { 82 | jsch = new JSch(); 83 | session = jsch.getSession(username, host, port); 84 | session.setUserInfo(pi); 85 | session.connect(); 86 | 87 | channel = session.openChannel("sftp"); 88 | channel.connect(); 89 | c = (ChannelSftp) channel; 90 | 91 | SftpATTRS sftpAttrs = c.lstat(path); 92 | if (sftpAttrs == null){ 93 | throw new IllegalArgumentException( 94 | "paramkey dir not found on the remote server: " + path); 95 | }else if (sftpAttrs.isDir() ){ 96 | logger.info("removing files under the " + path); 97 | c.rm(path + "/" + prefixname + "*"); 98 | }else{ 99 | logger.error(path + " is a file, please make sure it is only a directory. "); 100 | return; 101 | } 102 | } catch (Exception e) { 103 | closeAll(); 104 | throw new RuntimeException("something wrong with jsch:" 105 | + e.getCause()); 106 | } 107 | closeAll(); 108 | } 109 | 110 | private void closeAll(){ 111 | if (c != null) { 112 | c.disconnect(); 113 | } 114 | if (session != null) { 115 | session.disconnect(); 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/sqlserverreader/SqlserverReader.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.sqlserverreader; 2 | 3 | import java.sql.Connection; 4 | import java.sql.ResultSet; 5 | import java.sql.SQLException; 6 | import java.text.SimpleDateFormat; 7 | import java.util.HashMap; 8 | import java.util.Map; 9 | 10 | import org.apache.commons.logging.Log; 11 | import org.apache.commons.logging.LogFactory; 12 | 13 | import com.dp.nebula.wormhole.common.AbstractPlugin; 14 | import com.dp.nebula.wormhole.common.JobStatus; 15 | import com.dp.nebula.wormhole.common.WormholeException; 16 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 17 | import com.dp.nebula.wormhole.common.interfaces.IReader; 18 | import com.dp.nebula.wormhole.plugins.common.DBResultSetSender; 19 | import com.dp.nebula.wormhole.plugins.common.DBSource; 20 | import com.dp.nebula.wormhole.plugins.common.DBUtils; 21 | 22 | public class SqlserverReader extends AbstractPlugin implements IReader{ 23 | 24 | private Connection conn; 25 | 26 | private String ip = ""; 27 | 28 | private String port = "1433"; 29 | 30 | private String dbname; 31 | 32 | private String sql; 33 | 34 | private Log logger = LogFactory.getLog(SqlserverReader.class); 35 | 36 | static final int PLUGIN_NO = 3; 37 | 38 | static final int ERROR_CODE_ADD = JobStatus.PLUGIN_BASE*PLUGIN_NO; 39 | 40 | 41 | @Override 42 | public void init() { 43 | /* for database connection */ 44 | this.ip = getParam().getValue(ParamKey.ip,""); 45 | this.port = getParam().getValue(ParamKey.port, this.port); 46 | this.dbname = getParam().getValue(ParamKey.dbname,""); 47 | this.sql = getParam().getValue(ParamKey.sql, "").trim(); 48 | } 49 | 50 | @Override 51 | public void connection() { 52 | try { 53 | conn = DBSource.getConnection(this.getClass(), ip, port, dbname); 54 | } catch (Exception e) { 55 | throw new WormholeException(e, JobStatus.READ_CONNECTION_FAILED.getStatus() + ERROR_CODE_ADD); 56 | } 57 | } 58 | 59 | @Override 60 | public void read(ILineSender lineSender){ 61 | DBResultSetSender proxy = DBResultSetSender.newSender(lineSender); 62 | proxy.setMonitor(getMonitor()); 63 | proxy.setDateFormatMap(genDateFormatMap()); 64 | if(sql.isEmpty()){ 65 | logger.error("Sql for SqlserverReader is empty."); 66 | throw new WormholeException("Sql for SqlserverReader is empty.",JobStatus.READ_FAILED.getStatus()+ERROR_CODE_ADD); 67 | } 68 | logger.debug(String.format("SqlserverReader start to query %s .", sql)); 69 | for(String sqlItem:sql.split(";")){ 70 | sqlItem = sqlItem.trim(); 71 | if(sqlItem.isEmpty()) { 72 | continue; 73 | } 74 | logger.debug(sqlItem); 75 | ResultSet rs = null; 76 | try { 77 | rs = DBUtils.query(conn, sqlItem); 78 | proxy.sendToWriter(rs); 79 | proxy.flush(); 80 | } catch (SQLException e) { 81 | logger.error(e.getMessage()); 82 | throw new WormholeException(e,JobStatus.READ_FAILED.getStatus()+ERROR_CODE_ADD); 83 | } catch (WormholeException e1) { 84 | e1.setStatusCode(e1.getStatusCode() + ERROR_CODE_ADD); 85 | throw e1; 86 | } finally { 87 | if (null != rs) { 88 | try { 89 | DBUtils.closeResultSet(rs); 90 | } catch (SQLException e) { 91 | logger.error("SqlserverReader close resultset error "); 92 | throw new WormholeException(e,JobStatus.READ_FAILED.getStatus()+ERROR_CODE_ADD); 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | @Override 100 | public void finish(){ 101 | try { 102 | if (conn != null) { 103 | conn.close(); 104 | } 105 | conn = null; 106 | } catch (SQLException e) { 107 | logger.error(e.getMessage(),e); 108 | } 109 | } 110 | 111 | private Map genDateFormatMap() { 112 | Map mapDateFormat = new HashMap(); 113 | mapDateFormat.put("datetime", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")); 114 | mapDateFormat.put("smalldatetime", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")); 115 | return mapDateFormat; 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/common/BufferedLineExchanger.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.common; 2 | 3 | import java.util.List; 4 | 5 | import com.dp.nebula.wormhole.common.interfaces.ILine; 6 | import com.dp.nebula.wormhole.common.interfaces.ILineReceiver; 7 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 8 | import com.dp.nebula.wormhole.engine.storage.IStorage; 9 | 10 | public class BufferedLineExchanger implements ILineSender, ILineReceiver{ 11 | 12 | static private final int DEFAUTL_BUF_SIZE = 64; 13 | 14 | /** store data which reader put to StroeageForWrite area. */ 15 | private ILine[] writeBuf; 16 | 17 | /** store data which {@link IWriter} get from StroeageForRead area. */ 18 | private ILine[] readBuf; 19 | 20 | private int writeBufIdx = 0; 21 | 22 | private int readBufIdx = 0; 23 | 24 | private List storageForWrite; 25 | 26 | private IStorage storageForRead; 27 | 28 | /** 29 | * Construct a {@link BufferedLineExchanger}. 30 | * 31 | * @param storageForRead 32 | * Storage which {@link IWriter} get data from. 33 | * 34 | * @param storageForWrite 35 | * Storage which {@link IReader} put data to. 36 | * 37 | */ 38 | public BufferedLineExchanger(IStorage storageForRead, List storageForWrite) { 39 | this(storageForRead, storageForWrite, DEFAUTL_BUF_SIZE); 40 | } 41 | 42 | /** 43 | * Construct a {@link BufferedLineExchanger}. 44 | * 45 | * @param storageForRead 46 | * Storage which {@link IWriter} get data from. 47 | * 48 | * @param storageForWrite 49 | * Storage which {@link IReader} put data to. 50 | * 51 | * @param bufSize 52 | * Storage buffer size. 53 | * 54 | */ 55 | public BufferedLineExchanger(IStorage storageForRead, 56 | List storageForWrite, int bufSize) { 57 | this.storageForRead = storageForRead; 58 | this.storageForWrite = storageForWrite; 59 | this.writeBuf = new ILine[bufSize]; 60 | this.readBuf = new ILine[bufSize]; 61 | } 62 | 63 | /** 64 | * Get next line of data which dumped to data destination. 65 | * 66 | * @return 67 | * next {@link ILine}. 68 | * 69 | */ 70 | @Override 71 | public ILine receive() { 72 | if (readBufIdx == 0) { 73 | readBufIdx = storageForRead.pull(readBuf); 74 | if (readBufIdx == 0) { 75 | return null; 76 | } 77 | } 78 | return readBuf[--readBufIdx]; 79 | } 80 | 81 | /** 82 | * Construct one {@link ILine} of data in {@link Storage} which will be used to exchange data. 83 | * 84 | * @return 85 | * a new {@link ILine}. 86 | * 87 | * */ 88 | @Override 89 | public ILine createNewLine() { 90 | return new DefaultLine(); 91 | } 92 | 93 | /** 94 | * Put one {@link ILine} into {@link Storage}. 95 | * 96 | * @param line 97 | * {@link ILine} of data pushed into {@link Storage}. 98 | * 99 | * @return 100 | * true for OK, false for failure. 101 | * 102 | * */ 103 | @Override 104 | public Boolean send(ILine line) { 105 | boolean result = true; 106 | if (writeBufIdx >= writeBuf.length) { 107 | if(!writeAllStorage(writeBuf, writeBufIdx)) { 108 | result = false; 109 | } 110 | writeBufIdx = 0; 111 | } 112 | writeBuf[writeBufIdx++] = line; 113 | return result; 114 | } 115 | 116 | /** 117 | * Flush data in buffer (if exists) to {@link Storage}. 118 | * 119 | * */ 120 | @Override 121 | public void flush() { 122 | if (writeBufIdx > 0) { 123 | writeAllStorage(writeBuf, writeBufIdx); 124 | } 125 | writeBufIdx = 0; 126 | } 127 | 128 | /** 129 | * Write buffered data(in a line array) to all storages which offer data to {@link IWriter}. 130 | * This method is the base of double write(data dumped to multiple destinations). 131 | * 132 | * @param lines 133 | * A line array buffered data. 134 | * 135 | * @param size 136 | * Limit of the line array. 137 | * 138 | * @return 139 | * True or False represents write data to storages success or fail. 140 | * 141 | */ 142 | private boolean writeAllStorage(ILine[] lines, int size) { 143 | boolean result = true; 144 | for (IStorage s : this.storageForWrite) { 145 | if(!s.push(lines, size)) { 146 | result = false; 147 | } 148 | } 149 | return result; 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/mysqlreader/MysqlReader.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.mysqlreader; 2 | 3 | import org.apache.commons.logging.Log; 4 | import org.apache.commons.logging.LogFactory; 5 | 6 | import java.sql.Connection; 7 | import java.sql.ResultSet; 8 | import java.sql.SQLException; 9 | import java.text.SimpleDateFormat; 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | 13 | import com.dp.nebula.wormhole.common.AbstractPlugin; 14 | import com.dp.nebula.wormhole.common.JobStatus; 15 | import com.dp.nebula.wormhole.common.WormholeException; 16 | import com.dp.nebula.wormhole.common.interfaces.ILineSender; 17 | import com.dp.nebula.wormhole.common.interfaces.IReader; 18 | import com.dp.nebula.wormhole.plugins.common.DBResultSetSender; 19 | import com.dp.nebula.wormhole.plugins.common.DBSource; 20 | import com.dp.nebula.wormhole.plugins.common.DBUtils; 21 | 22 | public class MysqlReader extends AbstractPlugin implements IReader{ 23 | 24 | static final int PLUGIN_NO = 1; 25 | 26 | static final int ERROR_CODE_ADD = JobStatus.PLUGIN_BASE*PLUGIN_NO; 27 | 28 | private Connection conn; 29 | 30 | private String ip = ""; 31 | 32 | private String port = "3306"; 33 | 34 | private String dbname; 35 | 36 | private String sql; 37 | 38 | private Log logger = LogFactory.getLog(MysqlReader.class); 39 | 40 | 41 | @Override 42 | public void init() { 43 | /* for database connection */ 44 | this.ip = getParam().getValue(ParamKey.ip,""); 45 | this.port = getParam().getValue(ParamKey.port, this.port); 46 | this.dbname = getParam().getValue(ParamKey.dbname,""); 47 | this.sql = getParam().getValue(ParamKey.sql, "").trim(); 48 | } 49 | 50 | @Override 51 | public void connection() { 52 | try { 53 | conn = DBSource.getConnection(this.getClass(), ip, port, dbname); 54 | } catch (Exception e) { 55 | throw new WormholeException(e, JobStatus.READ_CONNECTION_FAILED.getStatus() + ERROR_CODE_ADD); 56 | } 57 | } 58 | 59 | @Override 60 | public void read(ILineSender lineSender){ 61 | DBResultSetSender proxy = DBResultSetSender.newSender(lineSender); 62 | proxy.setMonitor(getMonitor()); 63 | proxy.setDateFormatMap(genDateFormatMap()); 64 | if(sql.isEmpty()){ 65 | logger.error("Sql for mysqlReader is empty."); 66 | throw new WormholeException("Sql for mysqlReader is empty.",JobStatus.READ_FAILED.getStatus()+ERROR_CODE_ADD); 67 | } 68 | logger.debug(String.format("MysqlReader start to query %s .", sql)); 69 | for(String sqlItem:sql.split(";")){ 70 | sqlItem = sqlItem.trim(); 71 | if(sqlItem.isEmpty()) { 72 | continue; 73 | } 74 | logger.debug(sqlItem); 75 | ResultSet rs = null; 76 | try { 77 | rs = DBUtils.query(conn, sqlItem); 78 | proxy.sendToWriter(rs); 79 | proxy.flush(); 80 | } catch (SQLException e) { 81 | logger.error("Mysql read failed",e); 82 | throw new WormholeException(e,JobStatus.READ_FAILED.getStatus()+ERROR_CODE_ADD); 83 | } catch (WormholeException e1) { 84 | e1.setStatusCode(e1.getStatusCode() + ERROR_CODE_ADD); 85 | throw e1; 86 | } finally { 87 | if (null != rs) { 88 | try { 89 | DBUtils.closeResultSet(rs); 90 | } catch (SQLException e) { 91 | logger.error("MysqlReader close resultset error "); 92 | throw new WormholeException(e,JobStatus.READ_FAILED.getStatus()+ERROR_CODE_ADD); 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | @Override 100 | public void finish(){ 101 | try { 102 | if (conn != null) { 103 | conn.close(); 104 | } 105 | conn = null; 106 | } catch (SQLException e) { 107 | logger.error("Close connection failed",e); 108 | } 109 | } 110 | 111 | private Map genDateFormatMap() { 112 | Map mapDateFormat = new HashMap(); 113 | mapDateFormat.clear(); 114 | mapDateFormat.put("datetime", new SimpleDateFormat( 115 | "yyyy-MM-dd HH:mm:ss")); 116 | mapDateFormat.put("timestamp", new SimpleDateFormat( 117 | "yyyy-MM-dd HH:mm:ss")); 118 | mapDateFormat.put("time", new SimpleDateFormat("HH:mm:ss")); 119 | mapDateFormat.put("date", new SimpleDateFormat( 120 | "yyyy-MM-dd")); 121 | return mapDateFormat; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/writer/hdfswriter/ParamKey.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.writer.hdfswriter; 2 | 3 | public final class ParamKey { 4 | /* 5 | * @name: dir 6 | * @description: hdfs dir,hdfs://ip:port/path 7 | * @range: 8 | * @mandatory: true 9 | * @default: 10 | */ 11 | public final static String dir = "dir"; 12 | /* 13 | * @name: prefixname 14 | * @description: hdfs filename 15 | * @range: 16 | * @mandatory: false 17 | * @default: prefix 18 | */ 19 | public final static String prefixname = "prefix_filename"; 20 | 21 | /* 22 | * @name: fieldSplit 23 | * @description: how to seperate fields 24 | * @range:\t,\001,"," 25 | * @mandatory: false 26 | * @default:\t 27 | */ 28 | public final static String fieldSplit = "field_split"; 29 | /* 30 | * @name: lineSplit 31 | * @description: how to seperate fields 32 | * @range:\n 33 | * @mandatory: false 34 | * @default:\n 35 | */ 36 | public final static String lineSplit = "line_split"; 37 | /* 38 | * @name: encoding 39 | * @description: encode 40 | * @range: UTF-8|GBK|GB2312 41 | * @mandatory: false 42 | * @default: UTF-8 43 | */ 44 | public final static String encoding = "encoding"; 45 | /* 46 | * @name: nullChar 47 | * @description: how to replace null in hdfs 48 | * @range: 49 | * @mandatory: false 50 | * @default: 51 | */ 52 | public final static String nullChar = "nullchar"; 53 | /* 54 | * @name: replaceChar 55 | * @description: replace characters, if this parameter is not set, we will replace \r \n and splitField with ' ' as default 56 | * @range: e.g:\r\n:\001 it means replace \r and \n with \001 57 | * @mandatory: false 58 | * @default: 59 | */ 60 | public final static String replaceChar = "replace_char"; 61 | /* 62 | * @name: codecClass 63 | * @description: compress codecs 64 | * @range:com.hadoop.compression.lzo.LzopCodec|org.apache.hadoop.io.compress.BZip2Codec|org.apache.hadoop.io.compress.DefaultCodec|org.apache.hadoop.io.compress.GzipCodec 65 | * @mandatory: false 66 | * @default: com.hadoop.compression.lzo.LzopCodec 67 | */ 68 | public final static String codecClass = "codec_class"; 69 | /* 70 | * @name: bufferSize 71 | * @description: how much the buffer size is 72 | * @range: [1024-4194304] 73 | * @mandatory: false 74 | * @default: 4096 75 | */ 76 | public final static String bufferSize = "buffer_size"; 77 | /* 78 | * @name: fileType 79 | * @description: TXT->TextFile,TXT_COMP->Compressed TextFile 80 | * @range: TXT|TXT_COMP 81 | * @mandatory: true 82 | * @default: TXT 83 | */ 84 | public final static String fileType = "file_type"; 85 | /* 86 | * @name:concurrency 87 | * @description:concurrency of the job,,it also equals to split number 88 | * @range:1-100 89 | * @mandatory: false 90 | * @default:1 91 | */ 92 | public final static String concurrency = "concurrency"; 93 | /* 94 | * @name: hiveTableAddPartitionSwitch 95 | * @description: hiveTableAddPartitionSwitch switch 96 | * @range: true,false 97 | * @mandatory: false 98 | * @default: false 99 | */ 100 | public final static String hiveTableAddPartitionSwitch = "hive_table_add_partition_switch"; 101 | /* 102 | * @name: hiveTableAddPartitionSwitch 103 | * @description: specify table and partition condition,this parameter is valid only if hiveTableAddPartitionSwitch is set to true 104 | * @range: e.g:dt='2010-01-01'@sampleDatabase.sampleTable 105 | * @mandatory: false 106 | * @default: 107 | */ 108 | public final static String hiveTableAddPartitionCondition = "hive_table_add_partition_condition"; 109 | 110 | /* 111 | * @name: dataTransformClass 112 | * @description: data transformer class path 113 | * @range: 114 | * @mandatory: false 115 | * @default: 116 | */ 117 | public final static String dataTransformClass = "dataTransformClass"; 118 | 119 | /* 120 | * @name: dataTransformClass 121 | * @description: data transformer paramas 122 | * @range: 123 | * @mandatory: false 124 | * @default: 125 | */ 126 | public final static String dataTransformParams = "dataTransformParams"; 127 | 128 | /* 129 | * @name: createLzoIndexFile 130 | * @description: whether to create lzo index file 131 | * @range: true,false 132 | * @mandatory: false 133 | * @default: true 134 | */ 135 | public final static String createLzoIndexFile = "createLzoIndexFile"; 136 | } 137 | -------------------------------------------------------------------------------- /src/main/java/com/dp/nebula/wormhole/plugins/reader/hivereader/HiveReaderPeriphery.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.hivereader; 2 | 3 | import java.io.IOException; 4 | import java.net.URI; 5 | 6 | import org.apache.commons.codec.digest.DigestUtils; 7 | import org.apache.commons.lang.StringUtils; 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.hadoop.fs.FileSystem; 10 | import org.apache.hadoop.fs.Path; 11 | import org.apache.log4j.Logger; 12 | 13 | import com.dp.nebula.wormhole.common.JobStatus; 14 | import com.dp.nebula.wormhole.common.WormholeException; 15 | import com.dp.nebula.wormhole.common.interfaces.IParam; 16 | import com.dp.nebula.wormhole.common.interfaces.IReaderPeriphery; 17 | import com.dp.nebula.wormhole.common.interfaces.ISourceCounter; 18 | import com.dp.nebula.wormhole.common.interfaces.ITargetCounter; 19 | import com.dp.nebula.wormhole.plugins.common.DFSUtils; 20 | import com.google.common.base.Preconditions; 21 | 22 | public class HiveReaderPeriphery implements IReaderPeriphery { 23 | private static final Logger LOG = Logger 24 | .getLogger(HiveReaderPeriphery.class); 25 | 26 | private static final String INSERT_SQL_PATTERN = "INSERT OVERWRITE DIRECTORY '%s' %s"; 27 | 28 | private String path = "jdbc:hive://10.1.1.161:10000/default"; 29 | private String username = ""; 30 | private String password = ""; 31 | private String sql = ""; 32 | private String dataDir = ""; 33 | private int reduceNumber = -1; 34 | public Path absolutePath; 35 | private String mode = HiveReaderMode.READ_FROM_HIVESERVER.getMode(); 36 | private HiveJdbcClient client; 37 | private Configuration conf; 38 | private FileSystem fs; 39 | 40 | @Override 41 | public void prepare(IParam param, ISourceCounter counter) { 42 | mode = param.getValue(ParamKey.mode, mode); 43 | Preconditions 44 | .checkArgument( 45 | mode.equals(HiveReaderMode.READ_FROM_HDFS.getMode()) 46 | || mode.equals(HiveReaderMode.READ_FROM_HIVESERVER 47 | .getMode()), 48 | "hive reader mode should be READ_FROM_HDFS or READ_FROM_HIVESERVER"); 49 | if (mode.equals(HiveReaderMode.READ_FROM_HDFS.getMode())) { 50 | path = param.getValue(ParamKey.path, path); 51 | username = param.getValue(ParamKey.username, username); 52 | password = param.getValue(ParamKey.password, password); 53 | reduceNumber = param.getIntValue(ParamKey.reduceNumber, 54 | reduceNumber); 55 | sql = param.getValue(ParamKey.sql, sql).trim(); 56 | if (sql.endsWith(";")) { 57 | sql = StringUtils.substring(sql, 0, sql.length() - 1); 58 | } 59 | dataDir = param.getValue(ParamKey.dataDir, dataDir); 60 | try { 61 | createTempDir(); 62 | param.putValue(ParamKey.dataDir, absolutePath.toString()); 63 | sql = String.format(INSERT_SQL_PATTERN, 64 | absolutePath.toString(), sql); 65 | client = new HiveJdbcClient.Builder(path).username(username) 66 | .password(password).sql(sql).build(); 67 | client.initialize(); 68 | client.processInsertQuery(reduceNumber); 69 | } catch (Exception e) { 70 | throw new WormholeException(e, 71 | JobStatus.READ_FAILED.getStatus()); 72 | } finally { 73 | if (client != null) { 74 | client.close(); 75 | } 76 | } 77 | } 78 | } 79 | 80 | private String createTempDir() throws Exception { 81 | conf = DFSUtils.getConf(dataDir, null); 82 | fs = DFSUtils.createFileSystem(new URI(dataDir), conf); 83 | absolutePath = new Path(dataDir, createFilename(sql)); 84 | fs = absolutePath.getFileSystem(conf); 85 | if (fs.mkdirs(absolutePath)) { 86 | LOG.info("create data temp directory successfully " 87 | + absolutePath.toString()); 88 | } else { 89 | LOG.error("Failed to mkdir " + absolutePath.toString()); 90 | throw new WormholeException(JobStatus.READ_FAILED.getStatus()); 91 | } 92 | return absolutePath.toString(); 93 | } 94 | 95 | private String createFilename(String sql) { 96 | return DigestUtils.md5Hex(sql + System.currentTimeMillis()); 97 | } 98 | 99 | @Override 100 | public void doPost(IParam param, ITargetCounter counter) { 101 | if (mode.equals(HiveReaderMode.READ_FROM_HDFS.getMode()) 102 | && absolutePath != null) { 103 | try { 104 | if (fs.exists(absolutePath)) { 105 | fs.delete(absolutePath, true); 106 | LOG.info(absolutePath.toString() 107 | + " has been deleted at dopost stage"); 108 | } 109 | } catch (IOException e) { 110 | LOG.info("Failed to delete " + absolutePath.toString()); 111 | } 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/test/java/com/dp/nebula/wormhole/plugins/reader/greenplumreader/GreenplumReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.dp.nebula.wormhole.plugins.reader.greenplumreader; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.mockito.Mockito.mock; 5 | 6 | import java.util.ArrayList; 7 | import java.util.HashMap; 8 | import java.util.List; 9 | import java.util.Map; 10 | 11 | import org.junit.Test; 12 | 13 | import com.dp.nebula.wormhole.common.BufferedLineExchanger; 14 | import com.dp.nebula.wormhole.common.DefaultParam; 15 | import com.dp.nebula.wormhole.common.interfaces.ILine; 16 | import com.dp.nebula.wormhole.common.interfaces.IParam; 17 | import com.dp.nebula.wormhole.common.interfaces.IPluginMonitor; 18 | import com.dp.nebula.wormhole.common.utils.ParseXMLUtil; 19 | import com.dp.nebula.wormhole.engine.config.EngineConfParamKey; 20 | import com.dp.nebula.wormhole.engine.storage.StorageConf; 21 | import com.dp.nebula.wormhole.engine.storage.StorageManager; 22 | import com.dp.nebula.wormhole.plugins.reader.greenplumreader.ParamKey; 23 | 24 | public class GreenplumReaderTest { 25 | 26 | public static BufferedLineExchanger getLineExchanger(){ 27 | IParam engineConf = null; 28 | engineConf = ParseXMLUtil.loadEngineConfig(); 29 | List result = new ArrayList(); 30 | 31 | for(int i = 0; i< 1; i++){ 32 | StorageConf storageConf = new StorageConf(); 33 | storageConf.setId(String.valueOf(i)); 34 | storageConf.setStorageClassName( 35 | engineConf.getValue(EngineConfParamKey.STORAGE_CLASS_NAME)); 36 | storageConf.setLineLimit( 37 | 10); 38 | storageConf.setByteLimit( 39 | engineConf.getIntValue(EngineConfParamKey.STORAGE_BYTE_LIMIT)); 40 | storageConf.setDestructLimit( 41 | engineConf.getIntValue(EngineConfParamKey.STORAGE_DESTRUCT_LIMIT)); 42 | storageConf.setPeriod( 43 | engineConf.getIntValue(EngineConfParamKey.MONITOR_INFO_DISPLAY_PERIOD)); 44 | storageConf.setWaitTime( 45 | 5000 46 | 47 | ); 48 | result.add(storageConf); 49 | } 50 | StorageManager manager = new StorageManager(result); 51 | return new BufferedLineExchanger(manager.getStorageForWriter("0"), manager.getStorageForReader()); 52 | } 53 | //@Test 54 | public void testGPReader() { 55 | GreenplumReaderPeriphery gpPeriphery = new GreenplumReaderPeriphery(); 56 | GreenplumReader gpReader = new GreenplumReader(); 57 | 58 | Map params = new HashMap(); 59 | 60 | params.put(ParamKey.ip, "10.1.21.57"); 61 | params.put(ParamKey.port, "5432"); 62 | params.put(ParamKey.dbname, "dianpingdw"); 63 | params.put(ParamKey.username, "tempuser_201302211841"); 64 | params.put(ParamKey.password,"dp!@g0Y8bRfvL"); 65 | 66 | params.put(ParamKey.sql, "copy (select * from dpmid.mid_dp_shop_info_his where cal_dt = '2012-01-01' limit 20) to stdout WITH DELIMITER E'\t' "); 67 | IParam iParam = new DefaultParam(params); 68 | 69 | gpPeriphery.prepare(iParam,null); 70 | gpReader.setParam(iParam); 71 | BufferedLineExchanger bufLineExchanger = getLineExchanger(); 72 | 73 | IPluginMonitor pluginMonitor = mock(IPluginMonitor.class); 74 | gpReader.setMonitor(pluginMonitor); 75 | 76 | gpReader.init(); 77 | gpReader.connection(); 78 | ReadRunnable t = new ReadRunnable(); 79 | t.init(gpReader,bufLineExchanger); 80 | new Thread(t).start(); 81 | ILine line = null; 82 | System.out.println("start"); 83 | int num = 0; 84 | try{ 85 | while((line = bufLineExchanger.receive())!=null) { 86 | num++; 87 | if(line.getFieldNum() != 14){ 88 | for(int i=0; i