├── image └── README.cn │ ├── 1765353174408.png │ └── 1765353763090.png ├── etl-engine ├── src │ └── main │ │ ├── java │ │ └── org │ │ │ └── liuneng │ │ │ ├── base │ │ │ ├── LogLevel.java │ │ │ ├── DataProcessingMetrics.java │ │ │ ├── InputNode.java │ │ │ ├── OutputNode.java │ │ │ ├── monitor │ │ │ │ └── DataflowMonitor.java │ │ │ ├── EtlLog.java │ │ │ ├── Row.java │ │ │ ├── Node.java │ │ │ ├── Pipe.java │ │ │ └── Dataflow.java │ │ │ ├── exception │ │ │ ├── DataflowPrestartException.java │ │ │ ├── DataflowStoppingException.java │ │ │ ├── NodeException.java │ │ │ ├── DataflowException.java │ │ │ ├── EtlEngineException.java │ │ │ ├── NodePrestartException.java │ │ │ ├── NodeReadingException.java │ │ │ └── NodeWritingException.java │ │ │ ├── node │ │ │ ├── UpsertTag.java │ │ │ ├── ValueConvertNode.java │ │ │ ├── DeleteOutputNode.java │ │ │ ├── FileOutputNode.java │ │ │ ├── SqlInputNode.java │ │ │ ├── InsertOutputNode.java │ │ │ └── UpsertOutputNode.java │ │ │ └── util │ │ │ ├── Tuple2.java │ │ │ ├── NodeHelper.java │ │ │ ├── Tuple3.java │ │ │ ├── StrUtil.java │ │ │ ├── CsvConverter.java │ │ │ ├── DBUtil.java │ │ │ └── DataflowHelper.java │ │ └── resources │ │ ├── META-INF │ │ └── maven │ │ │ └── archetype.xml │ │ ├── archetype-resources │ │ ├── src │ │ │ ├── main │ │ │ │ └── java │ │ │ │ │ └── App.java │ │ │ └── test │ │ │ │ └── java │ │ │ │ └── AppTest.java │ │ └── pom.xml │ │ └── log4j2.xml ├── .gitignore └── pom.xml ├── .gitignore ├── samples ├── src │ ├── main │ │ └── java │ │ │ └── io │ │ │ └── github │ │ │ └── add2ws │ │ │ ├── ValueMappingConverterNode.java │ │ │ ├── OracleToPG.java │ │ │ ├── util │ │ │ └── DataSourceUtil.java │ │ │ └── SamplesMain.java │ └── test │ │ └── java │ │ └── com │ │ └── test │ │ └── TestCase1.java └── pom.xml ├── README.md.txt ├── LICENSE ├── README.cn.md ├── README.md └── pom.xml /image/README.cn/1765353174408.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/add2ws/etl-engine-project/HEAD/image/README.cn/1765353174408.png -------------------------------------------------------------------------------- /image/README.cn/1765353763090.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/add2ws/etl-engine-project/HEAD/image/README.cn/1765353763090.png -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/LogLevel.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | public enum LogLevel { 4 | 5 | INFO, ERROR 6 | } 7 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/exception/DataflowPrestartException.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.exception; 2 | 3 | public class DataflowPrestartException extends DataflowException { 4 | public DataflowPrestartException(String message) { 5 | super(message); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/exception/DataflowStoppingException.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.exception; 2 | 3 | public class DataflowStoppingException extends DataflowException { 4 | public DataflowStoppingException(String message) { 5 | super(message); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /etl-engine/src/main/resources/META-INF/maven/archetype.xml: -------------------------------------------------------------------------------- 1 | 2 | etl-engine 3 | 4 | src/main/java/App.java 5 | 6 | 7 | src/test/java/AppTest.java 8 | 9 | 10 | -------------------------------------------------------------------------------- /etl-engine/src/main/resources/archetype-resources/src/main/java/App.java: -------------------------------------------------------------------------------- 1 | package $org.example; 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | public class App 8 | { 9 | public static void main( String[] args ) 10 | { 11 | System.out.println( "Hello World!" ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/exception/NodeException.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.exception; 2 | 3 | public class NodeException extends EtlEngineException { 4 | public NodeException(String message) { 5 | super(message); 6 | } 7 | 8 | public NodeException(Throwable cause) { 9 | super(cause); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/exception/DataflowException.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.exception; 2 | 3 | public class DataflowException extends EtlEngineException { 4 | public DataflowException(String message) { 5 | super(message); 6 | } 7 | 8 | public DataflowException(Throwable cause) { 9 | super(cause); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/exception/EtlEngineException.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.exception; 2 | 3 | public class EtlEngineException extends RuntimeException { 4 | public EtlEngineException(String message) { 5 | super(message); 6 | } 7 | 8 | public EtlEngineException(Throwable cause) { 9 | super(cause); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/exception/NodePrestartException.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.exception; 2 | 3 | public class NodePrestartException extends NodeException { 4 | public NodePrestartException(String message) { 5 | super(message); 6 | } 7 | 8 | public NodePrestartException(Throwable cause) { 9 | super(cause); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/exception/NodeReadingException.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.exception; 2 | 3 | public class NodeReadingException extends NodeException { 4 | public NodeReadingException(String message) { 5 | super(message); 6 | } 7 | 8 | public NodeReadingException(Throwable cause) { 9 | super(cause); 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/exception/NodeWritingException.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.exception; 2 | 3 | public class NodeWritingException extends NodeException { 4 | public NodeWritingException(String message) { 5 | super(message); 6 | } 7 | 8 | public NodeWritingException(Throwable cause) { 9 | super(cause); 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/DataProcessingMetrics.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | public interface DataProcessingMetrics { 4 | 5 | String getId(); 6 | 7 | long getStartTime(); 8 | 9 | long getProcessed(); 10 | 11 | long getProcessingRate(); 12 | 13 | long getInserted(); 14 | 15 | long getInsertingRate(); 16 | 17 | long getUpdated(); 18 | 19 | long getUpdatingRate(); 20 | 21 | long getDeleted(); 22 | 23 | long getDeletingRate(); 24 | 25 | 26 | } 27 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/InputNode.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | import org.liuneng.exception.NodeException; 4 | import org.liuneng.exception.NodeReadingException; 5 | 6 | public interface InputNode { 7 | 8 | long getProcessed(); 9 | 10 | long getProcessingRate(); 11 | 12 | long getStartTime(); 13 | 14 | Row read() throws NodeReadingException; 15 | 16 | String[] getInputColumns() throws NodeException; 17 | 18 | default Node asNode() { 19 | return (Node) this; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/OutputNode.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | import org.liuneng.exception.NodeException; 4 | import org.liuneng.exception.NodeWritingException; 5 | 6 | public interface OutputNode { 7 | 8 | long getProcessed(); 9 | 10 | long getProcessingRate(); 11 | 12 | long getStartTime(); 13 | 14 | void write(Row row) throws NodeWritingException; 15 | 16 | String[] getOutputColumns() throws NodeException; 17 | 18 | default Node asNode() { 19 | return (Node) this; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /etl-engine/src/main/resources/archetype-resources/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | $org.example 5 | $etl-engine 6 | $1.0-SNAPSHOT 7 | 8 | 9 | junit 10 | junit 11 | 3.8.1 12 | test 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | !**/src/main/**/target/ 4 | !**/src/test/**/target/ 5 | .kotlin 6 | 7 | ### IntelliJ IDEA ### 8 | .idea 9 | .idea/modules.xml 10 | .idea/jarRepositories.xml 11 | .idea/compiler.xml 12 | .idea/libraries/ 13 | *.iws 14 | *.iml 15 | *.ipr 16 | 17 | ### Eclipse ### 18 | .apt_generated 19 | .classpath 20 | .factorypath 21 | .project 22 | .settings 23 | .springBeans 24 | .sts4-cache 25 | 26 | ### NetBeans ### 27 | /nbproject/private/ 28 | /nbbuild/ 29 | /dist/ 30 | /nbdist/ 31 | /.nb-gradle/ 32 | build/ 33 | !**/src/main/**/build/ 34 | !**/src/test/**/build/ 35 | 36 | ### VS Code ### 37 | .vscode/ 38 | 39 | ### Mac OS ### 40 | .DS_Store -------------------------------------------------------------------------------- /etl-engine/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | !**/src/main/**/target/ 4 | !**/src/test/**/target/ 5 | 6 | ### IntelliJ IDEA ### 7 | .idea/modules.xml 8 | .idea/jarRepositories.xml 9 | .idea/compiler.xml 10 | .idea/libraries/ 11 | .idea/ 12 | *.iws 13 | *.iml 14 | *.ipr 15 | 16 | ### Eclipse ### 17 | .apt_generated 18 | .classpath 19 | .factorypath 20 | .project 21 | .settings 22 | .springBeans 23 | .sts4-cache 24 | 25 | ### NetBeans ### 26 | /nbproject/private/ 27 | /nbbuild/ 28 | /dist/ 29 | /nbdist/ 30 | /.nb-gradle/ 31 | build/ 32 | !**/src/main/**/build/ 33 | !**/src/test/**/build/ 34 | 35 | ### VS Code ### 36 | .vscode/ 37 | 38 | ### Mac OS ### 39 | .DS_Store -------------------------------------------------------------------------------- /etl-engine/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{yyyy-MM-dd HH:mm:ss} %-5p %c{1} - %m%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/node/UpsertTag.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.node; 2 | 3 | 4 | public enum UpsertTag { 5 | 6 | COMPARE_ONLY(1), //仅比对 7 | UPDATE_ONLY(2), //仅更新 8 | COMPARE_AND_UPDATE(3); //比对且更新 9 | 10 | private final int code; 11 | 12 | UpsertTag(int code) { 13 | this.code = code; 14 | } 15 | 16 | public int getCode() { 17 | return code; 18 | } 19 | 20 | public static UpsertTag fromCode(int code) { 21 | for (UpsertTag upsertTag : UpsertTag.values()) { 22 | if (upsertTag.getCode() == code) { 23 | return upsertTag; 24 | } 25 | } 26 | return null; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/util/Tuple2.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.util; 2 | 3 | public class Tuple2 { 4 | 5 | private T1 partA; 6 | private T2 partB; 7 | 8 | public Tuple2(T1 partA, T2 partB) { 9 | this.partA = partA; 10 | this.partB = partB; 11 | } 12 | 13 | public T1 getPartA() { 14 | return partA; 15 | } 16 | 17 | public T2 getPartB() { 18 | return partB; 19 | } 20 | 21 | public void setPartA(T1 partA) { 22 | this.partA = partA; 23 | } 24 | 25 | public void setPartB(T2 partB) { 26 | this.partB = partB; 27 | } 28 | 29 | @Override 30 | public String toString() { 31 | return String.format("(%s, %s)", partA, partB); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/monitor/DataflowMonitor.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base.monitor; 2 | 3 | import org.liuneng.base.Dataflow; 4 | 5 | public class DataflowMonitor { 6 | 7 | private Dataflow dataflowInstance; 8 | 9 | private DataflowMonitor() { 10 | 11 | } 12 | 13 | public static DataflowMonitor watch(Dataflow dataflow) { 14 | DataflowMonitor dataflowMonitor = new DataflowMonitor(); 15 | dataflowMonitor.setDataflowInstance(dataflow); 16 | 17 | return dataflowMonitor; 18 | } 19 | 20 | 21 | public Dataflow getDataflowInstance() { 22 | return dataflowInstance; 23 | } 24 | 25 | public void setDataflowInstance(Dataflow dataflowInstance) { 26 | this.dataflowInstance = dataflowInstance; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/util/NodeHelper.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.util; 2 | 3 | import org.liuneng.base.InputNode; 4 | import org.slf4j.Logger; 5 | import org.slf4j.LoggerFactory; 6 | 7 | import java.util.Arrays; 8 | import java.util.LinkedHashSet; 9 | import java.util.Set; 10 | 11 | public class NodeHelper { 12 | 13 | private final static Logger log = LoggerFactory.getLogger(NodeHelper.class); 14 | 15 | public static String[] getUpstreamColumns(InputNode inputNode) { 16 | Set columns = new LinkedHashSet<>(); 17 | InputNode current = inputNode; 18 | do { 19 | columns.addAll(Arrays.asList(current.getInputColumns())); 20 | current = current.asNode().getBeforeNode().orElse(null); 21 | } while (current != null); 22 | 23 | return columns.toArray(new String[0]); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/util/Tuple3.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.util; 2 | 3 | public class Tuple3 { 4 | 5 | private T1 partA; 6 | private T2 partB; 7 | private T3 partC; 8 | 9 | public Tuple3(T1 partA, T2 partB, T3 partC) { 10 | this.partA = partA; 11 | this.partB = partB; 12 | this.partC = partC; 13 | } 14 | 15 | public T1 getPartA() { 16 | return partA; 17 | } 18 | 19 | public T2 getPartB() { 20 | return partB; 21 | } 22 | 23 | public void setPartA(T1 partA) { 24 | this.partA = partA; 25 | } 26 | 27 | public void setPartB(T2 partB) { 28 | this.partB = partB; 29 | } 30 | 31 | public T3 getPartC() { 32 | return partC; 33 | } 34 | 35 | public void setPartC(T3 partC) { 36 | this.partC = partC; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /etl-engine/src/main/resources/archetype-resources/src/test/java/AppTest.java: -------------------------------------------------------------------------------- 1 | package $org.example; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /samples/src/main/java/io/github/add2ws/ValueMappingConverterNode.java: -------------------------------------------------------------------------------- 1 | package io.github.add2ws; 2 | 3 | import org.liuneng.base.Row; 4 | import org.liuneng.exception.NodeException; 5 | import org.liuneng.node.ValueConvertNode; 6 | 7 | public class ValueMappingConverterNode extends ValueConvertNode { 8 | @Override 9 | public Row convert(Row row) { 10 | 11 | // row.getData().get("jyzlb") 12 | 13 | return row; 14 | } 15 | 16 | @Override 17 | public long getProcessed() { 18 | return 0; 19 | } 20 | 21 | @Override 22 | public long getProcessingRate() { 23 | return 0; 24 | } 25 | 26 | @Override 27 | public long getStartTime() { 28 | return 0; 29 | } 30 | 31 | @Override 32 | public String[] getOutputColumns() throws NodeException { 33 | return new String[0]; 34 | } 35 | 36 | @Override 37 | protected void onDataflowStop() { 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/EtlLog.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | import lombok.Data; 4 | 5 | @Data 6 | public class EtlLog { 7 | 8 | private String id; 9 | 10 | private String dataflowID; 11 | 12 | private String nodeID; 13 | 14 | private long inputted; 15 | 16 | private long outputted; 17 | 18 | private long inserted; 19 | 20 | private long updated; 21 | 22 | private int currentBufferSize; 23 | 24 | private long timestamp; 25 | 26 | private LogLevel logLevel; 27 | 28 | private String message; 29 | 30 | 31 | public static EtlLog errorLog(String message) { 32 | EtlLog etlLog = new EtlLog(); 33 | etlLog.setLogLevel(LogLevel.ERROR); 34 | etlLog.setMessage(message); 35 | return etlLog; 36 | } 37 | 38 | public static EtlLog infoLog(String message) { 39 | EtlLog etlLog = new EtlLog(); 40 | etlLog.setLogLevel(LogLevel.INFO); 41 | etlLog.setMessage(message); 42 | return etlLog; 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /README.md.txt: -------------------------------------------------------------------------------- 1 | 帮我编写一个开源项目的readme.md文档,要求言简意赅,突出数据同步速度快、运行稳健、易扩展3大特性。 文档支持中文和英文: 2 | 项目名称:etl-engine 3 | 项目描述:一个快速、稳健、易扩展的ETL库(对标Kettle),实测200000数据,插入/更新速度是Kettle的2倍 4 | 使用代码示例: 5 | ```sql 6 | //获取数据源 7 | DataSource dataSourceOracle = DataSourceUtil.getOracleDataSource(); 8 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSource(); 9 | 10 | //创建表输入节点 11 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourceOracle, "select * from t_resident_info"); 12 | 13 | //创建插入/更新节点 14 | UpsertOutputNode upsertOutputNode = new UpsertOutputNode(dataSourcePG, "t_resident_info", 1000); 15 | upsertOutputNode.setIdentityMapping(Arrays.asList(new Tuple2<>("ID", "ID"))); 16 | 17 | //创建管道 18 | Pipe pipe = new Pipe(1000); 19 | //连接表输入和输出节点 20 | pipe.connect(sqlInputNode, upsertOutputNode); 21 | 22 | //创建数据流实例 23 | Dataflow dataflow = new Dataflow(sqlInputNode); 24 | //启动数据流 25 | dataflow.syncStart(5, TimeUnit.MINUTES); 26 | 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 LiuNeng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/util/StrUtil.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.util; 2 | 3 | import java.math.BigDecimal; 4 | import java.util.Date; 5 | 6 | public class StrUtil { 7 | 8 | public static boolean isBlank(String str) { 9 | return str == null || str.trim().isEmpty(); 10 | } 11 | 12 | public static boolean isNotBlank(String str) { 13 | return !isBlank(str); 14 | } 15 | 16 | public static String objectToString(Object obj) { 17 | return obj == null ? "" : obj.toString(); 18 | } 19 | 20 | public static boolean isEqual(Object obj1, Object obj2) { 21 | if (obj1 instanceof BigDecimal) { 22 | obj1 = ((BigDecimal) obj1).stripTrailingZeros().toPlainString(); 23 | } 24 | 25 | if (obj2 instanceof BigDecimal) { 26 | obj2 = ((BigDecimal) obj2).stripTrailingZeros().toPlainString(); 27 | } 28 | 29 | if (obj1 instanceof String) { 30 | obj1 = ((String) obj1).trim(); 31 | } 32 | 33 | if (obj2 instanceof String) { 34 | obj2 = ((String) obj2).trim(); 35 | } 36 | 37 | if (obj1 instanceof Date) { 38 | obj1 = ((Date) obj1).getTime(); 39 | } 40 | 41 | if (obj2 instanceof Date) { 42 | obj2 = ((Date) obj2).getTime(); 43 | } 44 | 45 | return objectToString(obj1).equals(objectToString(obj2)); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/Row.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | //import cn.hutool.core.map.CaseInsensitiveLinkedMap; 4 | import cn.hutool.json.JSONUtil; 5 | import org.springframework.util.LinkedCaseInsensitiveMap; 6 | 7 | import java.util.Map; 8 | 9 | 10 | public class Row { 11 | 12 | private boolean end; 13 | 14 | private final Map data = new LinkedCaseInsensitiveMap<>(); 15 | 16 | private Row() {} 17 | 18 | public static Row ofEnd() { 19 | Row row = new Row(); 20 | row.setEnd(true); 21 | return row; 22 | } 23 | 24 | public static Row fromMap(Map d) { 25 | Row row = new Row(); 26 | row.data.putAll(d); 27 | row.end = false; 28 | return row; 29 | } 30 | 31 | public Map getData() { 32 | return data; 33 | } 34 | 35 | public Object get(String key) { 36 | return data.get(key); 37 | } 38 | 39 | 40 | public void put(String key, Object val) { 41 | this.data.put(key, val); 42 | } 43 | 44 | @Override 45 | public String toString() { 46 | return data.toString(); 47 | } 48 | 49 | public String toJSONString() { 50 | return JSONUtil.toJsonStr(data); 51 | } 52 | 53 | public boolean isEnd() { 54 | return end; 55 | } 56 | 57 | public void setEnd(boolean end) { 58 | this.end = end; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/util/CsvConverter.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.util; 2 | 3 | import java.util.Collection; 4 | import java.util.stream.Collectors; 5 | 6 | public class CsvConverter { 7 | 8 | public static String ListToCsvRow(Collection valueList) { 9 | if (valueList == null) { 10 | return ""; 11 | } 12 | 13 | String csvRow = valueList.stream().map(CsvConverter::formatCsvValue).collect(Collectors.joining(",")); 14 | return csvRow; 15 | } 16 | 17 | /** 18 | * 将任何对象转换为适合 CSV 格式的字符串,并处理特殊字符。 19 | * 遵循标准的 CSV 引用和转义规则: 20 | * - 如果值包含逗号、双引号或换行符,则整个值用双引号包裹。 21 | * - 值中的双引号需要转义为两个双引号 (""")。 22 | * 23 | * @param value 待格式化的对象 24 | * @return 格式化后的 CSV 字段字符串 25 | */ 26 | private static String formatCsvValue(Object value) { 27 | if (value == null) { 28 | return ""; 29 | } 30 | 31 | // 将值转换为字符串 32 | String stringValue = value.toString(); 33 | 34 | // 检查是否需要引用(即是否包含特殊字符:逗号, 双引号, 换行符) 35 | if (stringValue.contains(",") || stringValue.contains("\"") || stringValue.contains("\n")) { 36 | 37 | // 1. 转义内部的双引号:将 " 替换为 "" 38 | String escapedValue = stringValue.replace("\"", "\"\""); 39 | 40 | // 2. 用双引号将整个值包裹起来 41 | return "\"" + escapedValue + "\""; 42 | } 43 | 44 | // 如果不包含特殊字符,则直接返回原字符串 45 | return stringValue; 46 | } 47 | } -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/node/ValueConvertNode.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.node; 2 | 3 | import org.liuneng.base.OutputNode; 4 | import org.liuneng.base.InputNode; 5 | import org.liuneng.base.Node; 6 | import org.liuneng.base.Row; 7 | import org.liuneng.exception.NodeException; 8 | import org.liuneng.exception.NodeReadingException; 9 | import org.liuneng.exception.NodeWritingException; 10 | import org.liuneng.util.Tuple2; 11 | 12 | import java.util.Collections; 13 | import java.util.List; 14 | import java.util.concurrent.BlockingQueue; 15 | import java.util.concurrent.SynchronousQueue; 16 | 17 | public abstract class ValueConvertNode extends Node implements InputNode, OutputNode { 18 | private final BlockingQueue list = new SynchronousQueue<>(); 19 | 20 | 21 | 22 | @Override 23 | public Row read() throws NodeReadingException { 24 | try { 25 | return list.take(); 26 | } catch (InterruptedException e) { 27 | throw new NodeReadingException(e); 28 | } 29 | } 30 | 31 | @Override 32 | public void write(Row row) throws NodeWritingException { 33 | row = convert(row); 34 | try { 35 | list.put(row); 36 | } catch (InterruptedException e) { 37 | throw new NodeWritingException(e); 38 | } 39 | } 40 | 41 | public abstract Row convert(Row row); 42 | 43 | 44 | 45 | public List> getColumnMapping() { 46 | return Collections.emptyList(); 47 | } 48 | 49 | public void setColumnMapping(List> columnsMapping) { 50 | 51 | } 52 | 53 | @Override 54 | public String[] getInputColumns() throws NodeException { 55 | return this.getBeforeNode().orElseThrow(() -> new NodeException("无法获得上个节点的列")).getInputColumns(); 56 | } 57 | 58 | @Override 59 | public Node asNode() { 60 | return this; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/util/DBUtil.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.util; 2 | 3 | import org.springframework.jdbc.support.JdbcUtils; 4 | import org.springframework.util.LinkedCaseInsensitiveMap; 5 | 6 | import javax.sql.DataSource; 7 | import java.sql.*; 8 | import java.util.Map; 9 | 10 | public class DBUtil { 11 | 12 | public static String[] lookupColumns(DataSource ds, String tableName) throws SQLException { 13 | String[] columns = null; 14 | Connection connection = null; 15 | PreparedStatement statement = null; 16 | try { 17 | connection = ds.getConnection(); 18 | statement = connection.prepareStatement("select * from " + tableName); 19 | columns = new String[statement.getMetaData().getColumnCount()]; 20 | for (int i = 0; i < statement.getMetaData().getColumnCount(); i++) { 21 | String targetColumn = statement.getMetaData().getColumnLabel(i + 1); 22 | columns[i] = targetColumn; 23 | } 24 | 25 | } finally { 26 | if (statement != null) { 27 | statement.close(); 28 | } 29 | if (connection != null) { 30 | connection.close(); 31 | } 32 | } 33 | return columns; 34 | } 35 | 36 | public static Map mapRow(ResultSet rs) throws SQLException { 37 | 38 | ResultSetMetaData rsmd = rs.getMetaData(); 39 | int columnCount = rsmd.getColumnCount(); 40 | Map mapOfColumnValues = new LinkedCaseInsensitiveMap<>(columnCount); 41 | 42 | for(int i = 1; i <= columnCount; ++i) { 43 | String column = JdbcUtils.lookupColumnName(rsmd, i); 44 | mapOfColumnValues.putIfAbsent(column, JdbcUtils.getResultSetValue(rs, i)); 45 | } 46 | 47 | return mapOfColumnValues; 48 | } 49 | 50 | public static int testReference() { 51 | return 9527; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /README.cn.md: -------------------------------------------------------------------------------- 1 | # Etl-engine 2 | 3 | **中文** | [English](README.en.md) 4 | 5 | ## 🚀 简介:高性能 ETL 引擎 6 | 7 | **`Etl-engine`** 是一个轻量、稳健、易扩展的面向开发者的 ETL(抽取、转换、加载)库,旨在成为 **Kettle (PDI) 的高性能替代方案。 8 | 9 | ----- 10 | 11 | ## 🔥 核心优势 12 | 13 | **`Etl-engine`** 提供以下三大核心特性: 14 | 15 | ### 1. 极致的速度 ⚡️ 16 | 17 | 通过批量操作和非阻塞的缓存管道设计,显著提升数据处理和数据库 I/O 速度。 18 | 19 | 📊 **实测数据:** 处理 $200,000$ 条数据的插入/更新任务,`etl-engine` 的速度是 **Kettle 的 $\mathbf{2}$ 倍左右**。 20 | 21 | **Kettle:** 22 | ![1765353174408](image/README.cn/1765353174408.png) 23 | 24 | **Etl-engine:** 25 | ![1765353763090](image/README.cn/1765353763090.png) 26 | 27 | ### 2. 运行稳健可靠 🛡️ 28 | 29 | 数据流传输过程中如果遇到异常不会马上停止,可以配置重试次数,自动尝试重新读取或写入数据。 30 | 31 | ### 3. 轻量且易于扩展 🧩 32 | 33 | 核心仅由 **Node(节点)** , **Pipe(管道)** , **Dataflow(数据流)** 3个主要组件构成,所有数据加载逻辑都抽象为可扩展的**节点**。除了内置的JDBC数据源节点,用户可以轻松继承基类,快速开发新的数据源(如 Http、Redis)或自定义转换逻辑,满足特定的业务需求。 34 | 35 | ----- 36 | 37 | ## 🛠️ 使用示例 38 | 39 | 以下代码展示了如何快速构建一个将 **Oracle 数据(抽取)** 通过 **Upsert 方式同步到 PostgreSQL(加载)** 的 ETL 任务。 40 | 41 | ### 1. 一个表输入到一个表输出 42 | 43 | ```mermaid 44 | flowchart LR 45 | sqlInputNode --pipe(10000)--> upsertOutputNode 46 | ``` 47 | 48 | ```java 49 | // 1. 获取数据源 50 | DataSource dataSourceOracle = DataSourceUtil.getOracleDataSource(); 51 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSource(); 52 | 53 | // 2. 创建输入节点 54 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourceOracle, "select * from t_resident_info"); 55 | 56 | // 3. 创建插入/更新节点 57 | // 批量大小 1000 58 | UpsertOutputNode upsertOutputNode = new UpsertOutputNode(dataSourcePG, "t_resident_info", 1000); 59 | // 设置唯一标识映射,用于判断 Insert 或 Update 60 | upsertOutputNode.setIdentityMapping(Arrays.asList(new Tuple2<>("ID", "ID"))); 61 | 62 | // 4. 创建管道并连接节点 63 | Pipe pipe = new Pipe(1000); // 管道缓存大小 1000 64 | pipe.connect(sqlInputNode, upsertOutputNode); 65 | 66 | // 5. 启动数据流 67 | Dataflow dataflow = new Dataflow(sqlInputNode); 68 | dataflow.syncStart(5, TimeUnit.MINUTES); // 设置超时时间 69 | ``` 70 | 71 | ### 2. 一个表输入经过字段值转换到一个表输出 72 | 73 | ```mermaid 74 | flowchart LR 75 | sqlInputNode --pipe(10000)-->valueConverter --pipe(10000)--> upsertOutputNode 76 | ``` 77 | 78 | ```java 79 | //todo 80 | ``` 81 | 82 | ### 3. 一个表输入到多个输出 83 | 84 | ```mermaid 85 | flowchart LR 86 | sqlInputNode --pipe(10000)-->valueConverter --pipe(10000)--> upsertOutputNode 87 | sqlInputNode --pipe(10000)--> csvOutputNode 88 | ``` 89 | 90 | ```java 91 | //todo 92 | ``` 93 | 94 | ----- 95 | 96 | ## 🏗️ 架构概览 97 | 98 | `Etl-engine` 核心仅由以下3个主要组件构成: 99 | 100 | * **Node (节点):** 数据的起点、终点和数据转换逻辑载体。 101 | * **Pipe (管道):** 负责在节点间传递数据的非阻塞缓存队列。 102 | * **Dataflow (数据流):** 任务的编排器和执行入口。 103 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/Node.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | import org.liuneng.exception.NodePrestartException; 6 | import org.liuneng.util.StrUtil; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Collections; 10 | import java.util.List; 11 | import java.util.Optional; 12 | import java.util.stream.Collectors; 13 | 14 | public abstract class Node { 15 | 16 | @Setter 17 | @Getter 18 | private String id; 19 | 20 | @Setter 21 | private String name; 22 | 23 | protected Dataflow dataflowInstance; 24 | 25 | private Pipe beforePipe; 26 | 27 | @Getter 28 | private final List afterPipes = new ArrayList<>(); 29 | 30 | public Optional getBeforePipe() { 31 | return Optional.ofNullable(beforePipe); 32 | } 33 | 34 | public Optional getBeforeNode() { 35 | if (this.getBeforePipe().isPresent()) { 36 | return this.getBeforePipe().get().getFrom(); 37 | } else { 38 | return Optional.empty(); 39 | } 40 | } 41 | 42 | public List getAfterNodes() { 43 | if (this.getAfterPipes().isEmpty()) { 44 | return Collections.emptyList(); 45 | } else { 46 | return this.getAfterPipes().stream().map(pipe -> pipe.getTo().orElse(null)).collect(Collectors.toList()); 47 | } 48 | } 49 | 50 | protected void setBeforePipe(Pipe beforePipe) { 51 | this.beforePipe = beforePipe; 52 | } 53 | 54 | protected void setAfterPipes(List afterPipes) { 55 | this.afterPipes.clear(); 56 | this.afterPipes.addAll(afterPipes); 57 | } 58 | 59 | protected void addAfterPipe(Pipe pipe) { 60 | afterPipes.add(pipe); 61 | } 62 | 63 | public String getName() { 64 | return StrUtil.isBlank(name) ? id : name; 65 | } 66 | 67 | protected Dataflow getDataflowInstance() { 68 | return dataflowInstance; 69 | } 70 | 71 | protected void setDataflowInstance(Dataflow dataflowInstance) { 72 | this.dataflowInstance = dataflowInstance; 73 | } 74 | 75 | protected void prestart(Dataflow dataflow) throws NodePrestartException { 76 | this.dataflowInstance = dataflow; 77 | 78 | // dataflow.getDataTransferExecutor().execute(() -> { 79 | // try { 80 | // this.dataflowInstance.awaitStoppingSignal(); 81 | // } catch (InterruptedException e) { 82 | // throw new RuntimeException(e); 83 | // } finally { 84 | // this.onStop(); 85 | // } 86 | // }); 87 | 88 | }; 89 | 90 | protected abstract void onDataflowStop(); 91 | 92 | } -------------------------------------------------------------------------------- /samples/src/main/java/io/github/add2ws/OracleToPG.java: -------------------------------------------------------------------------------- 1 | package io.github.add2ws; 2 | 3 | import io.github.add2ws.util.DataSourceUtil; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.liuneng.base.Dataflow; 6 | import org.liuneng.base.Pipe; 7 | import org.liuneng.node.InsertOutputNode; 8 | import org.liuneng.node.SqlInputNode; 9 | import org.liuneng.node.UpsertOutputNode; 10 | import org.liuneng.util.DataflowHelper; 11 | import org.liuneng.util.Tuple2; 12 | 13 | import javax.sql.DataSource; 14 | import java.util.Arrays; 15 | import java.util.concurrent.TimeUnit; 16 | 17 | @Slf4j 18 | public class OracleToPG { 19 | 20 | static void oracleToPG() { 21 | DataSource dataSourceOracle = DataSourceUtil.getOracleDataSourcePool(); 22 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSourcePool(); 23 | 24 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourceOracle, "select * from etl_base.t_resident_info where 1=1 and rownum<= 200000", 1000); 25 | UpsertOutputNode upsertOutputNode = new UpsertOutputNode(dataSourcePG, "t_resident_info", 1000); 26 | upsertOutputNode.setInsertOnly(true); 27 | upsertOutputNode.setIdentityMapping(Arrays.asList(new Tuple2<>("ID", "ID"))); 28 | Pipe pipe = new Pipe(10000); 29 | pipe.connect(sqlInputNode, upsertOutputNode); 30 | 31 | Dataflow dataflow = new Dataflow(sqlInputNode); 32 | dataflow.setProcessingThresholdLog(100); 33 | // DataflowHelper.logListener(dataflow, etlLog -> { 34 | // System.out.println(etlLog.getMessage()); 35 | // }); 36 | try { 37 | dataflow.syncStart(5, TimeUnit.MINUTES); 38 | } catch (Exception e) { 39 | throw new RuntimeException(e); 40 | } 41 | 42 | } 43 | 44 | static void oracleToPGInsert() { 45 | DataSource dataSourceOracle = DataSourceUtil.getOracleDataSource(); 46 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSourcePool(); 47 | log.info("使用连接池。。。。。。。。。。。。。。。。。。。。。。。。。。"); 48 | 49 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourceOracle, "select * from etl_base.t_resident_info where 1=1 and rownum<= 200000", 1000); 50 | InsertOutputNode outputNode = new InsertOutputNode(dataSourcePG, "t_resident_info", 1000); 51 | Pipe pipe = new Pipe(10000); 52 | pipe.connect(sqlInputNode, outputNode); 53 | 54 | Dataflow dataflow = new Dataflow(sqlInputNode); 55 | dataflow.setProcessingThresholdLog(5000); 56 | DataflowHelper.logListener(dataflow, etlLog -> { 57 | System.out.println(etlLog.getMessage()); 58 | }); 59 | try { 60 | dataflow.syncStart(5, TimeUnit.MINUTES); 61 | } catch (Exception e) { 62 | throw new RuntimeException(e); 63 | } 64 | 65 | } 66 | 67 | public static void main(String[] args) { 68 | oracleToPGInsert(); 69 | 70 | try { 71 | Thread.sleep(1000 * 60 * 100); 72 | } catch (InterruptedException e) { 73 | throw new RuntimeException(e); 74 | } 75 | 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Etl-engine 2 | [中文](README.md) | **English** 3 | 4 | ## 🚀 Introduction: High-Performance ETL Engine 5 | 6 | **`etl-engine`** is a lightweight, developer-focused ETL (Extract, Transform, Load) library designed to be a **high-performance alternative to Kettle (PDI)**. It achieves ultimate data synchronization efficiency through optimized data flow and concurrency control. 7 | 8 | ----- 9 | 10 | ## 🔥 Core Advantages 11 | 12 | We focus on delivering three core characteristics: 13 | 14 | ### 1\. Extreme Speed ⚡️ 15 | 16 | Significantly improves data processing and database I/O speed through batch operations and an efficient in-memory pipeline design. 17 | 18 | > 📊 **Real-World Test:** For an insert/update task involving $200,000$ records, the speed of `etl-engine` is **$\mathbf{2}$ times faster than Kettle**. 19 | 20 | ### 2\. Robust and Stable Operation 🛡️ 21 | 22 | Built on a **Node** and **Pipe** architecture, it includes built-in backpressure mechanisms and transaction management, ensuring task stability and data consistency in high-concurrency and large-scale synchronization scenarios. 23 | 24 | ### 3\. Easy to Extend and Customize 🧩 25 | 26 | All features are abstracted as pluggable **Nodes**. Users can easily inherit base classes to quickly develop new data sources (e.g., MongoDB, Redis) or custom transformation logic, meeting specific business requirements. 27 | 28 | ----- 29 | 30 | ## 🛠️ Usage Example 31 | 32 | The following code demonstrates how to quickly build an ETL task that **extracts data from Oracle** and **synchronizes (Upsert) it to PostgreSQL (Load)**. 33 | 34 | ```java 35 | // 1. Get Data Sources 36 | DataSource dataSourceOracle = DataSourceUtil.getOracleDataSource(); 37 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSource(); 38 | 39 | // 2. Create Input Node (Extract) 40 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourceOracle, "select * from t_resident_info"); 41 | 42 | // 3. Create Upsert/Update Node (Load) 43 | // Batch size 1000 44 | UpsertOutputNode upsertOutputNode = new UpsertOutputNode(dataSourcePG, "t_resident_info", 1000); 45 | // Set identity mapping for Insert or Update determination 46 | upsertOutputNode.setIdentityMapping(Arrays.asList(new Tuple2<>("ID", "ID"))); 47 | 48 | // 4. Create Pipe and Connect Nodes 49 | Pipe pipe = new Pipe(1000); // Pipe buffer size 1000 50 | pipe.connect(sqlInputNode, upsertOutputNode); 51 | 52 | // 5. Start Dataflow 53 | Dataflow dataflow = new Dataflow(sqlInputNode); 54 | dataflow.syncStart(5, TimeUnit.MINUTES); // Set timeout 55 | ``` 56 | 57 | ----- 58 | 59 | ## 🏗️ Architecture Overview 60 | 61 | `etl-engine` is based on **stream processing**, composed of the following core components: 62 | 63 | * **Node:** The carrier for the start point (`SqlInputNode`), end point (`UpsertOutputNode`), and all processing logic in the data flow. 64 | * **Pipe:** A high-performance, bounded queue responsible for efficiently transferring data records between nodes. 65 | * **Dataflow:** The orchestrator and execution entry point for the task. 66 | 67 | ----- 68 | 69 | ## 🤝 Contribution and Support 70 | 71 | We welcome community contributions\! If you find a bug or would like to add a new data source/transformation node, please feel free to submit a Pull Request. 72 | 73 | * **License:** [Insert License here, e.g., MIT or Apache 2.0] -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/util/DataflowHelper.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.util; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.liuneng.base.Dataflow; 5 | import org.liuneng.base.EtlLog; 6 | import org.liuneng.base.Node; 7 | import org.liuneng.base.Pipe; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.concurrent.atomic.AtomicReference; 12 | import java.util.function.BiFunction; 13 | import java.util.function.Consumer; 14 | import java.util.function.Function; 15 | 16 | @Slf4j 17 | public class DataflowHelper { 18 | 19 | private DataflowHelper() {} 20 | 21 | public static void logListener(Dataflow dataflow, Consumer handler) { 22 | Runnable runnable = () -> { 23 | int cursor = 0; 24 | while (!dataflow.isStopped() || cursor < dataflow.getLogList().size()) { 25 | try { 26 | Thread.sleep(100); 27 | } catch (InterruptedException e) { 28 | throw new RuntimeException(e); 29 | } 30 | if (cursor < dataflow.getLogList().size()) { 31 | EtlLog etlLog = dataflow.getLogList().get(cursor); 32 | if (etlLog == null) { 33 | log.error("日志为空"); 34 | } 35 | handler.accept(etlLog); 36 | cursor++; 37 | } 38 | } 39 | 40 | }; 41 | new Thread(runnable, "LogListener-" + dataflow.getId()).start(); 42 | } 43 | 44 | public static List> getAllNodesAndPipes(Dataflow dataflow) { 45 | List> results = new ArrayList<>(); 46 | recurNodes(dataflow.getHead(), results::add); 47 | return results; 48 | } 49 | 50 | public static Node findNodeById(Dataflow dataflow, String id) { 51 | AtomicReference re = new AtomicReference<>(); 52 | forEachNodesOrPipes(dataflow, (node, pipe) -> { 53 | if (node != null && id.equals(node.getId())) { 54 | re.set(node); 55 | return false; 56 | } 57 | return true; 58 | }); 59 | 60 | return re.get(); 61 | } 62 | 63 | public static void forEachNodesOrPipes(Dataflow dataflow, BiFunction consumer) { 64 | recurNodes(dataflow.getHead(), nodePipeTuple2 -> consumer.apply(nodePipeTuple2.getPartA(), nodePipeTuple2.getPartB())); 65 | } 66 | 67 | 68 | private static void recurNodes(Object currentNode, Function, Boolean> handler) { 69 | if (currentNode instanceof Node) { 70 | Node node = (Node) currentNode; 71 | boolean isContinue = handler.apply(new Tuple2<>(node, null)); 72 | if (!isContinue) { 73 | return; 74 | } 75 | if (!node.getAfterPipes().isEmpty()) { 76 | for (Pipe afterPipe : node.getAfterPipes()) { 77 | recurNodes(afterPipe, handler); 78 | } 79 | } 80 | } else if (currentNode instanceof Pipe) { 81 | Pipe pipe = (Pipe) currentNode; 82 | boolean isContinue = handler.apply(new Tuple2<>(null, pipe)); 83 | if (!isContinue) { 84 | return; 85 | } 86 | if (pipe.getTo().isPresent()) { 87 | recurNodes(pipe.getTo().get(), handler); 88 | } 89 | } 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/Pipe.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | import java.util.Optional; 9 | import java.util.concurrent.ArrayBlockingQueue; 10 | import java.util.concurrent.BlockingQueue; 11 | 12 | public class Pipe { 13 | final static Logger log = LoggerFactory.getLogger(Dataflow.class); 14 | 15 | private Dataflow dataflowInstance; 16 | 17 | @Setter 18 | @Getter 19 | private String id; 20 | 21 | @Getter 22 | private long startTime = 0; 23 | 24 | private InputNode from; 25 | 26 | private OutputNode to; 27 | 28 | private final int bufferCapacity; 29 | 30 | private final BlockingQueue bufferQueue; 31 | 32 | private boolean isValid; 33 | 34 | private boolean closed = false; 35 | 36 | public Pipe(int bufferCapacity) { 37 | this.bufferCapacity = bufferCapacity; 38 | bufferQueue = new ArrayBlockingQueue<>(this.bufferCapacity); 39 | isValid = true; 40 | } 41 | 42 | public Optional getFrom() { 43 | return Optional.ofNullable(from); 44 | } 45 | 46 | public void setFrom(InputNode from) { 47 | this.from = from; 48 | from.asNode().addAfterPipe(this); 49 | } 50 | 51 | public Optional getTo() { 52 | return Optional.ofNullable(to); 53 | } 54 | 55 | public void setTo(OutputNode to) { 56 | this.to = to; 57 | to.asNode().setBeforePipe(this); 58 | } 59 | 60 | public void connect(InputNode inputNode, OutputNode outputNode) { 61 | this.setFrom(inputNode); 62 | this.setTo(outputNode); 63 | } 64 | 65 | public int getBufferCapacity() { 66 | return bufferCapacity; 67 | } 68 | 69 | public int getCurrentBufferSize() { 70 | return bufferQueue.size(); 71 | } 72 | 73 | public boolean isValid() { 74 | return isValid; 75 | } 76 | 77 | public void setValid(boolean valid) { 78 | isValid = valid; 79 | } 80 | 81 | public void beWritten(Row row) throws InterruptedException { 82 | if (startTime == 0) { 83 | startTime = System.currentTimeMillis(); 84 | } 85 | 86 | if (!closed) { 87 | bufferQueue.put(row); 88 | } 89 | } 90 | 91 | public Row beRead() throws InterruptedException { 92 | return bufferQueue.take(); 93 | } 94 | 95 | protected Dataflow getDataflowInstance() { 96 | return dataflowInstance; 97 | } 98 | 99 | protected void setDataflowInstance(Dataflow dataflowInstance) { 100 | this.dataflowInstance = dataflowInstance; 101 | } 102 | 103 | protected void initialize(Dataflow dataFlow) { 104 | 105 | this.dataflowInstance = dataFlow; 106 | // this.dataflowInstance.getDataTransferExecutor().execute(() -> { 107 | // try { 108 | // dataflowInstance.awaitStoppingSignal(); 109 | // } catch (InterruptedException e) { 110 | // throw new RuntimeException(e); 111 | // } finally { 112 | // bufferQueue.drainTo(new ArrayList<>()); 113 | // } 114 | // }); 115 | } 116 | 117 | protected void stop() { 118 | closed = true; 119 | bufferQueue.clear(); 120 | // bufferQueue.drainTo(new ArrayList<>()); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | io.github.add2ws 8 | etl-engine-project 9 | 1.0-SNAPSHOT 10 | pom 11 | 12 | etl-engine 13 | samples 14 | 15 | 16 | 17 | 8 18 | 8 19 | UTF-8 20 | 21 | 22 | 23 | 24 | org.postgresql 25 | postgresql 26 | 42.7.2 27 | 28 | 29 | 30 | mysql 31 | mysql-connector-java 32 | 8.0.33 33 | 34 | 35 | cn.hutool 36 | hutool-json 37 | 5.8.26 38 | 39 | 40 | com.zaxxer 41 | HikariCP 42 | 4.0.3 43 | 44 | 45 | 46 | 47 | 48 | 49 | com.oracle.database.jdbc 50 | ojdbc8 51 | 21.1.0.0 52 | 53 | 54 | 55 | 56 | com.oracle.database.nls 57 | orai18n 58 | 21.1.0.0 59 | 60 | 61 | 62 | 63 | org.duckdb 64 | duckdb_jdbc 65 | 1.4.1.0 66 | 67 | 68 | 69 | org.apache.logging.log4j 70 | log4j-api 71 | 2.24.1 72 | 73 | 74 | org.apache.logging.log4j 75 | log4j-core 76 | 2.24.1 77 | 78 | 79 | org.apache.logging.log4j 80 | log4j-slf4j-impl 81 | 2.24.1 82 | 83 | 84 | 85 | org.projectlombok 86 | lombok 87 | 1.18.30 88 | true 89 | 90 | 91 | org.junit.jupiter 92 | junit-jupiter 93 | 5.10.2 94 | test 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /samples/src/main/java/io/github/add2ws/util/DataSourceUtil.java: -------------------------------------------------------------------------------- 1 | package io.github.add2ws.util; 2 | 3 | import com.zaxxer.hikari.HikariDataSource; 4 | import org.springframework.jdbc.datasource.DriverManagerDataSource; 5 | 6 | import javax.sql.DataSource; 7 | import java.util.Properties; 8 | 9 | public class DataSourceUtil { 10 | 11 | public static DataSource getOracleDataSource() { 12 | DriverManagerDataSource dataSource = new DriverManagerDataSource(); 13 | dataSource.setDriverClassName("oracle.jdbc.OracleDriver"); 14 | String host = "ppppp"; 15 | // String host = "127.0.0.1"; 16 | int port = 1521; 17 | String sid = "orcl"; 18 | String url = String.format("jdbc:oracle:thin:@%s:%d:%s", host, port, sid); 19 | dataSource.setUrl(url); 20 | dataSource.setUsername("etl_base"); 21 | dataSource.setPassword("etl_base"); 22 | Properties properties = new Properties(); 23 | properties.setProperty("oracle.jdbc.ReadTimeout", "500"); 24 | dataSource.setConnectionProperties(properties); 25 | return dataSource; 26 | } 27 | 28 | public static DataSource getMySqlDataSource() { 29 | DriverManagerDataSource dataSource = new DriverManagerDataSource(); 30 | dataSource.setDriverClassName("com.mysql.cj.jdbc.Driver"); 31 | String host = "127.0.0.1"; 32 | int port = 3308; 33 | String sid = "etl_base"; 34 | String url = String.format("jdbc:mysql://@%s:%d/%s", host, port, sid); 35 | dataSource.setUrl(url); 36 | dataSource.setUsername("root"); 37 | dataSource.setPassword("root"); 38 | return dataSource; 39 | } 40 | 41 | public static DataSource getOracleDataSourcePool() { 42 | HikariDataSource dataSource = new HikariDataSource(); 43 | dataSource.setDriverClassName("oracle.jdbc.OracleDriver"); 44 | String host = "127.0.0.1"; 45 | int port = 1521; 46 | String sid = "orcl"; 47 | String url = String.format("jdbc:oracle:thin:@%s:%d:%s", host, port, sid); 48 | dataSource.setJdbcUrl(url); 49 | dataSource.setUsername("etl_base"); 50 | dataSource.setPassword("etl_base"); 51 | // dataSource.setMaximumPoolSize(4); 52 | return dataSource; 53 | } 54 | 55 | public static DataSource getDuckDBDataSource(String filePath) { 56 | DriverManagerDataSource dataSource = new DriverManagerDataSource(); 57 | Properties connectionProperties = new Properties(); 58 | connectionProperties.put("duckdb.read_only", "true"); 59 | dataSource.setConnectionProperties(connectionProperties); 60 | dataSource.setDriverClassName("org.duckdb.DuckDBDriver"); 61 | String url = "jdbc:duckdb:" + filePath; 62 | dataSource.setUrl(url); 63 | return dataSource; 64 | } 65 | 66 | public static DataSource getPostgresDataSource() { 67 | DriverManagerDataSource dataSource = new DriverManagerDataSource(); 68 | dataSource.setDriverClassName("org.postgresql.Driver"); 69 | String host = "127.0.0.1"; 70 | int port = 5432; 71 | String sid = "postgres"; 72 | String url = String.format("jdbc:postgresql://%s:%d/%s", host, port, sid); 73 | dataSource.setUrl(url); 74 | dataSource.setUsername("postgres"); 75 | dataSource.setPassword("123"); 76 | return dataSource; 77 | } 78 | 79 | public static DataSource getPostgresDataSourcePool() { 80 | HikariDataSource dataSource = new HikariDataSource(); 81 | dataSource.setDriverClassName("org.postgresql.Driver"); 82 | String host = "127.0.0.1"; 83 | int port = 5432; 84 | String sid = "postgres"; 85 | String url = String.format("jdbc:postgresql://%s:%d/%s", host, port, sid); 86 | dataSource.setJdbcUrl(url); 87 | dataSource.setUsername("postgres"); 88 | dataSource.setPassword("123"); 89 | // dataSource.setMaximumPoolSize(4); 90 | return dataSource; 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /samples/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | io.github.add2ws 8 | etl-engine-project 9 | 1.0-SNAPSHOT 10 | 11 | 12 | samples 13 | 14 | 15 | 8 16 | 8 17 | UTF-8 18 | 19 | 20 | 21 | 22 | 23 | io.github.add2ws 24 | etl-engine 25 | 2.1.0-BETA 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | org.apache.maven.plugins 34 | maven-compiler-plugin 35 | 3.11.0 36 | 37 | 1.8 38 | 1.8 39 | UTF-8 40 | 41 | 42 | 43 | 44 | org.apache.maven.plugins 45 | maven-dependency-plugin 46 | 3.6.1 47 | 48 | 49 | copy-dependencies 50 | package 51 | 52 | copy-dependencies 53 | 54 | 55 | 56 | ${project.build.directory}/lib 57 | 58 | runtime 59 | 60 | 61 | 62 | 63 | 64 | false 65 | true 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | org.apache.maven.plugins 74 | maven-jar-plugin 75 | 3.3.0 76 | 77 | 78 | etl-sample 79 | 80 | 81 | 82 | io.github.add2ws.OracleToPG 83 | 84 | true 85 | 86 | lib/ 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/node/DeleteOutputNode.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.node; 2 | 3 | import org.liuneng.base.Dataflow; 4 | import org.liuneng.base.Node; 5 | import org.liuneng.base.OutputNode; 6 | import org.liuneng.base.Row; 7 | import org.liuneng.exception.NodeException; 8 | import org.liuneng.exception.NodePrestartException; 9 | import org.liuneng.exception.NodeWritingException; 10 | import org.liuneng.util.DBUtil; 11 | import org.liuneng.util.Tuple2; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import javax.sql.DataSource; 16 | import java.sql.PreparedStatement; 17 | import java.sql.SQLException; 18 | import java.util.ArrayList; 19 | import java.util.List; 20 | import java.util.stream.Collectors; 21 | 22 | public class DeleteOutputNode extends Node implements OutputNode { 23 | final static Logger log = LoggerFactory.getLogger(DeleteOutputNode.class); 24 | 25 | private DataSource dataSource; 26 | 27 | private String table; 28 | 29 | private String[] columns; 30 | 31 | private List> columnsMapping = new ArrayList<>();//targetColumn map sourceColumn 32 | 33 | private PreparedStatement preparedStatement; 34 | 35 | private final int batchSize; 36 | 37 | private long processed; 38 | 39 | private long processingRate; 40 | 41 | public DeleteOutputNode(DataSource dataSource, String table, int batchSize) { 42 | this.dataSource = dataSource; 43 | this.table = table; 44 | this.batchSize = batchSize; 45 | } 46 | 47 | public DeleteOutputNode(DataSource dataSource, String table) { 48 | this.dataSource = dataSource; 49 | this.table = table; 50 | this.batchSize = 100; 51 | } 52 | 53 | @Override 54 | public long getProcessed() { 55 | return processed; 56 | } 57 | 58 | @Override 59 | public long getProcessingRate() { 60 | return processingRate; 61 | } 62 | 63 | @Override 64 | public long getStartTime() { 65 | return dataflowInstance.getStartTime(); 66 | } 67 | 68 | @Override 69 | public void write(Row row) throws NodeWritingException { 70 | 71 | } 72 | 73 | @Override 74 | public String[] getOutputColumns() throws NodeException { 75 | return new String[0]; 76 | } 77 | 78 | public String[] getTableColumns() throws SQLException { 79 | if (columns == null) { 80 | columns = DBUtil.lookupColumns(dataSource, table); 81 | } 82 | return columns; 83 | } 84 | 85 | @Override 86 | protected void prestart(Dataflow dataflow) throws NodePrestartException { 87 | super.prestart(dataflow); 88 | try { 89 | this.getTableColumns(); 90 | 91 | if (this.columnsMapping.isEmpty()) { 92 | throw new NodePrestartException("未指定关联字段"); 93 | } 94 | 95 | String whereSql = this.columnsMapping.stream().map(tuple2 -> tuple2.getPartB() + "=?").collect(Collectors.joining(" and ")); 96 | preparedStatement = dataSource.getConnection().prepareStatement(String.format("delete from %s where %s", this.table, whereSql)); 97 | } catch (SQLException | NodePrestartException e) { 98 | throw new NodePrestartException(e); 99 | } 100 | } 101 | 102 | @Override 103 | protected void onDataflowStop() { 104 | try { 105 | if (preparedStatement != null && !preparedStatement.isClosed()) { 106 | preparedStatement.close(); 107 | } 108 | 109 | } catch (SQLException e) { 110 | throw new RuntimeException(e); 111 | } 112 | } 113 | 114 | public DataSource getDataSource() { 115 | return dataSource; 116 | } 117 | 118 | public void setDataSource(DataSource dataSource) { 119 | this.dataSource = dataSource; 120 | } 121 | 122 | public String getTable() { 123 | return table; 124 | } 125 | 126 | public void setTable(String table) { 127 | this.table = table; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/node/FileOutputNode.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.node; 2 | 3 | import org.liuneng.base.Dataflow; 4 | import org.liuneng.base.Node; 5 | import org.liuneng.base.OutputNode; 6 | import org.liuneng.base.Row; 7 | import org.liuneng.exception.NodePrestartException; 8 | import org.liuneng.exception.NodeWritingException; 9 | import org.liuneng.util.CsvConverter; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | import java.io.File; 14 | import java.io.FileNotFoundException; 15 | import java.io.FileOutputStream; 16 | import java.io.IOException; 17 | import java.nio.charset.StandardCharsets; 18 | import java.util.Arrays; 19 | import java.util.Collection; 20 | 21 | public class FileOutputNode extends Node implements OutputNode { 22 | final static Logger log = LoggerFactory.getLogger(FileOutputNode.class); 23 | 24 | private final String filePath; 25 | 26 | private FileOutputStream fileOutputStream; 27 | 28 | private Format format; 29 | 30 | public enum Format { 31 | JSON, CSV, TXT 32 | } 33 | 34 | private boolean firstWrite = true; 35 | 36 | public FileOutputNode(String filePath, Format format) { 37 | this.filePath = filePath; 38 | this.format = format; 39 | } 40 | 41 | 42 | @Override 43 | public long getProcessed() { 44 | return 0; 45 | } 46 | 47 | @Override 48 | public long getProcessingRate() { 49 | return 0; 50 | } 51 | 52 | @Override 53 | public long getStartTime() { 54 | return 0; 55 | } 56 | 57 | @Override 58 | public void write(Row row) throws NodeWritingException { 59 | 60 | try { 61 | if (format == Format.JSON) { 62 | if (firstWrite) { 63 | fileOutputStream.write('['); 64 | } 65 | 66 | if (row.isEnd()) { 67 | fileOutputStream.write(']'); 68 | log.info("json文件写入完成:{}", filePath); 69 | return; 70 | } 71 | 72 | fileOutputStream.write((row.toJSONString()+",\n").getBytes(StandardCharsets.UTF_8)); 73 | 74 | 75 | } else if (format == Format.CSV) { 76 | if (firstWrite) { 77 | String csvRow = CsvConverter.ListToCsvRow(Arrays.asList(row.getData().keySet().toArray())) + "\n"; 78 | fileOutputStream.write(csvRow.getBytes(StandardCharsets.UTF_8)); 79 | } 80 | 81 | if (row.isEnd()) { 82 | fileOutputStream.write(']'); 83 | log.info("csv文件写入完成:{}", filePath); 84 | return; 85 | } 86 | 87 | String csvRow = CsvConverter.ListToCsvRow(row.getData().values()) + "\n"; 88 | fileOutputStream.write(csvRow.getBytes(StandardCharsets.UTF_8)); 89 | 90 | } else { 91 | fileOutputStream.write((row.toString()+"\n").getBytes(StandardCharsets.UTF_8)); 92 | } 93 | } catch (IOException e) { 94 | throw new NodeWritingException(e); 95 | } 96 | 97 | if (firstWrite) { 98 | firstWrite = false; 99 | } 100 | } 101 | 102 | @Override 103 | public String[] getOutputColumns() { 104 | return new String[0]; 105 | } 106 | 107 | 108 | @Override 109 | protected void prestart(Dataflow dataflow) throws NodePrestartException { 110 | super.prestart(dataflow); 111 | File file = new File(filePath); 112 | try { 113 | fileOutputStream = new FileOutputStream(file); 114 | } catch (FileNotFoundException e) { 115 | throw new NodePrestartException(e); 116 | } 117 | } 118 | 119 | @Override 120 | protected void onDataflowStop() { 121 | try { 122 | if (fileOutputStream != null) { 123 | fileOutputStream.close(); 124 | } 125 | } catch (IOException e) { 126 | log.error("关闭异常", e); 127 | } 128 | 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /samples/src/main/java/io/github/add2ws/SamplesMain.java: -------------------------------------------------------------------------------- 1 | package io.github.add2ws; 2 | 3 | import cn.hutool.json.JSONUtil; 4 | import io.github.add2ws.util.DataSourceUtil; 5 | import lombok.extern.slf4j.Slf4j; 6 | import org.liuneng.base.Dataflow; 7 | import org.liuneng.base.Pipe; 8 | import org.liuneng.node.FileOutputNode; 9 | import org.liuneng.node.SqlInputNode; 10 | import org.liuneng.node.UpsertOutputNode; 11 | import org.liuneng.node.UpsertTag; 12 | import org.liuneng.util.DBUtil; 13 | import org.liuneng.util.DataflowHelper; 14 | import org.liuneng.util.Tuple2; 15 | import org.liuneng.util.Tuple3; 16 | 17 | import javax.sql.DataSource; 18 | import java.util.ArrayList; 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.concurrent.TimeUnit; 22 | 23 | @Slf4j 24 | public class SamplesMain { 25 | final static String src = ""; 26 | 27 | public static Dataflow testEtlTask() { 28 | // String sql = "select * from t_resident_info where 1=1 " + 29 | // "and substr(ywbjsj, 1, 7) between '2024-01' and '2024-03' " + 30 | // "and rownum<122283"; 31 | 32 | String sql = "select * from t_resident_info where 1=1\n" + 33 | "and rownum<=5000\n "; 34 | 35 | // DataSource oraclePool = DBUtil.getOracleDataSourcePool(); 36 | 37 | DataSource oracle = DataSourceUtil.getOracleDataSource(); 38 | DataSource postgresPool = DataSourceUtil.getPostgresDataSourcePool(); 39 | DataSource postgres = DataSourceUtil.getPostgresDataSource(); 40 | DataSource duckdb = DataSourceUtil.getDuckDBDataSource("E:/duck.db"); 41 | 42 | SqlInputNode sqlReaderNode_oracle = new SqlInputNode(oracle, sql, 100); 43 | FileOutputNode fileWriterNode = new FileOutputNode("D:/表数据.txt", FileOutputNode.Format.JSON); 44 | 45 | // InsertOutputNode insertWriterNode_pg = new InsertOutputNode(postgresPool, "t_resident_info", 200); 46 | // insertWriterNode_pg.setDeleteSql("truncate table t_resident_info"); 47 | 48 | // InsertOutputNode insertWriterNode_oracle = new InsertOutputNode(oraclePool, "t_resident_info_2", 200); 49 | // insertWriterNode_oracle.setDeleteSql("truncate table t_resident_info_2"); 50 | 51 | UpsertOutputNode upsertWriterNode_pg = new UpsertOutputNode(postgresPool, "t_resident_info", 500); 52 | upsertWriterNode_pg.addIdentityMapping(new Tuple2<>("XH", "XH")); 53 | // upsertWriterNode_pg.addIdentityMapping(new Tuple2<>("CITYNO", "cityNo")); 54 | 55 | List> columnMaps = JSONUtil.toBean(src, new cn.hutool.core.lang.TypeReference>>() {}, true); 56 | 57 | List> columnMapList = new ArrayList<>(); 58 | for (Map columnMap : columnMaps) { 59 | String ut = columnMap.get("upsertTag").toString(); 60 | UpsertTag upsertTag = null; 61 | if ("compareOnly".equalsIgnoreCase(ut)) { 62 | upsertTag = UpsertTag.COMPARE_ONLY; 63 | } else if ("updateOnly".equalsIgnoreCase(ut)) { 64 | upsertTag = UpsertTag.UPDATE_ONLY; 65 | } else if ("compareAndUpdate".equalsIgnoreCase(ut)) { 66 | upsertTag = UpsertTag.COMPARE_AND_UPDATE; 67 | } 68 | columnMapList.add(new Tuple3<>(columnMap.get("from").toString(), columnMap.get("to").toString(), upsertTag)); 69 | } 70 | 71 | UpsertOutputNode upsertWriterNode_oracle = new UpsertOutputNode(oracle, "t_resident_info_2", 200); 72 | upsertWriterNode_oracle.addIdentityMapping(new Tuple2<>("XH", "XH")); 73 | upsertWriterNode_oracle.setColumnMapping(columnMapList); 74 | Pipe pipe = new Pipe(10000); 75 | // Pipe pipe2 = new Pipe(10000); 76 | // Pipe pipe3 = new Pipe(10000); 77 | 78 | UpsertOutputNode upsertOutputNodeDuckDB = new UpsertOutputNode(duckdb, "t_resident_info", 100); 79 | 80 | pipe.connect(sqlReaderNode_oracle, upsertOutputNodeDuckDB); 81 | 82 | return new Dataflow(sqlReaderNode_oracle); 83 | } 84 | 85 | public static void testDataflow() throws InterruptedException { 86 | 87 | Dataflow dataflow = testEtlTask(); 88 | log.info("Dataflow initialized ... ID={}", dataflow.getId()); 89 | DataflowHelper.logListener(dataflow, log -> { 90 | System.out.println("logListener==============>" + log.getMessage()); 91 | }); 92 | dataflow.setProcessingThresholdLog(20000); 93 | 94 | new Thread(() -> { 95 | try { 96 | // log.info("after 5 seconds will stop........................................................................"); 97 | // Thread.sleep(5000); 98 | // log.info("执行手动停止。。。。。{}秒后强制关闭xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", 3); 99 | // dataflow.syncStop(3, TimeUnit.SECONDS); 100 | } catch (Exception e) { 101 | log.error(e.getMessage(), e); 102 | } 103 | }).start(); 104 | 105 | dataflow.syncStart(3600 * 2, TimeUnit.SECONDS); 106 | 107 | Thread.sleep(2000); //防止日志监听线程被过早关闭 108 | } 109 | 110 | public static void main(String[] args) { 111 | try { 112 | testDataflow(); 113 | } catch (InterruptedException e) { 114 | log.error(e.getMessage(), e); 115 | } 116 | } 117 | } -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/node/SqlInputNode.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.node; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | import org.liuneng.base.*; 6 | import org.liuneng.exception.NodeException; 7 | import org.liuneng.exception.NodePrestartException; 8 | import org.liuneng.exception.NodeReadingException; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import javax.sql.DataSource; 13 | import java.io.UnsupportedEncodingException; 14 | import java.sql.Connection; 15 | import java.sql.PreparedStatement; 16 | import java.sql.ResultSet; 17 | import java.sql.SQLException; 18 | import java.util.HashMap; 19 | import java.util.LinkedHashMap; 20 | import java.util.Map; 21 | 22 | public class SqlInputNode extends Node implements InputNode, DataProcessingMetrics { 23 | final static Logger log = LoggerFactory.getLogger(SqlInputNode.class); 24 | 25 | @Getter @Setter 26 | private String charset; 27 | 28 | private String[] columns; 29 | @Getter @Setter 30 | private DataSource dataSource; 31 | @Getter @Setter 32 | private String sql; 33 | @Getter @Setter 34 | private int fetchSize; 35 | @Getter @Setter 36 | private long processed; 37 | @Getter @Setter 38 | private long processingRate; 39 | 40 | @Getter 41 | private long startTime; 42 | 43 | private Connection connection; 44 | 45 | private PreparedStatement preparedStatement; 46 | 47 | private ResultSet resultSet; 48 | 49 | 50 | @Override 51 | public Row read() throws NodeReadingException { 52 | if (startTime == 0) { 53 | startTime = System.currentTimeMillis(); 54 | } 55 | 56 | try { 57 | if (resultSet == null) { 58 | resultSet = preparedStatement.executeQuery(); 59 | resultSet.setFetchSize(fetchSize); 60 | } 61 | 62 | long duration = System.currentTimeMillis() - startTime; 63 | if (resultSet.next()) { 64 | Map row = new LinkedHashMap<>(); 65 | for (String column : columns) { 66 | Object value = resultSet.getObject(column); 67 | if (charset != null && value instanceof String) { 68 | value = new String(((String) value).getBytes(charset)); 69 | } 70 | row.put(column, value); 71 | } 72 | processed ++; 73 | if (duration > 0) { 74 | processingRate = (long) (processed / (duration/1000.0)); 75 | } 76 | return Row.fromMap(row); 77 | } else { 78 | preparedStatement.close(); 79 | resultSet.close(); 80 | connection.close(); 81 | super.dataflowInstance.addInfoLog(String.format("%s completed, processed=%d, time consuming=%ds.", this.getName(), processed, duration /1000)); 82 | return Row.ofEnd(); 83 | } 84 | } catch (SQLException | UnsupportedEncodingException e) { 85 | throw new NodeReadingException(e); 86 | } 87 | } 88 | 89 | @Override 90 | public String[] getInputColumns() { 91 | try { 92 | if (columns == null) { 93 | connection = dataSource.getConnection(); 94 | preparedStatement = connection.prepareStatement(sql); 95 | preparedStatement.setFetchSize(fetchSize); 96 | columns = new String[preparedStatement.getMetaData().getColumnCount()]; 97 | for (int i = 0; i < preparedStatement.getMetaData().getColumnCount(); i++) { 98 | columns[i] = preparedStatement.getMetaData().getColumnLabel(i + 1); 99 | } 100 | } 101 | } catch (SQLException e) { 102 | throw new NodeException(e); 103 | } 104 | return columns; 105 | } 106 | 107 | public SqlInputNode() {} 108 | 109 | public SqlInputNode(DataSource dataSource, String sql) { 110 | this.dataSource = dataSource; 111 | this.sql = sql; 112 | this.fetchSize = 0; 113 | } 114 | 115 | public SqlInputNode(DataSource dataSource, String sql, String charset, int fetchSize) { 116 | this.dataSource = dataSource; 117 | this.sql = sql; 118 | this.fetchSize = fetchSize; 119 | this.charset = charset; 120 | } 121 | 122 | public SqlInputNode(DataSource dataSource, String sql, int fetchSize) { 123 | this.dataSource = dataSource; 124 | this.sql = sql; 125 | this.fetchSize = fetchSize; 126 | } 127 | 128 | @Override 129 | protected void prestart(Dataflow dataflow) throws NodePrestartException { 130 | startTime = System.currentTimeMillis(); 131 | log.info("{}[{}] start initializing...", this.getClass().getSimpleName(), super.getId()); 132 | super.prestart(dataflow); 133 | getInputColumns(); 134 | log.info("{}[{}] has been initialized.", this.getClass().getSimpleName(), super.getId()); 135 | } 136 | 137 | @Override 138 | protected void onDataflowStop() { 139 | try { 140 | if (preparedStatement != null && !preparedStatement.isClosed()) { 141 | preparedStatement.close(); 142 | } 143 | if (resultSet != null && !resultSet.isClosed()) { 144 | resultSet.close(); 145 | } 146 | } catch (SQLException e) { 147 | log.error("input node closing error!", e); 148 | } 149 | } 150 | 151 | @Override 152 | public long getInserted() { 153 | return 0; 154 | } 155 | 156 | @Override 157 | public long getInsertingRate() { 158 | return 0; 159 | } 160 | 161 | @Override 162 | public long getUpdated() { 163 | return 0; 164 | } 165 | 166 | @Override 167 | public long getUpdatingRate() { 168 | return 0; 169 | } 170 | 171 | @Override 172 | public long getDeleted() { 173 | return 0; 174 | } 175 | 176 | @Override 177 | public long getDeletingRate() { 178 | return 0; 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /etl-engine/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 12 | 13 | io.github.add2ws 14 | etl-engine 15 | 2.1.0-BETA 16 | ETL Engine 17 | https://github.com/add2ws/etl-engine 18 | 19 | 20 | The quick and stable data transfer tool. 21 | 22 | 23 | 24 | 25 | add2ws 26 | add2ws 27 | 28 | Project Manager 29 | Developer 30 | 31 | add2ws@gmail.com 32 | https://github.com/add2ws 33 | 34 | 35 | 36 | 37 | GitHub 38 | https://github.com/add2ws/etl-engine/issues 39 | 40 | 41 | 2024 42 | 43 | 44 | 45 | Apache License 2.0 46 | https://www.apache.org/licenses/LICENSE-2.0.txt 47 | 48 | 49 | 50 | 51 | scm:git:git://github.com/add2ws/etl-engine.git 52 | scm:git:git@github.com:add2ws/etl-engine.git 53 | https://github.com/add2ws/etl-engine 54 | HEAD 55 | 56 | 57 | 58 | 59 | 8 60 | 8 61 | UTF-8 62 | 63 | 64 | 65 | 66 | org.springframework 67 | spring-jdbc 68 | 5.3.39 69 | 70 | 71 | 72 | cn.hutool 73 | hutool-core 74 | 5.8.41 75 | 76 | 77 | cn.hutool 78 | hutool-json 79 | 5.8.41 80 | 81 | 82 | 83 | org.apache.logging.log4j 84 | log4j-api 85 | 2.24.1 86 | 87 | 88 | org.apache.logging.log4j 89 | log4j-core 90 | 2.24.1 91 | 92 | 93 | org.apache.logging.log4j 94 | log4j-slf4j-impl 95 | 2.24.1 96 | 97 | 98 | 99 | org.projectlombok 100 | lombok 101 | 1.16.18 102 | provided 103 | 104 | 105 | 106 | 107 | ${project.artifactId} 108 | 109 | 110 | org.apache.maven.plugins 111 | maven-source-plugin 112 | 2.2.1 113 | 114 | 115 | attach-sources 116 | 117 | jar-no-fork 118 | 119 | 120 | 121 | 122 | 123 | 124 | org.apache.maven.plugins 125 | maven-javadoc-plugin 126 | 2.10.4 127 | 128 | 129 | attach-javadocs 130 | package 131 | 132 | jar 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | deploy2public 145 | 146 | 147 | 148 | 149 | org.apache.maven.plugins 150 | maven-gpg-plugin 151 | 1.6 152 | 153 | C:\Program Files (x86)\GnuPG\bin\gpg.exe 154 | 155 | 156 | 157 | verify 158 | 159 | sign 160 | 161 | 162 | 163 | 164 | 165 | org.sonatype.central 166 | central-publishing-maven-plugin 167 | 0.5.0 168 | true 169 | 170 | 171 | sonatype-add2ws 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/node/InsertOutputNode.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.node; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | import org.liuneng.base.*; 6 | import org.liuneng.exception.NodeException; 7 | import org.liuneng.exception.NodePrestartException; 8 | import org.liuneng.exception.NodeWritingException; 9 | import org.liuneng.util.DBUtil; 10 | import org.liuneng.util.NodeHelper; 11 | import org.liuneng.util.StrUtil; 12 | import org.liuneng.util.Tuple2; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | import org.springframework.jdbc.core.BatchPreparedStatementSetter; 16 | import org.springframework.jdbc.core.JdbcTemplate; 17 | 18 | import javax.sql.DataSource; 19 | import java.sql.PreparedStatement; 20 | import java.sql.SQLException; 21 | import java.util.ArrayList; 22 | import java.util.Collections; 23 | import java.util.List; 24 | import java.util.stream.Collectors; 25 | 26 | 27 | public class InsertOutputNode extends Node implements OutputNode, DataProcessingMetrics { 28 | final static Logger log = LoggerFactory.getLogger(InsertOutputNode.class); 29 | 30 | private String[] columns; 31 | 32 | private JdbcTemplate jdbcTemplate; 33 | 34 | @Getter @Setter 35 | private DataSource dataSource; 36 | @Getter @Setter 37 | private String table; 38 | @Getter @Setter 39 | private String deleteSql; 40 | @Getter @Setter 41 | private boolean isDeleted = false; 42 | @Getter @Setter 43 | private int batchSize; 44 | 45 | private final List> columnsMapping = new ArrayList<>(); 46 | 47 | private final List batchData = new ArrayList<>(); 48 | 49 | private PreparedStatement currentPreparedStatement; 50 | @Getter @Setter 51 | private long processed; 52 | @Getter @Setter 53 | private long processingRate; 54 | @Getter @Setter 55 | private long startTime; 56 | 57 | @Override 58 | public void write(Row row) throws NodeWritingException { 59 | if (!isDeleted && StrUtil.isNotBlank(deleteSql)) { 60 | jdbcTemplate.update(deleteSql); 61 | isDeleted = true; 62 | log.info("{} 删除表数据成功", this.getId()); 63 | } 64 | if (!row.isEnd()) { 65 | batchData.add(row); 66 | } else { 67 | commitBatch(); 68 | super.dataflowInstance.addInfoLog(String.format("InsertOutputNode[%s] completed, processed=%d, time consuming=%ds.", this.getName(), processed, (System.currentTimeMillis() - startTime)/1000)); 69 | } 70 | 71 | if (batchData.size() == batchSize) { 72 | commitBatch(); 73 | batchData.clear(); 74 | } 75 | } 76 | 77 | private String insertSql = null; 78 | private void commitBatch() { 79 | if (batchData.isEmpty()) return; 80 | 81 | long startTime = System.currentTimeMillis(); 82 | if (insertSql == null) { 83 | String columnsSql = columnsMapping.stream().map(Tuple2::getPartB).collect(Collectors.joining(",")); 84 | String valuesSql = String.join(",", Collections.nCopies(columnsMapping.size(), "?")); 85 | insertSql = String.format("insert into %s (%s)values(%s)", table, columnsSql, valuesSql); 86 | } 87 | jdbcTemplate.batchUpdate(insertSql, new BatchPreparedStatementSetter() { 88 | @Override 89 | public void setValues(PreparedStatement preparedStatement, int indexOfBatch) throws SQLException { 90 | currentPreparedStatement = preparedStatement; 91 | int i = 1; 92 | for (Tuple2 colMap : columnsMapping) { 93 | Object value = batchData.get(indexOfBatch).get(colMap.getPartA()); 94 | preparedStatement.setObject(i++, value); 95 | } 96 | } 97 | 98 | @Override 99 | public int getBatchSize() { 100 | return batchData.size(); 101 | } 102 | }); 103 | 104 | long elapsedMillis = System.currentTimeMillis() - startTime; 105 | if (elapsedMillis == 0) { 106 | processingRate = -1; 107 | } else { 108 | processingRate = (long) (1.0 * batchSize / elapsedMillis * 1000); 109 | } 110 | log.info("提交成功!,总量={}条 速度={}条/秒", batchData.size(), processingRate); 111 | processed += batchData.size(); 112 | } 113 | 114 | 115 | public List> getColumnMapping() { 116 | return this.columnsMapping; 117 | } 118 | 119 | public void setColumnMapping(List> columnsMapping) { 120 | this.columnsMapping.clear(); 121 | this.columnsMapping.addAll(columnsMapping); 122 | } 123 | 124 | 125 | public List> autoMapTargetColumns() throws Exception { 126 | log.info("{} 开始自动匹配列。。。。。。", this.getId()); 127 | InputNode from = this.getBeforePipe().orElseThrow(() -> new Exception("无法获得上一节点的列信息")).getFrom().orElseThrow(() -> new Exception("无法获得上一节点的列信息")); 128 | String[] sourceColumns = NodeHelper.getUpstreamColumns(from); 129 | 130 | this.columnsMapping.clear(); 131 | for (String targetColumn : this.getOutputColumns()) { 132 | for (String sourceColumn : sourceColumns) { 133 | if (sourceColumn.equalsIgnoreCase(targetColumn)) { 134 | this.columnsMapping.add(new Tuple2<>(sourceColumn, targetColumn)); 135 | break; 136 | } 137 | } 138 | } 139 | if (this.columnsMapping.isEmpty()) { 140 | throw new Exception("自动匹配列名失败!没有一个列能匹配上。"); 141 | } 142 | log.info("{} 自动匹配列完成", this.getId()); 143 | return columnsMapping; 144 | } 145 | 146 | public InsertOutputNode() { 147 | 148 | } 149 | 150 | 151 | public InsertOutputNode(DataSource dataSource, String table) { 152 | this.dataSource = dataSource; 153 | this.batchSize = 100; 154 | this.table = table; 155 | jdbcTemplate = new JdbcTemplate(dataSource); 156 | } 157 | 158 | public InsertOutputNode(DataSource dataSource, String table, int batchSize) { 159 | this.dataSource = dataSource; 160 | this.batchSize = batchSize; 161 | this.table = table; 162 | jdbcTemplate = new JdbcTemplate(dataSource); 163 | } 164 | 165 | public void ping() { 166 | } 167 | 168 | 169 | @Override 170 | public String[] getOutputColumns() { 171 | if (columns == null) { 172 | try { 173 | columns = DBUtil.lookupColumns(dataSource, table); 174 | } catch (SQLException e) { 175 | throw new NodeException(e); 176 | } 177 | } 178 | return columns; 179 | } 180 | 181 | @Override 182 | protected void prestart(Dataflow dataflow) throws NodePrestartException { 183 | startTime = System.currentTimeMillis(); 184 | super.prestart(dataflow); 185 | if (columnsMapping.isEmpty()) { 186 | try { 187 | autoMapTargetColumns(); 188 | } catch (Exception e) { 189 | throw new NodePrestartException(e); 190 | } 191 | } 192 | } 193 | 194 | @Override 195 | protected void onDataflowStop() { 196 | try { 197 | if (currentPreparedStatement != null && !currentPreparedStatement.isClosed()) { 198 | currentPreparedStatement.close(); 199 | } 200 | } catch (SQLException e) { 201 | throw new RuntimeException(e); 202 | } 203 | 204 | } 205 | 206 | 207 | 208 | @Override 209 | public long getStartTime() { 210 | return 0; 211 | } 212 | 213 | @Override 214 | public long getInserted() { 215 | return processed; 216 | } 217 | 218 | @Override 219 | public long getInsertingRate() { 220 | return processingRate; 221 | } 222 | 223 | @Override 224 | public long getUpdated() { 225 | return 0; 226 | } 227 | 228 | @Override 229 | public long getUpdatingRate() { 230 | return 0; 231 | } 232 | 233 | @Override 234 | public long getDeleted() { 235 | return 0; 236 | } 237 | 238 | @Override 239 | public long getDeletingRate() { 240 | return 0; 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /samples/src/test/java/com/test/TestCase1.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import io.github.add2ws.util.DataSourceUtil; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.junit.jupiter.api.Test; 6 | import org.liuneng.base.Dataflow; 7 | import org.liuneng.base.Pipe; 8 | import org.liuneng.exception.DataflowException; 9 | import org.liuneng.node.*; 10 | import org.liuneng.util.DataflowHelper; 11 | import org.liuneng.util.Tuple2; 12 | 13 | import javax.sql.DataSource; 14 | import java.math.BigDecimal; 15 | import java.math.MathContext; 16 | import java.sql.Connection; 17 | import java.sql.SQLException; 18 | import java.sql.Statement; 19 | import java.util.ArrayList; 20 | import java.util.Arrays; 21 | import java.util.concurrent.ExecutorService; 22 | import java.util.concurrent.Executors; 23 | import java.util.concurrent.TimeUnit; 24 | 25 | @Slf4j 26 | public class TestCase1 { 27 | 28 | @Test 29 | void pp() { 30 | long s = (long) (1.0 * 500 / 3832 * 1000); 31 | 32 | System.out.println(s); 33 | } 34 | 35 | @Test 36 | void PGToFile() { 37 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSource(); 38 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourcePG, "select * from t_resident_info order by xh limit 103"); 39 | 40 | FileOutputNode fileOutputNode = new FileOutputNode("E:/output.csv", FileOutputNode.Format.CSV); 41 | 42 | Pipe pipe = new Pipe(1000); 43 | pipe.connect(sqlInputNode,fileOutputNode); 44 | 45 | Dataflow dataflow = new Dataflow(sqlInputNode); 46 | 47 | dataflow.syncStart(); 48 | 49 | } 50 | 51 | @Test 52 | void print1() throws SQLException { 53 | DataSource oracleDataSource = DataSourceUtil.getOracleDataSource(); 54 | Connection connection = oracleDataSource.getConnection(); 55 | System.out.println("connected !!!!!"); 56 | } 57 | 58 | @Test 59 | void MysqlAndPG() { 60 | //获取数据源 61 | DataSource dataSourceMysql = DataSourceUtil.getMySqlDataSource(); 62 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSource(); 63 | 64 | //创建表输入节点 65 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourcePG, "select * from t_resident_info limit 20007"); 66 | 67 | //创建插入/更新节点 68 | // UpsertOutputNode outputNode = new UpsertOutputNode(dataSourceMysql, "t_resident_info", 500); 69 | // outputNode.setIdentityMapping(Arrays.asList(new Tuple2<>("XH", "XH"))); 70 | 71 | InsertOutputNode outputNode = new InsertOutputNode(dataSourceMysql, "t_resident_info", 500); 72 | 73 | 74 | //创建管道 75 | Pipe pipe = new Pipe(10000); 76 | //连接表输入和输出节点 77 | pipe.connect(sqlInputNode, outputNode); 78 | 79 | //创建数据流实例 80 | Dataflow dataflow = new Dataflow(sqlInputNode); 81 | dataflow.setProcessingThresholdLog(10000); 82 | //启动数据流 83 | dataflow.syncStart(5, TimeUnit.MINUTES); 84 | 85 | try { 86 | Thread.sleep(1000 * 60); 87 | } catch (InterruptedException e) { 88 | throw new RuntimeException(e); 89 | } 90 | } 91 | 92 | @Test 93 | void oracleToPG() { 94 | //获取数据源 95 | DataSource dataSourceOracle = DataSourceUtil.getOracleDataSource(); 96 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSource(); 97 | 98 | //创建表输入节点 99 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourceOracle, "select * from t_resident_info"); 100 | 101 | //创建插入/更新节点 102 | UpsertOutputNode upsertOutputNode = new UpsertOutputNode(dataSourcePG, "t_resident_info", 1000); 103 | upsertOutputNode.setIdentityMapping(Arrays.asList(new Tuple2<>("ID", "ID"))); 104 | 105 | //创建管道 106 | Pipe pipe = new Pipe(1000); 107 | //连接表输入和输出节点 108 | pipe.connect(sqlInputNode, upsertOutputNode); 109 | 110 | //创建数据流实例 111 | Dataflow dataflow = new Dataflow(sqlInputNode); 112 | //启动数据流 113 | dataflow.syncStart(5, TimeUnit.MINUTES); 114 | 115 | try { 116 | Thread.sleep(1000 * 60); 117 | } catch (InterruptedException e) { 118 | throw new RuntimeException(e); 119 | } 120 | } 121 | 122 | @Test 123 | void testDuckDB() { 124 | DataSource dataSourceDuck = DataSourceUtil.getDuckDBDataSource("E:/duck.db"); 125 | 126 | /* 127 | try (Connection connection = dataSourceDuck.getConnection()) { 128 | Statement statement = connection.createStatement(); 129 | statement.executeUpdate("delete from main.t_resident_info where substr(ywbjsj, 1, 10) >= '2025-11-01'"); 130 | log.info("删除成功。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。"); 131 | } catch (SQLException e) { 132 | throw new RuntimeException(e); 133 | } 134 | */ 135 | 136 | 137 | DataSource dataSourcePG = DataSourceUtil.getPostgresDataSource(); 138 | 139 | String sql = "select * from main.t_resident_info where 1=1 limit 20000 "; 140 | // String sql = "SELECT xh::text, ywbh, llywlb, llywlbbm, sjywlb, yywbh, jyzt, jyywzmzsmc, jyywzmzsmcbm, jyywzmzsh, djywzmzsmc, djywzmzsmcbm, djywzmzsh, jyzlb, jyzlbbm, jyzqc, jyzzjmc, jyzzjmcbm, jyzzjhm, jyzxz, jyzxzbm, jyzhj, jyzhjxzqh, bdcdyh, fwbm, fwzt, xzqhdm, qx, xzjdb, jjxzqh, jjxzqhdm, ljx, xq, lh, szqsc, szzzc, myc, dy, fh, fwzl, hxjs, hxjsbm, hxjg, hxjgbm, fwcx, fwcxbm, jzmj, tnjzmj, gtjzmj, ghyt, jzjg, jzjgbm, fwyt, fwytbm, fwxz, fwxzbm, fwlx, fwlxbm, gyfs, gyfsbm, szfe, cjje, fklx, fklxbm, dkfs, dkfsbm, htsxrq, ywbjsj, ywbjrsfzh, ywblszxzqhdm, cityno, zlsj, zlid, del_status, insert_time, jfrq, sfk, ysxkzh, ybdczh, xhs\n" + 141 | // "FROM t_resident_info where 1=1 " + 142 | // "and substr(ywbjsj, 1, 10) > '2025-11-02'" + 143 | // "limit 124"; 144 | 145 | SqlInputNode sqlInputNode = new SqlInputNode(dataSourceDuck, sql); 146 | sqlInputNode.setFetchSize(1000); 147 | UpsertOutputNode upsertOutputNode = new UpsertOutputNode(dataSourcePG, "t_resident_info"); 148 | upsertOutputNode.setFindMatchInjection("xh=555555"); 149 | // upsertOutputNode.setIdentityMapping(Arrays.asList(new Tuple2<>("xh", "xh"))); 150 | 151 | Pipe pipe = new Pipe(10000); 152 | pipe.connect(sqlInputNode, upsertOutputNode); 153 | 154 | Dataflow dataflow = new Dataflow(sqlInputNode); 155 | // dataflow.setProcessingThresholdLog(100); 156 | DataflowHelper.logListener(dataflow, etlLog -> { 157 | System.out.println(etlLog.getMessage()); 158 | }); 159 | try { 160 | dataflow.syncStart(5, TimeUnit.MINUTES); 161 | } catch (Exception e) { 162 | throw new RuntimeException(e); 163 | } 164 | 165 | try { 166 | Thread.sleep(1000 * 60); 167 | } catch (InterruptedException e) { 168 | throw new RuntimeException(e); 169 | } 170 | } 171 | 172 | @Test 173 | void t1() { 174 | ArrayList list = new ArrayList<>(); 175 | Object object = list.get(3); 176 | System.out.println(object); 177 | } 178 | 179 | @Test 180 | void testB() { 181 | BigDecimal number = new BigDecimal("1234567890.000", new MathContext(0)); 182 | number = number.stripTrailingZeros(); 183 | System.out.println(number.toPlainString()); 184 | int precision = number.precision(); 185 | Integer i = 10; 186 | 187 | String string = number.toString(); 188 | String string1 = i.toString(); 189 | 190 | System.out.println(string1 + string); 191 | } 192 | 193 | /*@Test 194 | void test1() throws InterruptedException { 195 | 196 | 197 | ExecutorService executorService = Executors.newCachedThreadPool(); 198 | // ExecutorService executorService = Executors.newSingleThreadExecutor(); 199 | // ExecutorService executorService = Executors.newFixedThreadPool(5); 200 | 201 | 202 | WaitGroup waitGroup = new WaitGroup(); 203 | try { 204 | waitGroup.add(2); 205 | } catch (WaitGroup.NegativeCounterException e) { 206 | throw new RuntimeException(e); 207 | } 208 | 209 | CountDownLatch countDownLatch = new CountDownLatch(2); 210 | 211 | 212 | Future future = executorService.submit(() -> { 213 | for (int i = 0; i < 5; i++) { 214 | System.out.println("it is " + (5 - i) + " seconds left"); 215 | try { 216 | Thread.sleep(1000); 217 | } catch (InterruptedException e) { 218 | throw new RuntimeException(e); 219 | } 220 | } 221 | countDownLatch.countDown(); 222 | }); 223 | 224 | 225 | executorService.submit(() -> { 226 | for (int i = 0; i < 3; i++) { 227 | System.out.println("it is " + (500 - i) + " seconds left"); 228 | try { 229 | Thread.sleep(1000); 230 | } catch (InterruptedException e) { 231 | throw new RuntimeException(e); 232 | } 233 | } 234 | countDownLatch.countDown(); 235 | }); 236 | 237 | // boolean await = countDownLatch.await(2, TimeUnit.SECONDS); 238 | System.out.println("it is done!=>" + 1); 239 | 240 | }*/ 241 | 242 | @Test 243 | void test_wait() { 244 | ExecutorService executorService = Executors.newCachedThreadPool(); 245 | executorService.execute(() -> { 246 | int i = 3; 247 | try { 248 | while (i-- > 0) { 249 | log.info("{}秒后终结线程池", i); 250 | Thread.sleep(1000); 251 | } 252 | executorService.shutdown(); 253 | log.info("已发送终结信号"); 254 | boolean notTimeout = executorService.awaitTermination(2, TimeUnit.SECONDS); 255 | if (!notTimeout) { 256 | log.info("终结超时!准备强制终结"); 257 | executorService.shutdownNow(); 258 | } 259 | 260 | } catch (InterruptedException e) { 261 | log.error(e.getMessage(), e); 262 | } 263 | log.info("thread 0 ends."); 264 | }); 265 | 266 | 267 | executorService.execute(() -> { 268 | Boolean b = null; 269 | try { 270 | b = executorService.awaitTermination(8, TimeUnit.SECONDS); 271 | } catch (InterruptedException e) { 272 | log.error("thread 1 waited failed!", e); 273 | } 274 | boolean isTerminated = executorService.isTerminated(); 275 | log.info("thread 1: is terminated: {}", isTerminated); 276 | log.info("thread 1 ends....=>{}", b); 277 | }); 278 | 279 | executorService.execute(() -> { 280 | Boolean b = null; 281 | try { 282 | b = executorService.awaitTermination(10, TimeUnit.SECONDS); 283 | } catch (InterruptedException e) { 284 | log.error("thread 2 waited failed!", e); 285 | } 286 | boolean isTerminated = executorService.isTerminated(); 287 | log.info("thread 2: is terminated: {}", isTerminated); 288 | log.info("thread 2 ends....=>{}", b); 289 | }); 290 | 291 | boolean notTimeout = false; 292 | try { 293 | notTimeout = executorService.awaitTermination(30000, TimeUnit.MILLISECONDS); 294 | } catch (InterruptedException e) { 295 | throw new RuntimeException(e); 296 | } 297 | boolean isTerminated = executorService.isTerminated(); 298 | log.info("notTimeout:{} if terminated: {}", notTimeout, isTerminated); 299 | } 300 | } -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/node/UpsertOutputNode.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.node; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | import org.liuneng.base.*; 6 | import org.liuneng.exception.NodeException; 7 | import org.liuneng.exception.NodePrestartException; 8 | import org.liuneng.exception.NodeWritingException; 9 | import org.liuneng.util.*; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | import javax.sql.DataSource; 14 | import java.sql.Connection; 15 | import java.sql.PreparedStatement; 16 | import java.sql.ResultSet; 17 | import java.sql.SQLException; 18 | import java.util.ArrayList; 19 | import java.util.Collections; 20 | import java.util.List; 21 | import java.util.Map; 22 | import java.util.stream.Collectors; 23 | 24 | 25 | public class UpsertOutputNode extends Node implements OutputNode, DataProcessingMetrics { 26 | final static Logger log = LoggerFactory.getLogger(UpsertOutputNode.class); 27 | 28 | private String[] columns; 29 | 30 | @Getter 31 | @Setter 32 | private DataSource dataSource; 33 | 34 | private Connection connection; 35 | 36 | @Getter 37 | @Setter 38 | private String table; 39 | 40 | @Getter 41 | @Setter 42 | private String findMatchInjection; 43 | 44 | @Getter 45 | @Setter 46 | private String updateInjection; 47 | 48 | @Getter 49 | @Setter 50 | private boolean isIgnoreCase = true; 51 | 52 | @Getter 53 | @Setter 54 | private boolean insertOnly; 55 | 56 | @Getter 57 | @Setter 58 | private int batchSize; 59 | 60 | private final List> columnsMapping = new ArrayList<>();//targetColumn map sourceColumn 61 | 62 | private final List> identityMapping = new ArrayList<>();//targetColumn map sourceColumn 63 | 64 | private final List batchData = new ArrayList<>(); 65 | 66 | @Getter 67 | private long processed = 0; 68 | 69 | @Getter 70 | private long processingRate = 0; 71 | 72 | @Getter 73 | private long inserted = 0; 74 | 75 | @Getter 76 | private long insertingRate = 0; 77 | 78 | @Getter 79 | private long updated = 0; 80 | 81 | @Getter 82 | private long updatingRate = 0; 83 | 84 | 85 | @Getter 86 | private long startTime; 87 | 88 | private List uncleanData = new ArrayList<>(); 89 | 90 | @Override 91 | public void write(Row row) throws NodeWritingException { 92 | if (startTime == 0) { 93 | startTime = System.currentTimeMillis(); 94 | } 95 | 96 | if (row.isEnd()) { 97 | commitBatch(); 98 | batchData.clear(); 99 | super.dataflowInstance.addInfoLog(String.format("UpsertOutputNode[%s] completed, processed(inserted/updated)=%d(%d/%d), time consuming=%ds.", this.getName(), processed, inserted, updated, (System.currentTimeMillis() - startTime) / 1000)); 100 | return; 101 | } 102 | 103 | if (batchData.size() < batchSize) { 104 | batchData.add(row); 105 | } 106 | 107 | if (batchData.size() == batchSize) { 108 | commitBatch(); 109 | batchData.clear(); 110 | } 111 | } 112 | 113 | private void commitBatch() { 114 | if (batchData.isEmpty()) return; 115 | 116 | long startTime = System.currentTimeMillis(); 117 | int inserted = 0; 118 | int updated = 0; 119 | 120 | if (!this.identityMapping.isEmpty() || StrUtil.isNotBlank(findMatchInjection) ) {//判断主键映射是否为空,为空则全部insert 121 | List> matchedRows; 122 | try { 123 | matchedRows = retrieveMatchRows(); 124 | } catch (SQLException e) { 125 | log.error(e.getMessage(), e); 126 | throw new NodeWritingException("Finding match rows error: " + e.getMessage()); 127 | } 128 | log.debug("查询出来了 {} 条", matchedRows.size()); 129 | 130 | Tuple2, List> willInsertAndUpdateRows = findWillInsertAndUpdateRows(matchedRows); 131 | 132 | try { 133 | inserted = insertBatch(willInsertAndUpdateRows.getPartA()); 134 | } catch (SQLException e) { 135 | log.debug(e.getMessage(), e); 136 | throw new NodeWritingException("Insert rows error: " + e.getMessage()); 137 | } 138 | try { 139 | updated = updateBatch(willInsertAndUpdateRows.getPartB()); 140 | } catch (SQLException e) { 141 | log.debug(e.getMessage(), e); 142 | throw new NodeWritingException("Update rows error: " + e.getMessage()); 143 | } 144 | } else { 145 | try { 146 | inserted = insertBatch(batchData); 147 | } catch (SQLException e) { 148 | log.debug(e.getMessage(), e); 149 | throw new NodeWritingException("Insert rows error: " + e.getMessage()); 150 | } 151 | } 152 | 153 | 154 | this.inserted += inserted; 155 | this.updated += updated; 156 | this.processed += batchData.size(); 157 | 158 | long currentTimeMillis = System.currentTimeMillis(); 159 | double elapsedSeconds = (currentTimeMillis - startTime) / 1000.0; 160 | insertingRate = (long) (inserted / elapsedSeconds); 161 | updatingRate = (long) (updated / elapsedSeconds); 162 | processingRate = (long) (batchData.size() / elapsedSeconds); 163 | log.info("提交成功!,总量(新增/更新)={}({}/{}) 速度={}条/秒", batchData.size(), inserted, updated, this.processingRate); 164 | } 165 | 166 | // private PreparedStatement findMatchRowsPreparedstatement = null; 167 | 168 | private String findMatchSqlBase = null; 169 | private String findMatchSqlPredicateItem = null; 170 | 171 | /** 172 | * 从目标库中查询匹配的数据 173 | * 174 | * @return 175 | * @throws SQLException 176 | */ 177 | private List> retrieveMatchRows() throws SQLException { 178 | if (findMatchSqlBase == null) { 179 | findMatchSqlBase = String.format("select %s from %s", 180 | columnsMapping.stream().map(cp -> adaptStringCase(cp.getPartB())).collect(Collectors.joining(",")), 181 | adaptStringCase(table)); 182 | 183 | findMatchSqlPredicateItem = identityMapping.stream().map(identityPair -> String.format("%s=?", adaptStringCase(identityPair.getPartB()))) 184 | .collect(Collectors.joining(" and ")); 185 | if (StrUtil.isNotBlank(findMatchInjection)) { 186 | if (identityMapping.isEmpty()) { 187 | findMatchSqlPredicateItem = findMatchInjection; 188 | } else { 189 | findMatchSqlPredicateItem += " and " + findMatchInjection; 190 | } 191 | } 192 | } 193 | String predicates = String.join(" or ", Collections.nCopies(batchData.size(), "(" + findMatchSqlPredicateItem + ")")); 194 | String sql = findMatchSqlBase + " where " + predicates; 195 | log.debug("Find match rows sql: {}", sql); 196 | 197 | long startTime = System.currentTimeMillis(); 198 | log.info("准备检索匹配数据,待匹配条数: {}", batchData.size()); 199 | 200 | try (PreparedStatement findMatchRowsPreparedstatement = connection.prepareStatement(sql)) { 201 | int argIndex = 1; 202 | for (Row row : batchData) { 203 | for (Tuple2 inputMapOutput : identityMapping) { 204 | findMatchRowsPreparedstatement.setObject(argIndex++, row.get(inputMapOutput.getPartA())); 205 | } 206 | } 207 | 208 | List> result = new ArrayList<>(); 209 | ResultSet rs = findMatchRowsPreparedstatement.executeQuery(); 210 | while (rs.next()) { 211 | Map mapRow = DBUtil.mapRow(rs); 212 | result.add(mapRow); 213 | } 214 | log.info("匹配数据检索完成,共检索出{}条数据,耗时: {} ms", result.size(), System.currentTimeMillis() - startTime); 215 | return result; 216 | } 217 | 218 | } 219 | 220 | /** 221 | * 比对数据是否有差异 222 | * 223 | * @param rows 224 | * @return 225 | */ 226 | private Tuple2, List> findWillInsertAndUpdateRows(List> rows) { 227 | List willInsert = new ArrayList<>(batchData); 228 | List willUpdate = new ArrayList<>(); 229 | long startTime = System.currentTimeMillis(); 230 | log.debug("准备比对目标库数据,筛选待插入和待更新的数据,待比对条数: {}", rows.size()); 231 | for (Row inputRow : batchData) { 232 | for (Map outputRowData : rows) { 233 | boolean isFound = identityMapping.stream().allMatch(identityMappingPair -> { 234 | Object inputValue = inputRow.get(identityMappingPair.getPartA()); 235 | Object outputValue = outputRowData.get(identityMappingPair.getPartB()); 236 | return StrUtil.isEqual(inputValue, outputValue); 237 | }); 238 | 239 | if (isFound) { 240 | willInsert.remove(inputRow); 241 | if (insertOnly) { 242 | break; 243 | } 244 | 245 | if (!isSameRow(inputRow.getData(), outputRowData)) { 246 | willUpdate.add(inputRow); 247 | break; 248 | } 249 | } 250 | } 251 | } 252 | log.debug("目标库数据比对结束,筛选出待插入 {} 条,待更新 {} 条,耗时:{} ms", willInsert.size(), willUpdate.size(), System.currentTimeMillis() - startTime); 253 | 254 | return new Tuple2<>(willInsert, willUpdate); 255 | } 256 | 257 | private boolean isSameRow(Map inputRow, Map outputRow) { 258 | for (Tuple3 columnMap : columnsMapping) { 259 | if (columnMap.getPartC() == UpsertTag.UPDATE_ONLY) { 260 | continue; 261 | } 262 | 263 | Object inputValue = inputRow.get(columnMap.getPartA()); 264 | Object outputValue = outputRow.get(columnMap.getPartB()); 265 | if (!StrUtil.isEqual(inputValue, outputValue)) { 266 | return false; 267 | } 268 | } 269 | 270 | return true; 271 | } 272 | 273 | private PreparedStatement insertPreparedStatement = null; 274 | 275 | private int insertBatch(List rows) throws SQLException { 276 | if (rows.isEmpty()) { 277 | return 0; 278 | } 279 | if (insertPreparedStatement == null) { 280 | String columnsSql = columnsMapping.stream().map(cp -> adaptStringCase(cp.getPartB())).collect(Collectors.joining(",")); 281 | String valuesSql = String.join(",", Collections.nCopies(columnsMapping.size(), "?")); 282 | String insertSql = String.format("insert into %s (%s)values(%s)", table, columnsSql, valuesSql); 283 | insertPreparedStatement = connection.prepareStatement(insertSql); 284 | log.debug("Insert sql: {}", insertSql); 285 | } 286 | 287 | long startTime = System.currentTimeMillis(); 288 | log.info("待插入数据:{}条", rows.size()); 289 | for (Row row : rows) { 290 | for (int i = 0; i < columnsMapping.size(); i++) { 291 | Object obj = row.get(columnsMapping.get(i).getPartA()); 292 | insertPreparedStatement.setObject(i + 1, obj); 293 | } 294 | insertPreparedStatement.addBatch(); 295 | } 296 | int[] inserted = insertPreparedStatement.executeBatch(); 297 | log.info("数据插入完成,耗时:{} ms", System.currentTimeMillis() - startTime); 298 | 299 | return inserted.length; 300 | } 301 | 302 | private PreparedStatement updatePreparedStatement = null; 303 | 304 | private int updateBatch(List rows) throws SQLException { 305 | if (rows.isEmpty()) { 306 | return 0; 307 | } 308 | if (updatePreparedStatement == null) { 309 | String setClause = columnsMapping.stream().filter(mapping -> mapping.getPartC() != UpsertTag.COMPARE_ONLY) 310 | .map(cp -> adaptStringCase(cp.getPartB()) + "=?") 311 | .collect(Collectors.joining(", ")); 312 | if (StrUtil.isNotBlank(updateInjection)) { 313 | setClause += ", ".concat(updateInjection); 314 | } 315 | String updateSql = String.format("update %s set %s where ", adaptStringCase(table), setClause); 316 | updateSql += identityMapping.stream().map(identityPair -> String.format("%s=?", adaptStringCase(identityPair.getPartB()))).collect(Collectors.joining(" and ")); 317 | updatePreparedStatement = connection.prepareStatement(updateSql); 318 | log.debug("Update sql: {}", updateSql); 319 | } 320 | long startTime = System.currentTimeMillis(); 321 | log.debug("待更新数据:{}条", rows.size()); 322 | for (Row row : rows) { 323 | int i = 1; 324 | for (Tuple3 colMap : columnsMapping) { 325 | Object val = row.get(colMap.getPartA()); 326 | updatePreparedStatement.setObject(i++, val); 327 | } 328 | for (Tuple2 pair : identityMapping) { 329 | updatePreparedStatement.setObject(i++, row.get(pair.getPartA())); 330 | } 331 | updatePreparedStatement.addBatch(); 332 | } 333 | 334 | int[] updated = updatePreparedStatement.executeBatch(); 335 | log.debug("数据更新完成,耗时:{} ms", System.currentTimeMillis() - startTime); 336 | return updated.length; 337 | } 338 | 339 | public List> getColumnMapping() { 340 | return this.columnsMapping; 341 | } 342 | 343 | public void setColumnMapping(List> columnsMapping) { 344 | this.columnsMapping.clear(); 345 | this.columnsMapping.addAll(columnsMapping); 346 | } 347 | 348 | 349 | public List> autoMapTargetColumns() { 350 | log.info("{} 开始自动匹配列。。。。。。", this.getId()); 351 | long time = System.currentTimeMillis(); 352 | InputNode from = this.getBeforePipe().orElseThrow(() -> new NodeException("无法获得上一节点的列信息")).getFrom().orElseThrow(() -> new NodeException("无法获得上一节点的列信息")); 353 | String[] sourceColumns = NodeHelper.getUpstreamColumns(from); 354 | 355 | this.columnsMapping.clear(); 356 | for (String targetColumn : this.getOutputColumns()) { 357 | for (String sourceColumn : sourceColumns) { 358 | if (sourceColumn.equalsIgnoreCase(targetColumn)) { 359 | this.columnsMapping.add(new Tuple3<>(sourceColumn, targetColumn, UpsertTag.COMPARE_AND_UPDATE)); 360 | break; 361 | } 362 | } 363 | } 364 | if (this.columnsMapping.isEmpty()) { 365 | throw new NodeException("自动匹配列名失败!没有一个列能匹配上。"); 366 | } 367 | log.info("{}自动匹配列完成,耗时{}ms", this.getId(), System.currentTimeMillis() - time); 368 | return this.columnsMapping; 369 | } 370 | 371 | public UpsertOutputNode() { 372 | 373 | } 374 | 375 | public UpsertOutputNode(DataSource dataSource, String table) { 376 | this.dataSource = dataSource; 377 | this.batchSize = 100; 378 | this.table = table; 379 | this.updateInjection = ""; 380 | } 381 | 382 | public UpsertOutputNode(DataSource dataSource, String table, int batchSize) { 383 | this.dataSource = dataSource; 384 | this.batchSize = batchSize; 385 | this.table = table; 386 | this.updateInjection = ""; 387 | } 388 | 389 | public UpsertOutputNode(DataSource dataSource, String table, int batchSize, String updateInjection) { 390 | this.dataSource = dataSource; 391 | this.batchSize = batchSize; 392 | this.table = table; 393 | this.updateInjection = updateInjection; 394 | } 395 | 396 | public List> getIdentityMapping() { 397 | return identityMapping; 398 | } 399 | 400 | public void setIdentityMapping(List> identityMapping) { 401 | this.identityMapping.clear(); 402 | this.identityMapping.addAll(identityMapping); 403 | } 404 | 405 | public void addIdentityMapping(Tuple2 identityPair) { 406 | this.identityMapping.add(identityPair); 407 | } 408 | 409 | @Override 410 | public String[] getOutputColumns() throws NodeException { 411 | if (columns == null) { 412 | try { 413 | columns = DBUtil.lookupColumns(dataSource, table); 414 | } catch (SQLException e) { 415 | throw new NodeException(e); 416 | } 417 | } 418 | return columns; 419 | } 420 | 421 | @Override 422 | protected void prestart(Dataflow dataflow) throws NodePrestartException { 423 | super.prestart(dataflow); 424 | try { 425 | connection = dataSource.getConnection(); 426 | // connection.setAutoCommit(false); 427 | if (columnsMapping.isEmpty()) { 428 | autoMapTargetColumns(); 429 | } 430 | } catch (SQLException e) { 431 | throw new NodePrestartException(e); 432 | } 433 | 434 | } 435 | 436 | @Override 437 | protected void onDataflowStop() { 438 | try { 439 | // if (findMatchRowsPreparedstatement != null && !findMatchRowsPreparedstatement.isClosed()) { 440 | // findMatchRowsPreparedstatement.cancel(); 441 | // findMatchRowsPreparedstatement.close(); 442 | // } 443 | 444 | if (insertPreparedStatement != null && !insertPreparedStatement.isClosed()) { 445 | insertPreparedStatement.cancel(); 446 | insertPreparedStatement.close(); 447 | } 448 | 449 | if (updatePreparedStatement != null && !updatePreparedStatement.isClosed()) { 450 | updatePreparedStatement.cancel(); 451 | updatePreparedStatement.close(); 452 | } 453 | 454 | if (connection != null && !connection.isClosed()) { 455 | connection.close(); 456 | } 457 | } catch (SQLException e) { 458 | throw new RuntimeException(e); 459 | } 460 | 461 | } 462 | 463 | private String adaptStringCase(String src) { 464 | if (isIgnoreCase) { 465 | return src; 466 | } 467 | return String.format("\"%s\"", src); 468 | } 469 | 470 | @Override 471 | public long getDeleted() { 472 | return 0; 473 | } 474 | 475 | @Override 476 | public long getDeletingRate() { 477 | return 0; 478 | } 479 | } 480 | -------------------------------------------------------------------------------- /etl-engine/src/main/java/org/liuneng/base/Dataflow.java: -------------------------------------------------------------------------------- 1 | package org.liuneng.base; 2 | 3 | import cn.hutool.core.date.DateUtil; 4 | import cn.hutool.core.util.IdUtil; 5 | import lombok.Getter; 6 | import lombok.Setter; 7 | import lombok.extern.slf4j.Slf4j; 8 | import org.liuneng.exception.*; 9 | import org.liuneng.util.DataflowHelper; 10 | import org.liuneng.util.StrUtil; 11 | 12 | import java.time.LocalDateTime; 13 | import java.time.format.DateTimeFormatter; 14 | import java.util.ArrayList; 15 | import java.util.Date; 16 | import java.util.List; 17 | import java.util.concurrent.CountDownLatch; 18 | import java.util.concurrent.ExecutorService; 19 | import java.util.concurrent.Executors; 20 | import java.util.concurrent.TimeUnit; 21 | import java.util.concurrent.atomic.AtomicInteger; 22 | 23 | @Slf4j 24 | public class Dataflow { 25 | public enum Status { 26 | IDLE, RUNNING, STOPPING, STOPPED 27 | } 28 | 29 | @Getter 30 | @Setter 31 | private String id; 32 | 33 | @Getter 34 | private Status status; 35 | 36 | @Getter 37 | private final Node head; 38 | 39 | @Getter 40 | @Setter 41 | private int maxRetryCount = 3; 42 | 43 | @Getter 44 | @Setter 45 | private int processingThresholdLog = 20000; 46 | 47 | private final AtomicInteger allCompetedOutputCount = new AtomicInteger(0); 48 | 49 | private final LocalDateTime createTime; 50 | 51 | private int increasedId = 0; 52 | 53 | private final ExecutorService dataTransferExecutor; 54 | 55 | // protected final Object blockLocker = new Object();; 56 | 57 | @Getter 58 | private long startTime; 59 | 60 | @Getter 61 | private long endTime; 62 | 63 | @Getter 64 | private final List logList = new ArrayList<>(); 65 | 66 | /*private final List> logListeners = new ArrayList<>(); 67 | 68 | public void addLogListener(Consumer logListener) { 69 | logListeners.add(logListener); 70 | } 71 | 72 | public void removeLogListener(Consumer logListener) { 73 | logListeners.remove(logListener); 74 | }*/ 75 | 76 | public void addInfoLog(String message) { 77 | addLogByNodeID(null, LogLevel.INFO, message); 78 | } 79 | 80 | public void addErrorLog(String message) { 81 | addLogByNodeID(null, LogLevel.ERROR, message); 82 | } 83 | 84 | private void addLogByNodeID(String nodeID, LogLevel logLevel, String message) { 85 | EtlLog etlLog = new EtlLog(); 86 | etlLog.setId(IdUtil.fastSimpleUUID()); 87 | etlLog.setNodeID(nodeID); 88 | etlLog.setLogLevel(logLevel); 89 | etlLog.setMessage(message); 90 | etlLog.setTimestamp(System.currentTimeMillis()); 91 | logList.add(etlLog); 92 | /* 93 | for (Consumer logListener : this.logListeners) { 94 | logListenerExecutor.execute(() -> { 95 | try { 96 | logListener.accept(etlLog); 97 | } catch (Exception e) { 98 | log.error(e.getMessage(), e); 99 | } 100 | }); 101 | } 102 | */ 103 | } 104 | 105 | 106 | private void nodeReading(InputNode inputNode) { 107 | dataTransferExecutor.execute(() -> { 108 | String s1 = String.format("%s start reading...", inputNode.asNode().getName()); 109 | log.info(s1); 110 | addInfoLog(s1); 111 | 112 | long currentStartTime = System.currentTimeMillis(); 113 | int readTotal = 0; 114 | 115 | int errorCount = 0; 116 | while (this.isRunning()) { 117 | try { 118 | Row row; 119 | try { 120 | row = inputNode.read(); 121 | } catch (NodeReadingException e) { 122 | if (++errorCount <= maxRetryCount) { 123 | String msg = String.format("InputNode reading exception!%s will retry after 5 seconds...", e.getMessage()); 124 | log.error(msg); 125 | log.debug(msg, e); 126 | this.addLogByNodeID(inputNode.asNode().getId(), LogLevel.ERROR, msg); 127 | Thread.sleep(3000); 128 | continue; 129 | } else { 130 | log.error("节点【{}】读取时已重试{}次仍异常,数据流强制结束。", inputNode.asNode().getId(), maxRetryCount); 131 | this.addLogByNodeID(inputNode.asNode().getId(), LogLevel.ERROR, "已重试" + maxRetryCount + "次仍异常,数据流强制结束。"); 132 | break; 133 | } 134 | 135 | } 136 | errorCount = 0; 137 | 138 | if (row != null && !row.isEnd()) { 139 | readTotal++; 140 | } 141 | 142 | if (readTotal % processingThresholdLog == 0) { 143 | double elapsedSeconds = (System.currentTimeMillis() - currentStartTime) / 1000.0; 144 | elapsedSeconds = (elapsedSeconds == 0 ? 0.001 : elapsedSeconds); 145 | // double currentSpeed = ; 146 | double avgSpeed = (double) readTotal / (System.currentTimeMillis() - startTime) * 1000.0; 147 | String msg = String.format("输入节点[%s] 读取总量=%d, 当前速度=%.0f条/秒,平均速度%.0f条/秒", inputNode.asNode().getName(), readTotal, processingThresholdLog / elapsedSeconds, avgSpeed); 148 | log.info(msg); 149 | this.addLogByNodeID(inputNode.asNode().getId(), LogLevel.INFO, msg); 150 | currentStartTime = System.currentTimeMillis(); 151 | } 152 | 153 | CountDownLatch countDownLatch = new CountDownLatch(inputNode.asNode().getAfterPipes().size());//确保每个下游管道都接收到数据 154 | for (Pipe afterPipe : inputNode.asNode().getAfterPipes()) { 155 | if (!afterPipe.isValid()) { 156 | countDownLatch.countDown(); 157 | continue; 158 | } 159 | 160 | dataTransferExecutor.execute(() -> { 161 | try { 162 | log.trace("开始写入pipe。。。{}", row); 163 | afterPipe.beWritten(row); 164 | } catch (InterruptedException e) { 165 | log.error("Pipe 写入异常!"); 166 | throw new RuntimeException(e); 167 | } finally { 168 | countDownLatch.countDown(); 169 | } 170 | }); 171 | } 172 | countDownLatch.await();//确保每个下游管道都接收到数据 173 | 174 | if (row == null || row.isEnd()) { 175 | String msg = String.format("InputNode reading completed,%d total.", readTotal); 176 | log.info(msg); 177 | this.addLogByNodeID(inputNode.asNode().getId(), LogLevel.INFO, msg); 178 | break;//节点数据完全读取结束 179 | } 180 | 181 | } catch (InterruptedException e) { 182 | log.error("节点【{}】读取线程中断,异常消息:{}", inputNode.asNode().getId(), e.getMessage(), e); 183 | } 184 | } 185 | }); 186 | } 187 | 188 | private void nodeWriting(OutputNode outputNode) { 189 | Pipe beforePipe = outputNode.asNode().getBeforePipe().orElseThrow(() -> new RuntimeException("数据流配置有误,获取前一个管道为空!")); 190 | allCompetedOutputCount.addAndGet(1); 191 | dataTransferExecutor.execute(() -> { 192 | String logMessage = String.format("%s start writing...", outputNode.asNode().getName()); 193 | log.info(logMessage); 194 | addInfoLog(logMessage); 195 | 196 | long currentNodeStartTime = System.currentTimeMillis(); 197 | int writtenTotal = 0; 198 | int errorCount = 0; 199 | while (this.isRunning()) { 200 | try { 201 | Row row = null; 202 | if (errorCount == 0) { 203 | row = beforePipe.beRead(); 204 | } 205 | 206 | log.trace("开始输出"); 207 | try { 208 | outputNode.write(row); 209 | } catch (NodeWritingException e) { 210 | if (++errorCount <= maxRetryCount) { 211 | logMessage = String.format("输出节点[%s] 写入异常!%s 3秒后重试。。。", outputNode.asNode().getId(), e.getMessage()); 212 | log.debug(logMessage, e); 213 | this.addLogByNodeID(outputNode.asNode().getId(), LogLevel.ERROR, logMessage); 214 | Thread.sleep(3000); 215 | continue; 216 | } else { 217 | log.error("节点【{}】写入时已重试{}次仍异常,数据流强制结束。", outputNode.asNode().getId(), maxRetryCount); 218 | this.addLogByNodeID(outputNode.asNode().getId(), LogLevel.ERROR, "已重试" + maxRetryCount + "次仍异常,数据流强制结束。"); 219 | break; 220 | } 221 | } 222 | errorCount = 0; 223 | 224 | if (row == null || row.isEnd()) { 225 | logMessage = String.format("节点[%s] 写入结束,共%d条。", outputNode.asNode().getName(), writtenTotal); 226 | log.info(logMessage); 227 | this.addLogByNodeID(outputNode.asNode().getId(), LogLevel.INFO, logMessage); 228 | break; 229 | } 230 | 231 | if (++writtenTotal % processingThresholdLog == 0) { 232 | double elapsed = System.currentTimeMillis() - currentNodeStartTime; 233 | // long currentSpeed; 234 | // long avgSpeed; 235 | // if (elapsed == 0) { 236 | // currentSpeed = -1; 237 | // } 238 | 239 | 240 | elapsed = (elapsed == 0 ? 0.001 : elapsed); 241 | logMessage = String.format("输出节点[%s] 输出总量=%d, 当前速度=%.0f条/秒,平均速度%.0f条/秒,当前管道(%d/%d)" 242 | , outputNode.asNode().getName() 243 | , writtenTotal 244 | , processingThresholdLog / elapsed * 1000 245 | , writtenTotal / ((System.currentTimeMillis() - startTime) / 1000.0) 246 | , beforePipe.getCurrentBufferSize() 247 | , beforePipe.getBufferCapacity() 248 | ); 249 | log.info(logMessage); 250 | this.addLogByNodeID(outputNode.asNode().getId(), LogLevel.INFO, logMessage); 251 | currentNodeStartTime = System.currentTimeMillis(); 252 | } 253 | } catch (InterruptedException e) { 254 | log.error("节点【{}】写入线程中断,异常消息:{}", outputNode.asNode().getId(), e.getMessage(), e); 255 | } 256 | } 257 | 258 | logMessage = String.format("节点[%s] 输出结束,输出总量=%d", outputNode.asNode().getId(), writtenTotal); 259 | log.info(logMessage); 260 | this.addLogByNodeID(outputNode.asNode().getId(), LogLevel.INFO, logMessage); 261 | if (allCompetedOutputCount.addAndGet(-1) == 0) { 262 | log.debug("所有输出节点已写入结束,准备结束数据流线程池。。。"); 263 | this.addInfoLog("所有输出节点已写入结束,准备结束数据流线程池。。。"); 264 | this.tryStop(); 265 | } 266 | }); 267 | } 268 | 269 | private void recursiveStartNodes(Node currentNode) { 270 | String timeStr = createTime.format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss")); 271 | 272 | if (StrUtil.isBlank(currentNode.getId())) { 273 | String nodeID = String.format("%s-%d_%s", currentNode.getClass().getSimpleName(), ++increasedId, timeStr); 274 | currentNode.setId(nodeID); 275 | } 276 | this.addLogByNodeID(currentNode.getId(), LogLevel.INFO, String.format("节点[%s] 开始初始化...", currentNode.getName())); 277 | currentNode.prestart(this); 278 | // try { 279 | // } catch (NodePrestartException e) { 280 | // throw new Exception(String.format("节点[%s]初始化失败!error message: %s", currentNode.getId(), e.getMessage())); 281 | // } 282 | this.addLogByNodeID(currentNode.getId(), LogLevel.INFO, String.format("节点[%s] 初始化完成。", currentNode.getName())); 283 | 284 | if (currentNode instanceof InputNode) { 285 | nodeReading((InputNode) currentNode); 286 | } 287 | 288 | if (currentNode instanceof OutputNode) { 289 | nodeWriting((OutputNode) currentNode); 290 | } 291 | 292 | for (Pipe afterPipe : currentNode.getAfterPipes()) { 293 | if (afterPipe.isValid() && afterPipe.getTo().isPresent()) { 294 | if (StrUtil.isBlank(afterPipe.getId())) { 295 | String pipeID = String.format("Pipeline-%d_%s", ++increasedId, timeStr); 296 | afterPipe.setId(pipeID); 297 | } 298 | afterPipe.initialize(this); 299 | this.recursiveStartNodes(afterPipe.getTo().get().asNode()); 300 | } 301 | } 302 | } 303 | 304 | 305 | public Dataflow(Node headNode) { 306 | this.id = IdUtil.fastSimpleUUID(); 307 | this.head = headNode; 308 | this.createTime = LocalDateTime.now(); 309 | this.status = Status.IDLE; 310 | this.dataTransferExecutor = Executors.newCachedThreadPool(r -> new Thread(r, "DataTransmissionThread-" + this.id)); 311 | } 312 | 313 | public void syncStart() throws DataflowException { 314 | syncStart(999, TimeUnit.DAYS); 315 | } 316 | 317 | 318 | public boolean syncStart(long timeout, TimeUnit timeUnit) { 319 | if (status == Status.STOPPED || status == Status.STOPPING) { 320 | throw new DataflowPrestartException("启动失败,数据流已经结束!"); 321 | } 322 | 323 | 324 | Date now = new Date(); 325 | this.addInfoLog(String.format("%s 数据流[%s]开始执行。。。", DateUtil.format(now, "yyyy-MM-dd HH:mm:ss"), this.getId())); 326 | this.startTime = now.getTime(); 327 | this.status = Status.RUNNING; 328 | try { 329 | this.recursiveStartNodes(head); 330 | } catch (Exception e) { 331 | this.tryStop(); 332 | this.addErrorLog(e.getMessage()); 333 | } 334 | boolean notTimeout = false; 335 | try { 336 | notTimeout = this.dataTransferExecutor.awaitTermination(timeout, timeUnit); 337 | endTime = System.currentTimeMillis(); 338 | if (notTimeout) { 339 | this.addInfoLog(String.format("数据流[%s]运行结束,总耗时%.2fs", this.getId(), (endTime - startTime) / 1000f)); 340 | } else { 341 | this.dataTransferExecutor.shutdownNow(); 342 | this.addErrorLog(String.format("数据流[%s]运行超时!已执行强制关闭!", this.getId())); 343 | } 344 | } catch (InterruptedException e) { 345 | endTime = System.currentTimeMillis(); 346 | this.dataTransferExecutor.shutdownNow(); 347 | this.addErrorLog(String.format("数据流[%s]任务意外中断,已执行强制关闭!总耗时%.2fs", this.getId(), (endTime - startTime) / 1000f)); 348 | } 349 | 350 | if (this.dataTransferExecutor.isTerminated()) { 351 | this.addInfoLog("数据流线程池关闭正常。"); 352 | log.info("数据流[{}]线程池关闭正常。", this.getId()); 353 | } else { 354 | this.addErrorLog("数据流线程池未正常关闭!"); 355 | log.error("数据流[{}]线程池关闭异常!", this.getId()); 356 | } 357 | this.status = Status.STOPPED; 358 | return notTimeout; 359 | } 360 | 361 | private void tryStop() { 362 | if (status == Status.STOPPING || status == Status.STOPPED) { 363 | return; 364 | } 365 | 366 | if (status == Status.IDLE) { 367 | status = Status.STOPPED; 368 | return; 369 | } 370 | 371 | this.status = Status.STOPPING; 372 | DataflowHelper.forEachNodesOrPipes(this, (node, pipe) -> { 373 | this.dataTransferExecutor.execute(() -> { 374 | if (node != null) { 375 | node.onDataflowStop(); 376 | } else if (pipe != null) { 377 | pipe.stop(); 378 | } 379 | }); 380 | return true; 381 | }); 382 | this.dataTransferExecutor.shutdown(); 383 | } 384 | 385 | public void asyncStop(long timeoutMillis) { 386 | this.tryStop(); 387 | new Thread(() -> { 388 | try { 389 | Thread.sleep(timeoutMillis); 390 | if (!dataTransferExecutor.isTerminated()) { 391 | dataTransferExecutor.shutdownNow(); 392 | } 393 | 394 | } catch (InterruptedException e) { 395 | throw new RuntimeException(e); 396 | } finally { 397 | status = Status.STOPPED; 398 | } 399 | }).start(); 400 | } 401 | 402 | public void syncStop(long timeout, TimeUnit timeUnit) throws DataflowStoppingException { 403 | this.tryStop(); 404 | try { 405 | boolean notTimeout = this.dataTransferExecutor.awaitTermination(timeout, timeUnit); 406 | if (notTimeout) { 407 | this.addInfoLog(String.format("数据流[%s]停止成功。", this.getId())); 408 | } else { 409 | this.dataTransferExecutor.shutdownNow(); 410 | this.addErrorLog(String.format("数据流[%s]停止超时,已执行强制关闭!", this.getId())); 411 | throw new DataflowStoppingException(String.format("数据流[%s]停止超时,已执行强制关闭!", this.getId())); 412 | } 413 | } catch (InterruptedException e) { 414 | this.dataTransferExecutor.shutdownNow(); 415 | this.addErrorLog(String.format("数据流[%s]停止被中断,已执行强制关闭!", this.getId())); 416 | throw new DataflowStoppingException(String.format("数据流[%s]停止被中断,已执行强制关闭!", this.getId())); 417 | } 418 | } 419 | 420 | public void syncStopWithError(long timeout, TimeUnit timeUnit, String errorMsg) throws DataflowStoppingException { 421 | this.addErrorLog(errorMsg); 422 | this.syncStop(timeout, timeUnit); 423 | } 424 | 425 | public void syncStopWithInfo(long timeout, TimeUnit timeUnit, String infoMsg) throws DataflowStoppingException { 426 | this.addErrorLog(infoMsg); 427 | this.syncStop(timeout, timeUnit); 428 | } 429 | 430 | 431 | // public void awaitStoppingSignal() throws InterruptedException { 432 | // synchronized (blockLocker) { 433 | // blockLocker.wait(); 434 | // } 435 | // } 436 | 437 | public boolean isRunning() { 438 | return status == Status.RUNNING; 439 | } 440 | 441 | public boolean isStopped() { 442 | return status == Status.STOPPED; 443 | } 444 | 445 | // protected ExecutorService getDataTransferExecutor() { 446 | // return dataTransferExecutor; 447 | // } 448 | 449 | } 450 | --------------------------------------------------------------------------------