├── src ├── test │ ├── resources │ │ └── sql │ └── java │ │ └── com │ │ └── lan │ │ └── lineage │ │ └── druid │ │ └── DruidTest.java └── main │ └── java │ └── com │ └── lan │ └── lineage │ ├── common │ ├── EmptyUtils.java │ ├── LineageColumn.java │ ├── TreeNodeIterator.java │ └── TreeNode.java │ └── druid │ └── LineageUtils.java ├── pom.xml └── README.md /src/test/resources/sql: -------------------------------------------------------------------------------- 1 | ## 简单sql 2 | select user_id,user_name 3 | from user 4 | 5 | ## sub sql 6 | select 7 | user_id as uid 8 | ,user_name as uname 9 | from 10 | ( 11 | select user_id, concat("test",user_name) as user_name 12 | from user 13 | )t 14 | 15 | ## union sql 16 | select 17 | user_id 18 | ,user_name 19 | from user 20 | union all 21 | 22 | select 23 | sub_user_id as user_id 24 | ,sub_user_name as user_name 25 | from sub_user 26 | 27 | 28 | ## join sql 29 | select t1.user_id,t2.user_name,t2.sub_user_id,t2.sub_user_name 30 | from user t1 31 | left join sub_user t2 32 | on t1.user_id = t2.user_id 33 | 34 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.lan.lineage 8 | sql-lineage-parser 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | org.apache.maven.plugins 14 | maven-compiler-plugin 15 | 16 | 8 17 | 8 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | org.projectlombok 27 | lombok 28 | 1.16.18 29 | 30 | 31 | org.springframework 32 | spring-core 33 | 5.1.6.RELEASE 34 | 35 | 36 | com.alibaba 37 | druid 38 | 1.1.12 39 | 40 | 41 | com.alibaba 42 | fastjson 43 | 1.2.51 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/main/java/com/lan/lineage/common/EmptyUtils.java: -------------------------------------------------------------------------------- 1 | package com.lan.lineage.common; 2 | 3 | import org.springframework.util.ObjectUtils; 4 | import org.springframework.util.StringUtils; 5 | 6 | import java.util.Collection; 7 | import java.util.Map; 8 | 9 | /** 10 | * @author lanxueri 11 | * @ClassName EmptyUtils 12 | * @Description TODO 13 | * @createTime 2020-07-31 14 | */ 15 | public class EmptyUtils { 16 | 17 | private EmptyUtils() { 18 | 19 | } 20 | 21 | /** 22 | * 判断集合是否为空 coll->null->true coll-> coll.size() == 0 -> true 23 | */ 24 | public static boolean isEmpty(Collection coll) { 25 | return (coll == null || coll.isEmpty()); 26 | } 27 | 28 | /** 29 | * 判断集合是否不为空 30 | */ 31 | public static boolean isNotEmpty(Collection coll) { 32 | return !isEmpty(coll); 33 | } 34 | 35 | /** 36 | * 判断map是否为空 37 | */ 38 | public static boolean isEmpty(Map map) { 39 | return (map == null || map.isEmpty()); 40 | } 41 | 42 | /** 43 | * 判断map是否不为空 44 | */ 45 | public static boolean isNotEmpty(Map map) { 46 | return !isEmpty(map); 47 | } 48 | 49 | /** 50 | * 判断一个对象是否为空 51 | */ 52 | public static boolean isEmpty(T t) { 53 | if (t == null) { 54 | return true; 55 | } 56 | return StringUtils.isEmpty(t.toString()); 57 | } 58 | 59 | /** 60 | * 判断数组是否不为空 61 | */ 62 | public static boolean isNotEmpty(T[] datas) { 63 | return !isEmpty(datas); 64 | } 65 | 66 | /** 67 | * 判断数组是否不为空 68 | */ 69 | public static boolean isEmpty(T[] datas) { 70 | return ObjectUtils.isEmpty(datas); 71 | } 72 | 73 | 74 | /** 75 | * 判断一个对象是否不为空 76 | */ 77 | public static boolean isNotEmpty(T t) { 78 | return !isEmpty(t); 79 | } 80 | 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/com/lan/lineage/common/LineageColumn.java: -------------------------------------------------------------------------------- 1 | package com.lan.lineage.common; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * @author lanxueri 7 | * @ClassName LineageColumn 8 | * @Description TODO 9 | * @createTime 2020-07-31 10 | */ 11 | @Data 12 | public class LineageColumn implements Comparable { 13 | private String targetColumnName; 14 | 15 | private String sourceDbName; 16 | 17 | private String sourceTableName; 18 | 19 | private String sourceColumnName; 20 | 21 | private String expression; 22 | 23 | private Boolean isEnd = false; 24 | 25 | 26 | public void setSourceTableName(String sourceTableName) { 27 | sourceTableName = EmptyUtils.isNotEmpty(sourceTableName) ? sourceTableName.replace("`","") : sourceTableName; 28 | if (sourceTableName.contains(".")){ 29 | this.sourceDbName = sourceTableName.substring(0,sourceTableName.indexOf(".")); 30 | this.sourceTableName = sourceTableName.substring(sourceTableName.indexOf(".")+1); 31 | }else { 32 | this.sourceTableName = sourceTableName; 33 | 34 | } 35 | } 36 | 37 | 38 | public int compareTo(LineageColumn o) { 39 | if (this.getTargetColumnName().equals(o.getTargetColumnName())) 40 | return 0; 41 | return -1; 42 | } 43 | 44 | 45 | @Override 46 | public boolean equals(Object o) { 47 | if (this == o) return true; 48 | if (o == null || getClass() != o.getClass()) return false; 49 | 50 | LineageColumn myColumn = (LineageColumn) o; 51 | 52 | if (!this.getTargetColumnName().equals(myColumn.getTargetColumnName())) return false; 53 | if (EmptyUtils.isNotEmpty(sourceTableName) && !sourceTableName.equals(myColumn.sourceTableName)) return false; 54 | if (EmptyUtils.isNotEmpty(sourceColumnName)) 55 | return sourceColumnName.equals(myColumn.sourceColumnName); 56 | return true; 57 | } 58 | 59 | @Override 60 | public int hashCode() { 61 | int result = getTargetColumnName().hashCode(); 62 | if (EmptyUtils.isNotEmpty(sourceTableName)){ 63 | result = 31 * result + sourceTableName.hashCode(); 64 | } 65 | if (EmptyUtils.isNotEmpty(sourceColumnName)){ 66 | result = 31 * result + sourceColumnName.hashCode(); 67 | } 68 | return result; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/lan/lineage/common/TreeNodeIterator.java: -------------------------------------------------------------------------------- 1 | package com.lan.lineage.common; 2 | 3 | import java.util.Iterator; 4 | 5 | /** 6 | * @author lanxueri 7 | * @ClassName TreeNodeIterator 8 | * @Description TODO 9 | * @createTime 2020-07-31 10 | */ 11 | public class TreeNodeIterator implements Iterator> { 12 | 13 | enum ProcessStages { 14 | ProcessParent, ProcessChildCurNode, ProcessChildSubNode 15 | } 16 | 17 | private ProcessStages doNext; 18 | 19 | private TreeNode next; 20 | 21 | private Iterator> childrenCurNodeIter; 22 | 23 | private Iterator> childrenSubNodeIter; 24 | 25 | private TreeNode treeNode; 26 | 27 | public TreeNodeIterator(TreeNode treeNode) { 28 | this.treeNode = treeNode; 29 | this.doNext = ProcessStages.ProcessParent; 30 | this.childrenCurNodeIter = treeNode.children.iterator(); 31 | } 32 | 33 | 34 | public boolean hasNext() { 35 | 36 | if (this.doNext == ProcessStages.ProcessParent) { 37 | this.next = this.treeNode; 38 | this.doNext = ProcessStages.ProcessChildCurNode; 39 | return true; 40 | } 41 | 42 | if (this.doNext == ProcessStages.ProcessChildCurNode) { 43 | if (childrenCurNodeIter.hasNext()) { 44 | TreeNode childDirect = childrenCurNodeIter.next(); 45 | childrenSubNodeIter = childDirect.iterator(); 46 | this.doNext = ProcessStages.ProcessChildSubNode; 47 | return hasNext(); 48 | } else { 49 | this.doNext = null; 50 | return false; 51 | } 52 | } 53 | 54 | if (this.doNext == ProcessStages.ProcessChildSubNode) { 55 | if (childrenSubNodeIter.hasNext()) { 56 | this.next = childrenSubNodeIter.next(); 57 | return true; 58 | } else { 59 | this.next = null; 60 | this.doNext = ProcessStages.ProcessChildCurNode; 61 | return hasNext(); 62 | } 63 | } 64 | 65 | return false; 66 | } 67 | 68 | 69 | public TreeNode next() { 70 | return this.next; 71 | } 72 | 73 | /** 74 | * 目前不支持删除节点 75 | */ 76 | public void remove() { 77 | throw new UnsupportedOperationException(); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/com/lan/lineage/druid/DruidTest.java: -------------------------------------------------------------------------------- 1 | package com.lan.lineage.druid; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import com.lan.lineage.common.LineageColumn; 5 | import com.lan.lineage.common.TreeNode; 6 | import org.springframework.util.ResourceUtils; 7 | 8 | import java.io.BufferedReader; 9 | import java.io.File; 10 | import java.io.FileReader; 11 | import java.io.IOException; 12 | import java.util.Arrays; 13 | import java.util.LinkedHashMap; 14 | import java.util.List; 15 | import java.util.Map; 16 | import java.util.Set; 17 | import java.util.regex.Matcher; 18 | import java.util.regex.Pattern; 19 | 20 | /** 21 | * @author lanxueri 22 | * @ClassName DruidTest 23 | * @Description TODO 24 | * @createTime 2020-07-31 25 | */ 26 | public class DruidTest { 27 | String file = this.getClass().getResource("/sql").getFile(); 28 | public static void main(String[] args) throws IOException { 29 | 30 | File file = ResourceUtils.getFile(new DruidTest().file); 31 | BufferedReader bufferedReader = new BufferedReader(new FileReader(file)); 32 | String line = ""; 33 | StringBuilder sb = new StringBuilder(); 34 | while ((line = bufferedReader.readLine()) != null) { 35 | sb.append(line); 36 | sb.append("\n"); 37 | } 38 | bufferedReader.close(); 39 | 40 | Map map = new LinkedHashMap<>(); 41 | Pattern pattern = Pattern.compile("##\\s.*\\S"); 42 | Matcher matcher = pattern.matcher(sb.toString()); 43 | List list = Arrays.asList(sb.toString().split("##\\s.*\\S")); 44 | int j = 1; 45 | while (matcher.find()){ 46 | String key = matcher.group().replace("##","").trim(); 47 | map.put(key,list.get(j)); 48 | j++; 49 | } 50 | 51 | String key = "sub sql"; 52 | System.out.println("Begin parse:"+key+"\n"+"sql:"+map.get(key)); 53 | 54 | LineageColumn root = new LineageColumn(); 55 | TreeNode rootNode = new TreeNode<>(root); 56 | 57 | LineageUtils.columnLineageAnalyzer(map.get(key),rootNode); 58 | 59 | for (TreeNode e : rootNode.getChildren()) { 60 | Set leafNodes = e.getAllLeafData(); 61 | for (LineageColumn f : leafNodes){ 62 | if (f.getIsEnd()){ 63 | System.out.println(e.getData().getTargetColumnName() + "\tfrom:"+ JSONObject.toJSONString(f)+"\n"); 64 | } 65 | 66 | } 67 | 68 | } 69 | 70 | for (TreeNode node : rootNode) { 71 | 72 | StringBuilder indent = new StringBuilder(); 73 | for (int i = 1; i < node.getLevel();i++){ 74 | indent.append(" "); 75 | } 76 | 77 | System.out.println(indent.toString() + JSONObject.toJSONString(node.getData())+"\n"); 78 | } 79 | 80 | 81 | 82 | 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 简介 2 | 数据平台建设过程中需要获取数据血缘信息,通过对血缘数据的探索,就可以快速获取数据,加快数据开发的效率。 3 | 4 | 5 | 对于以hive为中心的数仓系统,简化架构可能为 6 | ![DBE7657D-731D-4D32-9200-B03012B01B01.png](http://ww1.sinaimg.cn/large/71f45afdgy1gha7grg147j218y0dudh9.jpg) 7 | 8 | - 数据同步阶段可以通过同步系统解决源与数仓的列级血缘信息 9 | - ETL阶段可以使用hive的hooks的得到ETL过程的的列级血缘信息,例如http://cxy7.com/articles/2017/11/10/1510310104765.html 10 | - 最后一个数据展示阶段则可以通过对页面SQL的解析获取报表页面与数据的关系,此阶段则为本例需要处理问题 11 | 12 | 通过这几个阶段血缘数据的打通,可以跟踪数据从产生到展示的整个过程,提高数据监控及使用的效率。 13 | 14 | 15 | ### 如何解析SQL 16 | 17 | 可以通过语法树的解析得到列级的血缘。那么怎么得到语法树呢? 18 | 19 | - [alibaba-druid](https://github.com/alibaba/druid/wiki/Druid_SQL_AST) 20 | - [antlr4](https://github.com/antlr/antlr4) 21 | 22 | 23 | 可以通过以上两种工具的使用,得到语法数,那么剩下的问题就是如何将语法书解析为血缘信息。 24 | 25 | 此例使用一个简单的对象LineageColumn进行血缘存储 26 | 27 | name | desc 28 | ---|--- 29 | targetColumnName | 目标字段,即SELECT的列 30 | sourceDbName | 字段来源DB 31 | sourceTableName | 字段来源表 32 | sourceColumnName | 字段来源列 33 | expression | 表达式 34 | isEnd | 是否结束标识 35 | 36 | 37 | 通过递归迭代获取数结构,最终树的叶子节点即为最终的血缘信息,以SQL为例 38 | ``` 39 | select 40 | user_id as uid 41 | ,user_name as uname 42 | from 43 | ( 44 | select user_id, concat("test",user_name) as user_name 45 | from user 46 | )t 47 | ``` 48 | 需要经过两层循环,过程为 49 | ![DC10D4B3-A22E-40AD-AFB5-C12D95C2CEC1.png](http://ww1.sinaimg.cn/large/71f45afdgy1ghacfj01a8j212c10in0g.jpg) 50 | 51 | 52 | 最终血缘信息为 53 | ``` 54 | --- 输出列uid 数据来源于user表的user_id 列 55 | uid from:{"expression":"user_id","isEnd":true,"sourceTableName":"user","targetColumnName":"user_id"} 56 | 57 | 58 | -- 输出列uname 数据来源于常量字段test及 user表的user_name 列 59 | uname from:{"expression":"concat('test', user_name)","isEnd":true,"sourceTableName":"user","targetColumnName":"user_name"} 60 | 61 | uname from:{"expression":"concat('test', user_name)","isEnd":true,"sourceTableName":"user","targetColumnName":"'test'"} 62 | 63 | ``` 64 | 展开的树型结构为 65 | ``` 66 | {"isEnd":false} 67 | 68 | {"expression":"user_id","isEnd":false,"targetColumnName":"uid"} 69 | 70 | {"isEnd":false,"targetColumnName":"user_id"} 71 | 72 | {"expression":"user_id","isEnd":false,"targetColumnName":"user_id"} 73 | 74 | {"expression":"user_id","isEnd":true,"sourceTableName":"user","targetColumnName":"user_id"} 75 | 76 | {"expression":"user_name","isEnd":false,"targetColumnName":"uname"} 77 | 78 | {"isEnd":false,"targetColumnName":"user_name"} 79 | 80 | {"expression":"concat('test', user_name)","isEnd":false,"targetColumnName":"user_name"} 81 | 82 | {"expression":"concat('test', user_name)","isEnd":true,"sourceTableName":"user","targetColumnName":"'test'"} 83 | 84 | {"expression":"concat('test', user_name)","isEnd":true,"sourceTableName":"user","targetColumnName":"user_name"} 85 | 86 | ``` 87 | 88 | 叶子节点即为最终关联的目标表及列。 89 | ### 如何使用 90 | 参考DruidTest即可。 91 | 92 | # 可能遇到的问题 93 | #### interval 语句解析异常 94 | 如果数据可视化使用的是tableau,动态sql会生成很多interval语句,interval使用druid进行语法解析的时候可能会报错。 95 | ###### 原因 96 | druid对interval的词法解析存在缺陷 97 | ###### 解决方案 98 | 下载druid源码,参照https://github.com/alibaba/druid/pull/4368 进行代码修改,使用修改源码后的jar包。 99 | -------------------------------------------------------------------------------- /src/main/java/com/lan/lineage/common/TreeNode.java: -------------------------------------------------------------------------------- 1 | package com.lan.lineage.common; 2 | 3 | import java.util.HashSet; 4 | import java.util.Iterator; 5 | import java.util.LinkedList; 6 | import java.util.List; 7 | import java.util.Set; 8 | 9 | /** 10 | * @author lanxueri 11 | * @ClassName TreeNode 12 | * @Description TODO 13 | * @createTime 2020-07-31 14 | */ 15 | public class TreeNode implements Iterable> { 16 | /** 17 | * 树节点 18 | */ 19 | public T data; 20 | 21 | /** 22 | * 父节点,根没有父节点 23 | */ 24 | public TreeNode parent; 25 | 26 | /** 27 | * 子节点,叶子节点没有子节点 28 | */ 29 | public List> children; 30 | 31 | /** 32 | * 保存了当前节点及其所有子节点,方便查询 33 | */ 34 | private List> elementsIndex; 35 | 36 | 37 | public T getData() { 38 | return data; 39 | } 40 | 41 | /** 42 | * 构造函数 43 | * 44 | * @param data 45 | */ 46 | public TreeNode(T data) { 47 | this.data = data; 48 | this.children = new LinkedList>(); 49 | this.elementsIndex = new LinkedList>(); 50 | this.elementsIndex.add(this); 51 | } 52 | 53 | public List> getChildren() { 54 | return children; 55 | } 56 | 57 | /** 58 | * 判断是否为根:根没有父节点 59 | * 60 | * @return 61 | */ 62 | public boolean isRoot() { 63 | return parent == null; 64 | } 65 | 66 | /** 67 | * 判断是否为叶子节点:子节点没有子节点 68 | * 69 | * @return 70 | */ 71 | public boolean isLeaf() { 72 | return children.size() == 0; 73 | } 74 | 75 | /** 76 | * 添加一个子节点 77 | * 78 | * @param child 79 | * @return 80 | */ 81 | public TreeNode addChild(T child) { 82 | TreeNode childNode = new TreeNode(child); 83 | 84 | childNode.parent = this; 85 | 86 | this.children.add(childNode); 87 | 88 | this.registerChildForSearch(childNode); 89 | 90 | return childNode; 91 | } 92 | 93 | 94 | public TreeNode addChild(TreeNode childNode) { 95 | childNode.parent = this; 96 | 97 | this.children.add(childNode); 98 | 99 | this.registerChildForSearch(childNode); 100 | 101 | return childNode; 102 | } 103 | 104 | /** 105 | * 获取当前节点的层 106 | * 107 | * @return 108 | */ 109 | public int getLevel() { 110 | if (this.isRoot()) { 111 | return 0; 112 | } else { 113 | return parent.getLevel() + 1; 114 | } 115 | } 116 | 117 | /** 118 | * 递归为当前节点以及当前节点的所有父节点增加新的节点 119 | * 120 | * @param node 121 | */ 122 | private void registerChildForSearch(TreeNode node) { 123 | elementsIndex.add(node); 124 | if (parent != null) { 125 | parent.registerChildForSearch(node); 126 | } 127 | } 128 | 129 | /** 130 | * 从当前节点及其所有子节点中搜索某节点 131 | * 132 | * @param cmp 133 | * @return 134 | */ 135 | public TreeNode findTreeNode(Comparable cmp) { 136 | for (TreeNode element : this.elementsIndex) { 137 | T elData = element.data; 138 | if (cmp.compareTo(elData) == 0) 139 | return element; 140 | } 141 | 142 | return null; 143 | } 144 | 145 | 146 | public TreeNode findChildNode(Comparable cmp) { 147 | for (TreeNode element : this.getChildren()) { 148 | T elData = element.data; 149 | if (cmp.compareTo(elData) == 0) 150 | return element; 151 | } 152 | 153 | return null; 154 | } 155 | 156 | 157 | 158 | /** 159 | * 获取当前节点的迭代器 160 | * 161 | * @return 162 | */ 163 | public Iterator> iterator() { 164 | TreeNodeIterator iterator = new TreeNodeIterator(this); 165 | return iterator; 166 | } 167 | 168 | @Override 169 | public String toString() { 170 | return data != null ? data.toString() : "[tree data null]"; 171 | } 172 | 173 | 174 | 175 | /** 176 | * 获取所有叶子节点的数据 177 | * @return 178 | */ 179 | public Set> getAllLeafs() { 180 | Set> leafNodes = new HashSet>(); 181 | if (this.children.isEmpty()) { 182 | leafNodes.add(this); 183 | } else { 184 | for (TreeNode child : this.children) { 185 | leafNodes.addAll(child.getAllLeafs()); 186 | } 187 | } 188 | return leafNodes; 189 | } 190 | 191 | /** 192 | * 获取所有叶子节点的数据 193 | * @return 194 | */ 195 | public Set getAllLeafData() { 196 | Set leafNodes = new HashSet(); 197 | if (this.children.isEmpty()) { 198 | leafNodes.add(this.data); 199 | } else { 200 | for (TreeNode child : this.children) { 201 | leafNodes.addAll(child.getAllLeafData()); 202 | } 203 | } 204 | return leafNodes; 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /src/main/java/com/lan/lineage/druid/LineageUtils.java: -------------------------------------------------------------------------------- 1 | package com.lan.lineage.druid; 2 | 3 | import com.alibaba.druid.sql.SQLUtils; 4 | import com.alibaba.druid.sql.ast.SQLExpr; 5 | import com.alibaba.druid.sql.ast.SQLStatement; 6 | import com.alibaba.druid.sql.ast.expr.SQLAggregateExpr; 7 | import com.alibaba.druid.sql.ast.expr.SQLBinaryOpExpr; 8 | import com.alibaba.druid.sql.ast.expr.SQLCaseExpr; 9 | import com.alibaba.druid.sql.ast.expr.SQLCharExpr; 10 | import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr; 11 | import com.alibaba.druid.sql.ast.expr.SQLIntegerExpr; 12 | import com.alibaba.druid.sql.ast.expr.SQLMethodInvokeExpr; 13 | import com.alibaba.druid.sql.ast.expr.SQLNumberExpr; 14 | import com.alibaba.druid.sql.ast.expr.SQLPropertyExpr; 15 | import com.alibaba.druid.sql.ast.statement.SQLExprTableSource; 16 | import com.alibaba.druid.sql.ast.statement.SQLJoinTableSource; 17 | import com.alibaba.druid.sql.ast.statement.SQLSelectItem; 18 | import com.alibaba.druid.sql.ast.statement.SQLSelectQuery; 19 | import com.alibaba.druid.sql.ast.statement.SQLSelectQueryBlock; 20 | import com.alibaba.druid.sql.ast.statement.SQLSelectStatement; 21 | import com.alibaba.druid.sql.ast.statement.SQLSubqueryTableSource; 22 | import com.alibaba.druid.sql.ast.statement.SQLTableSource; 23 | import com.alibaba.druid.sql.ast.statement.SQLUnionQuery; 24 | import com.alibaba.druid.sql.ast.statement.SQLUnionQueryTableSource; 25 | import com.alibaba.druid.util.JdbcConstants; 26 | import com.lan.lineage.common.EmptyUtils; 27 | import com.lan.lineage.common.LineageColumn; 28 | import com.lan.lineage.common.TreeNode; 29 | 30 | import java.util.ArrayList; 31 | import java.util.List; 32 | import java.util.concurrent.atomic.AtomicReference; 33 | 34 | /** 35 | * @author lanxueri 36 | * @ClassName LineageUtils 37 | * @Description TODO 38 | * @createTime 2020-07-31 39 | */ 40 | public class LineageUtils { 41 | 42 | 43 | public static void columnLineageAnalyzer(String sql,TreeNode node) { 44 | if ( EmptyUtils.isEmpty(sql)){ 45 | return; 46 | } 47 | AtomicReference isContinue = new AtomicReference<>(false); 48 | List statements = new ArrayList<>(); 49 | // 解析 50 | try{ 51 | statements = SQLUtils.parseStatements(sql, JdbcConstants.MYSQL); 52 | }catch (Exception e){ 53 | System.out.println("can't parser by druid MYSQL"+e); 54 | } 55 | // 只考虑一条语句 56 | SQLStatement statement = statements.get(0); 57 | // 只考虑查询语句 58 | SQLSelectStatement sqlSelectStatement = (SQLSelectStatement) statement; 59 | SQLSelectQuery sqlSelectQuery = sqlSelectStatement.getSelect().getQuery(); 60 | // 非union的查询语句 61 | if (sqlSelectQuery instanceof SQLSelectQueryBlock) { 62 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlSelectQuery; 63 | // 获取字段列表 64 | List selectItems = sqlSelectQueryBlock.getSelectList(); 65 | selectItems.forEach(x -> { 66 | // 处理--------------------- 67 | String column = EmptyUtils.isEmpty(x.getAlias()) ? x.toString() : x.getAlias(); 68 | 69 | if (column.contains(".")){ 70 | column = column.substring(column.indexOf(".")+1); 71 | } 72 | column = column.replace("`",""); 73 | 74 | String expr = x.getExpr().toString(); 75 | LineageColumn myColumn = new LineageColumn(); 76 | myColumn.setTargetColumnName(column); 77 | myColumn.setExpression(expr); 78 | 79 | TreeNode itemNode = new TreeNode<>(myColumn); 80 | SQLExpr expr1 = x.getExpr(); 81 | //解析表达式,添加解析结果子节点 82 | handlerExpr(expr1,itemNode); 83 | 84 | if (node.getLevel() == 0 || node.getData().getTargetColumnName().equals(column) ){ 85 | node.addChild(itemNode); 86 | isContinue.set(true); 87 | } 88 | 89 | }); 90 | if (isContinue.get()){ 91 | // 获取表 92 | SQLTableSource table = sqlSelectQueryBlock.getFrom(); 93 | // 普通单表 94 | if (table instanceof SQLExprTableSource) { 95 | // 处理最终表--------------------- 96 | handlerSQLExprTableSource(node, (SQLExprTableSource) table); 97 | 98 | } else if (table instanceof SQLJoinTableSource) { 99 | //处理join 100 | handlerSQLJoinTableSource(node, (SQLJoinTableSource) table); 101 | 102 | } else if (table instanceof SQLSubqueryTableSource) { 103 | // 处理 subquery --------------------- 104 | handlerSQLSubqueryTableSource(node, table); 105 | 106 | }else if (table instanceof SQLUnionQueryTableSource) { 107 | // 处理 union --------------------- 108 | handlerSQLUnionQueryTableSource(node, (SQLUnionQueryTableSource) table); 109 | } 110 | } 111 | 112 | 113 | // 处理--------------------- 114 | // union的查询语句 115 | } else if (sqlSelectQuery instanceof SQLUnionQuery) { 116 | // 处理--------------------- 117 | columnLineageAnalyzer(((SQLUnionQuery) sqlSelectQuery).getLeft().toString(),node); 118 | columnLineageAnalyzer(((SQLUnionQuery) sqlSelectQuery).getRight().toString(),node); 119 | 120 | } 121 | } 122 | 123 | /** 124 | * 处理UNION子句 125 | * @param node 126 | * @param table 127 | */ 128 | private static void handlerSQLUnionQueryTableSource(TreeNode node, SQLUnionQueryTableSource table) { 129 | node.getAllLeafs().stream().filter(e -> !e.getData().getIsEnd()).forEach(e->{ 130 | columnLineageAnalyzer(table.getUnion().toString(), e); 131 | }); 132 | } 133 | 134 | /** 135 | * 处理sub子句 136 | * @param node 137 | * @param table 138 | */ 139 | private static void handlerSQLSubqueryTableSource(TreeNode node, SQLTableSource table) { 140 | node.getAllLeafs().stream().filter(e -> !e.getData().getIsEnd()).forEach(e->{ 141 | columnLineageAnalyzer(table.toString(), e); 142 | }); 143 | } 144 | 145 | 146 | /** 147 | * 处理JOIN 148 | * @param node 149 | * @param table 150 | */ 151 | private static void handlerSQLJoinTableSource(TreeNode node,SQLJoinTableSource table){ 152 | // 处理--------------------- 153 | // 子查询作为表 154 | node.getAllLeafs().stream().filter(e -> !e.getData().getIsEnd()).forEach(e->{ 155 | if (table.getLeft() instanceof SQLJoinTableSource ){ 156 | handlerSQLJoinTableSource(node, (SQLJoinTableSource) table.getLeft()); 157 | }else if (table.getLeft() instanceof SQLExprTableSource){ 158 | handlerSQLExprTableSource(node, (SQLExprTableSource) table.getLeft()); 159 | }else if (table.getLeft() instanceof SQLSubqueryTableSource) { 160 | // 处理--------------------- 161 | handlerSQLSubqueryTableSource(node, table.getLeft()); 162 | } 163 | else if (table.getLeft() instanceof SQLUnionQueryTableSource) { 164 | // 处理--------------------- 165 | handlerSQLUnionQueryTableSource(node, (SQLUnionQueryTableSource) table.getLeft()); 166 | } 167 | }); 168 | 169 | 170 | node.getAllLeafs().stream().filter(e -> !e.getData().getIsEnd()).forEach(e->{ 171 | if (table.getRight() instanceof SQLJoinTableSource ){ 172 | handlerSQLJoinTableSource(node, (SQLJoinTableSource) table.getRight()); 173 | }else if (table.getRight() instanceof SQLExprTableSource){ 174 | handlerSQLExprTableSource(node, (SQLExprTableSource) table.getRight()); 175 | }else if (table.getRight() instanceof SQLSubqueryTableSource) { 176 | // 处理--------------------- 177 | handlerSQLSubqueryTableSource(node, table.getRight()); 178 | } 179 | else if (table.getRight() instanceof SQLUnionQueryTableSource) { 180 | // 处理--------------------- 181 | handlerSQLUnionQueryTableSource(node, (SQLUnionQueryTableSource) table.getRight()); 182 | } 183 | }); 184 | } 185 | 186 | 187 | /** 188 | * 处理最终表 189 | * @param node 190 | * @param table 191 | */ 192 | private static void handlerSQLExprTableSource(TreeNode node, SQLExprTableSource table) { 193 | SQLExprTableSource tableSource = table; 194 | String db = tableSource.getExpr() instanceof SQLPropertyExpr ? ((SQLPropertyExpr) tableSource.getExpr()).getOwner().toString().replace("`","") : ""; 195 | String tableName = tableSource.getExpr() instanceof SQLPropertyExpr ? ((SQLPropertyExpr) tableSource.getExpr()).getName().replace("`","") : ""; 196 | String alias = EmptyUtils.isNotEmpty(tableSource.getAlias()) ? tableSource.getAlias().replace("`","") : ""; 197 | 198 | node.getChildren().forEach(e->{ 199 | e.getChildren().forEach(f->{ 200 | if (EmptyUtils.isNotEmpty(db)){ 201 | f.getData().setSourceDbName(db); 202 | } 203 | if (f.getData().getSourceTableName() == null || f.getData().getSourceTableName().equals(tableName) || f.getData().getSourceTableName().equals(alias)){ 204 | f.getData().setSourceTableName(tableSource.toString()); 205 | f.getData().setIsEnd(true); 206 | f.getData().setExpression(e.getData().getExpression()); 207 | } 208 | }); 209 | 210 | }); 211 | } 212 | 213 | /** 214 | * 处理表达式 215 | * @param sqlExpr 216 | * @param itemNode 217 | */ 218 | private static void handlerExpr(SQLExpr sqlExpr,TreeNode itemNode) { 219 | //方法 220 | if (sqlExpr instanceof SQLMethodInvokeExpr){ 221 | visitSQLMethodInvoke( (SQLMethodInvokeExpr) sqlExpr,itemNode); 222 | } 223 | //聚合 224 | else if (sqlExpr instanceof SQLAggregateExpr){ 225 | visitSQLAggregateExpr((SQLAggregateExpr) sqlExpr,itemNode); 226 | } 227 | //case 228 | else if (sqlExpr instanceof SQLCaseExpr){ 229 | visitSQLCaseExpr((SQLCaseExpr) sqlExpr,itemNode); 230 | } 231 | //比较 232 | else if (sqlExpr instanceof SQLBinaryOpExpr){ 233 | visitSQLBinaryOpExpr((SQLBinaryOpExpr) sqlExpr,itemNode); 234 | } 235 | //表达式 236 | else if (sqlExpr instanceof SQLPropertyExpr){ 237 | visitSQLPropertyExpr((SQLPropertyExpr) sqlExpr,itemNode); 238 | } 239 | //列 240 | else if (sqlExpr instanceof SQLIdentifierExpr){ 241 | visitSQLIdentifierExpr((SQLIdentifierExpr) sqlExpr,itemNode); 242 | } 243 | //赋值表达式 244 | else if (sqlExpr instanceof SQLIntegerExpr){ 245 | visitSQLIntegerExpr((SQLIntegerExpr) sqlExpr,itemNode); 246 | } 247 | //数字 248 | else if (sqlExpr instanceof SQLNumberExpr){ 249 | visitSQLNumberExpr((SQLNumberExpr) sqlExpr,itemNode); 250 | } 251 | //字符 252 | else if (sqlExpr instanceof SQLCharExpr){ 253 | visitSQLCharExpr((SQLCharExpr) sqlExpr,itemNode); 254 | } 255 | } 256 | 257 | 258 | /** 259 | * 方法 260 | * @param expr 261 | * @param node 262 | */ 263 | public static void visitSQLMethodInvoke(SQLMethodInvokeExpr expr,TreeNode node){ 264 | if (expr.getParameters().size() == 0){ 265 | //计算表达式,没有更多列,结束循环 266 | if (node.getData().getExpression().equals(expr.toString())){ 267 | node.getData().setIsEnd(true); 268 | } 269 | }else { 270 | expr.getParameters().forEach( expr1 -> { 271 | handlerExpr(expr1,node); 272 | }); 273 | } 274 | } 275 | 276 | 277 | /** 278 | * 聚合 279 | * @param expr 280 | * @param node 281 | */ 282 | public static void visitSQLAggregateExpr(SQLAggregateExpr expr,TreeNode node){ 283 | expr.getArguments().forEach( expr1 -> { 284 | handlerExpr(expr1,node); 285 | }); 286 | } 287 | 288 | 289 | /** 290 | * 选择 291 | * @param expr 292 | * @param node 293 | */ 294 | public static void visitSQLCaseExpr(SQLCaseExpr expr,TreeNode node){ 295 | expr.getItems().forEach( expr1 -> { 296 | handlerExpr(expr1.getConditionExpr(),node); 297 | 298 | }); 299 | } 300 | 301 | 302 | /** 303 | * 判断 304 | * @param expr 305 | * @param node 306 | */ 307 | public static void visitSQLBinaryOpExpr(SQLBinaryOpExpr expr,TreeNode node){ 308 | handlerExpr(expr.getLeft(),node); 309 | handlerExpr(expr.getRight(),node); 310 | } 311 | 312 | 313 | 314 | 315 | /** 316 | * 表达式列 317 | * @param expr 318 | * @param node 319 | */ 320 | public static void visitSQLPropertyExpr(SQLPropertyExpr expr,TreeNode node){ 321 | LineageColumn project = new LineageColumn(); 322 | String columnName = expr.getName().replace("`",""); 323 | project.setTargetColumnName(columnName); 324 | 325 | project.setSourceTableName(expr.getOwner().toString()); 326 | TreeNode search = node.findChildNode(project); 327 | 328 | if (EmptyUtils.isEmpty(search)){ 329 | node.addChild(project); 330 | } 331 | } 332 | 333 | /** 334 | * 列 335 | * @param expr 336 | * @param node 337 | */ 338 | public static void visitSQLIdentifierExpr(SQLIdentifierExpr expr,TreeNode node){ 339 | LineageColumn project = new LineageColumn(); 340 | project.setTargetColumnName(expr.getName()); 341 | 342 | TreeNode search = node.findChildNode(project); 343 | if (EmptyUtils.isEmpty(search)){ 344 | node.addChild(project); 345 | } 346 | } 347 | 348 | 349 | /** 350 | * 整型赋值 351 | * @param expr 352 | * @param node 353 | */ 354 | public static void visitSQLIntegerExpr(SQLIntegerExpr expr,TreeNode node){ 355 | LineageColumn project = new LineageColumn(); 356 | project.setTargetColumnName(expr.getNumber().toString()); 357 | //常量不设置表信息 358 | project.setSourceTableName(""); 359 | project.setIsEnd(true); 360 | TreeNode search = node.findChildNode(project); 361 | if (EmptyUtils.isEmpty(search)){ 362 | node.addChild(project); 363 | } 364 | } 365 | 366 | /** 367 | * 数字 368 | * @param expr 369 | * @param node 370 | */ 371 | public static void visitSQLNumberExpr(SQLNumberExpr expr, TreeNode node){ 372 | LineageColumn project = new LineageColumn(); 373 | project.setTargetColumnName(expr.getNumber().toString()); 374 | //常量不设置表信息 375 | project.setSourceTableName(""); 376 | project.setIsEnd(true); 377 | TreeNode search = node.findChildNode(project); 378 | if (EmptyUtils.isEmpty(search)){ 379 | node.addChild(project); 380 | } 381 | } 382 | 383 | 384 | /** 385 | * 字符 386 | * @param expr 387 | * @param node 388 | */ 389 | public static void visitSQLCharExpr(SQLCharExpr expr, TreeNode node){ 390 | LineageColumn project = new LineageColumn(); 391 | project.setTargetColumnName(expr.toString()); 392 | //常量不设置表信息 393 | project.setSourceTableName(""); 394 | project.setIsEnd(true); 395 | TreeNode search = node.findChildNode(project); 396 | if (EmptyUtils.isEmpty(search)){ 397 | node.addChild(project); 398 | } 399 | } 400 | } 401 | --------------------------------------------------------------------------------