├── src
├── test
│ ├── resources
│ │ └── sql
│ └── java
│ │ └── com
│ │ └── lan
│ │ └── lineage
│ │ └── druid
│ │ └── DruidTest.java
└── main
│ └── java
│ └── com
│ └── lan
│ └── lineage
│ ├── common
│ ├── EmptyUtils.java
│ ├── LineageColumn.java
│ ├── TreeNodeIterator.java
│ └── TreeNode.java
│ └── druid
│ └── LineageUtils.java
├── pom.xml
└── README.md
/src/test/resources/sql:
--------------------------------------------------------------------------------
1 | ## 简单sql
2 | select user_id,user_name
3 | from user
4 |
5 | ## sub sql
6 | select
7 | user_id as uid
8 | ,user_name as uname
9 | from
10 | (
11 | select user_id, concat("test",user_name) as user_name
12 | from user
13 | )t
14 |
15 | ## union sql
16 | select
17 | user_id
18 | ,user_name
19 | from user
20 | union all
21 |
22 | select
23 | sub_user_id as user_id
24 | ,sub_user_name as user_name
25 | from sub_user
26 |
27 |
28 | ## join sql
29 | select t1.user_id,t2.user_name,t2.sub_user_id,t2.sub_user_name
30 | from user t1
31 | left join sub_user t2
32 | on t1.user_id = t2.user_id
33 |
34 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.lan.lineage
8 | sql-lineage-parser
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 | org.apache.maven.plugins
14 | maven-compiler-plugin
15 |
16 | 8
17 | 8
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | org.projectlombok
27 | lombok
28 | 1.16.18
29 |
30 |
31 | org.springframework
32 | spring-core
33 | 5.1.6.RELEASE
34 |
35 |
36 | com.alibaba
37 | druid
38 | 1.1.12
39 |
40 |
41 | com.alibaba
42 | fastjson
43 | 1.2.51
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/src/main/java/com/lan/lineage/common/EmptyUtils.java:
--------------------------------------------------------------------------------
1 | package com.lan.lineage.common;
2 |
3 | import org.springframework.util.ObjectUtils;
4 | import org.springframework.util.StringUtils;
5 |
6 | import java.util.Collection;
7 | import java.util.Map;
8 |
9 | /**
10 | * @author lanxueri
11 | * @ClassName EmptyUtils
12 | * @Description TODO
13 | * @createTime 2020-07-31
14 | */
15 | public class EmptyUtils {
16 |
17 | private EmptyUtils() {
18 |
19 | }
20 |
21 | /**
22 | * 判断集合是否为空 coll->null->true coll-> coll.size() == 0 -> true
23 | */
24 | public static boolean isEmpty(Collection coll) {
25 | return (coll == null || coll.isEmpty());
26 | }
27 |
28 | /**
29 | * 判断集合是否不为空
30 | */
31 | public static boolean isNotEmpty(Collection coll) {
32 | return !isEmpty(coll);
33 | }
34 |
35 | /**
36 | * 判断map是否为空
37 | */
38 | public static boolean isEmpty(Map map) {
39 | return (map == null || map.isEmpty());
40 | }
41 |
42 | /**
43 | * 判断map是否不为空
44 | */
45 | public static boolean isNotEmpty(Map map) {
46 | return !isEmpty(map);
47 | }
48 |
49 | /**
50 | * 判断一个对象是否为空
51 | */
52 | public static boolean isEmpty(T t) {
53 | if (t == null) {
54 | return true;
55 | }
56 | return StringUtils.isEmpty(t.toString());
57 | }
58 |
59 | /**
60 | * 判断数组是否不为空
61 | */
62 | public static boolean isNotEmpty(T[] datas) {
63 | return !isEmpty(datas);
64 | }
65 |
66 | /**
67 | * 判断数组是否不为空
68 | */
69 | public static boolean isEmpty(T[] datas) {
70 | return ObjectUtils.isEmpty(datas);
71 | }
72 |
73 |
74 | /**
75 | * 判断一个对象是否不为空
76 | */
77 | public static boolean isNotEmpty(T t) {
78 | return !isEmpty(t);
79 | }
80 |
81 |
82 | }
83 |
--------------------------------------------------------------------------------
/src/main/java/com/lan/lineage/common/LineageColumn.java:
--------------------------------------------------------------------------------
1 | package com.lan.lineage.common;
2 |
3 | import lombok.Data;
4 |
5 | /**
6 | * @author lanxueri
7 | * @ClassName LineageColumn
8 | * @Description TODO
9 | * @createTime 2020-07-31
10 | */
11 | @Data
12 | public class LineageColumn implements Comparable {
13 | private String targetColumnName;
14 |
15 | private String sourceDbName;
16 |
17 | private String sourceTableName;
18 |
19 | private String sourceColumnName;
20 |
21 | private String expression;
22 |
23 | private Boolean isEnd = false;
24 |
25 |
26 | public void setSourceTableName(String sourceTableName) {
27 | sourceTableName = EmptyUtils.isNotEmpty(sourceTableName) ? sourceTableName.replace("`","") : sourceTableName;
28 | if (sourceTableName.contains(".")){
29 | this.sourceDbName = sourceTableName.substring(0,sourceTableName.indexOf("."));
30 | this.sourceTableName = sourceTableName.substring(sourceTableName.indexOf(".")+1);
31 | }else {
32 | this.sourceTableName = sourceTableName;
33 |
34 | }
35 | }
36 |
37 |
38 | public int compareTo(LineageColumn o) {
39 | if (this.getTargetColumnName().equals(o.getTargetColumnName()))
40 | return 0;
41 | return -1;
42 | }
43 |
44 |
45 | @Override
46 | public boolean equals(Object o) {
47 | if (this == o) return true;
48 | if (o == null || getClass() != o.getClass()) return false;
49 |
50 | LineageColumn myColumn = (LineageColumn) o;
51 |
52 | if (!this.getTargetColumnName().equals(myColumn.getTargetColumnName())) return false;
53 | if (EmptyUtils.isNotEmpty(sourceTableName) && !sourceTableName.equals(myColumn.sourceTableName)) return false;
54 | if (EmptyUtils.isNotEmpty(sourceColumnName))
55 | return sourceColumnName.equals(myColumn.sourceColumnName);
56 | return true;
57 | }
58 |
59 | @Override
60 | public int hashCode() {
61 | int result = getTargetColumnName().hashCode();
62 | if (EmptyUtils.isNotEmpty(sourceTableName)){
63 | result = 31 * result + sourceTableName.hashCode();
64 | }
65 | if (EmptyUtils.isNotEmpty(sourceColumnName)){
66 | result = 31 * result + sourceColumnName.hashCode();
67 | }
68 | return result;
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/main/java/com/lan/lineage/common/TreeNodeIterator.java:
--------------------------------------------------------------------------------
1 | package com.lan.lineage.common;
2 |
3 | import java.util.Iterator;
4 |
5 | /**
6 | * @author lanxueri
7 | * @ClassName TreeNodeIterator
8 | * @Description TODO
9 | * @createTime 2020-07-31
10 | */
11 | public class TreeNodeIterator implements Iterator> {
12 |
13 | enum ProcessStages {
14 | ProcessParent, ProcessChildCurNode, ProcessChildSubNode
15 | }
16 |
17 | private ProcessStages doNext;
18 |
19 | private TreeNode next;
20 |
21 | private Iterator> childrenCurNodeIter;
22 |
23 | private Iterator> childrenSubNodeIter;
24 |
25 | private TreeNode treeNode;
26 |
27 | public TreeNodeIterator(TreeNode treeNode) {
28 | this.treeNode = treeNode;
29 | this.doNext = ProcessStages.ProcessParent;
30 | this.childrenCurNodeIter = treeNode.children.iterator();
31 | }
32 |
33 |
34 | public boolean hasNext() {
35 |
36 | if (this.doNext == ProcessStages.ProcessParent) {
37 | this.next = this.treeNode;
38 | this.doNext = ProcessStages.ProcessChildCurNode;
39 | return true;
40 | }
41 |
42 | if (this.doNext == ProcessStages.ProcessChildCurNode) {
43 | if (childrenCurNodeIter.hasNext()) {
44 | TreeNode childDirect = childrenCurNodeIter.next();
45 | childrenSubNodeIter = childDirect.iterator();
46 | this.doNext = ProcessStages.ProcessChildSubNode;
47 | return hasNext();
48 | } else {
49 | this.doNext = null;
50 | return false;
51 | }
52 | }
53 |
54 | if (this.doNext == ProcessStages.ProcessChildSubNode) {
55 | if (childrenSubNodeIter.hasNext()) {
56 | this.next = childrenSubNodeIter.next();
57 | return true;
58 | } else {
59 | this.next = null;
60 | this.doNext = ProcessStages.ProcessChildCurNode;
61 | return hasNext();
62 | }
63 | }
64 |
65 | return false;
66 | }
67 |
68 |
69 | public TreeNode next() {
70 | return this.next;
71 | }
72 |
73 | /**
74 | * 目前不支持删除节点
75 | */
76 | public void remove() {
77 | throw new UnsupportedOperationException();
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/src/test/java/com/lan/lineage/druid/DruidTest.java:
--------------------------------------------------------------------------------
1 | package com.lan.lineage.druid;
2 |
3 | import com.alibaba.fastjson.JSONObject;
4 | import com.lan.lineage.common.LineageColumn;
5 | import com.lan.lineage.common.TreeNode;
6 | import org.springframework.util.ResourceUtils;
7 |
8 | import java.io.BufferedReader;
9 | import java.io.File;
10 | import java.io.FileReader;
11 | import java.io.IOException;
12 | import java.util.Arrays;
13 | import java.util.LinkedHashMap;
14 | import java.util.List;
15 | import java.util.Map;
16 | import java.util.Set;
17 | import java.util.regex.Matcher;
18 | import java.util.regex.Pattern;
19 |
20 | /**
21 | * @author lanxueri
22 | * @ClassName DruidTest
23 | * @Description TODO
24 | * @createTime 2020-07-31
25 | */
26 | public class DruidTest {
27 | String file = this.getClass().getResource("/sql").getFile();
28 | public static void main(String[] args) throws IOException {
29 |
30 | File file = ResourceUtils.getFile(new DruidTest().file);
31 | BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
32 | String line = "";
33 | StringBuilder sb = new StringBuilder();
34 | while ((line = bufferedReader.readLine()) != null) {
35 | sb.append(line);
36 | sb.append("\n");
37 | }
38 | bufferedReader.close();
39 |
40 | Map map = new LinkedHashMap<>();
41 | Pattern pattern = Pattern.compile("##\\s.*\\S");
42 | Matcher matcher = pattern.matcher(sb.toString());
43 | List list = Arrays.asList(sb.toString().split("##\\s.*\\S"));
44 | int j = 1;
45 | while (matcher.find()){
46 | String key = matcher.group().replace("##","").trim();
47 | map.put(key,list.get(j));
48 | j++;
49 | }
50 |
51 | String key = "sub sql";
52 | System.out.println("Begin parse:"+key+"\n"+"sql:"+map.get(key));
53 |
54 | LineageColumn root = new LineageColumn();
55 | TreeNode rootNode = new TreeNode<>(root);
56 |
57 | LineageUtils.columnLineageAnalyzer(map.get(key),rootNode);
58 |
59 | for (TreeNode e : rootNode.getChildren()) {
60 | Set leafNodes = e.getAllLeafData();
61 | for (LineageColumn f : leafNodes){
62 | if (f.getIsEnd()){
63 | System.out.println(e.getData().getTargetColumnName() + "\tfrom:"+ JSONObject.toJSONString(f)+"\n");
64 | }
65 |
66 | }
67 |
68 | }
69 |
70 | for (TreeNode node : rootNode) {
71 |
72 | StringBuilder indent = new StringBuilder();
73 | for (int i = 1; i < node.getLevel();i++){
74 | indent.append(" ");
75 | }
76 |
77 | System.out.println(indent.toString() + JSONObject.toJSONString(node.getData())+"\n");
78 | }
79 |
80 |
81 |
82 |
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## 简介
2 | 数据平台建设过程中需要获取数据血缘信息,通过对血缘数据的探索,就可以快速获取数据,加快数据开发的效率。
3 |
4 |
5 | 对于以hive为中心的数仓系统,简化架构可能为
6 | 
7 |
8 | - 数据同步阶段可以通过同步系统解决源与数仓的列级血缘信息
9 | - ETL阶段可以使用hive的hooks的得到ETL过程的的列级血缘信息,例如http://cxy7.com/articles/2017/11/10/1510310104765.html
10 | - 最后一个数据展示阶段则可以通过对页面SQL的解析获取报表页面与数据的关系,此阶段则为本例需要处理问题
11 |
12 | 通过这几个阶段血缘数据的打通,可以跟踪数据从产生到展示的整个过程,提高数据监控及使用的效率。
13 |
14 |
15 | ### 如何解析SQL
16 |
17 | 可以通过语法树的解析得到列级的血缘。那么怎么得到语法树呢?
18 |
19 | - [alibaba-druid](https://github.com/alibaba/druid/wiki/Druid_SQL_AST)
20 | - [antlr4](https://github.com/antlr/antlr4)
21 |
22 |
23 | 可以通过以上两种工具的使用,得到语法数,那么剩下的问题就是如何将语法书解析为血缘信息。
24 |
25 | 此例使用一个简单的对象LineageColumn进行血缘存储
26 |
27 | name | desc
28 | ---|---
29 | targetColumnName | 目标字段,即SELECT的列
30 | sourceDbName | 字段来源DB
31 | sourceTableName | 字段来源表
32 | sourceColumnName | 字段来源列
33 | expression | 表达式
34 | isEnd | 是否结束标识
35 |
36 |
37 | 通过递归迭代获取数结构,最终树的叶子节点即为最终的血缘信息,以SQL为例
38 | ```
39 | select
40 | user_id as uid
41 | ,user_name as uname
42 | from
43 | (
44 | select user_id, concat("test",user_name) as user_name
45 | from user
46 | )t
47 | ```
48 | 需要经过两层循环,过程为
49 | 
50 |
51 |
52 | 最终血缘信息为
53 | ```
54 | --- 输出列uid 数据来源于user表的user_id 列
55 | uid from:{"expression":"user_id","isEnd":true,"sourceTableName":"user","targetColumnName":"user_id"}
56 |
57 |
58 | -- 输出列uname 数据来源于常量字段test及 user表的user_name 列
59 | uname from:{"expression":"concat('test', user_name)","isEnd":true,"sourceTableName":"user","targetColumnName":"user_name"}
60 |
61 | uname from:{"expression":"concat('test', user_name)","isEnd":true,"sourceTableName":"user","targetColumnName":"'test'"}
62 |
63 | ```
64 | 展开的树型结构为
65 | ```
66 | {"isEnd":false}
67 |
68 | {"expression":"user_id","isEnd":false,"targetColumnName":"uid"}
69 |
70 | {"isEnd":false,"targetColumnName":"user_id"}
71 |
72 | {"expression":"user_id","isEnd":false,"targetColumnName":"user_id"}
73 |
74 | {"expression":"user_id","isEnd":true,"sourceTableName":"user","targetColumnName":"user_id"}
75 |
76 | {"expression":"user_name","isEnd":false,"targetColumnName":"uname"}
77 |
78 | {"isEnd":false,"targetColumnName":"user_name"}
79 |
80 | {"expression":"concat('test', user_name)","isEnd":false,"targetColumnName":"user_name"}
81 |
82 | {"expression":"concat('test', user_name)","isEnd":true,"sourceTableName":"user","targetColumnName":"'test'"}
83 |
84 | {"expression":"concat('test', user_name)","isEnd":true,"sourceTableName":"user","targetColumnName":"user_name"}
85 |
86 | ```
87 |
88 | 叶子节点即为最终关联的目标表及列。
89 | ### 如何使用
90 | 参考DruidTest即可。
91 |
92 | # 可能遇到的问题
93 | #### interval 语句解析异常
94 | 如果数据可视化使用的是tableau,动态sql会生成很多interval语句,interval使用druid进行语法解析的时候可能会报错。
95 | ###### 原因
96 | druid对interval的词法解析存在缺陷
97 | ###### 解决方案
98 | 下载druid源码,参照https://github.com/alibaba/druid/pull/4368 进行代码修改,使用修改源码后的jar包。
99 |
--------------------------------------------------------------------------------
/src/main/java/com/lan/lineage/common/TreeNode.java:
--------------------------------------------------------------------------------
1 | package com.lan.lineage.common;
2 |
3 | import java.util.HashSet;
4 | import java.util.Iterator;
5 | import java.util.LinkedList;
6 | import java.util.List;
7 | import java.util.Set;
8 |
9 | /**
10 | * @author lanxueri
11 | * @ClassName TreeNode
12 | * @Description TODO
13 | * @createTime 2020-07-31
14 | */
15 | public class TreeNode implements Iterable> {
16 | /**
17 | * 树节点
18 | */
19 | public T data;
20 |
21 | /**
22 | * 父节点,根没有父节点
23 | */
24 | public TreeNode parent;
25 |
26 | /**
27 | * 子节点,叶子节点没有子节点
28 | */
29 | public List> children;
30 |
31 | /**
32 | * 保存了当前节点及其所有子节点,方便查询
33 | */
34 | private List> elementsIndex;
35 |
36 |
37 | public T getData() {
38 | return data;
39 | }
40 |
41 | /**
42 | * 构造函数
43 | *
44 | * @param data
45 | */
46 | public TreeNode(T data) {
47 | this.data = data;
48 | this.children = new LinkedList>();
49 | this.elementsIndex = new LinkedList>();
50 | this.elementsIndex.add(this);
51 | }
52 |
53 | public List> getChildren() {
54 | return children;
55 | }
56 |
57 | /**
58 | * 判断是否为根:根没有父节点
59 | *
60 | * @return
61 | */
62 | public boolean isRoot() {
63 | return parent == null;
64 | }
65 |
66 | /**
67 | * 判断是否为叶子节点:子节点没有子节点
68 | *
69 | * @return
70 | */
71 | public boolean isLeaf() {
72 | return children.size() == 0;
73 | }
74 |
75 | /**
76 | * 添加一个子节点
77 | *
78 | * @param child
79 | * @return
80 | */
81 | public TreeNode addChild(T child) {
82 | TreeNode childNode = new TreeNode(child);
83 |
84 | childNode.parent = this;
85 |
86 | this.children.add(childNode);
87 |
88 | this.registerChildForSearch(childNode);
89 |
90 | return childNode;
91 | }
92 |
93 |
94 | public TreeNode addChild(TreeNode childNode) {
95 | childNode.parent = this;
96 |
97 | this.children.add(childNode);
98 |
99 | this.registerChildForSearch(childNode);
100 |
101 | return childNode;
102 | }
103 |
104 | /**
105 | * 获取当前节点的层
106 | *
107 | * @return
108 | */
109 | public int getLevel() {
110 | if (this.isRoot()) {
111 | return 0;
112 | } else {
113 | return parent.getLevel() + 1;
114 | }
115 | }
116 |
117 | /**
118 | * 递归为当前节点以及当前节点的所有父节点增加新的节点
119 | *
120 | * @param node
121 | */
122 | private void registerChildForSearch(TreeNode node) {
123 | elementsIndex.add(node);
124 | if (parent != null) {
125 | parent.registerChildForSearch(node);
126 | }
127 | }
128 |
129 | /**
130 | * 从当前节点及其所有子节点中搜索某节点
131 | *
132 | * @param cmp
133 | * @return
134 | */
135 | public TreeNode findTreeNode(Comparable cmp) {
136 | for (TreeNode element : this.elementsIndex) {
137 | T elData = element.data;
138 | if (cmp.compareTo(elData) == 0)
139 | return element;
140 | }
141 |
142 | return null;
143 | }
144 |
145 |
146 | public TreeNode findChildNode(Comparable cmp) {
147 | for (TreeNode element : this.getChildren()) {
148 | T elData = element.data;
149 | if (cmp.compareTo(elData) == 0)
150 | return element;
151 | }
152 |
153 | return null;
154 | }
155 |
156 |
157 |
158 | /**
159 | * 获取当前节点的迭代器
160 | *
161 | * @return
162 | */
163 | public Iterator> iterator() {
164 | TreeNodeIterator iterator = new TreeNodeIterator(this);
165 | return iterator;
166 | }
167 |
168 | @Override
169 | public String toString() {
170 | return data != null ? data.toString() : "[tree data null]";
171 | }
172 |
173 |
174 |
175 | /**
176 | * 获取所有叶子节点的数据
177 | * @return
178 | */
179 | public Set> getAllLeafs() {
180 | Set> leafNodes = new HashSet>();
181 | if (this.children.isEmpty()) {
182 | leafNodes.add(this);
183 | } else {
184 | for (TreeNode child : this.children) {
185 | leafNodes.addAll(child.getAllLeafs());
186 | }
187 | }
188 | return leafNodes;
189 | }
190 |
191 | /**
192 | * 获取所有叶子节点的数据
193 | * @return
194 | */
195 | public Set getAllLeafData() {
196 | Set leafNodes = new HashSet();
197 | if (this.children.isEmpty()) {
198 | leafNodes.add(this.data);
199 | } else {
200 | for (TreeNode child : this.children) {
201 | leafNodes.addAll(child.getAllLeafData());
202 | }
203 | }
204 | return leafNodes;
205 | }
206 | }
207 |
--------------------------------------------------------------------------------
/src/main/java/com/lan/lineage/druid/LineageUtils.java:
--------------------------------------------------------------------------------
1 | package com.lan.lineage.druid;
2 |
3 | import com.alibaba.druid.sql.SQLUtils;
4 | import com.alibaba.druid.sql.ast.SQLExpr;
5 | import com.alibaba.druid.sql.ast.SQLStatement;
6 | import com.alibaba.druid.sql.ast.expr.SQLAggregateExpr;
7 | import com.alibaba.druid.sql.ast.expr.SQLBinaryOpExpr;
8 | import com.alibaba.druid.sql.ast.expr.SQLCaseExpr;
9 | import com.alibaba.druid.sql.ast.expr.SQLCharExpr;
10 | import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr;
11 | import com.alibaba.druid.sql.ast.expr.SQLIntegerExpr;
12 | import com.alibaba.druid.sql.ast.expr.SQLMethodInvokeExpr;
13 | import com.alibaba.druid.sql.ast.expr.SQLNumberExpr;
14 | import com.alibaba.druid.sql.ast.expr.SQLPropertyExpr;
15 | import com.alibaba.druid.sql.ast.statement.SQLExprTableSource;
16 | import com.alibaba.druid.sql.ast.statement.SQLJoinTableSource;
17 | import com.alibaba.druid.sql.ast.statement.SQLSelectItem;
18 | import com.alibaba.druid.sql.ast.statement.SQLSelectQuery;
19 | import com.alibaba.druid.sql.ast.statement.SQLSelectQueryBlock;
20 | import com.alibaba.druid.sql.ast.statement.SQLSelectStatement;
21 | import com.alibaba.druid.sql.ast.statement.SQLSubqueryTableSource;
22 | import com.alibaba.druid.sql.ast.statement.SQLTableSource;
23 | import com.alibaba.druid.sql.ast.statement.SQLUnionQuery;
24 | import com.alibaba.druid.sql.ast.statement.SQLUnionQueryTableSource;
25 | import com.alibaba.druid.util.JdbcConstants;
26 | import com.lan.lineage.common.EmptyUtils;
27 | import com.lan.lineage.common.LineageColumn;
28 | import com.lan.lineage.common.TreeNode;
29 |
30 | import java.util.ArrayList;
31 | import java.util.List;
32 | import java.util.concurrent.atomic.AtomicReference;
33 |
34 | /**
35 | * @author lanxueri
36 | * @ClassName LineageUtils
37 | * @Description TODO
38 | * @createTime 2020-07-31
39 | */
40 | public class LineageUtils {
41 |
42 |
43 | public static void columnLineageAnalyzer(String sql,TreeNode node) {
44 | if ( EmptyUtils.isEmpty(sql)){
45 | return;
46 | }
47 | AtomicReference isContinue = new AtomicReference<>(false);
48 | List statements = new ArrayList<>();
49 | // 解析
50 | try{
51 | statements = SQLUtils.parseStatements(sql, JdbcConstants.MYSQL);
52 | }catch (Exception e){
53 | System.out.println("can't parser by druid MYSQL"+e);
54 | }
55 | // 只考虑一条语句
56 | SQLStatement statement = statements.get(0);
57 | // 只考虑查询语句
58 | SQLSelectStatement sqlSelectStatement = (SQLSelectStatement) statement;
59 | SQLSelectQuery sqlSelectQuery = sqlSelectStatement.getSelect().getQuery();
60 | // 非union的查询语句
61 | if (sqlSelectQuery instanceof SQLSelectQueryBlock) {
62 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlSelectQuery;
63 | // 获取字段列表
64 | List selectItems = sqlSelectQueryBlock.getSelectList();
65 | selectItems.forEach(x -> {
66 | // 处理---------------------
67 | String column = EmptyUtils.isEmpty(x.getAlias()) ? x.toString() : x.getAlias();
68 |
69 | if (column.contains(".")){
70 | column = column.substring(column.indexOf(".")+1);
71 | }
72 | column = column.replace("`","");
73 |
74 | String expr = x.getExpr().toString();
75 | LineageColumn myColumn = new LineageColumn();
76 | myColumn.setTargetColumnName(column);
77 | myColumn.setExpression(expr);
78 |
79 | TreeNode itemNode = new TreeNode<>(myColumn);
80 | SQLExpr expr1 = x.getExpr();
81 | //解析表达式,添加解析结果子节点
82 | handlerExpr(expr1,itemNode);
83 |
84 | if (node.getLevel() == 0 || node.getData().getTargetColumnName().equals(column) ){
85 | node.addChild(itemNode);
86 | isContinue.set(true);
87 | }
88 |
89 | });
90 | if (isContinue.get()){
91 | // 获取表
92 | SQLTableSource table = sqlSelectQueryBlock.getFrom();
93 | // 普通单表
94 | if (table instanceof SQLExprTableSource) {
95 | // 处理最终表---------------------
96 | handlerSQLExprTableSource(node, (SQLExprTableSource) table);
97 |
98 | } else if (table instanceof SQLJoinTableSource) {
99 | //处理join
100 | handlerSQLJoinTableSource(node, (SQLJoinTableSource) table);
101 |
102 | } else if (table instanceof SQLSubqueryTableSource) {
103 | // 处理 subquery ---------------------
104 | handlerSQLSubqueryTableSource(node, table);
105 |
106 | }else if (table instanceof SQLUnionQueryTableSource) {
107 | // 处理 union ---------------------
108 | handlerSQLUnionQueryTableSource(node, (SQLUnionQueryTableSource) table);
109 | }
110 | }
111 |
112 |
113 | // 处理---------------------
114 | // union的查询语句
115 | } else if (sqlSelectQuery instanceof SQLUnionQuery) {
116 | // 处理---------------------
117 | columnLineageAnalyzer(((SQLUnionQuery) sqlSelectQuery).getLeft().toString(),node);
118 | columnLineageAnalyzer(((SQLUnionQuery) sqlSelectQuery).getRight().toString(),node);
119 |
120 | }
121 | }
122 |
123 | /**
124 | * 处理UNION子句
125 | * @param node
126 | * @param table
127 | */
128 | private static void handlerSQLUnionQueryTableSource(TreeNode node, SQLUnionQueryTableSource table) {
129 | node.getAllLeafs().stream().filter(e -> !e.getData().getIsEnd()).forEach(e->{
130 | columnLineageAnalyzer(table.getUnion().toString(), e);
131 | });
132 | }
133 |
134 | /**
135 | * 处理sub子句
136 | * @param node
137 | * @param table
138 | */
139 | private static void handlerSQLSubqueryTableSource(TreeNode node, SQLTableSource table) {
140 | node.getAllLeafs().stream().filter(e -> !e.getData().getIsEnd()).forEach(e->{
141 | columnLineageAnalyzer(table.toString(), e);
142 | });
143 | }
144 |
145 |
146 | /**
147 | * 处理JOIN
148 | * @param node
149 | * @param table
150 | */
151 | private static void handlerSQLJoinTableSource(TreeNode node,SQLJoinTableSource table){
152 | // 处理---------------------
153 | // 子查询作为表
154 | node.getAllLeafs().stream().filter(e -> !e.getData().getIsEnd()).forEach(e->{
155 | if (table.getLeft() instanceof SQLJoinTableSource ){
156 | handlerSQLJoinTableSource(node, (SQLJoinTableSource) table.getLeft());
157 | }else if (table.getLeft() instanceof SQLExprTableSource){
158 | handlerSQLExprTableSource(node, (SQLExprTableSource) table.getLeft());
159 | }else if (table.getLeft() instanceof SQLSubqueryTableSource) {
160 | // 处理---------------------
161 | handlerSQLSubqueryTableSource(node, table.getLeft());
162 | }
163 | else if (table.getLeft() instanceof SQLUnionQueryTableSource) {
164 | // 处理---------------------
165 | handlerSQLUnionQueryTableSource(node, (SQLUnionQueryTableSource) table.getLeft());
166 | }
167 | });
168 |
169 |
170 | node.getAllLeafs().stream().filter(e -> !e.getData().getIsEnd()).forEach(e->{
171 | if (table.getRight() instanceof SQLJoinTableSource ){
172 | handlerSQLJoinTableSource(node, (SQLJoinTableSource) table.getRight());
173 | }else if (table.getRight() instanceof SQLExprTableSource){
174 | handlerSQLExprTableSource(node, (SQLExprTableSource) table.getRight());
175 | }else if (table.getRight() instanceof SQLSubqueryTableSource) {
176 | // 处理---------------------
177 | handlerSQLSubqueryTableSource(node, table.getRight());
178 | }
179 | else if (table.getRight() instanceof SQLUnionQueryTableSource) {
180 | // 处理---------------------
181 | handlerSQLUnionQueryTableSource(node, (SQLUnionQueryTableSource) table.getRight());
182 | }
183 | });
184 | }
185 |
186 |
187 | /**
188 | * 处理最终表
189 | * @param node
190 | * @param table
191 | */
192 | private static void handlerSQLExprTableSource(TreeNode node, SQLExprTableSource table) {
193 | SQLExprTableSource tableSource = table;
194 | String db = tableSource.getExpr() instanceof SQLPropertyExpr ? ((SQLPropertyExpr) tableSource.getExpr()).getOwner().toString().replace("`","") : "";
195 | String tableName = tableSource.getExpr() instanceof SQLPropertyExpr ? ((SQLPropertyExpr) tableSource.getExpr()).getName().replace("`","") : "";
196 | String alias = EmptyUtils.isNotEmpty(tableSource.getAlias()) ? tableSource.getAlias().replace("`","") : "";
197 |
198 | node.getChildren().forEach(e->{
199 | e.getChildren().forEach(f->{
200 | if (EmptyUtils.isNotEmpty(db)){
201 | f.getData().setSourceDbName(db);
202 | }
203 | if (f.getData().getSourceTableName() == null || f.getData().getSourceTableName().equals(tableName) || f.getData().getSourceTableName().equals(alias)){
204 | f.getData().setSourceTableName(tableSource.toString());
205 | f.getData().setIsEnd(true);
206 | f.getData().setExpression(e.getData().getExpression());
207 | }
208 | });
209 |
210 | });
211 | }
212 |
213 | /**
214 | * 处理表达式
215 | * @param sqlExpr
216 | * @param itemNode
217 | */
218 | private static void handlerExpr(SQLExpr sqlExpr,TreeNode itemNode) {
219 | //方法
220 | if (sqlExpr instanceof SQLMethodInvokeExpr){
221 | visitSQLMethodInvoke( (SQLMethodInvokeExpr) sqlExpr,itemNode);
222 | }
223 | //聚合
224 | else if (sqlExpr instanceof SQLAggregateExpr){
225 | visitSQLAggregateExpr((SQLAggregateExpr) sqlExpr,itemNode);
226 | }
227 | //case
228 | else if (sqlExpr instanceof SQLCaseExpr){
229 | visitSQLCaseExpr((SQLCaseExpr) sqlExpr,itemNode);
230 | }
231 | //比较
232 | else if (sqlExpr instanceof SQLBinaryOpExpr){
233 | visitSQLBinaryOpExpr((SQLBinaryOpExpr) sqlExpr,itemNode);
234 | }
235 | //表达式
236 | else if (sqlExpr instanceof SQLPropertyExpr){
237 | visitSQLPropertyExpr((SQLPropertyExpr) sqlExpr,itemNode);
238 | }
239 | //列
240 | else if (sqlExpr instanceof SQLIdentifierExpr){
241 | visitSQLIdentifierExpr((SQLIdentifierExpr) sqlExpr,itemNode);
242 | }
243 | //赋值表达式
244 | else if (sqlExpr instanceof SQLIntegerExpr){
245 | visitSQLIntegerExpr((SQLIntegerExpr) sqlExpr,itemNode);
246 | }
247 | //数字
248 | else if (sqlExpr instanceof SQLNumberExpr){
249 | visitSQLNumberExpr((SQLNumberExpr) sqlExpr,itemNode);
250 | }
251 | //字符
252 | else if (sqlExpr instanceof SQLCharExpr){
253 | visitSQLCharExpr((SQLCharExpr) sqlExpr,itemNode);
254 | }
255 | }
256 |
257 |
258 | /**
259 | * 方法
260 | * @param expr
261 | * @param node
262 | */
263 | public static void visitSQLMethodInvoke(SQLMethodInvokeExpr expr,TreeNode node){
264 | if (expr.getParameters().size() == 0){
265 | //计算表达式,没有更多列,结束循环
266 | if (node.getData().getExpression().equals(expr.toString())){
267 | node.getData().setIsEnd(true);
268 | }
269 | }else {
270 | expr.getParameters().forEach( expr1 -> {
271 | handlerExpr(expr1,node);
272 | });
273 | }
274 | }
275 |
276 |
277 | /**
278 | * 聚合
279 | * @param expr
280 | * @param node
281 | */
282 | public static void visitSQLAggregateExpr(SQLAggregateExpr expr,TreeNode node){
283 | expr.getArguments().forEach( expr1 -> {
284 | handlerExpr(expr1,node);
285 | });
286 | }
287 |
288 |
289 | /**
290 | * 选择
291 | * @param expr
292 | * @param node
293 | */
294 | public static void visitSQLCaseExpr(SQLCaseExpr expr,TreeNode node){
295 | expr.getItems().forEach( expr1 -> {
296 | handlerExpr(expr1.getConditionExpr(),node);
297 |
298 | });
299 | }
300 |
301 |
302 | /**
303 | * 判断
304 | * @param expr
305 | * @param node
306 | */
307 | public static void visitSQLBinaryOpExpr(SQLBinaryOpExpr expr,TreeNode node){
308 | handlerExpr(expr.getLeft(),node);
309 | handlerExpr(expr.getRight(),node);
310 | }
311 |
312 |
313 |
314 |
315 | /**
316 | * 表达式列
317 | * @param expr
318 | * @param node
319 | */
320 | public static void visitSQLPropertyExpr(SQLPropertyExpr expr,TreeNode node){
321 | LineageColumn project = new LineageColumn();
322 | String columnName = expr.getName().replace("`","");
323 | project.setTargetColumnName(columnName);
324 |
325 | project.setSourceTableName(expr.getOwner().toString());
326 | TreeNode search = node.findChildNode(project);
327 |
328 | if (EmptyUtils.isEmpty(search)){
329 | node.addChild(project);
330 | }
331 | }
332 |
333 | /**
334 | * 列
335 | * @param expr
336 | * @param node
337 | */
338 | public static void visitSQLIdentifierExpr(SQLIdentifierExpr expr,TreeNode node){
339 | LineageColumn project = new LineageColumn();
340 | project.setTargetColumnName(expr.getName());
341 |
342 | TreeNode search = node.findChildNode(project);
343 | if (EmptyUtils.isEmpty(search)){
344 | node.addChild(project);
345 | }
346 | }
347 |
348 |
349 | /**
350 | * 整型赋值
351 | * @param expr
352 | * @param node
353 | */
354 | public static void visitSQLIntegerExpr(SQLIntegerExpr expr,TreeNode node){
355 | LineageColumn project = new LineageColumn();
356 | project.setTargetColumnName(expr.getNumber().toString());
357 | //常量不设置表信息
358 | project.setSourceTableName("");
359 | project.setIsEnd(true);
360 | TreeNode search = node.findChildNode(project);
361 | if (EmptyUtils.isEmpty(search)){
362 | node.addChild(project);
363 | }
364 | }
365 |
366 | /**
367 | * 数字
368 | * @param expr
369 | * @param node
370 | */
371 | public static void visitSQLNumberExpr(SQLNumberExpr expr, TreeNode node){
372 | LineageColumn project = new LineageColumn();
373 | project.setTargetColumnName(expr.getNumber().toString());
374 | //常量不设置表信息
375 | project.setSourceTableName("");
376 | project.setIsEnd(true);
377 | TreeNode search = node.findChildNode(project);
378 | if (EmptyUtils.isEmpty(search)){
379 | node.addChild(project);
380 | }
381 | }
382 |
383 |
384 | /**
385 | * 字符
386 | * @param expr
387 | * @param node
388 | */
389 | public static void visitSQLCharExpr(SQLCharExpr expr, TreeNode node){
390 | LineageColumn project = new LineageColumn();
391 | project.setTargetColumnName(expr.toString());
392 | //常量不设置表信息
393 | project.setSourceTableName("");
394 | project.setIsEnd(true);
395 | TreeNode search = node.findChildNode(project);
396 | if (EmptyUtils.isEmpty(search)){
397 | node.addChild(project);
398 | }
399 | }
400 | }
401 |
--------------------------------------------------------------------------------