├── README.md ├── dw-column-level-lineage-master.zip └── src ├── main ├── java │ └── com │ │ └── jd │ │ └── jr │ │ └── daat │ │ └── dw │ │ └── lineage │ │ ├── analysis │ │ ├── ColumnLineageAnalyzer.java │ │ └── GetColumnsLineage.java │ │ ├── domains │ │ ├── basic │ │ │ ├── Column.java │ │ │ ├── ColumnType.java │ │ │ ├── Database.java │ │ │ ├── DatabaseType.java │ │ │ ├── ForeignKeys.java │ │ │ ├── MethodInvoke.java │ │ │ ├── Table.java │ │ │ └── TableType.java │ │ └── lineage │ │ │ ├── column │ │ │ ├── ColumnLineageColumnNode.java │ │ │ ├── ColumnLineageColumnNodeType.java │ │ │ ├── ColumnLineageRelationNode.java │ │ │ ├── ColumnLineageRelationNodeType.java │ │ │ └── ColumnLineageUtils.java │ │ │ └── table │ │ │ └── TableColumnLineage.java │ │ └── utils │ │ ├── ProcessUnparsedSQL.java │ │ ├── SQLExtractor.java │ │ ├── SchemaExtractor.java │ │ └── SchemaLoader.java └── resources │ └── log4j2.xml └── test └── java └── com └── jd └── jr └── daat └── dw └── lineage ├── analysis ├── ColumnLineageAnalyzerTest.java ├── ColunmnTest.java └── GetPrimaryAndForeignKeyTest.java └── utils ├── GetUnparsedSQLTest.java ├── ProcessUnparsedSQLTest.java ├── SQLExtractorTest.java ├── SchemaExtractorTest.java └── SchemaLoaderTest.java /README.md: -------------------------------------------------------------------------------- 1 | # Hive_SQL_AST 2 | 利用Druid SQL Parser解析HiveSQL日志,自动构建字段级别的血缘关系及主外键的自动抽取 3 | 4 | # Druid 5 | Druid是alibaba开源的一个JDBC组件库,包括数据库连接池、SQL Parser等组件,DruidDataSource官方宣称是最好的数据库连接池,Druid能够提供强大的监控和扩展功能。 6 | 具体信息可参考官方wiki:https://github.com/alibaba/druid/wiki/%E9%A6%96%E9%A1%B5 7 | # Druid SQL Parser 8 | 在这个项目中,只用到SQL Parser组件,在这里主要详解我们是如何利用SQL Parser进行解析sql日志信息。 9 | ## 简介 10 | 首先了解下SQL Parser。SQL Parser是Druid的一个重要组成部分,Druid内置使用SQL Parser来实现防御SQL注入(WallFilter)、合并统计没有参数化的SQL(StatFilter的mergeSql)、SQL格式化、分库分表。 11 | 12 | 具体的wiki信息:https://github.com/alibaba/druid/wiki/SQL-Parser 13 | 14 | 先来了解一下Druid SQL parser的结构,它主要分三个模块: 15 | 16 | - Parser 17 | - AST 18 | - Visitor 19 | 20 | ### Parser 21 | parser是将输入文本转换为ast(抽象语法树),parser有包括两个部分,Parser和Lexer,其中Lexer实现词法分析,Parser实现语法分析。 22 | ### AST 23 | AST是Abstract Syntax Tree的缩写,也就是抽象语法树。AST是parser输出的结果。我们通过下面的语句来产生AST: 24 | ```java 25 | final String dbType = JdbcConstants.MYSQL; // 可以是ORACLE、POSTGRESQL、SQLSERVER、ODPS等 26 | String sql = "select * from t"; 27 | List stmtList = SQLUtils.parseStatements(sql, dbType); 28 | ``` 29 | 第一句是进行数据库连接,得到数据库类型以便之后的解析,第二句示例sql代码,然后我们利用SQLUtils的parseStatement产生List。 30 | 常用的SQLStatemment包括SELECT/UPDATE/DELETE/INSERT,例如这个sql就是属于 SQLSelectStatement ,SQLStatement你可以简单理解为1条SQL语句。 31 | ```java 32 | class SQLSelectStatement implements SQLStatement { 33 | SQLSelect select; 34 | } 35 | ``` 36 | 在之后SQLSelectStatement包含一个SQLSelect,SQLSelect包含一个SQLSelectQuery,都是组成的关系。SQLSelectQuery有主要的两个派生类,分别是SQLSelectQueryBlock和SQLUnionQuery。 37 | 然后我们根据定义再继续看SQLSelect的定义, 38 | ```java 39 | class SQLSelect extends SQLObjectImpl { 40 | SQLWithSubqueryClause withSubQuery; 41 | SQLSelectQuery query; 42 | } 43 | 44 | interface SQLSelectQuery extends SQLObject {} 45 | 46 | class SQLSelectQueryBlock implements SQLSelectQuery { 47 | List selectList; 48 | SQLTableSource from; 49 | SQLExprTableSource into; 50 | SQLExpr where; 51 | SQLSelectGroupByClause groupBy; 52 | SQLOrderBy orderBy; 53 | SQLLimit limit; 54 | } 55 | 56 | class SQLUnionQuery implements SQLSelectQuery { 57 | SQLSelectQuery left; 58 | SQLSelectQuery right; 59 | SQLUnionOperator operator; // UNION/UNION_ALL/MINUS/INTERSECT 60 | } 61 | ``` 62 | SQLSelcet包含一个SQLSelectQuery,在SQLSelectQuery里我们看到它包含selectList,from,into,where等关键字,我们就 63 | 可以将sql解析了,比如这个简单的sql= "select id from t",它的selectlist为 id,这里的from t是一个SQLExprTableSource, 64 | 其中expr是一个name=t的SQLIdentifierExpr。 65 | 66 | 当然我们可以清晰的知道id是属于t表的,但是在我们的sql日志中,sql语句没有这么简单,在数据仓库中通过一系列的 67 | 调用,比如在一张全量用户信息表中想得到所有用户年龄信息,我们提取出来构成一张表,然后在年龄信息表中我们又想得到出不同年龄段的职业信息,再构成一张表,这样下去我们 68 | 会构成很多张临时表,方便进行一些业务操作。但是这些字段其实都是来源于最开始的用户信息表的,当我们想知道这临时表中某个字段到底是来源于哪个物理表的,我们 69 | 通过sql的语法解析树一层层解析,可以自动的找到它的血缘关系。 70 | 71 | 比较复杂的sql示例:(为了保护公司数据,代码进行了修改) 72 | ```sql 73 | create table tmp.tmp_a_supp_achievement_an_mom_001 as 74 | select a1.dim_day_txdate 75 | ,a.a_pin 76 | ,sum(coalesce(b.amount,0)) as total_amount 77 | ,sum(coalesce(c.refund_amt,0)) as refund_amt 78 | ,sum(os_prcp_amt)os_prcp_amt 79 | from 80 | (select dim_day_txdate 81 | from dmv.dim_day 82 | where dim_day_txdate>=concat(cast(year('2018-05-15')-1 as string),'-',substring('2018-05-15',6,2),'-01') and dim_day_txdate<='2018-05-15' 83 | )a1 84 | join 85 | (select distinct a_pin 86 | ,product_type 87 | from dwd.dwd_as_qy_cust_account_s_d 88 | where dt ='2018-05-15' and product_type='20288' 89 | )a 90 | left outer join 91 | (select substring(tx_time,1,10) as time 92 | ,sum(order_amt) as amount 93 | ,a_pin 94 | from DWD.dwd_actv_as_qy_iou_receipt_s_d------- 95 | where a_order_type='20096' - 96 | and a_pin not in ('vep_test','VOPVSP测试','VOPVSP测试_1','测试号','2016联通测试号','pxpx01','pxpx02', 97 | 'i000','i001','测试','测试aa01','测试aa02','px01','px02', 98 | 'test','test01','px031901','px031902','多级审核测试admin','邮政测试2015','中石油积分兑换-测试','买卖宝测试王','mengmengda111','ZHAOGANGWANG1809','ZHAOGANGWANGC1000508', 99 | '差旅测试01','差旅测试03','差旅测试04','差旅测试02','差旅测试06','差旅测试05','jc_test1','大连航天测试','大客户金采测试','移动测试账号1','中国联通测试','云积分商城测试' 100 | ,'多级审核测试采购08','多级审核测试采购05','国电物流有限公司测试') 101 | and dt='2018-05-15' 102 | group by substring(tx_time,1,10),a_pin 103 | )b on cast(a.a_pin as string)=cast(b.a_pin as string) and a1.dim_day_txdate=b.time 104 | left outer join 105 | (select substring(refund_time,1,10) as refund_time 106 | ,a_pin 107 | ,sum(refund_amt)as refund_amt 108 | from DWD.dwd_as_qy_iou_refund_s_d 109 | where refund_status='20090' 110 | and dt='2018-05-15' 111 | and a_order_no <> '12467657248' 112 | and a_refund_no <> '1610230919767139947' 113 | group by substring(refund_time,1,10),a_pin 114 | )c on cast(a.a_pin as string)=cast(c.a_pin as string) and a1.dim_day_txdate=c.refund_time 115 | left outer join 116 | (select dt,a_pin,sum(os_prcp_amt) as os_prcp_amt from dwd.dwd_as_qy_cycle_detail_s_d where dt>=concat(substr('2018-05-15',1,7),'-01') and dt<='2018-05-15' group by dt,a_pin)e on cast(a.jd_pin as string)=cast(e.a_pin as string) and a1.dim_day_txdate=e.dt 117 | group by a1.dim_day_txdate,a.a_pin 118 | ; 119 | ``` 120 | 121 | 所以从上面的sql中,进行我们的解析可以得到dim_day_txdate字段来源于dmv.dim_day,a_pin来源于dwd.dwd_as_qy_cust_account_s_d,total_amount是 122 | 来源于DWD.dwd_actv_as_qy_iou_receipt_s_d中的(order_amt)经过sum之后得到amount 再通过sum和coleace操作得到的,这系列的血缘关系变化我们将用链表方式进行存储,得到最终的血缘关系。 123 | 124 | ## 代码结构如下: 125 | ``` 126 | dw-column-level-lineage/ # 工程根目录 127 | ├── src/ 128 | | ├─ main 129 | | | ├─ lineage 130 | | | | ├─ analysis 131 | | | | | ├─ ColumnLineageAnalyzer.java #解析主函数 132 | | | | ├─ domains 133 | | | | | ├─ basic #数据结构的基本定义 134 | | | | | | ├─ Column.java 135 | | | | | | ├─ ColumnType.java 136 | | | | | | ├─ Database.java 137 | | | | | | ├─ DatabaseType.java 138 | | | | | | ├─ Table.java 139 | | | | | | ├─ TableType.java 140 | | | | | | ├─ ForeignKeys.java 141 | | | | | ├─ lineage # 血缘关系链表的定义 142 | | | | | | ├─ table 143 | | | | | | | ├─ TableColumnLineage.java 144 | | | | | | ├─ column 145 | | | | | | | ├─ ColumnLineageColumnNode.java 146 | | | | | | | ├─ ColumnLineageColumnNodeType.java 147 | | | | | | | ├─ ColumnLineageRelationNodeType.java 148 | | | | | | | ├─ ColumnLineageRelationNode.java 149 | | | | | | | ├─ ColumnLineageUtils.java 150 | | | | ├─ utils # 数据处理文件 151 | | | | | ├─ ProcessUnparsedSQL.java 152 | | | | | ├─ SchemaExtractor.java 153 | | | | | ├─ SchemaLoader.java 154 | | | | | ├─ SQLExtractor.java 155 | | | ├─ resource 156 | | | | ├─ log4j2.xml 157 | | ├─ test # 单元测试文件 158 | ├── .gitignore # Git Ignore 文件 159 | ├── pox.ml # maven配置文件 160 | 161 | ``` 162 | -------------------------------------------------------------------------------- /dw-column-level-lineage-master.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lulumengyi/Hive_SQL_AST/2771242e4b5c117de8fbfb8102ac2c0fda027607/dw-column-level-lineage-master.zip -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/analysis/ColumnLineageAnalyzer.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.analysis; 2 | 3 | import com.alibaba.druid.sql.SQLUtils; 4 | import com.alibaba.druid.sql.ast.SQLExpr; 5 | import com.alibaba.druid.sql.ast.SQLName; 6 | import com.alibaba.druid.sql.ast.SQLObject; 7 | import com.alibaba.druid.sql.ast.SQLStatement; 8 | import com.alibaba.druid.sql.ast.expr.*; 9 | import com.alibaba.druid.sql.ast.statement.*; 10 | import com.alibaba.druid.sql.repository.SchemaRepository; 11 | import com.google.common.collect.Lists; 12 | import com.jd.jr.daat.dw.lineage.domains.basic.*; 13 | import com.jd.jr.daat.dw.lineage.domains.lineage.column.*; 14 | import com.jd.jr.daat.dw.lineage.domains.lineage.table.TableColumnLineage; 15 | 16 | import java.io.File; 17 | import java.io.FileWriter; 18 | import java.io.IOException; 19 | import java.util.ArrayList; 20 | import java.util.List; 21 | 22 | public class ColumnLineageAnalyzer { 23 | /** 24 | * Schema Repository 25 | */ 26 | private SchemaRepository schemaRepository; 27 | 28 | public ColumnLineageAnalyzer(SchemaRepository schemaRepository) { 29 | this.schemaRepository = schemaRepository; 30 | } 31 | 32 | /** 33 | * Description: 34 | * 添加不支持的节点 35 | * 36 | * @param columnNode 字段级别血缘字段节点 37 | * @param sqlObject SQLObject 38 | */ 39 | public void putUnsupportedChildrenNodes( 40 | ColumnLineageColumnNode columnNode, 41 | SQLObject sqlObject) { 42 | String className = sqlObject.getClass().getName(); 43 | 44 | // 构建 Relation 节点,类型 [UNSUPPORTED] 45 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 46 | ColumnLineageRelationNodeType.UNSUPPORTED); 47 | 48 | // 连接 Relation 节点和父 Column 节点 49 | relationChildNode.setParent(columnNode); 50 | columnNode.addChildNode(relationChildNode); 51 | 52 | // 构建子 Column 节点 53 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(ColumnLineageColumnNodeType.STOP); 54 | 55 | // 连接 Relation 节点和子 Column 节点 56 | relationChildNode.addChildNode(columnChildNode); 57 | columnChildNode.setParent(relationChildNode); 58 | 59 | // 停止递归 60 | return; 61 | } 62 | /** 63 | * Description: 64 | * 添加 SQLPropertry 字段级别血缘 65 | * 为SQLCaseExpr设置 66 | * 67 | */ 68 | public void putChildrenNodes( 69 | ColumnLineageColumnNode columnNode, 70 | ColumnLineageColumnNode parentColumnNode, 71 | SQLPropertyExpr sqlPropertyExpr) { 72 | 73 | // 构建 Relation 节点,类型 [DIRECT] 74 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 75 | ColumnLineageRelationNodeType.DIRECT); 76 | 77 | // 连接 Relation 节点和父 Column 节点 78 | relationChildNode.setParent(columnNode); 79 | columnNode.addChildNode(relationChildNode); 80 | // 获取子节点 81 | 82 | // 构建子 Column 节点 83 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 84 | .getColumnLineageColumnNodeType(sqlPropertyExpr.getResolvedOwnerObject(), parentColumnNode); 85 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 86 | columnChildNode.setSqlObject(sqlPropertyExpr.getResolvedOwnerObject()); 87 | columnChildNode.setTableName(sqlPropertyExpr.getOwnernName()); 88 | columnChildNode.setColumnName(sqlPropertyExpr.getName()); 89 | // 连接 Relation 节点和子 Column 节点 90 | relationChildNode.addChildNode(columnChildNode); 91 | columnChildNode.setParent(relationChildNode); 92 | 93 | // 递归子 Column 节点 94 | 95 | putChildrenNodes(columnChildNode, columnNode, sqlPropertyExpr.getName()); 96 | 97 | } 98 | 99 | 100 | /** 101 | * Description: 102 | * 添加 SQLSelectItem 字段级别血缘 103 | * 104 | * @param columnNode 字段级别血缘字段节点 105 | * @param parentColumnNode 父字段节点 106 | * @param sqlSelectItem SQLSelectItem (即 Column) 107 | */ 108 | public void putChildrenNodes( 109 | ColumnLineageColumnNode columnNode, 110 | ColumnLineageColumnNode parentColumnNode, 111 | SQLSelectItem sqlSelectItem) { 112 | 113 | String columnNameOrAlias = null; 114 | String tablename =columnNode.getTableName(); 115 | String databasename = columnNode.getDatabaseName(); 116 | SQLObject sqlObject=null; 117 | if (sqlSelectItem.getExpr() instanceof SQLPropertyExpr) { 118 | // 构建 Relation 节点,类型 [DIRECT] 119 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 120 | ColumnLineageRelationNodeType.DIRECT); 121 | 122 | // 获取子节点 123 | SQLPropertyExpr sqlPropertyExpr = (SQLPropertyExpr) sqlSelectItem.getExpr(); 124 | 125 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 126 | .getColumnLineageColumnNodeType(sqlPropertyExpr.getResolvedOwnerObject(), parentColumnNode); 127 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 128 | 129 | columnNameOrAlias = sqlPropertyExpr.getName(); 130 | tablename =sqlPropertyExpr.getOwnernName(); 131 | databasename =columnNode.getDatabaseName(); 132 | sqlObject =sqlPropertyExpr.getResolvedOwnerObject(); 133 | 134 | // 调用函数 连接childNode 135 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 136 | 137 | // 递归子 Column 节点 138 | 139 | putChildrenNodes(columnChildNode, columnNode, sqlPropertyExpr.getName()); 140 | } else if (sqlSelectItem.getExpr() instanceof SQLIdentifierExpr){ 141 | // 构建 Relation 节点,类型 [DIRECT] 142 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 143 | ColumnLineageRelationNodeType.DIRECT); 144 | SQLExpr selectItemExpr = sqlSelectItem.getExpr(); 145 | // 查询 Item 是 SELECT item 类型 146 | SQLIdentifierExpr sqlIdentifierExpr = (SQLIdentifierExpr) selectItemExpr; 147 | 148 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 149 | .getColumnLineageColumnNodeType(sqlIdentifierExpr.getResolvedOwnerObject(), parentColumnNode); 150 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 151 | 152 | 153 | columnNameOrAlias = sqlIdentifierExpr.getName(); 154 | sqlObject = sqlIdentifierExpr.getResolvedOwnerObject(); 155 | 156 | //如果sqlobject为空,可能对应的schema中没有该字段 157 | if(sqlObject ==null){ 158 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlIdentifierExpr.getParent().getParent(); 159 | sqlObject = sqlSelectQueryBlock.getFrom(); 160 | } 161 | 162 | //设置tablename和basename 163 | if (sqlObject instanceof SQLExprTableSource){ 164 | SQLExpr expr = ((SQLExprTableSource) sqlObject).getExpr(); 165 | tablename = GetTableDatabaseName(expr).split("\t")[0]; 166 | databasename = GetTableDatabaseName(expr).split("\t")[1]; 167 | } else if (sqlObject instanceof SQLUnionQueryTableSource){ 168 | tablename = ((SQLUnionQueryTableSource) sqlObject).getAlias(); 169 | databasename = columnNode.getDatabaseName(); 170 | } else if (sqlObject instanceof SQLSubqueryTableSource){ 171 | SQLObject sqlObject1= ((SQLSubqueryTableSource) sqlObject).getSelect().getQueryBlock().getFrom(); 172 | 173 | if(sqlObject1 instanceof SQLExprTableSource){ 174 | SQLExprTableSource sqlExprTableSource = (SQLExprTableSource) ((SQLSubqueryTableSource) sqlObject).getSelect().getQueryBlock().getFrom(); 175 | tablename = GetTableDatabaseName(sqlExprTableSource.getExpr()).split("\t")[0]; 176 | databasename = GetTableDatabaseName(sqlExprTableSource.getExpr()).split("\t")[1]; 177 | } else if (sqlObject1 instanceof SQLSubqueryTableSource){ 178 | SQLObject sqlObject2 =((SQLSubqueryTableSource) sqlObject1).getSelect().getQueryBlock().getFrom(); 179 | 180 | if (sqlObject2 instanceof SQLExprTableSource){ 181 | tablename = GetTableDatabaseName(((SQLExprTableSource) sqlObject2).getExpr()).split("\t")[0]; 182 | databasename = GetTableDatabaseName(((SQLExprTableSource) sqlObject2).getExpr()).split("\t")[1]; 183 | } 184 | } 185 | 186 | } 187 | // 调用函数 连接childNode 188 | setColumnChildNode(relationChildNode, columnChildNode, columnNode, columnNameOrAlias, databasename, tablename, sqlObject); 189 | 190 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 191 | } else if(sqlSelectItem.getExpr() instanceof SQLCastExpr){ 192 | //如果为SQLCastExpr类型 193 | 194 | // 构建 Relation 节点,类型 [FUNCTION] 195 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 196 | ColumnLineageRelationNodeType.CAST); 197 | 198 | SQLExpr selectItemExpr = sqlSelectItem.getExpr(); 199 | SQLCastExpr sqlCastExpr = (SQLCastExpr) selectItemExpr; 200 | 201 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 202 | .getColumnLineageColumnNodeType(sqlCastExpr, parentColumnNode); 203 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 204 | 205 | columnNameOrAlias =sqlSelectItem.getAlias(); 206 | tablename=columnNode.getTableName(); 207 | databasename =columnNode.getDatabaseName(); 208 | sqlObject = null; 209 | 210 | //设置sqlObject 211 | if(sqlCastExpr.getExpr() instanceof SQLPropertyExpr){ 212 | columnNameOrAlias = ((SQLPropertyExpr) sqlCastExpr.getExpr()).getName(); 213 | tablename =((SQLPropertyExpr) sqlCastExpr.getExpr()).getOwnernName(); 214 | sqlObject =((SQLPropertyExpr) sqlCastExpr.getExpr()).getResolvedOwnerObject(); 215 | }else if(sqlCastExpr.getExpr() instanceof SQLIdentifierExpr){ 216 | columnNameOrAlias = ((SQLIdentifierExpr) sqlCastExpr.getExpr()).getName(); 217 | sqlObject = ((SQLIdentifierExpr) sqlCastExpr.getExpr()).getResolvedOwnerObject(); 218 | } 219 | else if(sqlCastExpr.getExpr() instanceof SQLBinaryOpExpr){ 220 | SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlCastExpr.getExpr(); 221 | sqlBinaryOpExpr.getLeft(); 222 | }else if(sqlCastExpr.getExpr() instanceof SQLAggregateExpr){ 223 | 224 | SQLAggregateExpr sqlAggregateExpr = (SQLAggregateExpr) sqlCastExpr.getExpr(); 225 | 226 | // 调用GetAggregateExprColunmnSQLObject函数 设置sqlAggregateExpr的sqlobject和columnNameOrAlias 227 | MethodInvoke SqlObjectColumnName =GetAggregateExprColunmnSQLObject(sqlAggregateExpr); 228 | sqlObject=SqlObjectColumnName.getSqlObject(); 229 | columnNameOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 230 | 231 | }else if(sqlCastExpr.getExpr() instanceof SQLMethodInvokeExpr){ 232 | SQLMethodInvokeExpr sqlMethodInvokeExpr = (SQLMethodInvokeExpr) sqlCastExpr.getExpr(); 233 | // 调用GetMethodInvokeColumnSQLObjec函数 得到sqlobect和columnNameOrAlias 234 | MethodInvoke SqlObjectColumnName =GetMethodInvokeColumnSQLObject(sqlMethodInvokeExpr); 235 | sqlObject=SqlObjectColumnName.getSqlObject(); 236 | columnNameOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 237 | 238 | } 239 | 240 | // 调用函数 连接childNode 241 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 242 | 243 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 244 | 245 | }//如果为SQLAggregateExpr类型 246 | else if(sqlSelectItem.getExpr() instanceof SQLAggregateExpr){ 247 | // 构建 Relation 节点,类型 [FUNCTION] 248 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 249 | ColumnLineageRelationNodeType.FUNCTION); 250 | 251 | SQLExpr selectItemExpr = sqlSelectItem.getExpr(); 252 | // 查询 Item 是 SSQLAggregateExpr 类型 253 | SQLAggregateExpr sqlAggregateExpr = (SQLAggregateExpr) selectItemExpr; 254 | 255 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 256 | .getColumnLineageColumnNodeType(sqlAggregateExpr, parentColumnNode); 257 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 258 | 259 | // 调用GetAggregateExprColunmnSQLObject函数 设置sqlAggregateExpr的sqlobject和columnNameOrAlias 260 | MethodInvoke SqlObjectColumnName =GetAggregateExprColunmnSQLObject(sqlAggregateExpr); 261 | sqlObject=SqlObjectColumnName.getSqlObject(); 262 | columnNameOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 263 | //如果sqlobject为空 264 | if(sqlObject ==null){ 265 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlSelectItem.getParent(); 266 | sqlObject = sqlSelectQueryBlock.getFrom(); 267 | } 268 | tablename =columnNode.getTableName(); 269 | databasename = columnNode.getDatabaseName(); 270 | // 调用函数 连接childNode 271 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 272 | 273 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 274 | 275 | }//如果是SQLMethodInvokeExpr函数嵌套关系 276 | else if(sqlSelectItem.getExpr() instanceof SQLMethodInvokeExpr){ 277 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 278 | ColumnLineageRelationNodeType.FUNCTION); 279 | 280 | SQLMethodInvokeExpr sqlMethodInvokeExpr = (SQLMethodInvokeExpr)sqlSelectItem.getExpr(); 281 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 282 | .getColumnLineageColumnNodeType(sqlMethodInvokeExpr, parentColumnNode); 283 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 284 | 285 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlSelectItem.getParent(); 286 | sqlObject = sqlSelectQueryBlock.getFrom(); 287 | if(sqlObject instanceof SQLExprTableSource){ 288 | tablename =GetTableDatabaseName(((SQLExprTableSource) sqlObject).getExpr()).split("\t")[0]; 289 | databasename =GetTableDatabaseName(((SQLExprTableSource) sqlObject).getExpr()).split("\t")[1]; 290 | 291 | } 292 | // 获取sqlobect和columnNameOrAlias 293 | MethodInvoke SqlObjectColumnName =GetMethodInvokeColumnSQLObject(sqlMethodInvokeExpr); 294 | sqlObject=SqlObjectColumnName.getSqlObject(); 295 | columnNameOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 296 | if(columnNameOrAlias.equals("")){ 297 | if(sqlSelectItem.getAlias()!=null){ 298 | columnNameOrAlias =sqlSelectItem.getAlias(); 299 | } 300 | } 301 | 302 | // 调用函数 连接childNode 303 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 304 | 305 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 306 | 307 | }//添加SQLCharExpr 308 | else if(sqlSelectItem.getExpr() instanceof SQLCharExpr){ 309 | SQLCharExpr sqlCharExpr = (SQLCharExpr) sqlSelectItem.getExpr(); 310 | columnNameOrAlias =sqlCharExpr.getText(); 311 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 312 | ColumnLineageRelationNodeType.DIRECT); 313 | 314 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 315 | .getColumnLineageColumnNodeType(sqlCharExpr, parentColumnNode); 316 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 317 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlSelectItem.getParent(); 318 | 319 | sqlObject = sqlSelectQueryBlock.getFrom(); 320 | // 调用函数 连接childNode 321 | setColumnChildNode(relationChildNode, columnChildNode, columnNode, columnNameOrAlias, databasename, tablename, sqlObject); 322 | 323 | } else if (sqlSelectItem.getExpr() instanceof SQLBinaryOpExpr){ 324 | // 构建 Relation 节点,类型 [FUNCTION] 325 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 326 | ColumnLineageRelationNodeType.FUNCTION); 327 | SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlSelectItem.getExpr(); 328 | SQLExpr sqlLeftExpr =sqlBinaryOpExpr.getLeft(); 329 | 330 | if (sqlLeftExpr instanceof SQLAggregateExpr){ 331 | // 查询 Item 是 SSQLAggregateExpr 类型 332 | SQLAggregateExpr sqlAggregateExpr = (SQLAggregateExpr) sqlLeftExpr; 333 | 334 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 335 | .getColumnLineageColumnNodeType(sqlBinaryOpExpr, parentColumnNode); 336 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 337 | 338 | // 调用GetAggregateExprColunmnSQLObject函数 设置sqlAggregateExpr的sqlobject和columnNameOrAlias 339 | MethodInvoke SqlObjectColumnName =GetAggregateExprColunmnSQLObject(sqlAggregateExpr); 340 | sqlObject=SqlObjectColumnName.getSqlObject(); 341 | columnNameOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 342 | 343 | //如果sqlobject为空 344 | if (sqlObject == null) { 345 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlSelectItem.getParent(); 346 | sqlObject = sqlSelectQueryBlock.getFrom(); 347 | } 348 | 349 | tablename =columnNode.getTableName(); 350 | databasename = columnNode.getDatabaseName(); 351 | 352 | // 调用函数 连接childNode 353 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 354 | 355 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 356 | 357 | } else { 358 | putUnsupportedChildrenNodes(columnNode, sqlSelectItem); 359 | } 360 | } else { 361 | putUnsupportedChildrenNodes(columnNode, sqlSelectItem); 362 | } 363 | 364 | 365 | } 366 | 367 | /** 368 | * Description:得到SQLAggregateExpr类型中的sqlobject和columnNameOrAlias 369 | * @param 370 | * @return 371 | */ 372 | public MethodInvoke GetExprColumnSQLObject(SQLExpr sqlExpr){ 373 | //在这设置一个新类型,用来返回函数中的sqlobject和columnOrAlias 374 | MethodInvoke methodInvoke =new MethodInvoke(); 375 | 376 | String columnNameOrAlias=""; 377 | SQLObject sqlObject=null; 378 | //如果为SQLPropertyExpr类型 379 | if(sqlExpr instanceof SQLIdentifierExpr){ 380 | SQLIdentifierExpr sqlIdentifierExpr = (SQLIdentifierExpr) (sqlExpr); 381 | sqlObject = sqlIdentifierExpr.getResolvedOwnerObject(); 382 | columnNameOrAlias = sqlIdentifierExpr.getName(); 383 | }//如果为SQLPropertyExpr 384 | else if(sqlExpr instanceof SQLPropertyExpr){ 385 | columnNameOrAlias = ((SQLPropertyExpr) sqlExpr).getName(); 386 | sqlObject = ((SQLPropertyExpr) sqlExpr).getResolvedOwnerObject(); 387 | }//如果是SQLMethodInvokeExpr类型 388 | else if(sqlExpr instanceof SQLMethodInvokeExpr){ 389 | SQLMethodInvokeExpr sqlMethodInvokeExpr = (SQLMethodInvokeExpr) sqlExpr; 390 | MethodInvoke SqlObjectColumnName =GetMethodInvokeColumnSQLObject(sqlMethodInvokeExpr); 391 | sqlObject=SqlObjectColumnName.getSqlObject(); 392 | columnNameOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 393 | }//如果是SQLBinaryOpExpr类型 394 | else if(sqlExpr instanceof SQLBinaryOpExpr){ 395 | SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlExpr; 396 | SQLExpr sqlLeftExpr =sqlBinaryOpExpr.getLeft(); 397 | MethodInvoke SqlObjectColumnName1 =GetExprColumnSQLObject(sqlLeftExpr); 398 | sqlObject=SqlObjectColumnName1.getSqlObject(); 399 | columnNameOrAlias =SqlObjectColumnName1.getColumnNameOrAlias(); 400 | if(columnNameOrAlias.equals("")){ 401 | SQLExpr sqlRightExpr =sqlBinaryOpExpr.getRight(); 402 | MethodInvoke SqlObjectColumnName2 =GetExprColumnSQLObject(sqlRightExpr); 403 | sqlObject=SqlObjectColumnName2.getSqlObject(); 404 | columnNameOrAlias =SqlObjectColumnName2.getColumnNameOrAlias(); 405 | } 406 | } 407 | //如果是SQLCaseExpr类型 408 | else if(sqlExpr instanceof SQLCaseExpr){ 409 | SQLCaseExpr sqlCaseExpr = (SQLCaseExpr) sqlExpr; 410 | if(sqlCaseExpr.getValueExpr() !=null){ 411 | MethodInvoke SqlObjectColumnName =GetExprColumnSQLObject(sqlCaseExpr.getValueExpr() ); 412 | sqlObject=SqlObjectColumnName.getSqlObject(); 413 | columnNameOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 414 | } 415 | 416 | if(sqlCaseExpr.getItems().size()!=0){ 417 | SQLExpr sqlExpr1 =sqlCaseExpr.getItems().get(0).getValueExpr(); 418 | if(sqlCaseExpr.getItems().get(0).getConditionExpr() instanceof SQLBinaryOpExpr){ 419 | SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlCaseExpr.getItems().get(0).getConditionExpr(); 420 | MethodInvoke SqlObjectColumnName1 =GetExprColumnSQLObject(sqlBinaryOpExpr); 421 | sqlObject=SqlObjectColumnName1.getSqlObject(); 422 | columnNameOrAlias =SqlObjectColumnName1.getColumnNameOrAlias(); 423 | } 424 | } 425 | } 426 | methodInvoke.setColumnNameOrAlias(columnNameOrAlias); 427 | methodInvoke.setSqlObject(sqlObject); 428 | return methodInvoke; 429 | } 430 | 431 | 432 | public MethodInvoke GetAggregateExprColunmnSQLObject(SQLAggregateExpr sqlAggregateExpr){ 433 | //在这设置一个新类型,用来返回函数中的sqlobject和columnOrAlias 434 | MethodInvoke methodInvoke =new MethodInvoke(); 435 | 436 | String columnNameOrAlias=""; 437 | SQLObject sqlObject=null; 438 | //找到字段和对应的sqlobject 439 | if(sqlAggregateExpr.getArguments().size()!=0){ 440 | SQLExpr sqlExpr =sqlAggregateExpr.getArguments().get(0); 441 | MethodInvoke SqlObjectColumnName =GetExprColumnSQLObject(sqlExpr); 442 | sqlObject=SqlObjectColumnName.getSqlObject(); 443 | columnNameOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 444 | } 445 | methodInvoke.setColumnNameOrAlias(columnNameOrAlias); 446 | methodInvoke.setSqlObject(sqlObject); 447 | 448 | return methodInvoke; 449 | 450 | 451 | } 452 | 453 | /** 454 | * Description:解决函数嵌套问题,循环得到最后的字段 如nvl(round(sum(awardvolumn),5),0) 得到awardvolumn这个字段列名和对应的sqlobject 455 | * @param sqlMethodInvokeExpr 函数嵌套表达式 456 | */ 457 | public MethodInvoke GetMethodInvokeColumnSQLObject(SQLMethodInvokeExpr sqlMethodInvokeExpr){ 458 | //在这设置一个新类型,用来返回嵌套函数中的sqlobject和columnOrAlias 459 | MethodInvoke methodInvoke =new MethodInvoke(); 460 | //得到第一个嵌套函数中的字段,如nvl(round(sum(awardvolumn),5),0),得到round(sum(awardvolumn),5) 461 | int parametersSize=sqlMethodInvokeExpr.getParameters().size(); 462 | //初始化sqlObject和columnOrAlias 463 | SQLObject sqlObject = null; 464 | String columnOrAlias =""; 465 | //循环得到最后一个字段 466 | while(parametersSize!=0){ 467 | SQLExpr SQLparameter = sqlMethodInvokeExpr.getParameters().get(parametersSize-1); 468 | MethodInvoke SqlObjectColumnName =GetExprColumnSQLObject(SQLparameter); 469 | sqlObject=SqlObjectColumnName.getSqlObject(); 470 | columnOrAlias =SqlObjectColumnName.getColumnNameOrAlias(); 471 | if(sqlObject!=null && !columnOrAlias.equals("")){ 472 | break; 473 | } 474 | else{ 475 | parametersSize-=1; 476 | continue; 477 | } 478 | } 479 | 480 | methodInvoke.setSqlObject(sqlObject); 481 | methodInvoke.setColumnNameOrAlias(columnOrAlias); 482 | return methodInvoke; 483 | } 484 | 485 | /** 486 | *Decription: 连接节点,将此操作合并为函数,此后都调用此函数即可。 487 | * @param relationChildNode 488 | * @param columnChildNode 489 | * @param columnNode 490 | * @param columnNameOrAlias 491 | * @param databasename 492 | * @param tablename 493 | * @param sqlObject 494 | */ 495 | public void setColumnChildNode( ColumnLineageRelationNode relationChildNode,ColumnLineageColumnNode columnChildNode,ColumnLineageColumnNode columnNode, 496 | String columnNameOrAlias, String databasename,String tablename,SQLObject sqlObject){ 497 | // 连接 Relation 节点和父 Column 节点 498 | relationChildNode.setParent(columnNode); 499 | columnNode.addChildNode(relationChildNode); 500 | 501 | // 构建子 Column 节点 502 | columnChildNode.setSqlObject(sqlObject); 503 | columnChildNode.setTableName(tablename); 504 | columnChildNode.setDatabaseName(databasename); 505 | columnChildNode.setColumnName(columnNameOrAlias); 506 | 507 | // 连接 Relation 节点和子 Column 节点 508 | relationChildNode.addChildNode(columnChildNode); 509 | columnChildNode.setParent(relationChildNode); 510 | 511 | } 512 | 513 | /** 514 | * Decription:将SQLTableSource各种类型 都调用此函数 515 | * @param columnNode 节点 516 | * @param parentColumnNode 父节点 517 | * @param sqlTableSource sqlTableSource 518 | * @param nearestAncestorColumnSpecificName 最近的节点,判断是否与selectItem是否相同 519 | * @param selectItems selectItems 520 | */ 521 | 522 | public void putChildrenNodes( ColumnLineageColumnNode columnNode, 523 | ColumnLineageColumnNode parentColumnNode,SQLTableSource sqlTableSource, 524 | String nearestAncestorColumnSpecificName,List selectItems){ 525 | //如果为SQLExprTableSource类型 526 | if(sqlTableSource instanceof SQLExprTableSource) { 527 | SQLExprTableSource sqlExprTableSource = (SQLExprTableSource) sqlTableSource; 528 | SQLExpr expr = sqlExprTableSource.getExpr(); 529 | String tablename = GetTableDatabaseName(expr).split("\t")[0]; 530 | String databasename = GetTableDatabaseName(expr).split("\t")[1]; 531 | SQLObject sqlObject =sqlExprTableSource; 532 | for (SQLSelectItem selectItem : selectItems) { 533 | SQLExpr selectItemExpr = selectItem.getExpr(); 534 | 535 | if (selectItemExpr instanceof SQLPropertyExpr) { 536 | // 查询 Item 是一般查询项 537 | SQLPropertyExpr sqlPropertyExpr = (SQLPropertyExpr) selectItemExpr; 538 | 539 | // 获取字段名或别名 540 | String columnNameOrAlias = sqlPropertyExpr.getName(); 541 | if (selectItem.getAlias() != null) { 542 | columnNameOrAlias = selectItem.getAlias(); 543 | } 544 | if (nearestAncestorColumnSpecificName.equalsIgnoreCase(columnNameOrAlias)) { 545 | // 构建 Relation 节点,类型 [DIRECT] 546 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 547 | ColumnLineageRelationNodeType.DIRECT); 548 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 549 | .getColumnLineageColumnNodeType(sqlPropertyExpr.getResolvedOwnerObject(), parentColumnNode); 550 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 551 | 552 | columnNameOrAlias =sqlPropertyExpr.getName(); 553 | 554 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 555 | 556 | // 递归子 Column 节点 557 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 558 | 559 | // 发现同名 (字段名或别名相同),即退出 560 | break; 561 | } 562 | //添加select APF.*的情况 563 | else if (columnNameOrAlias.equals("*")) { 564 | // 查询 Item 是 SELECT * 类型 565 | // SQLAllColumnExpr sqlAllColumnExpr = (SQLAllColumnExpr) selectItemExpr; 566 | 567 | // 构建 Relation 节点,类型 [DIRECT] 568 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 569 | ColumnLineageRelationNodeType.DIRECT); 570 | // 构建子 Column 节点 571 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 572 | .getColumnLineageColumnNodeType(sqlPropertyExpr.getResolvedOwnerObject(), parentColumnNode); 573 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 574 | 575 | columnNameOrAlias ="*"; 576 | 577 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 578 | 579 | // 递归子 Column 节点 580 | putChildrenNodes(columnChildNode, columnNode, nearestAncestorColumnSpecificName); 581 | // 找到则跳出 582 | break; 583 | 584 | } 585 | } else if (selectItemExpr instanceof SQLIdentifierExpr) { 586 | // 查询 Item 是 SELECT item 类型 587 | SQLIdentifierExpr sqlIdentifierExpr = (SQLIdentifierExpr) selectItemExpr; 588 | 589 | // 获取字段名或别名 590 | String columnNameOrAlias = sqlIdentifierExpr.getName(); 591 | if (selectItem.getAlias() != null) { 592 | columnNameOrAlias = selectItem.getAlias(); 593 | } 594 | 595 | if (nearestAncestorColumnSpecificName.equalsIgnoreCase(columnNameOrAlias)) { 596 | // 构建 Relation 节点,类型 [DIRECT] 597 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 598 | ColumnLineageRelationNodeType.DIRECT); 599 | // 构建子 Column 节点 600 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 601 | .getColumnLineageColumnNodeType(sqlIdentifierExpr, parentColumnNode); 602 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 603 | 604 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 605 | 606 | // 递归子 Column 节点 607 | putChildrenNodes(columnChildNode, columnNode, nearestAncestorColumnSpecificName); 608 | 609 | // 找到则跳出 610 | break; 611 | } 612 | } //如果为SQLAllColumnExpr类型 613 | else if (selectItemExpr instanceof SQLAllColumnExpr) { 614 | // 查询 Item 是 SELECT * 类型 615 | SQLAllColumnExpr sqlAllColumnExpr = (SQLAllColumnExpr) selectItemExpr; 616 | 617 | // 构建 Relation 节点,类型 [DIRECT] 618 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 619 | ColumnLineageRelationNodeType.DIRECT); 620 | 621 | // 构建子 Column 节点 622 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 623 | .getColumnLineageColumnNodeType(sqlAllColumnExpr, parentColumnNode); 624 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 625 | String columnNameOrAlias ="*"; 626 | 627 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 628 | 629 | putChildrenNodes(columnChildNode, columnNode, nearestAncestorColumnSpecificName); 630 | // 找到则跳出 631 | break; 632 | } //如果为SQLMethodInvokeExpr函数嵌套类型 633 | else if (selectItemExpr instanceof SQLMethodInvokeExpr) { 634 | 635 | String columnNameOrAlias = selectItemExpr.toString(); 636 | if (selectItem.getAlias() != null) { 637 | columnNameOrAlias = selectItem.getAlias(); 638 | } 639 | if (nearestAncestorColumnSpecificName.equalsIgnoreCase(columnNameOrAlias)) { 640 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 641 | ColumnLineageRelationNodeType.FUNCTION); 642 | // 连接 Relation 节点和父 Column 节点 643 | 644 | SQLMethodInvokeExpr sqlMethodInvokeExpr = (SQLMethodInvokeExpr) selectItem.getExpr(); 645 | 646 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 647 | .getColumnLineageColumnNodeType(sqlMethodInvokeExpr, parentColumnNode); 648 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 649 | 650 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 651 | 652 | putChildrenNodes(columnChildNode, columnNode, nearestAncestorColumnSpecificName); 653 | break; 654 | } 655 | } //如果为SQLAggregateExpr函数类型 656 | else if (selectItemExpr instanceof SQLAggregateExpr) { 657 | 658 | // 查询 Item 是 SSQLAggregateExpr 类型 659 | SQLAggregateExpr sqlAggregateExpr = (SQLAggregateExpr) selectItemExpr; 660 | 661 | // 获取方法名 662 | String methodName = sqlAggregateExpr.getMethodName(); 663 | String columnNameOrAlias = selectItemExpr.toString(); 664 | if (selectItem.getAlias() != null) { 665 | columnNameOrAlias = selectItem.getAlias(); 666 | } 667 | if (nearestAncestorColumnSpecificName.equalsIgnoreCase(columnNameOrAlias)) { 668 | // 构建 Relation 节点,类型 [FUNCTION] 669 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 670 | ColumnLineageRelationNodeType.FUNCTION); 671 | 672 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 673 | .getColumnLineageColumnNodeType(sqlAggregateExpr, parentColumnNode); 674 | 675 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 676 | 677 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 678 | 679 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 680 | break; 681 | 682 | } 683 | } //如果为SQLCharExpr类型 684 | else if(selectItemExpr instanceof SQLCharExpr){ 685 | String columnNameOrAlias = selectItem.getAlias(); 686 | if (nearestAncestorColumnSpecificName.equalsIgnoreCase(columnNameOrAlias)) { 687 | SQLCharExpr sqlCharExpr = (SQLCharExpr) selectItemExpr; 688 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 689 | ColumnLineageRelationNodeType.DIRECT); 690 | 691 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 692 | .getColumnLineageColumnNodeType(sqlCharExpr, parentColumnNode); 693 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 694 | 695 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 696 | 697 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 698 | break; 699 | } 700 | }//如果为SQLCaseExpr类型 701 | else if (selectItemExpr instanceof SQLCaseExpr) { 702 | String columnNameOrAlias = selectItem.getAlias(); 703 | if (nearestAncestorColumnSpecificName.equalsIgnoreCase(columnNameOrAlias)) { 704 | SQLCaseExpr sqlCaseExpr = (SQLCaseExpr) selectItemExpr; 705 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 706 | ColumnLineageRelationNodeType.CASE); 707 | 708 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 709 | .getColumnLineageColumnNodeType(sqlCaseExpr, parentColumnNode); 710 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 711 | 712 | 713 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 714 | 715 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 716 | break; 717 | } 718 | } //添加如:COALESCE(buyer_frozen,0)+COALESCE(erp_frozen,0) AS frozen 有运算符的表达式 719 | else if(selectItemExpr instanceof SQLBinaryOpExpr){ 720 | String columnNameOrAlias = selectItem.getAlias(); 721 | if (nearestAncestorColumnSpecificName.equalsIgnoreCase(columnNameOrAlias)) { 722 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 723 | ColumnLineageRelationNodeType.FUNCTION); 724 | 725 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 726 | .getColumnLineageColumnNodeType(selectItemExpr, parentColumnNode); 727 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 728 | 729 | setColumnChildNode(relationChildNode,columnChildNode,columnNode,columnNameOrAlias,databasename,tablename,sqlObject); 730 | 731 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 732 | break; 733 | } 734 | } 735 | else { 736 | String columnNameOrAlias = selectItem.getAlias(); 737 | if (nearestAncestorColumnSpecificName.equalsIgnoreCase(columnNameOrAlias)) { 738 | putUnsupportedChildrenNodes(columnNode, selectItemExpr); 739 | } 740 | 741 | } 742 | } 743 | 744 | }//如果为SQLSubqueryTableSource类型 745 | else if(sqlTableSource instanceof SQLSubqueryTableSource){ 746 | SQLSubqueryTableSource sqlSubqueryTableSource = (SQLSubqueryTableSource) sqlTableSource; 747 | if(sqlSubqueryTableSource.getSelect().getQuery() instanceof SQLSelectQueryBlock){ 748 | SQLSelectQueryBlock sqlSelectQueryBlock1 = (SQLSelectQueryBlock) sqlSubqueryTableSource.getSelect().getQuery(); 749 | SQLTableSource sqlTableSource1 =sqlSelectQueryBlock1.getFrom(); 750 | putChildrenNodes(columnNode,parentColumnNode,sqlTableSource1,nearestAncestorColumnSpecificName,selectItems); 751 | }else if(sqlSubqueryTableSource.getSelect().getQuery() instanceof SQLUnionQuery){ 752 | SQLUnionQuery sqlUnionQuery = (SQLUnionQuery) sqlSubqueryTableSource.getSelect().getQuery(); 753 | SQLSelectQuery sqlSelectQuery =sqlUnionQuery.getLeft(); 754 | } 755 | 756 | }//如果为SQLJoinTableSource类型 757 | else if(sqlTableSource instanceof SQLJoinTableSource){ 758 | SQLJoinTableSource sqlJoinTableSource = (SQLJoinTableSource) sqlTableSource; 759 | SQLTableSource sqlTableSourceLeft =sqlJoinTableSource.getLeft(); 760 | SQLTableSource sqlTableSourceRight =sqlJoinTableSource.getRight(); 761 | if(sqlTableSourceLeft instanceof SQLSubqueryTableSource){ 762 | SQLSubqueryTableSource sqlSubqueryTableSource = (SQLSubqueryTableSource) sqlTableSourceLeft; 763 | putChildrenNodes(columnNode,parentColumnNode,sqlSubqueryTableSource ,nearestAncestorColumnSpecificName,selectItems); 764 | } 765 | else if(sqlTableSourceLeft instanceof SQLExprTableSource){ 766 | SQLExprTableSource sqlExprTableSource = (SQLExprTableSource) sqlTableSourceLeft; 767 | putChildrenNodes(columnNode, parentColumnNode, nearestAncestorColumnSpecificName, sqlExprTableSource); 768 | }else if(sqlTableSourceLeft instanceof SQLJoinTableSource){ 769 | SQLJoinTableSource sqlJoinTableSource1 = (SQLJoinTableSource) sqlTableSourceLeft; 770 | putChildrenNodes(columnNode,parentColumnNode,sqlJoinTableSource1 ,nearestAncestorColumnSpecificName,selectItems); 771 | 772 | } 773 | 774 | }//如果为SQLUnionQueryTableSource类型 775 | else if(sqlTableSource instanceof SQLUnionQueryTableSource){ 776 | SQLUnionQueryTableSource sqlUnionQueryTableSource = (SQLUnionQueryTableSource)sqlTableSource; 777 | putChildrenNodes(columnNode, parentColumnNode, nearestAncestorColumnSpecificName, sqlUnionQueryTableSource); 778 | } 779 | 780 | } 781 | /** 782 | * Decription:SQLUnionQueryTableSource类型 783 | 784 | */ 785 | 786 | public void putChildrenNodes( 787 | ColumnLineageColumnNode columnNode, 788 | ColumnLineageColumnNode parentColumnNode, 789 | String nearestAncestorColumnSpecificName, 790 | SQLUnionQueryTableSource sqlUnionQueryTableSource) { 791 | SQLUnionQuery sqlUnionQuery = sqlUnionQueryTableSource.getUnion(); 792 | putSQLUnionQuery(columnNode,parentColumnNode,nearestAncestorColumnSpecificName,sqlUnionQuery); 793 | 794 | } 795 | 796 | /** 797 | * Desciprition: 对SQLUnionQuery进行解析 798 | * @param columnNode 799 | * @param parentColumnNode 800 | * @param nearestAncestorColumnSpecificName 801 | * @param sqlUnionQuery 802 | */ 803 | 804 | public void putSQLUnionQuery( ColumnLineageColumnNode columnNode, 805 | ColumnLineageColumnNode parentColumnNode, 806 | String nearestAncestorColumnSpecificName, 807 | SQLUnionQuery sqlUnionQuery){ 808 | List selectItemList = new ArrayList<>(); 809 | selectItemList = GetSQLUnionQuerySelectItemList(sqlUnionQuery,selectItemList); 810 | if(sqlUnionQuery.getLeft() instanceof SQLSelectQueryBlock){ 811 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlUnionQuery.getLeft(); 812 | SQLTableSource sqlTableSource =sqlSelectQueryBlock.getFrom(); 813 | putChildrenNodes(columnNode,parentColumnNode,sqlTableSource,nearestAncestorColumnSpecificName,selectItemList); 814 | }else if(sqlUnionQuery.getRight() instanceof SQLSelectQueryBlock){ 815 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlUnionQuery.getRight(); 816 | SQLTableSource sqlTableSource =sqlSelectQueryBlock.getFrom(); 817 | putChildrenNodes(columnNode,parentColumnNode,sqlTableSource,nearestAncestorColumnSpecificName,selectItemList); 818 | }else if(sqlUnionQuery.getLeft() instanceof SQLUnionQuery){ 819 | SQLUnionQuery sqlUnionQuery1 = (SQLUnionQuery) sqlUnionQuery.getLeft(); 820 | putSQLUnionQuery(columnNode,parentColumnNode,nearestAncestorColumnSpecificName,sqlUnionQuery1); 821 | }else if (sqlUnionQuery.getRight() instanceof SQLUnionQuery){ 822 | SQLUnionQuery sqlUnionQuery1 = (SQLUnionQuery) sqlUnionQuery.getRight(); 823 | putSQLUnionQuery(columnNode,parentColumnNode,nearestAncestorColumnSpecificName,sqlUnionQuery1); 824 | 825 | } 826 | 827 | } 828 | 829 | 830 | 831 | /** 832 | * Description: 833 | * SQLSubqueryTableSource 类型添加子查询字段级别血缘 834 | * 835 | * @param columnNode 字段级别血缘字段节点 836 | * @param parentColumnNode 父字段节点 837 | * @param nearestAncestorColumnSpecificName 最近的祖先节点的明确的列名 838 | * @param sqlSubqueryTableSource 子查询 SQLObject 839 | */ 840 | public void putChildrenNodes( 841 | ColumnLineageColumnNode columnNode, 842 | ColumnLineageColumnNode parentColumnNode, 843 | String nearestAncestorColumnSpecificName, 844 | SQLSubqueryTableSource sqlSubqueryTableSource) { 845 | if(sqlSubqueryTableSource.getSelect().getQuery() instanceof SQLSelectQueryBlock){ 846 | SQLSelectQueryBlock sqlSelectQueryBlock =sqlSubqueryTableSource.getSelect().getQueryBlock(); 847 | List selectItems =sqlSelectQueryBlock.getSelectList(); 848 | SQLTableSource sqlTableSource =sqlSelectQueryBlock.getFrom(); 849 | putChildrenNodes(columnNode,parentColumnNode,sqlTableSource,nearestAncestorColumnSpecificName,selectItems); 850 | 851 | } 852 | 853 | } 854 | 855 | /** 856 | * Description: 857 | * 添加物理表查询字段级别血缘 858 | * 859 | * @param columnNode 字段级别血缘字段节点 860 | * @param parentColumnNode 父字段节点 861 | * @param nearestAncestorColumnSpecificName 最近的祖先节点的明确的列名 862 | * @param sqlExprTableSource 物理表查询 SQLObject 863 | */ 864 | public void putChildrenNodes( 865 | ColumnLineageColumnNode columnNode, 866 | ColumnLineageColumnNode parentColumnNode, 867 | String nearestAncestorColumnSpecificName, 868 | SQLExprTableSource sqlExprTableSource) { 869 | // 构建 Relation 节点,类型 [DIRECT] 870 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 871 | ColumnLineageRelationNodeType.DIRECT); 872 | relationChildNode.setParent(columnNode); 873 | columnNode.addChildNode(relationChildNode); 874 | 875 | // 构建子 Column 节点 876 | ColumnLineageColumnNodeType columnChildNodeType = ColumnLineageUtils 877 | .getColumnLineageColumnNodeType(sqlExprTableSource, parentColumnNode); 878 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnChildNodeType); 879 | 880 | // 设置属性 881 | SQLExpr sqlExprTableSourceExpr = sqlExprTableSource.getExpr(); 882 | 883 | if (sqlExprTableSourceExpr instanceof SQLPropertyExpr) { 884 | SQLPropertyExpr sqlPropertyExpr = (SQLPropertyExpr) sqlExprTableSourceExpr; 885 | 886 | columnChildNode.setSqlObject(sqlPropertyExpr.getResolvedOwnerObject()); 887 | columnChildNode.setDatabaseName(sqlPropertyExpr.getOwnernName()); 888 | columnChildNode.setTableName(sqlPropertyExpr.getName()); 889 | }else if(sqlExprTableSourceExpr instanceof SQLIdentifierExpr){ 890 | SQLIdentifierExpr sqlIdentifierExpr = (SQLIdentifierExpr) sqlExprTableSourceExpr; 891 | columnChildNode.setTableName(sqlIdentifierExpr.getName()); 892 | columnChildNode.setDatabaseName(columnNode.getDatabaseName()); 893 | columnChildNode.setSqlObject(sqlIdentifierExpr.getResolvedOwnerObject()); 894 | } 895 | 896 | columnChildNode.setColumnName(nearestAncestorColumnSpecificName); 897 | 898 | // 连接 Relation 节点和子 Column 节点 899 | relationChildNode.addChildNode(columnChildNode); 900 | columnChildNode.setParent(relationChildNode); 901 | 902 | // 结束递归 903 | return; 904 | } 905 | 906 | /** 907 | * Description: 908 | * 根据字段级别血缘字段节点和父字段节点添加子节点 909 | * 910 | * @param columnNode 字段级别血缘字段节点 911 | * @param parentColumnNode 父字段节点 912 | * @param nearestAncestorColumnSpecificName 最近的祖先节点的明确的列名 913 | */ 914 | public void putChildrenNodes( 915 | ColumnLineageColumnNode columnNode, 916 | ColumnLineageColumnNode parentColumnNode, 917 | String nearestAncestorColumnSpecificName) { 918 | SQLObject sqlObject = columnNode.getSqlObject(); 919 | if (sqlObject instanceof SQLSelectItem) { 920 | // 直接查询 921 | SQLSelectItem sqlSelectItem = (SQLSelectItem) sqlObject; 922 | SQLExpr sqlExpr =sqlSelectItem.getExpr(); 923 | putChildrenNodes(columnNode, parentColumnNode, sqlSelectItem); 924 | } 925 | else if(sqlObject instanceof SQLPropertyExpr){ 926 | SQLPropertyExpr sqlPropertyExpr =(SQLPropertyExpr) sqlObject; 927 | putChildrenNodes(columnNode, parentColumnNode, sqlPropertyExpr); 928 | } 929 | else if (sqlObject instanceof SQLSubqueryTableSource) { 930 | // 子查询 931 | SQLSubqueryTableSource sqlSubqueryTableSource = (SQLSubqueryTableSource) sqlObject; 932 | putChildrenNodes(columnNode, parentColumnNode, nearestAncestorColumnSpecificName, sqlSubqueryTableSource); 933 | 934 | } else if (sqlObject instanceof SQLExprTableSource) { 935 | // 物理表查询 936 | SQLExprTableSource sqlExprTableSource = (SQLExprTableSource) sqlObject; 937 | putChildrenNodes(columnNode, parentColumnNode, nearestAncestorColumnSpecificName, sqlExprTableSource); 938 | }//增加SQLUnionQueryTableSource类型 939 | else if(sqlObject instanceof SQLUnionQueryTableSource){ 940 | SQLUnionQueryTableSource sqlUnionQueryTableSource = (SQLUnionQueryTableSource) sqlObject; 941 | putChildrenNodes(columnNode, parentColumnNode, nearestAncestorColumnSpecificName, sqlUnionQueryTableSource); 942 | }//SQLJoinTableSource类型 943 | else if(sqlObject instanceof SQLJoinTableSource){ 944 | SQLJoinTableSource sqlJoinTableSource = (SQLJoinTableSource) sqlObject; 945 | } 946 | } 947 | 948 | /** 949 | * Description : 950 | * @param columnNode 951 | * @param parentColumnNode 952 | * @param nearestAncestorColumnSpecificName 953 | * @param sqlJoinTableSource 954 | */ 955 | 956 | public void putChildrenNodes( ColumnLineageColumnNode columnNode, 957 | ColumnLineageColumnNode parentColumnNode, 958 | String nearestAncestorColumnSpecificName, 959 | SQLJoinTableSource sqlJoinTableSource){ 960 | SQLTableSource sqlTableSourceLeft =sqlJoinTableSource.getLeft(); 961 | SQLTableSource sqlTableSourceRight =sqlJoinTableSource.getRight(); 962 | //如果为SQLSubqueryTableSource类型 963 | if(sqlTableSourceLeft instanceof SQLSubqueryTableSource){ 964 | SQLSubqueryTableSource sqlSubqueryTableSource = (SQLSubqueryTableSource) sqlTableSourceLeft; 965 | if(sqlSubqueryTableSource.getSelect().getQuery() instanceof SQLSelectQueryBlock) { 966 | putChildrenNodes(columnNode, parentColumnNode, nearestAncestorColumnSpecificName, sqlSubqueryTableSource); 967 | } 968 | 969 | }//如果为SQLExprTableSource 970 | else if(sqlTableSourceLeft instanceof SQLExprTableSource){ 971 | SQLExprTableSource sqlExprTableSource = (SQLExprTableSource) sqlTableSourceLeft; 972 | putChildrenNodes(columnNode, parentColumnNode, nearestAncestorColumnSpecificName, sqlExprTableSource); 973 | }//如果为SQLJoinTableSource 974 | else if(sqlTableSourceLeft instanceof SQLJoinTableSource){ 975 | SQLJoinTableSource sqlJoinTableSource1 = (SQLJoinTableSource) sqlTableSourceLeft; 976 | putChildrenNodes(columnNode, parentColumnNode, nearestAncestorColumnSpecificName,sqlJoinTableSource1); 977 | } 978 | 979 | 980 | 981 | } 982 | 983 | /** Description: 984 | * 添加别名关系的节点 985 | * @param columnNode 表的字段级别血缘 986 | * @param sqlSelectItem SQLSelectItem (即 Column) 987 | * @param table 对应的表 988 | * 989 | */ 990 | public void putSQLSelectItemAliasNode(ColumnLineageColumnNode columnNode, ColumnLineageColumnNode parentColumnNode, 991 | SQLSelectItem sqlSelectItem,Table table){ 992 | if (sqlSelectItem.getAlias()!=null){ 993 | SQLExpr selectItemExpr = sqlSelectItem.getExpr(); 994 | String columnNameOrAlias = selectItemExpr.toString(); 995 | ColumnLineageRelationNode relationChildNode = new ColumnLineageRelationNode( 996 | ColumnLineageRelationNodeType.ALIAS); 997 | ColumnLineageColumnNodeType columnNodeType = 998 | ColumnLineageUtils.getColumnLineageColumnNodeTypeByDatabaseName(table.getDatabase().getName()); 999 | 1000 | ColumnLineageColumnNode columnChildNode = new ColumnLineageColumnNode(columnNodeType); 1001 | columnChildNode.setSqlObject(sqlSelectItem); 1002 | columnChildNode.setDatabaseName(table.getDatabase().getName()); 1003 | columnChildNode.setTableName(table.getName()); 1004 | 1005 | //添加SQLCaseExpr类型 1006 | if(selectItemExpr instanceof SQLCaseExpr){ 1007 | relationChildNode = new ColumnLineageRelationNode( 1008 | ColumnLineageRelationNodeType.CASE); 1009 | //避免首尾一致,暂时不解析case情况 1010 | columnNameOrAlias = "CASE_"+sqlSelectItem.getAlias(); 1011 | columnChildNode.setSqlObject(null); 1012 | 1013 | } 1014 | // 连接 Relation 节点和父 Column 节点 1015 | relationChildNode.setParent(columnNode); 1016 | columnNode.addChildNode(relationChildNode); 1017 | // 获取子节点 1018 | 1019 | columnChildNode.setColumnName(columnNameOrAlias); 1020 | 1021 | relationChildNode.addChildNode(columnChildNode); 1022 | columnChildNode.setParent(relationChildNode); 1023 | putChildrenNodes(columnChildNode, columnNode, columnNameOrAlias); 1024 | 1025 | } 1026 | } 1027 | 1028 | 1029 | /** 1030 | * Description: 1031 | * 获取单个 SQLSelectItem (即 Column) 的血缘 1032 | * 1033 | * @param tableColumnLineage 表的字段级别血缘 1034 | * @param selectItem SQLSelectItem (即 Column) 1035 | * @param table 对应的表 1036 | * @return 表的字段级别血缘 1037 | */ 1038 | public void putSQLSelectItemColumnLineage( 1039 | TableColumnLineage tableColumnLineage, SQLSelectItem selectItem, Table table) { 1040 | //首先构建列名的节点,再构建列名的别名节点 1041 | SQLExpr selectItemExpr = selectItem.getExpr(); 1042 | String columnNameOrAlias = ""; 1043 | SQLObject sqlObject =selectItem; 1044 | 1045 | //如果别名不为空,我们先建一个别名的节点,再建一个原始名的节点。如 SUM(A.count) AS B 先建B节点再建SUM(A.count) 1046 | if (selectItemExpr instanceof SQLPropertyExpr) { 1047 | columnNameOrAlias = ((SQLPropertyExpr) selectItem.getExpr()).getName(); 1048 | }else if(selectItemExpr instanceof SQLIdentifierExpr){ 1049 | columnNameOrAlias = ((SQLIdentifierExpr) selectItemExpr).getName(); 1050 | }//如果是caseExpr 1051 | else if(selectItemExpr instanceof SQLCaseExpr){ 1052 | columnNameOrAlias = ((SQLCaseExpr) selectItemExpr).getItems().get(0).getValueExpr().toString(); 1053 | }//select *情况 1054 | else if (selectItemExpr instanceof SQLAllColumnExpr){ 1055 | if(((SQLAllColumnExpr) selectItemExpr).getResolvedTableSource() instanceof SQLExprTableSource){ 1056 | SQLExprTableSource sqlExprTableSource = (SQLExprTableSource) ((SQLAllColumnExpr) selectItemExpr).getResolvedTableSource(); 1057 | columnNameOrAlias = "*"; 1058 | sqlObject =sqlExprTableSource; 1059 | }else if(((SQLAllColumnExpr) selectItemExpr).getResolvedTableSource() instanceof SQLSubqueryTableSource){ 1060 | SQLSubqueryTableSource sqlSubqueryTableSource = (SQLSubqueryTableSource) ((SQLAllColumnExpr) selectItemExpr).getResolvedTableSource(); 1061 | sqlObject = sqlSubqueryTableSource; 1062 | } 1063 | 1064 | } 1065 | if(selectItem.getAlias()!=null){ 1066 | columnNameOrAlias = selectItem.getAlias(); 1067 | } 1068 | 1069 | Column column = new Column(columnNameOrAlias, ColumnType.HIVE_COLUMN, table); 1070 | tableColumnLineage.addColumn(column); 1071 | ColumnLineageColumnNodeType columnNodeType = 1072 | ColumnLineageUtils.getColumnLineageColumnNodeTypeByDatabaseName(table.getDatabase().getName()); 1073 | ColumnLineageColumnNode columnRootNode = new ColumnLineageColumnNode(columnNodeType); 1074 | 1075 | columnRootNode.setSqlObject(sqlObject); 1076 | 1077 | columnRootNode.setDatabaseName(table.getDatabase().getName()); 1078 | columnRootNode.setTableName(table.getName()); 1079 | columnRootNode.setColumnName(columnNameOrAlias); 1080 | 1081 | tableColumnLineage.addColumnLineageColumnRootNode(column, columnRootNode); 1082 | if(selectItem.getAlias()==null){ 1083 | putChildrenNodes(columnRootNode, columnRootNode, columnNameOrAlias); 1084 | }//如果别名不为空,再建原名节点 1085 | else{ 1086 | putSQLSelectItemAliasNode(columnRootNode,columnRootNode,selectItem,table); 1087 | } 1088 | 1089 | } 1090 | 1091 | /** 1092 | * Description: 1093 | * 获取 SQLStatement 中表的字段级别血缘 1094 | * 1095 | * @param tableColumnLineage 表的字段级别血缘 1096 | * @param stmt SQLStatement 1097 | */ 1098 | public String putSQLStatementColumnLineage(TableColumnLineage tableColumnLineage, SQLStatement stmt,String databaseName) { 1099 | //从use语句中获得basename 1100 | String tableName =""; 1101 | if (stmt instanceof SQLUseStatement){ 1102 | databaseName= ((SQLUseStatement) stmt).getDatabase().getSimpleName(); 1103 | return databaseName; 1104 | } 1105 | // 仅针对有建表语句的 SQL 进行分析 1106 | if (stmt instanceof SQLCreateTableStatement) { 1107 | SQLCreateTableStatement stmtCT = (SQLCreateTableStatement) stmt; 1108 | 1109 | tableName =GetTableDatabaseName(stmtCT.getTableSource().getExpr()).split("\t")[0]; 1110 | if(databaseName.equals("")){ 1111 | databaseName =GetTableDatabaseName(stmtCT.getTableSource().getExpr()).split("\t")[1]; 1112 | } 1113 | Database database = new Database(databaseName, DatabaseType.HIVE_DB); 1114 | Table table = new Table(tableName, TableType.HIVE_TABLE, database); 1115 | 1116 | // 仅针对包含 SELECT 的建表 SQL 进行分析 1117 | SQLSelect select = stmtCT.getSelect(); 1118 | 1119 | if (select != null) { 1120 | List selectItemList=new ArrayList<>(); 1121 | selectItemList = GetSQLUnionQuerySelectItemList(select.getQuery(),selectItemList); 1122 | //增加SQLUnionQuery类 1123 | for (SQLSelectItem selectItem : selectItemList) { 1124 | putSQLSelectItemColumnLineage(tableColumnLineage, selectItem, table); 1125 | } 1126 | } 1127 | }//SQLInsertStatement类型 1128 | else if (stmt instanceof SQLInsertStatement){ 1129 | SQLInsertStatement stmtInsert = (SQLInsertStatement) stmt; 1130 | tableName =GetTableDatabaseName(stmtInsert.getTableSource().getExpr()).split("\t")[0]; 1131 | if(databaseName.equals("")){ 1132 | databaseName =GetTableDatabaseName(stmtInsert.getTableSource().getExpr()).split("\t")[1]; 1133 | } 1134 | Database database = new Database(databaseName, DatabaseType.HIVE_DB); 1135 | Table table = new Table(tableName, TableType.HIVE_TABLE, database); 1136 | List selectItemList=new ArrayList<>() ; 1137 | //增加SQLUnionQuery类型 1138 | if(stmtInsert.getValuesList().size()==0){ 1139 | selectItemList = GetSQLUnionQuerySelectItemList(stmtInsert.getQuery().getQuery(),selectItemList); 1140 | } 1141 | if(selectItemList!=null){ 1142 | for (SQLSelectItem selectItem : selectItemList) { 1143 | putSQLSelectItemColumnLineage(tableColumnLineage, selectItem, table); 1144 | } 1145 | } 1146 | 1147 | } 1148 | return databaseName; 1149 | } 1150 | 1151 | /** 1152 | * Description: 1153 | * 获取单个 SQL 中表的字段级别血缘 1154 | * 1155 | * @param sql SQL 1156 | * @return 表的字段级别血缘 1157 | */ 1158 | public TableColumnLineage getTableColumnLineage(String sql,String filename) throws IOException { 1159 | TableColumnLineage tableColumnLineage = new TableColumnLineage(); 1160 | String databaseName=""; 1161 | 1162 | List stmtList = SQLUtils.parseStatements(sql, schemaRepository.getDbType()); 1163 | 1164 | stmtList.forEach(schemaRepository::resolve); 1165 | 1166 | for (SQLStatement stmt : stmtList) { 1167 | if (stmt instanceof SQLUseStatement){ 1168 | databaseName= ((SQLUseStatement) stmt).getDatabase().getSimpleName(); 1169 | } 1170 | System.out.println("Dealing with one SQL statement ..."); 1171 | //CreateSchemaTable(stmt, filename,databaseName); 1172 | databaseName = putSQLStatementColumnLineage(tableColumnLineage, stmt, databaseName); 1173 | } 1174 | return tableColumnLineage; 1175 | 1176 | } 1177 | 1178 | /** 1179 | * Description: 1180 | * 创建上一个新建表的schema信息 1181 | * @param stmt SQLObject filename 文件 1182 | * @return tableColumnLineages 1183 | */ 1184 | 1185 | public String CreateSchemaTable(SQLStatement stmt,String filename,String databaseName) throws IOException { 1186 | //将生成的table schema SQL 写到 文件中 1187 | File file=new File(filename); 1188 | FileWriter writer = new FileWriter(file, true); 1189 | String tableName = null; 1190 | StringBuffer CreateTableSQL= new StringBuffer(); 1191 | int flag=0; 1192 | if (stmt instanceof SQLCreateTableStatement) { 1193 | SQLCreateTableStatement stmtCT = (SQLCreateTableStatement) stmt; 1194 | if (stmtCT.getTableSource().getExpr() instanceof SQLPropertyExpr){ 1195 | tableName = ((SQLPropertyExpr) stmtCT.getTableSource().getExpr()).getName(); 1196 | databaseName = ((SQLPropertyExpr) stmtCT.getTableSource().getExpr()).getOwnernName(); 1197 | String sql = "CREATE TABLE IF NOT EXISTS "+databaseName+"."+tableName+"\n"+"("; 1198 | CreateTableSQL.append(sql); 1199 | }else if(stmtCT.getTableSource().getExpr() instanceof SQLIdentifierExpr){ 1200 | tableName = ((SQLIdentifierExpr) stmtCT.getTableSource().getExpr()).getName(); 1201 | String sql = "CREATE TABLE IF NOT EXISTS "+ databaseName + "."+tableName+"\n"+"("; 1202 | CreateTableSQL.append(sql); 1203 | } 1204 | if (stmtCT.getTableSource().getSchemaObject()==null) { 1205 | SQLSelect select = stmtCT.getSelect(); 1206 | if (select != null) { 1207 | flag =1; 1208 | List selectItemList =new ArrayList<>(); 1209 | //增加SQLUnionQuery类型 1210 | selectItemList =GetSQLUnionQuerySelectItemList(select.getQuery(),selectItemList); 1211 | selectItemListToFile(selectItemList, CreateTableSQL, writer); 1212 | }//只有create语句 1213 | else { 1214 | List sqlTableElements =stmtCT.getTableElementList(); 1215 | if(sqlTableElements.size()!=0){ 1216 | flag =1; 1217 | for(SQLTableElement sqlTableElement:sqlTableElements ){ 1218 | if(sqlTableElement instanceof SQLColumnDefinition){ 1219 | SQLName sqlName = ((SQLColumnDefinition) sqlTableElement).getName(); 1220 | CreateTableSQL.append(sqlName.getSimpleName() +" STRING"+ ","+"\n"); 1221 | } 1222 | } 1223 | } 1224 | 1225 | } 1226 | }//只有在select不为空的情况下写入 1227 | if(flag ==1){ 1228 | CreateTableSQL.append(")"); 1229 | if(((SQLCreateTableStatement) stmt).getPartitionColumns().size()!=0){ 1230 | int size = ((SQLCreateTableStatement) stmt).getPartitionColumns().size(); 1231 | CreateTableSQL.append("partitioned by ("); 1232 | while (size!=0){ 1233 | String partitionColumn =((SQLCreateTableStatement) stmt).getPartitionColumns().get(size-1).toString(); 1234 | if(size ==1){ 1235 | CreateTableSQL.append(partitionColumn); 1236 | }else{ 1237 | CreateTableSQL.append(partitionColumn+","); 1238 | } 1239 | size-=1; 1240 | } 1241 | CreateTableSQL.append(");"); 1242 | } 1243 | else{ 1244 | CreateTableSQL.append(";"); 1245 | } 1246 | writer.write(CreateTableSQL.toString()+"\n"); 1247 | writer.close(); 1248 | } 1249 | 1250 | } 1251 | return CreateTableSQL.toString(); 1252 | } 1253 | 1254 | /** 1255 | * Description: 1256 | * 将SQLSelectItem 字段写到schema table中 1257 | * @param selectItemList selectItemList 1258 | * @param CreateTableSQL StringBuffer 写入文件的内容 1259 | * @param writer Filewriter 1260 | */ 1261 | public void selectItemListToFile(List selectItemList,StringBuffer CreateTableSQL,FileWriter writer) throws IOException { 1262 | for (SQLSelectItem selectItem : selectItemList) { 1263 | SQLExpr selectItemExpr = selectItem.getExpr(); 1264 | String columnNameOrAlias = selectItem.getAlias(); 1265 | if (selectItemExpr instanceof SQLPropertyExpr) { 1266 | columnNameOrAlias = ((SQLPropertyExpr) selectItem.getExpr()).getName(); 1267 | if(columnNameOrAlias.equals("*")){ 1268 | columnNameOrAlias ="type"; 1269 | } 1270 | if (selectItem.getAlias() != null) { 1271 | columnNameOrAlias = selectItem.getAlias(); 1272 | } 1273 | }else if (selectItemExpr instanceof SQLIdentifierExpr){ 1274 | columnNameOrAlias = ((SQLIdentifierExpr) selectItem.getExpr()).getName(); 1275 | }else if(selectItemExpr instanceof SQLAggregateExpr){ 1276 | if(selectItem.getAlias() !=null){ 1277 | columnNameOrAlias = selectItem.getAlias(); 1278 | }//不能解析带有函数的字段,如果没有别名,先用type代替 1279 | else { 1280 | columnNameOrAlias ="type"; 1281 | } 1282 | }else if (selectItemExpr instanceof SQLMethodInvokeExpr){ 1283 | if(selectItem.getAlias() !=null){ 1284 | columnNameOrAlias = selectItem.getAlias(); 1285 | }//不能解析带有函数的字段,如果没有别名,先用type代替 1286 | else { 1287 | columnNameOrAlias ="type"; 1288 | } 1289 | }else if(selectItemExpr instanceof SQLAllColumnExpr){ 1290 | //不能解析*,先用Type代替 1291 | columnNameOrAlias ="type"; 1292 | } 1293 | CreateTableSQL.append(columnNameOrAlias +" STRING"+ ","+"\n"); 1294 | } 1295 | 1296 | 1297 | } 1298 | 1299 | 1300 | /** 1301 | * Description: 主外键分析 1302 | * @param sql 1303 | * @param filename 1304 | * @return 1305 | * @throws IOException 1306 | */ 1307 | public ForeignKeys getTableColumnForeignKeys(String sql, String filename) throws IOException { 1308 | 1309 | ForeignKeys foreignKeys = new ForeignKeys(); 1310 | TableColumnLineage tableColumnLineage = new TableColumnLineage(); 1311 | ArrayList primaryAndForeignKeyList = new ArrayList<>(); 1312 | String databaseName=""; 1313 | 1314 | List stmtList = SQLUtils.parseStatements(sql, schemaRepository.getDbType()); 1315 | 1316 | stmtList.forEach(schemaRepository::resolve); 1317 | 1318 | for (SQLStatement stmt : stmtList) { 1319 | // continue; 1320 | if (stmt instanceof SQLUseStatement){ 1321 | databaseName = ((SQLUseStatement) stmt).getDatabase().getSimpleName(); 1322 | } 1323 | //根据sql语句添加schma信息 1324 | //CreateSchemaTable(stmt, filename, databaseName); 1325 | 1326 | //得到主外键血缘分析链路 1327 | getPrimaryAndForeignKey(stmt, tableColumnLineage, primaryAndForeignKeyList); 1328 | 1329 | //正常的血缘分析(不可以缺少,因为可能后面拼接路径时会用到) 1330 | databaseName = putSQLStatementColumnLineage(tableColumnLineage, stmt,databaseName); 1331 | } 1332 | 1333 | foreignKeys.setTableColumnLineage(tableColumnLineage); 1334 | foreignKeys.setArrayList(primaryAndForeignKeyList); 1335 | return foreignKeys; 1336 | 1337 | } 1338 | 1339 | /** 1340 | * Description 1341 | * 得到主外键 1342 | * 1343 | * @param stmt SQL被解析后存在druid的SQLStatement结构中 1344 | * @param tableColumnLineage 表的字段级别血缘 1345 | * @return 一个sql语句返回一个主外键对儿列表(待修改)TODO 1346 | */ 1347 | public void getPrimaryAndForeignKey(SQLStatement stmt, TableColumnLineage tableColumnLineage, ArrayList primaryAndForeignKeyList) { 1348 | // ArrayList primaryAndExternalStart = new ArrayList(); 1349 | //只处理建表语句 1350 | if (stmt instanceof SQLCreateTableStatement) { 1351 | SQLCreateTableStatement stmtCT = (SQLCreateTableStatement) stmt; 1352 | SQLTableSource stmCTSQLTableSource = stmtCT.getTableSource(); 1353 | SQLSelect select = stmtCT.getSelect(); 1354 | //只处理select语句 1355 | selectSQLParse(tableColumnLineage, primaryAndForeignKeyList, stmCTSQLTableSource, select); 1356 | } 1357 | if (stmt instanceof SQLInsertStatement) { 1358 | SQLInsertStatement stmtIS = (SQLInsertStatement) stmt; 1359 | SQLTableSource stmCTSQLTableSource = stmtIS.getTableSource(); 1360 | SQLSelect select = stmtIS.getQuery(); 1361 | //只处理select语句 1362 | selectSQLParse(tableColumnLineage, primaryAndForeignKeyList, stmCTSQLTableSource, select); 1363 | } 1364 | } 1365 | 1366 | /** 1367 | * 1368 | * @param tableColumnLineage 表的血缘关系分析 1369 | * @param primaryAndForeignKeyList 存储主外键的列表 1370 | * @param stmCTSQLTableSource 建表语句源 1371 | * @param select select 1372 | */ 1373 | 1374 | private void selectSQLParse(TableColumnLineage tableColumnLineage, ArrayList primaryAndForeignKeyList, SQLTableSource stmCTSQLTableSource, SQLSelect select) { 1375 | if (select != null) { 1376 | //得到查询主体部分 1377 | SQLSelectQueryBlock queryBlock = select.getQueryBlock(); 1378 | if(queryBlock!= null) { 1379 | SQLTableSource queryBlockFrom = queryBlock.getFrom(); 1380 | //分情况对不同query进行分析 1381 | parseSQLTableSource(queryBlockFrom, tableColumnLineage, stmCTSQLTableSource, primaryAndForeignKeyList); } } 1382 | } 1383 | 1384 | /** 1385 | * 根据selcet下查询语句queryBlockForm的TableSource类型,选择不同的主外键解析模式 1386 | * 1387 | * @param queryBlockFrom 是有join关系的query的Form部分 1388 | */ 1389 | 1390 | private void parseSQLTableSource(SQLTableSource queryBlockFrom, TableColumnLineage tableColumnLineage, SQLTableSource stmCTSQLTableSource, ArrayList primaryAndForeignKeyLists) { 1391 | 1392 | // 模式一:SQLJoinTableSource,这里存在直接join关系 1393 | if (queryBlockFrom instanceof SQLJoinTableSource) { 1394 | SQLJoinTableSource queryBlockFromJoin = (SQLJoinTableSource) queryBlockFrom; 1395 | 1396 | SQLExpr conditionSQLExpr = queryBlockFromJoin.getCondition(); 1397 | SQLTableSource primaryKeySQLTableSource = queryBlockFromJoin.getLeft(); 1398 | SQLTableSource foreignKeySQLTableSource = queryBlockFromJoin.getRight(); 1399 | String basename =""; 1400 | //这里仍需要考虑,看是不是连接操作的condition一定是SQLBinaryOpExpr 1401 | if (conditionSQLExpr instanceof SQLBinaryOpExpr) { 1402 | SQLBinaryOpExpr conditionSQLBinaryOpExpr = (SQLBinaryOpExpr) conditionSQLExpr; 1403 | 1404 | //得到主键和外键的expression 1405 | SQLExpr primaryKeySQLExpr = conditionSQLBinaryOpExpr.getLeft(); 1406 | SQLExpr foreignKeySQLExpr = conditionSQLBinaryOpExpr.getRight(); 1407 | 1408 | //分析主外键,SQLExpr实际可能为SQLPropertyExpr(一般为查询字段列表)或者SQLIdentifierExpr(一般表示表名或别名) 1409 | parsePrimaryAndForeignKey(tableColumnLineage, stmCTSQLTableSource, basename, primaryKeySQLExpr, foreignKeySQLExpr,primaryAndForeignKeyLists); 1410 | } 1411 | 1412 | // primaryAndForeignKeyLists.add(primaryKeyAndForeignKeyList); 1413 | 1414 | //继续分析from的left语句和right语句,看是否包含子语句(递归) 1415 | parseSQLTableSource(primaryKeySQLTableSource,tableColumnLineage, stmCTSQLTableSource,primaryAndForeignKeyLists); 1416 | parseSQLTableSource(foreignKeySQLTableSource,tableColumnLineage, stmCTSQLTableSource,primaryAndForeignKeyLists); 1417 | 1418 | } 1419 | 1420 | // 模式二:SQLSubqueryTableSource 1421 | else if (queryBlockFrom instanceof SQLSubqueryTableSource) { 1422 | SQLSubqueryTableSource leftSQLSubqueryTabSource = (SQLSubqueryTableSource) queryBlockFrom; 1423 | SQLSelect select = leftSQLSubqueryTabSource.getSelect(); 1424 | //只处理select语句 1425 | selectSQLParse(tableColumnLineage, primaryAndForeignKeyLists, stmCTSQLTableSource, select); 1426 | } 1427 | 1428 | //模式三:SQLExprTableSource 1429 | else if (queryBlockFrom instanceof SQLExprTableSource) { 1430 | return; 1431 | } 1432 | 1433 | //模式四:SQLUnionQueryTableSource 1434 | else if (queryBlockFrom instanceof SQLUnionQueryTableSource) { 1435 | return; 1436 | } 1437 | } 1438 | 1439 | 1440 | /** 1441 | * 1442 | * @param tableColumnLineage 1443 | * @param stmCTSQLTableSource 1444 | * @param basename 1445 | * @param primaryKeySQLExpr 1446 | * @param foreignKeySQLExpr 1447 | * @param primaryAndForeignKeyLists 1448 | * 分析join类型,可能是普通的join关系,可能是包含cast函数的join关系,可能是包含and情况(1个join下对应多个主外键) 1449 | */ 1450 | private void parsePrimaryAndForeignKey(TableColumnLineage tableColumnLineage, SQLTableSource stmCTSQLTableSource, String basename, SQLExpr primaryKeySQLExpr, SQLExpr foreignKeySQLExpr, ArrayList primaryAndForeignKeyLists) { 1451 | 1452 | //model 1:这里主要是存在and关系时会有多对儿主外键 1453 | if (primaryKeySQLExpr instanceof SQLBinaryOpExpr && foreignKeySQLExpr instanceof SQLBinaryOpExpr) { 1454 | //TOdo 1455 | SQLBinaryOpExpr conditionPrimarySQLBinaryOpExpr = (SQLBinaryOpExpr) primaryKeySQLExpr; 1456 | SQLExpr primaryKeyExpr = conditionPrimarySQLBinaryOpExpr.getLeft(); 1457 | SQLExpr foreignKeyExpr = conditionPrimarySQLBinaryOpExpr.getRight(); 1458 | parsePrimaryAndForeignKey(tableColumnLineage, stmCTSQLTableSource, basename, primaryKeyExpr, foreignKeyExpr, primaryAndForeignKeyLists); 1459 | SQLBinaryOpExpr conditionForeignSQLBinaryOpExpr = (SQLBinaryOpExpr) foreignKeySQLExpr; 1460 | SQLExpr primaryKeyExprRight = conditionForeignSQLBinaryOpExpr.getLeft(); 1461 | SQLExpr foreignKeyExperRight = conditionForeignSQLBinaryOpExpr.getRight(); 1462 | parsePrimaryAndForeignKey(tableColumnLineage, stmCTSQLTableSource, basename, primaryKeyExprRight, foreignKeyExperRight, primaryAndForeignKeyLists); 1463 | 1464 | } 1465 | 1466 | // model 2:正常的join关系的解析模式 1467 | if (foreignKeySQLExpr instanceof SQLPropertyExpr && primaryKeySQLExpr instanceof SQLPropertyExpr) { 1468 | 1469 | String[] primaryKeyAndForeignKeyList = new String[2]; 1470 | if (primaryKeySQLExpr instanceof SQLPropertyExpr) { 1471 | SQLPropertyExpr primaryKeySQLPropertyExpr = (SQLPropertyExpr) primaryKeySQLExpr; 1472 | 1473 | //得到主键字段(包含表名)的表名,并转化为string 1474 | SQLExpr tableNameOfPrimaryKeyKeySQLPropertyExpr = primaryKeySQLPropertyExpr.getOwner(); 1475 | String tableNameOfPrimaryKey = tableNameOfPrimaryKeyKeySQLPropertyExpr.toString(); 1476 | 1477 | //得到主键字段对应的表来源,并转换为string 1478 | SQLTableSource primaryKeyTableSource = primaryKeySQLPropertyExpr.getResolvedTableSource(); 1479 | String primaryKeySelectSQL = primaryKeyTableSource.toString(); 1480 | 1481 | //构建出主键对应的建表语句,这里是不是其实可以不写create这一步呢?直接从select开始 1482 | String primaryKeySQL = "CREATE TABLE IF NOT EXISTS " + stmCTSQLTableSource.toString() + tableNameOfPrimaryKey + " AS " + "\n" + " SELECT " + primaryKeySQLPropertyExpr.toString() + "\n" + " FROM " + primaryKeySelectSQL + " " + tableNameOfPrimaryKey; 1483 | 1484 | List stmtList = SQLUtils.parseStatements(primaryKeySQL, schemaRepository.getDbType()); 1485 | stmtList.forEach(schemaRepository::resolve); 1486 | for (SQLStatement stmt : stmtList) { 1487 | putSQLStatementColumnLineage(tableColumnLineage, stmt, basename); 1488 | } 1489 | 1490 | primaryKeyAndForeignKeyList[0] = stmCTSQLTableSource.toString() + tableNameOfPrimaryKey; 1491 | } 1492 | 1493 | // 分析外键 1494 | if (foreignKeySQLExpr instanceof SQLPropertyExpr) { 1495 | SQLPropertyExpr foreignKeySQLPropertyExpr = (SQLPropertyExpr) foreignKeySQLExpr; 1496 | 1497 | SQLExpr tableNameOfForeignKeySQLPropertyExpr = foreignKeySQLPropertyExpr.getOwner(); 1498 | String tableNameOfForeignKey = tableNameOfForeignKeySQLPropertyExpr.toString(); 1499 | 1500 | SQLTableSource foreignKeyTableSource = foreignKeySQLPropertyExpr.getResolvedTableSource(); 1501 | String foreignKeySelectSQL = foreignKeyTableSource.toString(); 1502 | 1503 | String foreignKeySQL = "CREATE TABLE IF NOT EXISTS " + stmCTSQLTableSource.toString() + tableNameOfForeignKey + " AS " + "\n" + " SELECT " + foreignKeySQLPropertyExpr.toString() + "\n" + " FROM " + foreignKeySelectSQL + " " + tableNameOfForeignKey; 1504 | 1505 | List stmtList = SQLUtils.parseStatements(foreignKeySQL, schemaRepository.getDbType()); 1506 | stmtList.forEach(schemaRepository::resolve); 1507 | for (SQLStatement stmt : stmtList) { 1508 | putSQLStatementColumnLineage(tableColumnLineage, stmt, basename); 1509 | } 1510 | 1511 | primaryKeyAndForeignKeyList[1] = stmCTSQLTableSource.toString() + tableNameOfForeignKey; 1512 | } 1513 | primaryAndForeignKeyLists.add(primaryKeyAndForeignKeyList); 1514 | } 1515 | 1516 | //model 3:主要针对join里包含cast函数的情况 1517 | if (foreignKeySQLExpr instanceof SQLCastExpr && primaryKeySQLExpr instanceof SQLCastExpr) { 1518 | 1519 | SQLCastExpr conditionPrimarySQLBinaryOpExpr = (SQLCastExpr) primaryKeySQLExpr; 1520 | SQLExpr primaryKeyExpr = conditionPrimarySQLBinaryOpExpr.getExpr(); 1521 | 1522 | SQLCastExpr conditionForeignSQLBinaryOpExpr = (SQLCastExpr) foreignKeySQLExpr; 1523 | SQLExpr foreignKeyExpr = conditionForeignSQLBinaryOpExpr.getExpr(); 1524 | 1525 | parsePrimaryAndForeignKey(tableColumnLineage, stmCTSQLTableSource, basename, primaryKeyExpr, foreignKeyExpr, primaryAndForeignKeyLists); 1526 | 1527 | } 1528 | } 1529 | 1530 | /** 1531 | * SQLUnionQuery类型得到SelectItemList 1532 | * @param sqlSelectQuery 1533 | * @param selectItemList 1534 | * @return 1535 | */ 1536 | public List GetSQLUnionQuerySelectItemList(SQLSelectQuery sqlSelectQuery,List selectItemList){ 1537 | if(sqlSelectQuery instanceof SQLUnionQuery) { 1538 | SQLUnionQuery sqlUnionQuery = (SQLUnionQuery) sqlSelectQuery; 1539 | if (sqlUnionQuery.getLeft() instanceof SQLUnionQuery){ 1540 | selectItemList =GetSQLUnionQuerySelectItemList(sqlUnionQuery.getLeft(),selectItemList); 1541 | 1542 | } 1543 | 1544 | if (sqlUnionQuery.getRight() instanceof SQLUnionQuery) { 1545 | selectItemList =GetSQLUnionQuerySelectItemList(sqlUnionQuery.getRight(),selectItemList); 1546 | 1547 | } 1548 | 1549 | if (sqlUnionQuery.getLeft() instanceof SQLSelectQueryBlock) { 1550 | SQLSelectQueryBlock sqlLeftSelectQuery = (SQLSelectQueryBlock) sqlUnionQuery.getLeft(); 1551 | List leftselectItemList = sqlLeftSelectQuery.getSelectList(); 1552 | selectItemList.addAll(leftselectItemList); 1553 | } 1554 | 1555 | if (sqlUnionQuery.getRight() instanceof SQLSelectQueryBlock) { 1556 | SQLSelectQueryBlock sqlRightSelectQuery = (SQLSelectQueryBlock) sqlUnionQuery.getRight(); 1557 | List rightselectItemList = sqlRightSelectQuery.getSelectList(); 1558 | selectItemList.addAll(rightselectItemList); 1559 | } 1560 | } else if (sqlSelectQuery instanceof SQLSelectQueryBlock) { 1561 | SQLSelectQueryBlock sqlSelectQueryBlock = (SQLSelectQueryBlock) sqlSelectQuery; 1562 | selectItemList =sqlSelectQueryBlock .getSelectList(); 1563 | if (selectItemList.get(0).getExpr() instanceof SQLAllColumnExpr) { 1564 | if (sqlSelectQueryBlock .getFrom() instanceof SQLExprTableSource) { 1565 | SQLExprTableSource sqlExprTableSource = (SQLExprTableSource) sqlSelectQueryBlock .getFrom(); 1566 | if (sqlExprTableSource.getSchemaObject() != null) { 1567 | 1568 | } 1569 | } 1570 | } 1571 | } 1572 | 1573 | return selectItemList; 1574 | } 1575 | 1576 | public List GetTableElementsList(ArrayList TableList){ 1577 | List sqlColumnDefinitions = Lists.newArrayList(); 1578 | for(Object column:TableList){ 1579 | if (column instanceof SQLColumnDefinition){ 1580 | SQLColumnDefinition sqlColumnDefinition = (SQLColumnDefinition) column; 1581 | sqlColumnDefinitions.add(sqlColumnDefinition); 1582 | } 1583 | } 1584 | return sqlColumnDefinitions; 1585 | 1586 | } 1587 | 1588 | public String GetTableDatabaseName(SQLExpr sqlExpr){ 1589 | List name = Lists.newArrayList(); 1590 | String databaseName =" "; 1591 | String tableName=" "; 1592 | 1593 | if (sqlExpr instanceof SQLPropertyExpr){ 1594 | tableName =((SQLPropertyExpr) sqlExpr).getName(); 1595 | databaseName =((SQLPropertyExpr) sqlExpr).getOwnernName(); 1596 | } else if (sqlExpr instanceof SQLIdentifierExpr){ 1597 | tableName=((SQLIdentifierExpr) sqlExpr).getName(); 1598 | databaseName = "tmp"; 1599 | } 1600 | 1601 | name.add(databaseName); 1602 | name.add(tableName); 1603 | 1604 | return tableName +"\t"+databaseName; 1605 | } 1606 | 1607 | 1608 | } 1609 | 1610 | 1611 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/analysis/GetColumnsLineage.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.analysis; 2 | 3 | import com.alibaba.druid.sql.repository.SchemaRepository; 4 | import com.alibaba.druid.util.JdbcConstants; 5 | import com.google.common.collect.Lists; 6 | import com.jd.jr.daat.dw.lineage.domains.lineage.column.ColumnLineageUtils; 7 | import com.jd.jr.daat.dw.lineage.domains.lineage.table.TableColumnLineage; 8 | import com.jd.jr.daat.dw.lineage.utils.ProcessUnparsedSQL; 9 | import com.jd.jr.daat.dw.lineage.utils.SQLExtractor; 10 | import com.jd.jr.daat.dw.lineage.utils.SchemaLoader; 11 | import org.apache.commons.csv.CSVFormat; 12 | import org.apache.commons.csv.CSVPrinter; 13 | import org.apache.commons.csv.QuoteMode; 14 | import org.apache.commons.io.FileUtils; 15 | 16 | import java.io.*; 17 | import java.nio.file.Files; 18 | import java.nio.file.Paths; 19 | import java.util.ArrayList; 20 | import java.util.Arrays; 21 | import java.util.List; 22 | import java.util.regex.Matcher; 23 | import java.util.regex.Pattern; 24 | 25 | public class GetColumnsLineage { 26 | 27 | private List> Allpath = new ArrayList>(); 28 | 29 | private List results; 30 | 31 | public static void main(String[] args) throws IOException { 32 | SchemaLoader schemaLoader = new SchemaLoader(); 33 | GetColumnsLineage getColumnsLineage =new GetColumnsLineage(); 34 | 35 | // 获取当前编译好类的路径 36 | //resourcePath 37 | String resourcePath = args[0]; 38 | //tablePath 39 | String tableSchemaPath = args[1]; 40 | //日志文件 41 | String logPath = args[2]; 42 | //最后的输出文件,链路只包含头尾 43 | String LineageHeadTailPath = args[3]; 44 | //文件后缀命名 45 | String LineageName =args[4]; 46 | 47 | String sqlExtractorPath= resourcePath+"/process_sql/"+LineageName +"_sqlExtractor.sql"; 48 | 49 | System.out.println("=====extract sql from logs================"); 50 | getColumnsLineage.getSQLExtractorPath(logPath,sqlExtractorPath); 51 | System.out.println("======extracted !!================"); 52 | 53 | System.out.println("=========getParsedSQL============="); 54 | String parsedSQLPath = getColumnsLineage.getParsedSQLPath(resourcePath,sqlExtractorPath,LineageName); 55 | System.out.println("=========done!!==================="); 56 | 57 | System.out.println("===========getLineage================"); 58 | String sqlLineage = FileUtils.readFileToString(new File(parsedSQLPath), "UTF-8"); 59 | List sqlsLineageTables = Lists.newArrayList(FileUtils.readFileToString(new File(tableSchemaPath), "UTF-8").split(";")); 60 | SchemaRepository schemaRepository = schemaLoader.load(sqlsLineageTables, JdbcConstants.HIVE); 61 | ColumnLineageAnalyzer analyzer = new ColumnLineageAnalyzer(schemaRepository); 62 | getColumnsLineage.GetColumnsLineage(analyzer,sqlLineage,tableSchemaPath, LineageHeadTailPath); 63 | System.out.println("===========done!!=============="); 64 | } 65 | 66 | 67 | 68 | /** 69 | * 从SQLExtractor抽取sql 70 | * @param logsPath 71 | * @param writeSqlFileName 72 | * @return 73 | * @throws IOException 74 | */ 75 | public String getSQLExtractorPath(String logsPath,String writeSqlFileName) throws IOException { 76 | SQLExtractor sqlExtractor= new SQLExtractor(); 77 | BufferedWriter sqlWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(writeSqlFileName,true),"UTF-8")); 78 | 79 | sqlExtractor.extractPathLogSQLs(logsPath,sqlWriter); 80 | return writeSqlFileName; 81 | 82 | } 83 | 84 | public String getParsedSQLPath(String resourcesPath,String sqlFile,String Name) throws IOException { 85 | //从SQLExtractor提取出来的sql,进行过滤得到不能解析的SQL,送到SQLprocessUnparsed处理,处理后再用。 86 | ProcessUnparsedSQL processUnparsedSQL =new ProcessUnparsedSQL(); 87 | String getUnparsedSqlPath = resourcesPath + "/process_sql/"+Name+"_GetUnParsed.sql"; 88 | String getParsedSqlPath = resourcesPath + "/sql/"+Name+"_GetParsed.sql"; 89 | processUnparsedSQL.filterSQL(resourcesPath,sqlFile,getUnparsedSqlPath,getParsedSqlPath); 90 | //把不能解析的sql进行处理 91 | String getProcessedSQLPath = resourcesPath +"/process_sql/"+Name+"_ProcessedSQL.sql"; 92 | String getUnprocessedSQLPath = resourcesPath +"/process_sql/"+Name+"_UnprocessedSQL.sql"; 93 | String processedSQL = processUnparsedSQL.processUnParsedSQL(resourcesPath,getUnparsedSqlPath,getProcessedSQLPath,getUnprocessedSQLPath ); 94 | //将处理过的sql得到可以解析的 95 | processUnparsedSQL.filterSQL(resourcesPath,processedSQL,getUnparsedSqlPath,getParsedSqlPath); 96 | return getParsedSqlPath; 97 | 98 | } 99 | 100 | void GetColumnsLineage(ColumnLineageAnalyzer analyzer,String sqlLineageDemo01,String FileTableSchemaPath, String FileLineageHeadTail) throws IOException { 101 | List> Allpath =getAllpath(analyzer,sqlLineageDemo01,FileTableSchemaPath); 102 | List path = new ArrayList<>(); 103 | List> mergedPath ; 104 | 105 | //System.out.println(Allpath.size()); 106 | for (List path1 : Allpath) { 107 | path.add(path1.get(0));//得到所有path集合 108 | } 109 | 110 | mergedPath = mergePath(Allpath, Allpath, path); 111 | int i=0; 112 | //System.out.println("mergedpath "+ i); 113 | while(!mergedPath.isEmpty()){ 114 | i+=1; 115 | mergedPath = mergePath(Allpath,mergedPath,path); 116 | //System.out.println("mergedpath "+ i); 117 | } 118 | 119 | //System.out.println("--------"); 120 | //System.out.println("链路条数"+path.size()); 121 | getLineageHeadAndTail(path, FileLineageHeadTail); 122 | //path.stream().forEach(System.out::println); 123 | } 124 | 125 | 126 | 127 | public List> getAllpath(ColumnLineageAnalyzer analyzer,String sqlLineageDemo01,String FileTableSchemaPath) throws IOException { 128 | TableColumnLineage tableColumnLineage = analyzer.getTableColumnLineage(sqlLineageDemo01, FileTableSchemaPath); 129 | tableColumnLineage.getColumnLineages().values().forEach(rootColumnNode -> { 130 | results = ColumnLineageUtils.getFirstPathString(rootColumnNode); 131 | if(results!=null){ 132 | Allpath.add(results); 133 | } 134 | }); 135 | //System.out.println(Allpath.size()); 136 | return Allpath; 137 | 138 | //path.stream().forEach(System.out::println); 139 | } 140 | 141 | /** 142 | * Description: 得到合并的路径 143 | * @param Allpath1 存放所有血缘关系的直接路径 144 | * @param Allpath2 存放每次合并后的路径 145 | * @param path 存放最后合并好的路径,也就是最后打印的路径 146 | * @return 返回每次合并的路径 147 | */ 148 | public List> mergePath(List> Allpath1, List> Allpath2, List path) throws IOException { 149 | List mergePathItem; 150 | List> mergedPath = new ArrayList<>(); 151 | 152 | for (List path1 : Allpath1) { 153 | String end_1 = path1.get(2); 154 | String path_1 = path1.get(0); 155 | String start_1 = path1.get(1); 156 | for (List path2 : Allpath2) { 157 | String start_2 = path2.get(1); 158 | String path_2 = path2.get(0); 159 | String end_2 = path2.get(2); 160 | if (start_2.equals(end_1)) { 161 | String mergepath = path_1.substring(0, path_1.length() - 5) + 162 | path_2.substring(10, path_2.length()); 163 | String mergedPathStart = pathGetStart(mergepath).split("\t")[0] +pathGetStart(mergepath).split("\t")[1]+pathGetStart(mergepath).split("\t")[2]; 164 | String mergedPathEnd = pathGetStart(mergepath).split("\t")[3] +pathGetStart(mergepath).split("\t")[4]+pathGetStart(mergepath).split("\t")[5]; 165 | if (!path.contains(mergepath) && !mergedPathStart.equals(mergedPathEnd) && !start_1.equals(end_2)) { 166 | path.add(mergepath); 167 | mergePathItem = stringToList(mergepath, start_1, end_2); 168 | mergedPath.add(mergePathItem); 169 | } 170 | path.remove(path_1); 171 | } 172 | } 173 | } 174 | 175 | return mergedPath; 176 | 177 | } 178 | 179 | /** 180 | * Description: 得到一条路径的头尾 181 | * @param path 输入路径 182 | * @return 返回 路径的头尾 start databasename tablename columnname end databasename tablename columnname 183 | */ 184 | public String pathGetStart(String path){ 185 | String table_name=" " ; 186 | String column_name=" " ; 187 | String database_name =" "; 188 | String end_table_name= " "; 189 | String end_column_name =" "; 190 | String end_database_name=" "; 191 | String table_name_str = "(?<=, table: )[A-Za-z0-9_]{1,60}"; 192 | String column_name_str ="(?<=, column: )[A-Za-z()*0-9_-]{1,60}"; 193 | String database_name_str ="(?<=, database: )[A-Za-z_]{1,20}"; 194 | String end_column_name_str ="(?<=R, type: )UNSUPPORTED"; 195 | //String end_database_name_str ="(?<=, database: )[a-z_]{1,20}"; 196 | Pattern table_name_pattern = Pattern.compile(table_name_str); 197 | Pattern column_name_pattern = Pattern.compile(column_name_str); 198 | Pattern end_column_name_pattern = Pattern.compile(end_column_name_str); 199 | Pattern database_name_pattern = Pattern.compile(database_name_str); 200 | 201 | Matcher table_matcher = table_name_pattern.matcher(path); 202 | Matcher column_matcher = column_name_pattern.matcher(path); 203 | Matcher end_column_matcher = end_column_name_pattern.matcher(path); 204 | Matcher database_matcher = database_name_pattern.matcher(path); 205 | 206 | if(table_matcher.find()){ 207 | table_name = table_matcher.group(); 208 | } 209 | while(table_matcher.find()){ 210 | end_table_name = table_matcher.group(); 211 | } 212 | if(column_matcher.find()){ 213 | column_name = column_matcher.group(); 214 | } 215 | while (column_matcher.find()){ 216 | end_column_name = column_matcher.group(); 217 | } 218 | if(database_matcher.find()){ 219 | database_name = database_matcher.group(); 220 | } 221 | while (database_matcher.find()){ 222 | end_database_name = database_matcher.group(); 223 | } 224 | if(end_column_matcher.find()){ 225 | end_column_name = end_column_matcher.group(); 226 | } 227 | return database_name+"\t"+table_name+"\t"+column_name +"\t"+end_database_name+"\t"+end_table_name+"\t"+end_column_name; 228 | } 229 | 230 | /** 231 | * Description : 将路径头和尾合并成列表 232 | * @param path 233 | * @param start 234 | * @param end 235 | * @return 236 | */ 237 | public List stringToList(String path,String start,String end){ 238 | List pathItem =new ArrayList<>(); 239 | pathItem.add(path); 240 | pathItem.add(start); 241 | pathItem.add(end); 242 | return pathItem; 243 | } 244 | 245 | /** 246 | * Description : 得到路径的头尾,也即是最后的结果 247 | * @param pathList 所有合并好的路径 248 | * @throws IOException 249 | */ 250 | public void getLineageHeadAndTail(List pathList, String FileLineageHeadTail) throws IOException { 251 | 252 | File file=new File(FileLineageHeadTail); 253 | BufferedWriter bufferedWriter = Files.newBufferedWriter(Paths.get(FileLineageHeadTail)); 254 | CSVFormat csvFormat = CSVFormat.newFormat('\t').withRecordSeparator('\n'); 255 | CSVPrinter csvPrinter = new CSVPrinter(bufferedWriter, csvFormat); 256 | 257 | //System.out.println("------------------"); 258 | //System.out.println(pathList.size()); 259 | 260 | for(String path:pathList){ 261 | //只包含头尾都是物理表的路径 262 | if(!pathGetStart(path).split("\t")[0].equals("TMP") && !pathGetStart(path).split("\t")[0].equals("tmp")&&!pathGetStart(path).split("\t")[3].equals("tmp")&&!pathGetStart(path).split("\t")[3].equals("TMP")){ 263 | //System.out.println(pathGetStart(path)); 264 | String[] writeLine=pathGetStart(path).split("\t"); 265 | csvPrinter.printRecord(Arrays.asList(writeLine)); 266 | csvPrinter.flush(); 267 | } 268 | } 269 | 270 | csvPrinter.close(); 271 | bufferedWriter.close(); 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/basic/Column.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.basic; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | 6 | @Data 7 | @AllArgsConstructor 8 | public class Column { 9 | /** 10 | * 字段名称 11 | */ 12 | private String name; 13 | 14 | /** 15 | * 字段类型 16 | */ 17 | private ColumnType type; 18 | 19 | /** 20 | * 对应的表 21 | */ 22 | private Table table; 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/basic/ColumnType.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.basic; 2 | 3 | public enum ColumnType { 4 | /** 5 | * Hive 字段 6 | */ 7 | HIVE_COLUMN("HIVE_COLUMN", 1002), 8 | 9 | /** 10 | * Hive 分区字段 11 | */ 12 | HIVE_PARTITION_COLUMN("HIVE_PARTITION_COLUMN", 1007), 13 | 14 | /** 15 | * MySQL 字段 16 | */ 17 | MYSQL_COLUMN("MYSQL_COLUMN", 2002); 18 | 19 | ColumnType(String name, int code) { 20 | this.name = name; 21 | this.code = code; 22 | } 23 | 24 | /** 25 | * 名称 26 | */ 27 | public String name; 28 | 29 | /** 30 | * 代码 31 | */ 32 | public int code; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/basic/Database.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.basic; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | 6 | @Data 7 | @AllArgsConstructor 8 | public class Database { 9 | /** 10 | * 数据库名称 11 | */ 12 | private String name; 13 | 14 | /** 15 | * 数据库类型 16 | */ 17 | private DatabaseType type; 18 | 19 | /** 20 | * Hive 临时表库 21 | */ 22 | public static final String[] HIVE_TMP_DATABASES = {"TMP"}; 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/basic/DatabaseType.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.basic; 2 | 3 | public enum DatabaseType { 4 | /** 5 | * Hive 数据库 6 | */ 7 | HIVE_DB("HIVE_DB"), 8 | 9 | /** 10 | * MySQL 数据库 11 | */ 12 | MYSQL_DB("MYSQL_DB"); 13 | 14 | DatabaseType(String name) { 15 | this.name = name; 16 | } 17 | 18 | /** 19 | * 名称 20 | */ 21 | public String name; 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/basic/ForeignKeys.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.basic; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | 6 | import com.jd.jr.daat.dw.lineage.domains.lineage.table.TableColumnLineage; 7 | import lombok.NoArgsConstructor; 8 | 9 | import java.util.ArrayList; 10 | 11 | @Data 12 | @AllArgsConstructor 13 | @NoArgsConstructor 14 | public class ForeignKeys { 15 | private TableColumnLineage tableColumnLineage; 16 | private ArrayList arrayList; 17 | } 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/basic/MethodInvoke.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.basic; 2 | 3 | import com.alibaba.druid.sql.ast.SQLObject; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | 8 | @Data 9 | @AllArgsConstructor 10 | @NoArgsConstructor 11 | public class MethodInvoke { 12 | private SQLObject sqlObject; 13 | private String ColumnNameOrAlias; 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/basic/Table.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.basic; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | 6 | @Data 7 | @AllArgsConstructor 8 | public class Table { 9 | /** 10 | * 表名称 11 | */ 12 | private String name; 13 | 14 | /** 15 | * 表类型 16 | */ 17 | private TableType type; 18 | 19 | /** 20 | * 对应的数据库 21 | */ 22 | private Database database; 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/basic/TableType.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.basic; 2 | 3 | public enum TableType { 4 | /** 5 | * Hive 表 6 | */ 7 | HIVE_TABLE("HIVE_TABLE", 1001), 8 | 9 | /** 10 | * Hive 视图 11 | */ 12 | HIVE_VIEW("HIVE_VIEW", 1006), 13 | 14 | /** 15 | * MySQL 表 16 | */ 17 | MYSQL_TABLE("MYSQL_TABLE", 2001); 18 | 19 | TableType(String name, int code) { 20 | this.name = name; 21 | this.code = code; 22 | } 23 | 24 | /** 25 | * 名称 26 | */ 27 | public String name; 28 | 29 | /** 30 | * 代码 31 | */ 32 | public int code; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/lineage/column/ColumnLineageColumnNode.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.lineage.column; 2 | 3 | import com.alibaba.druid.sql.ast.SQLObject; 4 | import com.google.common.collect.Lists; 5 | import com.google.common.collect.Maps; 6 | import lombok.Data; 7 | 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | @Data 13 | public class ColumnLineageColumnNode { 14 | /** 15 | * 父节点 16 | */ 17 | private ColumnLineageRelationNode parent; 18 | 19 | /** 20 | * 子节点 21 | */ 22 | private List children; 23 | 24 | /** 25 | * 字段节点类型 26 | */ 27 | private ColumnLineageColumnNodeType columnNodeType; 28 | 29 | /** 30 | * 数据库名称 31 | */ 32 | private String databaseName; 33 | 34 | /** 35 | * 表名称 36 | */ 37 | private String tableName; 38 | 39 | /** 40 | * 字段名称 41 | */ 42 | private String columnName; 43 | 44 | /** 45 | * 对应的 SQL Object 46 | */ 47 | private SQLObject sqlObject; 48 | 49 | /** 50 | * 属性 51 | */ 52 | private Map properties; 53 | 54 | public ColumnLineageColumnNode(ColumnLineageColumnNodeType columnNodeType) { 55 | this(columnNodeType, new HashMap()); 56 | } 57 | 58 | public ColumnLineageColumnNode(ColumnLineageColumnNodeType columnNodeType, Map properties) { 59 | this.columnNodeType = columnNodeType; 60 | this.properties = properties; 61 | this.children = Lists.newArrayList(); 62 | 63 | this.databaseName = "UNKNOWN"; 64 | } 65 | 66 | public void addChildNode(ColumnLineageRelationNode node) { 67 | children.add(node); 68 | } 69 | 70 | public void addChildrenNodes(List nodes) { 71 | children.addAll(nodes); 72 | } 73 | 74 | public boolean hasChild() { 75 | return children.size() != 0; 76 | } 77 | 78 | public ColumnLineageRelationNode getFirstChild() { 79 | if (hasChild()) { 80 | return children.get(0); 81 | } else { 82 | return null; 83 | } 84 | } 85 | 86 | @Override 87 | public String toString() { 88 | if (columnNodeType == ColumnLineageColumnNodeType.STOP) { 89 | return "[C, STOP]"; 90 | } 91 | 92 | return String.format( 93 | "[C, database: %s, table: %s, column: %s]", 94 | databaseName, tableName, columnName); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/lineage/column/ColumnLineageColumnNodeType.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.lineage.column; 2 | 3 | /** 4 | * 字段级别血缘字段节点类型 5 | */ 6 | public enum ColumnLineageColumnNodeType { 7 | /** 8 | * Hive 表字段 9 | */ 10 | HIVE_TABLE_COLUMN("HIVE_TABLE_COLUMN"), 11 | 12 | /** 13 | * Hive 表字段 (通过 * 查询得到) 14 | */ 15 | HIVE_TABLE_COLUMN_BY_ALL("HIVE_TABLE_COLUMN_BY_ALL"), 16 | 17 | /** 18 | * Hive 临时表字段 19 | */ 20 | HIVE_TMP_TABLE_COLUMN("HIVE_TMP_TABLE_COLUMN"), 21 | 22 | /** 23 | * Hive 子查询表字段 24 | */ 25 | HIVE_SUB_QUERY_TABLE_COLUMN("HIVE_SUB_QUERY_TABLE_COLUMN"), 26 | 27 | /** 28 | * Hive 子查询表字段 (通过 * 查询得到) 29 | */ 30 | HIVE_SUB_QUERY_TABLE_COLUMN_BY_ALL("HIVE_SUB_QUERY_TABLE_COLUMN_BY_ALL"), 31 | 32 | /** 33 | * 未知的 34 | */ 35 | UNKNOWN("UNKNOWN"), 36 | 37 | /** 38 | * 由于不支持的 Relation 节点导致的停止节点 39 | */ 40 | STOP("STOP"); 41 | 42 | ColumnLineageColumnNodeType(String name) { 43 | this.name = name; 44 | } 45 | 46 | /** 47 | * 名称 48 | */ 49 | public String name; 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/lineage/column/ColumnLineageRelationNode.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.lineage.column; 2 | 3 | import com.alibaba.druid.sql.ast.SQLObject; 4 | import com.google.common.collect.Lists; 5 | import com.google.common.collect.Maps; 6 | import lombok.Data; 7 | 8 | import java.util.HashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | @Data 13 | public class ColumnLineageRelationNode { 14 | /** 15 | * 父节点 16 | */ 17 | private ColumnLineageColumnNode parent; 18 | 19 | /** 20 | * 子节点 21 | */ 22 | private List children; 23 | 24 | /** 25 | * 关系节点类型 26 | */ 27 | private ColumnLineageRelationNodeType relationNodeType; 28 | 29 | /** 30 | * 对应的 SQL Object 31 | */ 32 | private SQLObject sqlObject; 33 | 34 | /** 35 | * 属性 36 | */ 37 | private Map properties; 38 | 39 | public ColumnLineageRelationNode(ColumnLineageRelationNodeType relationNodeType) { 40 | this(relationNodeType, new HashMap()); 41 | } 42 | 43 | public ColumnLineageRelationNode(ColumnLineageRelationNodeType relationNodeType, Map properties) { 44 | this.relationNodeType = relationNodeType; 45 | this.properties = properties; 46 | this.children = Lists.newArrayList(); 47 | } 48 | 49 | public void addChildNode(ColumnLineageColumnNode node) { 50 | children.add(node); 51 | } 52 | 53 | public void addChildreNodes(List nodes) { 54 | children.addAll(nodes); 55 | } 56 | 57 | public boolean hasChild() { 58 | return children.size() != 0; 59 | } 60 | 61 | public ColumnLineageColumnNode getFirstChild() { 62 | if (hasChild()) { 63 | return children.get(0); 64 | } else { 65 | return null; 66 | } 67 | } 68 | 69 | @Override 70 | public String toString() { 71 | return String.format( 72 | "[R, type: %s]", 73 | relationNodeType.toString()); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/lineage/column/ColumnLineageRelationNodeType.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.lineage.column; 2 | 3 | /** 4 | * 字段级别血缘关系节点类型 5 | */ 6 | public enum ColumnLineageRelationNodeType { 7 | /** 8 | * 直接关系 9 | * 10 | * 例如:CREATE TABLE table2 AS SELECT col FROM table1 (table2.col -> table1.col) 11 | */ 12 | DIRECT("DIRECT"), 13 | 14 | /** 15 | * 函数关系 16 | * 17 | * 例如:CREATE TABLE table2 AS SELECT TRIM(col) AS col_trim FROM table1; (table2.col_trim -> table1.col) 18 | */ 19 | FUNCTION("FUNCTION"), 20 | 21 | /** 22 | * 别名关系 23 | * 24 | * 例如 : select SUM(user_actual_pay_amount) ord_amount 25 | * 26 | */ 27 | ALIAS("ALIAS"), 28 | 29 | CASE("CASE"), 30 | 31 | CAST("CAST"), 32 | 33 | /** 34 | * 不支持的关系 35 | */ 36 | UNSUPPORTED("UNSUPPORTED"); 37 | 38 | ColumnLineageRelationNodeType(String name) { 39 | this.name = name; 40 | } 41 | 42 | /** 43 | * 名称 44 | */ 45 | public String name; 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/lineage/column/ColumnLineageUtils.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.lineage.column; 2 | 3 | import com.alibaba.druid.sql.ast.SQLObject; 4 | import com.alibaba.druid.sql.ast.expr.SQLAllColumnExpr; 5 | import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr; 6 | import com.alibaba.druid.sql.ast.statement.SQLExprTableSource; 7 | import com.alibaba.druid.sql.ast.statement.SQLJoinTableSource; 8 | import com.alibaba.druid.sql.ast.statement.SQLSubqueryTableSource; 9 | import com.google.common.collect.Lists; 10 | import com.jd.jr.daat.dw.lineage.domains.basic.Database; 11 | 12 | import java.util.List; 13 | 14 | 15 | public class ColumnLineageUtils { 16 | /** 17 | * Description: 18 | * 根据数据库名称获取字段级别血缘字段节点类型 19 | * 20 | * @param databaseName 数据库名称 21 | * @return 字段级别血缘字段节点类型 22 | */ 23 | public static ColumnLineageColumnNodeType getColumnLineageColumnNodeTypeByDatabaseName(String databaseName) { 24 | ColumnLineageColumnNodeType columnNodeType = ColumnLineageColumnNodeType.HIVE_TABLE_COLUMN; 25 | 26 | for (String tmpDatabaseName : Database.HIVE_TMP_DATABASES) { 27 | if (databaseName.toUpperCase().equals(tmpDatabaseName)) { 28 | columnNodeType = ColumnLineageColumnNodeType.HIVE_TMP_TABLE_COLUMN; 29 | 30 | break; 31 | } 32 | } 33 | 34 | return columnNodeType; 35 | } 36 | 37 | /** 38 | * Description: 39 | * 根据子 Column 的 SQLObject 和父 Column 节点获取子 Column 节点类型 40 | * 41 | * @param sqlObject 子 Column 的 SQLObject 42 | * @param parentColumnNode 父 Column 节点 43 | * @return 子 Column 节点类型 44 | */ 45 | public static ColumnLineageColumnNodeType getColumnLineageColumnNodeType( 46 | SQLObject sqlObject, ColumnLineageColumnNode parentColumnNode) { 47 | if (sqlObject instanceof SQLSubqueryTableSource) { 48 | return ColumnLineageColumnNodeType.HIVE_SUB_QUERY_TABLE_COLUMN; 49 | } 50 | 51 | if (sqlObject instanceof SQLAllColumnExpr) { 52 | return ColumnLineageColumnNodeType.HIVE_SUB_QUERY_TABLE_COLUMN_BY_ALL; 53 | } 54 | 55 | if (sqlObject instanceof SQLIdentifierExpr) { 56 | return ColumnLineageColumnNodeType.HIVE_SUB_QUERY_TABLE_COLUMN; 57 | } 58 | 59 | if(sqlObject instanceof SQLJoinTableSource){ 60 | return ColumnLineageColumnNodeType.HIVE_SUB_QUERY_TABLE_COLUMN; 61 | } 62 | 63 | 64 | // 当前节点为物理表 65 | if (sqlObject instanceof SQLExprTableSource) { 66 | if (parentColumnNode.getTableName().equalsIgnoreCase("*")) { 67 | // 父节点为 SELECT * 68 | return ColumnLineageColumnNodeType.HIVE_TABLE_COLUMN_BY_ALL; 69 | } else { 70 | // 父节点为 SELECT item 71 | return ColumnLineageColumnNodeType.HIVE_TABLE_COLUMN; 72 | } 73 | } 74 | 75 | return ColumnLineageColumnNodeType.UNKNOWN; 76 | } 77 | 78 | /** 79 | * Description: 80 | * 根据根 Column 节点,得到 Lineage 路径 81 | * 82 | * @param rootColumnNode 跟 Column 节点 83 | * @return Lineage 路径 84 | */ 85 | public static List getFirstPathString(ColumnLineageColumnNode rootColumnNode) { 86 | StringBuilder path = new StringBuilder(); 87 | 88 | List result = Lists.newArrayList(); 89 | 90 | ColumnLineageColumnNode currentColumnNode = rootColumnNode; 91 | 92 | List start = Lists.newArrayList(); 93 | start.add(currentColumnNode.getTableName() + "." + currentColumnNode.getColumnName()); 94 | 95 | path.append("[START]"); 96 | path.append(" -> "); 97 | 98 | while (currentColumnNode != null && currentColumnNode.hasChild()) { 99 | path.append(currentColumnNode.toString()); 100 | path.append(" -> "); 101 | 102 | ColumnLineageRelationNode currentRelationNode = currentColumnNode.getFirstChild(); 103 | path.append(currentRelationNode.toString()); 104 | path.append(" -> "); 105 | 106 | currentColumnNode = currentRelationNode.getFirstChild(); 107 | } 108 | path.append(currentColumnNode.toString()); 109 | 110 | 111 | List end = Lists.newArrayList(); 112 | end.add(currentColumnNode.getTableName() + "." + currentColumnNode.getColumnName()); 113 | 114 | path.append(" -> "); 115 | path.append("[END]"); 116 | 117 | //避免首尾一样产生死循环 118 | if(!start.toString().equals(end.toString())){ 119 | result.add(path.toString()); 120 | result.add(start.toString()); 121 | result.add(end.toString()); 122 | return result; 123 | } else { 124 | return null; 125 | } 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/domains/lineage/table/TableColumnLineage.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.domains.lineage.table; 2 | 3 | import com.google.common.collect.Maps; 4 | import com.google.common.collect.Sets; 5 | import com.jd.jr.daat.dw.lineage.domains.lineage.column.ColumnLineageColumnNode; 6 | import com.jd.jr.daat.dw.lineage.domains.basic.Column; 7 | import com.jd.jr.daat.dw.lineage.domains.basic.Table; 8 | import lombok.Data; 9 | 10 | import java.util.Map; 11 | import java.util.Set; 12 | 13 | /** 14 | * 表的字段级别血缘 15 | */ 16 | @Data 17 | public class TableColumnLineage { 18 | /** 19 | * 表 20 | */ 21 | private Table table; 22 | 23 | /** 24 | * 字段 25 | */ 26 | private Set columns; 27 | 28 | /** 29 | * 字段血缘 30 | */ 31 | private Map columnLineages; 32 | 33 | public TableColumnLineage() { 34 | this.columns = Sets.newHashSet(); 35 | this.columnLineages = Maps.newHashMap(); 36 | } 37 | 38 | /** 39 | * Description: 40 | * 添加字段 41 | * 42 | * @param column 字段 43 | */ 44 | public void addColumn(Column column) { 45 | this.columns.add(column); 46 | } 47 | 48 | /** 49 | * Description: 50 | * 添加字段级别血缘字段根节点 51 | * 52 | * @param column 字段 53 | * @param columnNode 字段级别血缘字段根节点 54 | */ 55 | public void addColumnLineageColumnRootNode(Column column, ColumnLineageColumnNode columnNode) { 56 | this.columnLineages.put(column, columnNode); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/utils/ProcessUnparsedSQL.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import com.alibaba.druid.sql.SQLUtils; 4 | import com.alibaba.druid.sql.ast.SQLStatement; 5 | import com.alibaba.druid.sql.repository.SchemaRepository; 6 | import com.alibaba.druid.util.JdbcConstants; 7 | import com.google.common.collect.Lists; 8 | import org.apache.commons.io.FileUtils; 9 | 10 | import java.io.*; 11 | import java.util.*; 12 | import java.util.regex.Matcher; 13 | import java.util.regex.Pattern; 14 | 15 | public class ProcessUnparsedSQL { 16 | 17 | /** 18 | * 过滤SQL 19 | * @param resourcesPath 20 | * @param sqlFile 从SQLExtractor抽出的sql文件 21 | * @throws IOException 22 | */ 23 | public void filterSQL(String resourcesPath, String sqlFile, String getUnparsedSqlPath, String getParsedSqlPath) throws IOException { 24 | SchemaLoader schemaLoader = new SchemaLoader(); 25 | SchemaRepository schemaRepository =new SchemaRepository(); 26 | List sqlLineageDemo01; 27 | List sqlsLineageDemo01Tables; 28 | 29 | // 获取当前编译好类的路径 30 | String logsFilePath = resourcesPath + "/logs"; 31 | String FileTableSchemaPath = resourcesPath + "/table/table"; 32 | 33 | int count=0; 34 | sqlLineageDemo01 = Lists.newArrayList(FileUtils.readFileToString(new File(sqlFile), "UTF-8").split(";")); 35 | sqlsLineageDemo01Tables = Lists.newArrayList(FileUtils.readFileToString(new File(FileTableSchemaPath), "UTF-8").split(";")); 36 | 37 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 38 | 39 | BufferedWriter unparsedSqlWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(getUnparsedSqlPath,true),"UTF-8")); 40 | BufferedWriter parsedSqlWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(getParsedSqlPath,true),"UTF-8")); 41 | 42 | // newStruct是一个存储结构,存储了血缘分析结果tableColumnLineage(原始sql的血缘分析和用主外键构造出sql的血缘分析结果)和最终链接到物理表的主外键对列表 43 | 44 | for (String sql : sqlLineageDemo01) { 45 | try { 46 | List stmtList = SQLUtils.parseStatements(sql, schemaRepository.getDbType()); 47 | 48 | stmtList.forEach(schemaRepository::resolve); 49 | 50 | parsedSqlWriter.write(sql+";"); 51 | continue; 52 | 53 | } catch (com.alibaba.druid.sql.parser.ParserException e) { 54 | //System.out.println(sql); 55 | count++; 56 | 57 | unparsedSqlWriter.write(sql+";"); 58 | } 59 | } 60 | 61 | unparsedSqlWriter.flush(); 62 | unparsedSqlWriter.close(); 63 | parsedSqlWriter.flush(); 64 | parsedSqlWriter.close(); 65 | 66 | } 67 | 68 | /** 69 | * 从文件中得到未解析的sql 70 | * @param 71 | */ 72 | public String processUnParsedSQL(String resourcesPath,String UnparsedSQLPath,String getProcessedSQLPath ,String getUnprocessedSQLPath ) throws IOException { 73 | // 获取当前编译好类的路径 74 | 75 | 76 | BufferedWriter processedWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(getProcessedSQLPath,true),"UTF-8")); 77 | BufferedWriter unprocessedWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(getUnprocessedSQLPath,true),"UTF-8")); 78 | 79 | List sqlLineageDemo01 = Lists.newArrayList(FileUtils.readFileToString(new File(UnparsedSQLPath), "UTF-8").split(";")); 80 | //过滤掉不能解析的sql语句 81 | for (String sql : sqlLineageDemo01){ 82 | String beforeSQL=sql; 83 | sql = processUnparsedCreateSQL(sql); 84 | sql = processUnparsedSplitSQL(sql); 85 | sql = processUnparsedSortSQL(sql); 86 | sql = processUnparsedGroupingSets(sql); 87 | sql = processUnparsedDistributeBy(sql); 88 | sql = processUnparsedBracketSQL(sql); 89 | sql = processUnparsedOver(sql); 90 | sql = processUnparsedOver1(sql); 91 | sql = processUnparseInteger(sql); 92 | sql = processUnparsedIndex(sql); 93 | 94 | //如果sql还是等于之前内容,说明该sql不符合以上能解析的情况。 95 | if (!sql.equals("")){ 96 | if (beforeSQL.equals(sql)){ 97 | unprocessedWriter.write(sql+";"+"\n"); 98 | } else { 99 | processedWriter.write(sql+";"+"\n"); 100 | } 101 | } 102 | 103 | } 104 | 105 | unprocessedWriter.flush(); 106 | unprocessedWriter.close(); 107 | processedWriter.flush(); 108 | processedWriter.close(); 109 | 110 | return getProcessedSQLPath; 111 | } 112 | 113 | /** 114 | * 处理split函数,把split函数去掉,留下中间的字段 115 | * 116 | */ 117 | 118 | public String processUnparsedSplitSQL(String sql) throws IOException { 119 | //String sql = "split(regexp_replace(requesturldecode,'^http[s]?://m.jr.jd.com/mjractivity/',''),'-')[0] id"; 120 | //String sql ="split(split(split(spoint2,'#')[2],'_')[1],'[*]')[1] as from_sec_page_card_name"; 121 | 122 | final String SQL_SPLIT_PREFIX_REGEX_STR ="split\\s*\\("; 123 | final String SQL_SPLIT_SUFFFIX_REGEX_STR ="\\,\\s*\'*[a-zA-Z\\d\\_\\-\"|%#&;.,:*?{}()=\\[\\]\\\\]{1,30}\\s*\'*\\s*\\)\\s*\\[\\d+\\]"; 124 | 125 | Pattern SQL_SPLIT_PREFIX_PATTERN=Pattern.compile(SQL_SPLIT_PREFIX_REGEX_STR); 126 | Matcher SQL_SPLIT_PREFIX_MATCHER = SQL_SPLIT_PREFIX_PATTERN.matcher(sql.toLowerCase()); 127 | 128 | Pattern SQL_SPLIT_SUFFIX_PATTERN=Pattern.compile(SQL_SPLIT_SUFFFIX_REGEX_STR); 129 | String s=""; 130 | String finalSQL=""; 131 | 132 | while(SQL_SPLIT_PREFIX_MATCHER.find()){ 133 | s =SQL_SPLIT_PREFIX_MATCHER.replaceAll(""); 134 | } 135 | 136 | Matcher SQL_SPLIT_SUFFIX_MATCHER = SQL_SPLIT_SUFFIX_PATTERN.matcher(s); 137 | while(SQL_SPLIT_SUFFIX_MATCHER.find()){ 138 | finalSQL = SQL_SPLIT_SUFFIX_MATCHER.replaceAll(""); 139 | } 140 | 141 | if(!finalSQL.equals("")){ 142 | sql =finalSQL; 143 | } 144 | 145 | return sql; 146 | } 147 | 148 | 149 | /** 150 | * 处理row_number 、rank、 count(1) Sum() over 语句 ,直接去掉,保留后面的别名 151 | */ 152 | 153 | public String processUnparsedSortSQL(String sql){ 154 | //String sql ="row_Number() over(distribute by item_first_cate_id,item_second_cate_id,item_third_cate_id sort by ratio desc,good_comment_ratio desc,price) rank_sku\n"; 155 | final String SQL_SORT_REGEX_STR ="(row_number)\\s*\\(\\)\\s*over\\s*\\(.*\\)\\s*(as)*|(rank)\\(\\)\\s*over\\s*\\(.*\\)\\s*(as)*|(count)\\(1\\)\\s*over\\s*\\(.*\\)\\s*(as)*|(sum\\(\\w+\\))\\(\\)\\s*over\\s*\\(.*\\)\\s*(as)*"; 156 | 157 | String repalce_SQL=""; 158 | Pattern SQL_SORT_PATTERN=Pattern.compile(SQL_SORT_REGEX_STR); 159 | Matcher SQL_SORT_MATCHER = SQL_SORT_PATTERN.matcher(sql.toLowerCase()); 160 | 161 | while (SQL_SORT_MATCHER.find()){ 162 | repalce_SQL = (SQL_SORT_MATCHER.replaceAll("")); 163 | } 164 | 165 | if (!repalce_SQL.equals("")){ 166 | sql=repalce_SQL; 167 | } 168 | 169 | return sql; 170 | } 171 | 172 | /** 173 | * 174 | * @param sql 175 | * @return 176 | */ 177 | public String processUnparsedBracketSQL(String sql){ 178 | //String sql ="row_Number() over(distribute by item_first_cate_id,item_second_cate_id,item_third_cate_id sort by ratio desc,good_comment_ratio desc,price) rank_sku\n"; 179 | final String SQL_SORT_REGEX_STR ="\\[\\d+\\]"; 180 | 181 | String repalce_SQL=""; 182 | Pattern SQL_SORT_PATTERN=Pattern.compile(SQL_SORT_REGEX_STR); 183 | Matcher SQL_SORT_MATCHER = SQL_SORT_PATTERN.matcher(sql.toLowerCase()); 184 | 185 | while (SQL_SORT_MATCHER.find()){ 186 | repalce_SQL = (SQL_SORT_MATCHER.replaceAll("")); 187 | } 188 | 189 | if (!repalce_SQL.equals("")){ 190 | sql=repalce_SQL; 191 | } 192 | 193 | return sql; 194 | 195 | } 196 | /** 197 | * 处理含有DistributeBy 198 | */ 199 | public String processUnparsedDistributeBy(String sql){ 200 | 201 | String SQL_OVER_DISTRIBUTE_BY_REGEX_STR = "over\\s*\\(\\s*distribute by.*\\)"; 202 | Pattern SQL_OVER_DISTRIBUTE_BY_PATTERN= Pattern.compile(SQL_OVER_DISTRIBUTE_BY_REGEX_STR); 203 | Matcher SQL_OVER_DISTRIBUTE_BY_MATCHER = SQL_OVER_DISTRIBUTE_BY_PATTERN.matcher(sql.toLowerCase()); 204 | 205 | String SQL_DISTRIBUTE_BY_REGEX_STR = "distribute by\\s*\\w+"; 206 | Pattern SQL_DISTRIBUTE_BY_PATTERN= Pattern.compile(SQL_DISTRIBUTE_BY_REGEX_STR); 207 | Matcher SQL_DISTRIBUTE_BY_MATCHER = SQL_DISTRIBUTE_BY_PATTERN.matcher(sql.toLowerCase()); 208 | 209 | while (SQL_OVER_DISTRIBUTE_BY_MATCHER.find()){ 210 | // System.out.println("ok"); 211 | // System.out.println(SQL_DISTRIBUTE_BY_MATCHER.group()); 212 | sql = SQL_OVER_DISTRIBUTE_BY_MATCHER.replaceAll(""); 213 | } 214 | 215 | while (SQL_DISTRIBUTE_BY_MATCHER.find()){ 216 | // System.out.println("ok"); 217 | // System.out.println(SQL_DISTRIBUTE_BY_MATCHER.group()); 218 | sql = SQL_DISTRIBUTE_BY_MATCHER.replaceAll(""); 219 | } 220 | 221 | 222 | //System.out.println(sql); 223 | return sql; 224 | 225 | } 226 | 227 | /** 228 | * 处理不能解析的含create语句,直接把create语句删掉 229 | */ 230 | 231 | public String processUnparsedCreateSQL(String sql){ 232 | 233 | final String SQL_CREATE_REGEX_STR ="create\\s*table\\s*(if not exists)*.*"; 234 | String repalce_SQL=""; 235 | Pattern SQL_CREATE_PATTERN=Pattern.compile(SQL_CREATE_REGEX_STR); 236 | Matcher SQL_CREATE_MATCHER = SQL_CREATE_PATTERN.matcher(sql.toLowerCase()); 237 | 238 | final String SQL_SELECT_REGEX_STR = "select\\s+"; 239 | Pattern SQL_SELECT_PATTERN=Pattern.compile(SQL_SELECT_REGEX_STR); 240 | Matcher SQL_SELECT_MATCHER = SQL_SELECT_PATTERN.matcher(sql.toLowerCase()); 241 | 242 | 243 | if (SQL_CREATE_MATCHER.find()&&!SQL_SELECT_MATCHER.find()){ 244 | sql = repalce_SQL; 245 | } 246 | 247 | return sql; 248 | 249 | } 250 | 251 | /** 252 | * 处理含有grouping sets语句,找到该语句用空格代替 253 | * @param 254 | * @return 255 | */ 256 | public String processUnparsedGroupingSets(String sql){ 257 | 258 | String SQL_GROUP_REGEX_STR = "grouping sets\\s*\\n*\\([^\\(\\)]*(\\([^\\(\\)]*(\\([^\\(\\)]*\\)[^\\(\\)]*)*\\)[^\\(\\)]*)*\\)"; 259 | 260 | Pattern SQL_GROUP_PATTERN= Pattern.compile(SQL_GROUP_REGEX_STR); 261 | Matcher SQL_GROUP_MATCHER = SQL_GROUP_PATTERN.matcher(sql.toLowerCase()); 262 | 263 | while (SQL_GROUP_MATCHER.find()){ 264 | 265 | sql = SQL_GROUP_MATCHER.replaceAll(""); 266 | } 267 | 268 | return sql; 269 | 270 | } 271 | 272 | /** 273 | * 处理over类型的SQL 274 | */ 275 | public String processUnparsedOver(String sql){ 276 | 277 | String SQL_OVER_REGEX_STR = "over\\(.*\\)"; 278 | 279 | Pattern SQL_OVER_PATTERN= Pattern.compile(SQL_OVER_REGEX_STR); 280 | Matcher SQL_OVER_MATCHER = SQL_OVER_PATTERN.matcher(sql.toLowerCase()); 281 | 282 | while (SQL_OVER_MATCHER.find()){ 283 | sql = SQL_OVER_MATCHER.replaceAll(""); 284 | } 285 | return sql; 286 | 287 | 288 | } 289 | 290 | /** 291 | * 处理含换行的over类型的SQL 292 | */ 293 | public String processUnparsedOver1(String sql){ 294 | 295 | String SQL_OVER_REGEX_STR = "over\\(\\n.*\\n.*\\)"; 296 | 297 | Pattern SQL_OVER_PATTERN= Pattern.compile(SQL_OVER_REGEX_STR); 298 | Matcher SQL_OVER_MATCHER = SQL_OVER_PATTERN.matcher(sql.toLowerCase()); 299 | 300 | while (SQL_OVER_MATCHER.find()){ 301 | sql = SQL_OVER_MATCHER.replaceAll(""); 302 | } 303 | 304 | return sql; 305 | 306 | 307 | } 308 | 309 | /** 310 | * 处理含数字的字段 311 | * @param sql 312 | * @return 313 | */ 314 | public String processUnparseInteger(String sql){ 315 | String SQL_Integer_REGEX_STR = "as\\s(\\d+_)+"; 316 | 317 | Pattern SQL_Integer_PATTERN= Pattern.compile(SQL_Integer_REGEX_STR); 318 | Matcher SQL_Integer_MATCHER = SQL_Integer_PATTERN.matcher(sql.toLowerCase()); 319 | 320 | while (SQL_Integer_MATCHER.find()){ 321 | sql = SQL_Integer_MATCHER.replaceAll(" "); 322 | } 323 | 324 | return sql; 325 | } 326 | 327 | /** 328 | * 处理包含关键字index的sql,替换成index_ 329 | * @param sql 330 | * @return 331 | */ 332 | public String processUnparsedIndex(String sql){ 333 | String SQL_Index_REGEX_STR = "index(?!\\w)"; 334 | 335 | Pattern SQL_Index_PATTERN= Pattern.compile(SQL_Index_REGEX_STR); 336 | Matcher SQL_Index_MATCHER = SQL_Index_PATTERN.matcher(sql.toLowerCase()); 337 | 338 | while (SQL_Index_MATCHER.find()){ 339 | sql = SQL_Index_MATCHER.replaceAll("index_"); 340 | } 341 | 342 | return sql; 343 | } 344 | 345 | /** 346 | * 合并两个文件 347 | * @param 348 | * @param 349 | */ 350 | public void mergeParsedSQLFile(String readFilePath,String writeFilePath) throws IOException { 351 | BufferedReader br = new BufferedReader (new InputStreamReader(new FileInputStream(readFilePath),"UTF-8")); 352 | BufferedWriter bw = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(writeFilePath,true),"UTF-8")); 353 | 354 | String str =null; 355 | while ((str = br.readLine()) != null){ 356 | bw.write(str+"\n"); 357 | } 358 | 359 | bw.flush(); 360 | bw.close(); 361 | br.close(); 362 | } 363 | } 364 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/utils/SQLExtractor.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import com.google.common.base.Joiner; 4 | import com.google.common.collect.Lists; 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | 8 | import java.io.*; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | import java.util.regex.Matcher; 12 | import java.util.regex.Pattern; 13 | import java.util.stream.Collectors; 14 | 15 | public class SQLExtractor { 16 | private static final Logger LOGGER = LoggerFactory.getLogger(SQLExtractor.class.getName()); 17 | 18 | // 日志前缀默认分隔符模式字符串 19 | static final String LOG_PREFIX_DELIMITER_PATTERN_STR = "\\|{3}"; 20 | 21 | // 日志前缀默认元素模式字符串 22 | static final String LOG_PREFIX_ELEMENT_PATTERN_STR = "[A-Za-z0-9_]{1,100}"; 23 | 24 | // 日志前缀默认日期模式字符串 25 | static final String LOG_PREFIX_DATE_PATTERN_STR = "\\d{8}"; 26 | 27 | // 日志前缀默认时间模式字符串 28 | static final String LOG_PREFIX_TIME_PATTERN_STR = "\\(\\d{2}:\\d{2}:\\d{5}\\)"; 29 | 30 | // 日志前缀默认 SESSION ID 模式字符串 31 | static final String LOG_PREFIX_SESSION_ID_PATTERN_STR = "\\d{1,10}"; 32 | 33 | // 日志前缀默认 SEQUENCE NO. 模式字符串 34 | static final String LOG_PREFIX_SEQ_NO_PATTERN_STR = "\\d{1,10}"; 35 | 36 | // 日志前缀模式字符串 37 | static final String LOG_PREFIX_PATTERN_STR = String.format( 38 | "(?<=^%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\\s{1}).+$", 39 | LOG_PREFIX_ELEMENT_PATTERN_STR, LOG_PREFIX_DELIMITER_PATTERN_STR, 40 | LOG_PREFIX_ELEMENT_PATTERN_STR, LOG_PREFIX_DELIMITER_PATTERN_STR, 41 | LOG_PREFIX_ELEMENT_PATTERN_STR, LOG_PREFIX_DELIMITER_PATTERN_STR, 42 | LOG_PREFIX_ELEMENT_PATTERN_STR, LOG_PREFIX_DELIMITER_PATTERN_STR, 43 | LOG_PREFIX_DATE_PATTERN_STR, LOG_PREFIX_DELIMITER_PATTERN_STR, 44 | LOG_PREFIX_SESSION_ID_PATTERN_STR, LOG_PREFIX_DELIMITER_PATTERN_STR, 45 | LOG_PREFIX_SEQ_NO_PATTERN_STR, LOG_PREFIX_DELIMITER_PATTERN_STR, 46 | LOG_PREFIX_TIME_PATTERN_STR); 47 | 48 | // 日志前缀模式 49 | static final Pattern LOG_PREFIX_PATTERN = Pattern.compile(LOG_PREFIX_PATTERN_STR); 50 | 51 | // 默认日志换行符 52 | static final String LOG_DEFAULT_EOL = "\n"; 53 | 54 | // 日志内容 SQL 头部关键词 55 | static final String LOG_CONTENT_SQL_HEAD_KEYWORDS = "Beging Execute Hive Sql".toUpperCase(); 56 | 57 | // 日志内容 SQL 尾部关键词 58 | static final String LOG_CONTENT_SQL_TAIL_KEYWORDS = "End Execute Hive Sql".toUpperCase(); 59 | 60 | List> sqls = Lists.newArrayList(); 61 | String writeSqlFileName; 62 | String resourcesPath; 63 | 64 | /** 65 | * Description: 66 | * 从单行日志中抽取日志内容 67 | * 68 | * @param singleLineLog 单行日志 69 | * @return 日志内容 70 | */ 71 | public String extractSingleLineLogContent(String singleLineLog) { 72 | Matcher logPrefixMatcher = LOG_PREFIX_PATTERN.matcher(singleLineLog); 73 | 74 | String logContent = ""; 75 | 76 | if (logPrefixMatcher.find()) { 77 | logContent = logPrefixMatcher.group(); 78 | } 79 | 80 | return logContent; 81 | } 82 | 83 | /** 84 | * Description: 85 | * 从多行日志中抽取日志内容 86 | * 87 | * @param multiLinesLog 多行日志 88 | * @return 日志内容列表 89 | */ 90 | public List extractMultiLinesLogContent(String multiLinesLog) { 91 | String[] logs = multiLinesLog.split(LOG_DEFAULT_EOL); 92 | 93 | return Arrays.stream(logs) 94 | .map(this::extractSingleLineLogContent) 95 | .collect(Collectors.toList()); 96 | } 97 | 98 | /** 99 | * Description: 100 | * 判断日志内容是否为 SQL 头部 101 | * 102 | * @param logContent 日志内容 103 | * @return 是否为 SQL 头部 104 | */ 105 | public boolean isSQLHead(String logContent) { 106 | return logContent.toUpperCase().contains(LOG_CONTENT_SQL_HEAD_KEYWORDS); 107 | } 108 | 109 | /** 110 | * Description: 111 | * 判断日志内容是否为 SQL 尾部 112 | * 113 | * @param logContent 日志内容 114 | * @return 是否为 SQL 尾部 115 | */ 116 | public boolean isSQLTail(String logContent) { 117 | return logContent.toUpperCase().contains(LOG_CONTENT_SQL_TAIL_KEYWORDS); 118 | } 119 | 120 | /** 121 | * Description: 122 | * 从日志内容集合中抽取 SQLs 123 | * 124 | * @param logContents 日志内容列表 125 | * @return SQLs 126 | */ 127 | public List extractLogContentsSQLs(List logContents) { 128 | List sqls = Lists.newArrayList(); 129 | 130 | boolean reachSQLHead = false; 131 | boolean reachSQLTail = false; 132 | StringBuffer sqlBuffer = new StringBuffer(); 133 | 134 | for (String logContent : logContents) { 135 | reachSQLTail = isSQLTail(logContent) || reachSQLTail; 136 | 137 | if (reachSQLHead && reachSQLTail) { 138 | sqls.add(sqlBuffer.toString()); 139 | sqlBuffer = new StringBuffer(); 140 | 141 | reachSQLHead = false; 142 | reachSQLTail = false; 143 | } 144 | 145 | if (reachSQLHead && !reachSQLTail) { 146 | sqlBuffer.append(logContent); 147 | sqlBuffer.append("\n"); 148 | } 149 | 150 | reachSQLHead = isSQLHead(logContent) || reachSQLHead; 151 | } 152 | 153 | return sqls.stream() 154 | .map(sql -> sql.split(";")) 155 | .flatMap(Arrays::stream) 156 | .collect(Collectors.toList()); 157 | } 158 | 159 | /** 160 | * Description: 161 | * 从多行日志中抽取 SQLs 162 | * 163 | * @param multiLinesLog 多行日志 164 | * @return SQLs 165 | */ 166 | public List extractLogSQLs(String multiLinesLog) { 167 | List logContents = extractMultiLinesLogContent(multiLinesLog); 168 | return extractLogContentsSQLs(logContents); 169 | } 170 | 171 | 172 | /** 173 | * Description: 174 | * 从该路径下的多个文件中抽取SQLs 175 | * @param path 文件路径 176 | * 177 | */ 178 | public void extractPathLogSQLs(String path ,BufferedWriter sqlWriter) throws IOException { 179 | //获取其file对象 180 | File file = new File(path); 181 | isFilePath(file,sqlWriter); 182 | sqlWriter.flush(); 183 | sqlWriter.close(); 184 | } 185 | 186 | public void isFilePath(File file,BufferedWriter sqlWriter) throws IOException { 187 | File[] fs = file.listFiles(); 188 | 189 | for(File f:fs){ 190 | if(f.isDirectory()){ 191 | //若是目录,则递归遍历该目录下的文件 192 | isFilePath(f,sqlWriter); 193 | } 194 | 195 | if(f.isFile()){ 196 | //若是文件,获取单个文件的sql 197 | extractSingleFilesLogSQLs(f.toString(),sqlWriter); 198 | System.out.println(String.format("extracting sql from log file [%s] ...", f.toString())); 199 | } 200 | 201 | } 202 | 203 | 204 | } 205 | 206 | /** 207 | * Decription: 208 | * 获取单个文件的SQL 209 | * @param filename 文件名 210 | * @return sql 211 | * @throws IOException 抛异常 212 | */ 213 | public List extractSingleFilesLogSQLs(String filename,BufferedWriter sqlWriter) throws IOException { 214 | //List logsEAPFLines = FileUtils.readLines(new File(filename), "UTF-8"); 215 | List logsEAPFLines = Lists.newArrayList(); 216 | BufferedReader br = new BufferedReader (new InputStreamReader(new FileInputStream (filename),"UTF-8")); 217 | String str = null; 218 | 219 | while ((str = br.readLine()) != null){ 220 | logsEAPFLines.add(str+""); 221 | } 222 | 223 | List sql = extractLogSQLs(Joiner.on("\n").join(logsEAPFLines)); 224 | sql.stream().forEach(s -> { 225 | try { 226 | sqlWriter.write(s+";"); 227 | } catch (IOException e) { 228 | e.printStackTrace(); 229 | } 230 | }); 231 | 232 | return sql; 233 | 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/utils/SchemaExtractor.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import org.apache.commons.io.FileUtils; 4 | import org.apache.commons.io.LineIterator; 5 | 6 | import java.io.*; 7 | 8 | public class SchemaExtractor { 9 | public void createTableSchema(String schemaResourcePath ,String createSchemaTablePath ) throws IOException { 10 | File schemaColumnFile = new File(schemaResourcePath); 11 | // File schemaCreateTableFile =new File(createSchemaTablePath); 12 | // FileOutputStream schemaOutPut = new FileOutputStream(schemaCreateTableFile); 13 | BufferedWriter schemaWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream (createSchemaTablePath,true),"UTF-8")); 14 | 15 | // FileWriter schemaWriter = new FileWriter(createSchemaTablePath); 16 | LineIterator it = FileUtils.lineIterator(schemaColumnFile, "UTF-8"); 17 | String beforeDatabaseNameTableName =""; 18 | // ArrayList schemaList = new ArrayList(); 19 | try { 20 | while (it.hasNext()) { 21 | String line = it.nextLine(); 22 | String[] column = line.split("\t"); 23 | String databaseNameTableName = column[9]+"."+column[7]; 24 | String columnName = column[1]; 25 | String columnType = column[3]; 26 | 27 | if (columnType==null){ 28 | columnType = "STRING"; 29 | } 30 | 31 | 32 | String columnItem = createColumnItem(columnName,columnType); 33 | 34 | //如果该表名等于上一个表,说明该表还有字段 35 | if (databaseNameTableName.equals(beforeDatabaseNameTableName)){ 36 | // FileUtils.write(schemaCreateTableFile,columnItem ,"UTF-8",true); 37 | schemaWriter.write(columnItem); 38 | } else { 39 | //如果该表名不等于上一个表,说明该表没有字段了或者该表只含有一个字段,结束上个表创建并创建一个新表。 40 | String tableInfoItem = createTableInfo(databaseNameTableName) ; 41 | String endSymbol = createEndSymbol(); 42 | String schema = endSymbol + tableInfoItem + columnItem; 43 | schemaWriter.write(schema); 44 | // FileUtils.write(schemaCreateTableFile, endSymbol , "UTF-8", true); 45 | // FileUtils.write(schemaCreateTableFile, tableInfoItem , "UTF-8", true); 46 | // FileUtils.write(schemaCreateTableFile, columnItem , "UTF-8", true); 47 | } 48 | 49 | beforeDatabaseNameTableName = databaseNameTableName ; 50 | } 51 | 52 | //结束最后一个表的创建。 53 | String endSymbol = createEndSymbol(); 54 | // FileUtils.write(schemaCreateTableFile, endSymbol, "UTF-8", true); 55 | schemaWriter.write(endSymbol); 56 | schemaWriter.flush(); 57 | schemaWriter.close(); 58 | } finally { 59 | LineIterator.closeQuietly(it); 60 | } 61 | } 62 | 63 | public String createColumnItem(String columnName,String columnType){ 64 | return columnName +" " + columnType + "," + "\n"; 65 | } 66 | 67 | public String createTableInfo(String databaseNameTableName){ 68 | return "CREATE TABLE IF NOT EXISTS " + databaseNameTableName + "\n" + "("+ "\n"; 69 | } 70 | 71 | public String createEndSymbol(){ 72 | return ");"+ "\n"; 73 | } 74 | } -------------------------------------------------------------------------------- /src/main/java/com/jd/jr/daat/dw/lineage/utils/SchemaLoader.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import com.alibaba.druid.sql.SQLUtils; 4 | import com.alibaba.druid.sql.ast.SQLStatement; 5 | import com.alibaba.druid.sql.dialect.hive.stmt.HiveCreateTableStatement; 6 | import com.alibaba.druid.sql.repository.SchemaRepository; 7 | import lombok.extern.slf4j.Slf4j; 8 | 9 | import java.util.List; 10 | import java.util.stream.Collectors; 11 | 12 | @Slf4j 13 | public class SchemaLoader { 14 | public SchemaRepository load(List sqls, String dbType) { 15 | SchemaRepository schemaRepository = new SchemaRepository(dbType); 16 | 17 | sqls.forEach(schemaRepository::console); 18 | 19 | 20 | // List stmtList = sqls.stream() 21 | // .map(sql -> SQLUtils.parseStatements(sql, dbType)) 22 | // .flatMap(List::stream) 23 | // .collect(Collectors.toList()); 24 | // 25 | // log.info("Loading table schemas ..."); 26 | // 27 | // long tableCounter = 0; 28 | // for (SQLStatement stmt : stmtList) { 29 | // if (stmt instanceof HiveCreateTableStatement) { 30 | // //schemaRepository.accept(stmt); 31 | // tableCounter++; 32 | // } 33 | // } 34 | // 35 | // log.info(String.format("%d table schemas loaded.", tableCounter)); 36 | 37 | return schemaRepository; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/test/java/com/jd/jr/daat/dw/lineage/analysis/ColumnLineageAnalyzerTest.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.analysis; 2 | 3 | import com.alibaba.druid.sql.repository.SchemaRepository; 4 | import com.alibaba.druid.util.JdbcConstants; 5 | import com.google.common.collect.Lists; 6 | import com.jd.jr.daat.dw.lineage.domains.lineage.column.ColumnLineageUtils; 7 | import com.jd.jr.daat.dw.lineage.domains.lineage.table.TableColumnLineage; 8 | import com.jd.jr.daat.dw.lineage.utils.SQLExtractor; 9 | import com.jd.jr.daat.dw.lineage.utils.SchemaLoader; 10 | import org.apache.commons.io.FileUtils; 11 | import org.junit.jupiter.api.BeforeEach; 12 | import org.junit.jupiter.api.Test; 13 | 14 | import java.io.File; 15 | import java.io.IOException; 16 | import java.util.*; 17 | import java.util.regex.Matcher; 18 | import java.util.regex.Pattern; 19 | 20 | public class ColumnLineageAnalyzerTest { 21 | private SchemaLoader schemaLoader; 22 | private SQLExtractor sqlExtractor; 23 | private SchemaRepository schemaRepository; 24 | private ColumnLineageAnalyzer analyzer; 25 | 26 | private String logsFilePath; 27 | private List> sqlsEAPF; 28 | private String sqlLineageDemo01; 29 | private List sqlsLineageDemo01Tables; 30 | private String FileTableSchemaPath; 31 | private List> Allpath = new ArrayList>(); 32 | 33 | private List results; 34 | 35 | 36 | @BeforeEach 37 | void init() throws IOException { 38 | schemaLoader = new SchemaLoader(); 39 | sqlExtractor = new SQLExtractor(); 40 | 41 | // 获取当前编译好类的路径 42 | String resourcesPath = getClass().getClassLoader().getResource("").getPath(); 43 | 44 | logsFilePath = resourcesPath + "/logs"; 45 | 46 | // sqlsEAPF = sqlExtractor.extractPathLogSQLs(logsFilePath,); 47 | 48 | FileTableSchemaPath = resourcesPath + "/tables/demo-01-table"; 49 | 50 | sqlLineageDemo01 = FileUtils.readFileToString(new File(resourcesPath + 51 | "SQLs/demo-01"), "UTF-8"); 52 | sqlsLineageDemo01Tables = Lists.newArrayList(FileUtils.readFileToString(new File(resourcesPath + 53 | "tables/demo-01-table"), "UTF-8").split(";")); 54 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 55 | analyzer = new ColumnLineageAnalyzer(schemaRepository); 56 | } 57 | 58 | List> testGetColumnsLineage() throws IOException { 59 | 60 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 61 | 62 | analyzer = new ColumnLineageAnalyzer(schemaRepository); 63 | 64 | TableColumnLineage tableColumnLineage = analyzer.getTableColumnLineage(sqlLineageDemo01, FileTableSchemaPath); 65 | 66 | tableColumnLineage.getColumnLineages().values().forEach(rootColumnNode -> { 67 | results = ColumnLineageUtils.getFirstPathString(rootColumnNode); 68 | Allpath.add(results); 69 | }); 70 | return Allpath; 71 | 72 | } 73 | 74 | 75 | @Test 76 | /** 77 | * Description: 78 | * 合并头尾连接的path 79 | * 80 | */ 81 | public void pathMerge() throws IOException { 82 | List> Allpath = testGetColumnsLineage(); 83 | List path = new ArrayList<>(); 84 | List> mergedPath ; 85 | for (List path1 : Allpath) { 86 | path.add(path1.get(0));//得到所有path集合 87 | } 88 | mergedPath = mergePath(Allpath, Allpath, path); 89 | while(!mergedPath.isEmpty()){ 90 | mergedPath= mergePath(Allpath,mergedPath,path) ; 91 | } 92 | System.out.println("--------------------"); 93 | path.stream().forEach(System.out::println); 94 | 95 | //循环遍历 判断当前path1的尾部是否是path2的头部,如果是连接,并删除对应的path, 96 | //一个问题:合并后的path可能还要和其他的path合并 97 | 98 | } 99 | 100 | 101 | public List> mergePath(List> Allpath1, List> Allpath2, List path) { 102 | List mergePathItem; 103 | List> mergedPath = new ArrayList<>(); 104 | for (List path1 : Allpath1) { 105 | String end_1 = path1.get(2); 106 | String path_1 = path1.get(0); 107 | String start_1 = path1.get(1); 108 | for (List path2 : Allpath2) { 109 | String start_2 = path2.get(1); 110 | String path_2 = path2.get(0); 111 | String end_2 = path2.get(2); 112 | if (start_2.equals(end_1)) { 113 | String mergepath = path_1.substring(0, path_1.length() - 5) + 114 | path_2.substring(10, path_2.length()); 115 | mergePathItem = stringToList(mergepath, start_1, end_2); 116 | mergepath = pathAddMergePath(mergepath, path); 117 | if (!path.contains(mergepath)) { 118 | path.add(mergepath); 119 | } 120 | mergedPath.add(mergePathItem); 121 | path.remove(path_1); 122 | 123 | } 124 | } 125 | } 126 | 127 | return mergedPath; 128 | 129 | } 130 | 131 | 132 | public String pathAddMergePath(String mergedpath ,List pathList){ 133 | String addMergePath="" ; 134 | Iterator it = pathList.iterator(); 135 | while(it.hasNext()){ 136 | String path = it.next(); 137 | String mergedPathStart = pathGetStart(mergedpath); 138 | String pathStart =pathGetStart(path); 139 | if(mergedPathStart.equals(pathStart) && mergedpath.length()>path.length()){ 140 | addMergePath=mergedpath; 141 | it.remove(); 142 | break; 143 | }else { 144 | 145 | addMergePath=path; 146 | } 147 | } 148 | return addMergePath; 149 | } 150 | public String pathGetStart(String path){ 151 | String table_name="" ; 152 | String column_name="" ; 153 | String table_name_str = "(?<=table: )[A-Za-z0-9_]{1,50}"; 154 | String column_name_str ="(?<=, column: )[a-z_]{1,20}"; 155 | String database_name_str = "(?<=, database: )[a-z_]{1,20}"; 156 | 157 | Pattern table_name_pattern = Pattern.compile(table_name_str); 158 | Pattern column_name_pattern = Pattern.compile(column_name_str); 159 | Matcher table_matcher = table_name_pattern.matcher(path); 160 | Matcher column_matcher = column_name_pattern.matcher(path); 161 | if(table_matcher.find()){ 162 | table_name = table_matcher.group(); 163 | } 164 | if(column_matcher.find()){ 165 | column_name = column_matcher.group(); 166 | } 167 | 168 | return table_name+"."+column_name; 169 | } 170 | 171 | public List stringToList(String path,String start,String end){ 172 | List pathItem =new ArrayList<>(); 173 | pathItem.add(path); 174 | pathItem.add(start); 175 | pathItem.add(end); 176 | return pathItem; 177 | } 178 | 179 | } 180 | -------------------------------------------------------------------------------- /src/test/java/com/jd/jr/daat/dw/lineage/analysis/ColunmnTest.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.analysis; 2 | 3 | import com.alibaba.druid.sql.repository.SchemaRepository; 4 | import com.alibaba.druid.util.JdbcConstants; 5 | import com.google.common.collect.Lists; 6 | import com.jd.jr.daat.dw.lineage.utils.SQLExtractor; 7 | import com.jd.jr.daat.dw.lineage.utils.SchemaLoader; 8 | import org.apache.commons.csv.CSVFormat; 9 | import org.apache.commons.csv.CSVPrinter; 10 | import org.apache.commons.csv.QuoteMode; 11 | import org.apache.commons.io.FileUtils; 12 | import org.junit.jupiter.api.BeforeEach; 13 | import org.junit.jupiter.api.Test; 14 | 15 | import java.io.File; 16 | import java.io.FileWriter; 17 | import java.io.IOException; 18 | import java.util.ArrayList; 19 | import java.util.Arrays; 20 | import java.util.List; 21 | import java.util.regex.Matcher; 22 | import java.util.regex.Pattern; 23 | 24 | public class ColunmnTest { 25 | private SchemaLoader schemaLoader; 26 | private SQLExtractor sqlExtractor; 27 | private SchemaRepository schemaRepository; 28 | private ColumnLineageAnalyzer analyzer; 29 | private GetColumnsLineage getColumnsLineage; 30 | private String FileLineagePath; 31 | private String FileMergePath; 32 | 33 | private String logsFilePath; 34 | private List> sqlsEAPF; 35 | private String sqlLineageDemo01; 36 | private List sqlsLineageDemo01Tables; 37 | private String FileTableSchemaPath; 38 | private String FileLineageHeadTail; 39 | private List> Allpath = new ArrayList>(); 40 | 41 | private List results; 42 | @BeforeEach 43 | void init() throws IOException { 44 | schemaLoader = new SchemaLoader(); 45 | sqlExtractor = new SQLExtractor(); 46 | getColumnsLineage =new GetColumnsLineage(); 47 | // 获取当前编译好类的路径 48 | String resourcesPath = getClass().getClassLoader().getResource("").getPath(); 49 | 50 | logsFilePath = resourcesPath + "/logs"; 51 | 52 | FileTableSchemaPath = resourcesPath + "/tables/demo-01-table"; 53 | FileLineagePath = resourcesPath +"path.sql"; 54 | FileMergePath = resourcesPath +"merge.sql"; 55 | FileLineageHeadTail = resourcesPath +"LineageHeadTail_ns_jrdw_internal_demoo.csv"; 56 | 57 | sqlLineageDemo01 = FileUtils.readFileToString(new File(resourcesPath + 58 | "process_unparsed/GetParsed.sql"), "UTF-8"); 59 | sqlsLineageDemo01Tables = Lists.newArrayList(FileUtils.readFileToString(new File(resourcesPath + 60 | "tables/demo-01-table"), "UTF-8").split(";")); 61 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 62 | analyzer = new ColumnLineageAnalyzer(schemaRepository); 63 | 64 | } 65 | /** 66 | * Description: 对链路进行合并,得到最后的头尾节点 67 | * 思想: 68 | * @param 69 | * @return 70 | */ 71 | @Test 72 | void testPathMerge() throws IOException { 73 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 74 | analyzer = new ColumnLineageAnalyzer(schemaRepository); 75 | List> Allpath =getColumnsLineage.getAllpath(analyzer,sqlLineageDemo01,FileTableSchemaPath); 76 | List path = new ArrayList<>(); 77 | List> mergedPath ; 78 | 79 | System.out.println(Allpath.size()); 80 | for (List path1 : Allpath) { 81 | path.add(path1.get(0));//得到所有path集合 82 | } 83 | 84 | mergedPath = mergePath(Allpath, Allpath, path); 85 | int i=0; 86 | System.out.println("mergedpath "+ i); 87 | while(!mergedPath.isEmpty()){ 88 | i+=1; 89 | mergedPath = mergePath(Allpath,mergedPath,path); 90 | System.out.println("mergedpath "+ i); 91 | } 92 | 93 | System.out.println("--------"); 94 | System.out.println(path.size()); 95 | GetLineageHeadAndTail(path); 96 | 97 | //path.stream().forEach(System.out::println); 98 | } 99 | 100 | /** 101 | * Description: 得到合并的路径 102 | * @param Allpath1 存放所有血缘关系的直接路径 103 | * @param Allpath2 存放每次合并后的路径 104 | * @param path 存放最后合并好的路径,也就是最后打印的路径 105 | * @return 返回每次合并的路径 106 | */ 107 | public List> mergePath(List> Allpath1, List> Allpath2, List path) throws IOException { 108 | List mergePathItem; 109 | List> mergedPath = new ArrayList<>(); 110 | 111 | File mergefile=new File(FileMergePath); 112 | FileWriter mergewriter = new FileWriter(mergefile, true); 113 | 114 | // System.out.println("enter"); 115 | for (List path1 : Allpath1) { 116 | String end_1 = path1.get(2); 117 | String path_1 = path1.get(0); 118 | String start_1 = path1.get(1); 119 | for (List path2 : Allpath2) { 120 | String start_2 = path2.get(1); 121 | String path_2 = path2.get(0); 122 | String end_2 = path2.get(2); 123 | if (start_2.equals(end_1)) { 124 | String mergepath = path_1.substring(0, path_1.length() - 5) + 125 | path_2.substring(10, path_2.length()); 126 | String mergedPathStart = pathGetStart(mergepath).split("\t")[0] +pathGetStart(mergepath).split("\t")[1]+pathGetStart(mergepath).split("\t")[2]; 127 | String mergedPathEnd = pathGetStart(mergepath).split("\t")[3] +pathGetStart(mergepath).split("\t")[4]+pathGetStart(mergepath).split("\t")[5]; 128 | if (!path.contains(mergepath) && !mergedPathStart.equals(mergedPathEnd) && !start_1.equals(end_2)) { 129 | // mergewriter.write(mergepath+"\n"); 130 | // mergewriter.write(start_1+"\n"+end_2); 131 | // mergewriter.write("\n"+"\n"); 132 | path.add(mergepath); 133 | mergePathItem = stringToList(mergepath, start_1, end_2); 134 | mergedPath.add(mergePathItem); 135 | } 136 | path.remove(path_1); 137 | } 138 | } 139 | } 140 | 141 | return mergedPath; 142 | 143 | } 144 | 145 | /** 146 | * Description: 得到一条路径的头尾 147 | * @param path 输入路径 148 | * @return 返回 路径的头尾 start databasename tablename columnname end databasename tablename columnname 149 | */ 150 | public String pathGetStart(String path){ 151 | String table_name=" " ; 152 | String column_name=" " ; 153 | String database_name =" "; 154 | String end_table_name= " "; 155 | String end_column_name =" "; 156 | String end_database_name=" "; 157 | String table_name_str = "(?<=, table: )[A-Za-z0-9_]{1,60}"; 158 | String column_name_str ="(?<=, column: )[A-Za-z()*0-9_-]{1,60}"; 159 | String database_name_str ="(?<=, database: )[A-Za-z_]{1,20}"; 160 | String end_column_name_str ="(?<=R, type: )UNSUPPORTED"; 161 | //String end_database_name_str ="(?<=, database: )[a-z_]{1,20}"; 162 | Pattern table_name_pattern = Pattern.compile(table_name_str); 163 | Pattern column_name_pattern = Pattern.compile(column_name_str); 164 | Pattern end_column_name_pattern = Pattern.compile(end_column_name_str); 165 | Pattern database_name_pattern = Pattern.compile(database_name_str); 166 | 167 | Matcher table_matcher = table_name_pattern.matcher(path); 168 | Matcher column_matcher = column_name_pattern.matcher(path); 169 | Matcher end_column_matcher = end_column_name_pattern.matcher(path); 170 | Matcher database_matcher = database_name_pattern.matcher(path); 171 | 172 | if(table_matcher.find()){ 173 | table_name = table_matcher.group(); 174 | } 175 | while(table_matcher.find()){ 176 | end_table_name = table_matcher.group(); 177 | } 178 | if(column_matcher.find()){ 179 | column_name = column_matcher.group(); 180 | } 181 | while (column_matcher.find()){ 182 | end_column_name = column_matcher.group(); 183 | } 184 | if(database_matcher.find()){ 185 | database_name = database_matcher.group(); 186 | } 187 | while (database_matcher.find()){ 188 | end_database_name = database_matcher.group(); 189 | } 190 | if(end_column_matcher.find()){ 191 | end_column_name = end_column_matcher.group(); 192 | } 193 | return database_name+"\t"+table_name+"\t"+column_name +"\t"+end_database_name+"\t"+end_table_name+"\t"+end_column_name; 194 | } 195 | 196 | /** 197 | * Description : 将路径头和尾合并成列表 198 | * @param path 199 | * @param start 200 | * @param end 201 | * @return 202 | */ 203 | public List stringToList(String path,String start,String end){ 204 | List pathItem =new ArrayList<>(); 205 | pathItem.add(path); 206 | pathItem.add(start); 207 | pathItem.add(end); 208 | return pathItem; 209 | } 210 | 211 | /** 212 | * Description : 得到路径的头尾,也即是最后的结果 213 | * @param pathList 所有合并好的路径 214 | * @throws IOException 215 | */ 216 | public void GetLineageHeadAndTail(List pathList) throws IOException { 217 | File file=new File(FileLineageHeadTail); 218 | FileWriter writer = new FileWriter(file, true); 219 | CSVFormat csvFormat = CSVFormat.newFormat('\t'); 220 | CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat); 221 | 222 | for(String path:pathList){ 223 | //只包含头尾都是物理表的路径 224 | if(!pathGetStart(path).split("\t")[0].equals("TMP") && !pathGetStart(path).split("\t")[0].equals("tmp")&&!pathGetStart(path).split("\t")[3].equals("tmp")&&!pathGetStart(path).split("\t")[3].equals("TMP")){ 225 | System.out.println(pathGetStart(path)); 226 | String[] writeLine=pathGetStart(path).split("\t"); 227 | csvPrinter.printRecord(Arrays.asList(writeLine)); 228 | csvPrinter.flush(); 229 | 230 | } 231 | 232 | } 233 | 234 | csvPrinter.close(); 235 | writer.close(); 236 | } 237 | } 238 | -------------------------------------------------------------------------------- /src/test/java/com/jd/jr/daat/dw/lineage/analysis/GetPrimaryAndForeignKeyTest.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.analysis; 2 | 3 | import com.alibaba.druid.sql.repository.SchemaRepository; 4 | import com.alibaba.druid.util.JdbcConstants; 5 | import com.google.common.collect.Lists; 6 | import com.jd.jr.daat.dw.lineage.domains.basic.ForeignKeys; 7 | import com.jd.jr.daat.dw.lineage.domains.lineage.column.ColumnLineageUtils; 8 | import com.jd.jr.daat.dw.lineage.domains.lineage.table.TableColumnLineage; 9 | import com.jd.jr.daat.dw.lineage.utils.SQLExtractor; 10 | import com.jd.jr.daat.dw.lineage.utils.SchemaLoader; 11 | import org.apache.commons.csv.CSVFormat; 12 | import org.apache.commons.csv.CSVPrinter; 13 | import org.apache.commons.csv.QuoteMode; 14 | import org.apache.commons.io.FileUtils; 15 | import org.junit.jupiter.api.BeforeEach; 16 | import org.junit.jupiter.api.Test; 17 | 18 | import java.io.File; 19 | import java.io.FileWriter; 20 | import java.io.IOException; 21 | import java.util.ArrayList; 22 | import java.util.Arrays; 23 | import java.util.List; 24 | import java.util.regex.Matcher; 25 | import java.util.regex.Pattern; 26 | 27 | 28 | public class GetPrimaryAndForeignKeyTest { 29 | private SchemaLoader schemaLoader; 30 | private SQLExtractor sqlExtractor; 31 | private SchemaRepository schemaRepository; 32 | private ColumnLineageAnalyzer analyzer; 33 | 34 | private String logsFilePath; 35 | private List> sqlsEAPF; 36 | private String sqlLineageDemo01; 37 | private List sqlsLineageDemo01Tables; 38 | private String FileTableSchemaPath; 39 | private String FilePrimaryForeignPath; 40 | 41 | private List> Allpath = new ArrayList>(); 42 | 43 | private List results; 44 | 45 | 46 | @BeforeEach 47 | void init() throws IOException { 48 | schemaLoader = new SchemaLoader(); 49 | sqlExtractor = new SQLExtractor(); 50 | 51 | // 获取当前编译好类的路径 52 | String resourcesPath = getClass().getClassLoader().getResource("").getPath(); 53 | 54 | logsFilePath = resourcesPath + "/logs"; 55 | 56 | //sqlsEAPF = sqlExtractor.extractPathLogSQLs(logsFilePath); 57 | 58 | FileTableSchemaPath = resourcesPath + "schemaTable.sql"; 59 | FilePrimaryForeignPath =resourcesPath +"LineagePrimaryAndForeign_2.csv"; 60 | 61 | sqlLineageDemo01 = FileUtils.readFileToString(new File(resourcesPath + 62 | "SQLs/ParsedSQL"), "UTF-8"); 63 | sqlsLineageDemo01Tables = Lists.newArrayList(FileUtils.readFileToString(new File(resourcesPath + 64 | "tables/demo-01-table"), "UTF-8").split(";")); 65 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 66 | analyzer = new ColumnLineageAnalyzer(schemaRepository); 67 | 68 | } 69 | 70 | 71 | @Test 72 | /** 73 | * Description: 74 | * 合并头尾连接的path,得到主外键结果 75 | * 76 | */ 77 | public void testGetPrimaryAndForeignKey() throws IOException { 78 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 79 | 80 | analyzer = new ColumnLineageAnalyzer(schemaRepository); 81 | 82 | // ForeignKeys是一个存储结构,存储了血缘分析结果tableColumnLineage(原始sql的血缘分析和用主外键构造出sql的血缘分析结果)和最终链接到物理表的主外键对列表 83 | // ForeignKeys storeStruct = analyzer.getTableColumnLineage(sqlLineageDemo01, FileTableSchemaPath); 84 | 85 | ForeignKeys storeStruct = analyzer.getTableColumnForeignKeys(sqlLineageDemo01, FileTableSchemaPath); 86 | 87 | //primaryAndforeignKeyList是最主外键对列表;tableColumnLineage是血缘分析结果 88 | ArrayList primaryAndforeignKeyList = storeStruct.getArrayList(); 89 | TableColumnLineage tableColumnLineage = storeStruct.getTableColumnLineage(); 90 | System.out.println("GetColumnLineage and primaryAndforeignKeyList"); 91 | 92 | //连接血缘分析路径,使最终链接到物理表的字段 93 | List path = getPathMerged(tableColumnLineage); 94 | 95 | System.out.println("--------------------"); 96 | int count =0; 97 | File file=new File(FilePrimaryForeignPath); 98 | FileWriter writer = new FileWriter(file, true); 99 | CSVFormat csvFormat = CSVFormat.newFormat('\t'); 100 | CSVPrinter csvPrinter = new CSVPrinter(writer, csvFormat); 101 | 102 | //多条sql语句一共有多少的join关系,这里就循环多少次 103 | for(Object primaryAndFreignKey:primaryAndforeignKeyList){ 104 | 105 | String[] primaryAndFreignKeyList = (String[])primaryAndFreignKey; 106 | String primaryKey=""; 107 | String foreignKey=""; 108 | 109 | //得到sql的join关系对应的主外键的初始字段 110 | try{ 111 | primaryKey = primaryAndFreignKeyList[0].split("\\.")[1]; 112 | foreignKey = primaryAndFreignKeyList[1].split("\\.")[1]; 113 | }catch (ArrayIndexOutOfBoundsException e){ 114 | count++; 115 | System.out.println(count); 116 | continue; 117 | } 118 | 119 | //primaryAndForeignRealList用于接收血缘分析后对应物理表的主外键对儿 120 | String[] primaryAndForeignRealList = new String[2]; 121 | for (String pathPer : path) { 122 | 123 | //根据血缘分析结果得到初始临时表字段名和最终物理表对应的字段名 124 | String[] tableNameS = pathPer.split("->"); 125 | String startTableName = tableNameS[1].split(",")[2].substring(8); 126 | String realColunmnName = tableNameS[tableNameS.length - 2]; 127 | 128 | //得到临时表join关系对应字段血缘关系分析后到最终物理表的字段,也就是主外键 129 | if (primaryKey.equals(startTableName) ) { 130 | primaryAndForeignRealList[0] = realColunmnName; 131 | } 132 | if (foreignKey.equals(startTableName)) { 133 | primaryAndForeignRealList[1] = realColunmnName; 134 | } 135 | } 136 | 137 | // 去除掉未映射到最终物理表的和主外键相同的,然后输出 138 | String notRealLabel = primaryAndForeignRealList[1].split(",")[1].substring(1,5); 139 | if (!"STOP".equals(notRealLabel) && !primaryAndForeignRealList[0].equals(primaryAndForeignRealList[1])){ 140 | String concatPrimaryForeignPath = primaryAndForeignRealList[0]+primaryAndForeignRealList[1]; 141 | String[] writeLine = pathGetStart(concatPrimaryForeignPath ).split("\t"); 142 | csvPrinter.printRecord(Arrays.asList(writeLine)); 143 | csvPrinter.flush(); 144 | //System.out.println(primaryAndForeignRealList[0] + " " + primaryAndForeignRealList[1]); 145 | } 146 | 147 | } 148 | csvPrinter.close(); 149 | writer.close(); 150 | 151 | } 152 | 153 | /** 154 | * Description: 对链路进行合并,得到最后的头尾节点 155 | * 思想: 156 | * @param tableColumnLineage :所有血缘关系的的链路 157 | * @return 158 | */ 159 | 160 | private List getPathMerged(TableColumnLineage tableColumnLineage) throws IOException { 161 | //ALLpath存放的所有血缘关系的直接链路(未合并的) 162 | tableColumnLineage.getColumnLineages().values().forEach(rootColumnNode -> { 163 | results = ColumnLineageUtils.getFirstPathString(rootColumnNode); 164 | if(results!=null){ 165 | Allpath.add(results);} 166 | }); 167 | //path存放最后合并好的链路 168 | List path = new ArrayList<>(); 169 | //mergedPath存放每次合并的链路 170 | List> mergedPath ; 171 | 172 | for (List path1 : Allpath) { 173 | path.add(path1.get(0));//得到所有path集合 174 | } 175 | 176 | mergedPath = mergePath(Allpath, Allpath, path); 177 | int i=0; 178 | System.out.println("mergedpath "+ i); 179 | while(!mergedPath.isEmpty()){ 180 | i+=1; 181 | mergedPath = mergePath(Allpath,mergedPath,path); 182 | System.out.println("mergedpath "+ i); 183 | } 184 | 185 | System.out.println("--------"); 186 | System.out.println(path.size()); 187 | 188 | return path; 189 | } 190 | 191 | /** 192 | * Description: 得到合并的路径 193 | * @param Allpath1 存放所有血缘关系的直接路径 194 | * @param Allpath2 存放每次合并后的路径 195 | * @param path 存放最后合并好的路径,也就是最后打印的路径 196 | * @return 返回每次合并的路径 197 | */ 198 | 199 | /** 200 | * Description: 得到合并的路径 201 | * @param Allpath1 存放所有血缘关系的直接路径 202 | * @param Allpath2 存放每次合并后的路径 203 | * @param path 存放最后合并好的路径,也就是最后打印的路径 204 | * @return 返回每次合并的路径 205 | */ 206 | public List> mergePath(List> Allpath1, List> Allpath2, List path) throws IOException { 207 | List mergePathItem; 208 | List> mergedPath = new ArrayList<>(); 209 | 210 | for (List path1 : Allpath1) { 211 | String end_1 = path1.get(2); 212 | String path_1 = path1.get(0); 213 | String start_1 = path1.get(1); 214 | for (List path2 : Allpath2) { 215 | String start_2 = path2.get(1); 216 | String path_2 = path2.get(0); 217 | String end_2 = path2.get(2); 218 | if (start_2.equals(end_1)) { 219 | String mergepath = path_1.substring(0, path_1.length() - 5) + 220 | path_2.substring(10, path_2.length()); 221 | String mergedPathStart = pathGetStart(mergepath).split("\t")[0] +pathGetStart(mergepath).split("\t")[1]+pathGetStart(mergepath).split("\t")[2]; 222 | String mergedPathEnd = pathGetStart(mergepath).split("\t")[3] +pathGetStart(mergepath).split("\t")[4]+pathGetStart(mergepath).split("\t")[5]; 223 | if (!path.contains(mergepath) && !mergedPathStart.equals(mergedPathEnd) && !start_1.equals(end_2)) { 224 | path.add(mergepath); 225 | mergePathItem = stringToList(mergepath, start_1, end_2); 226 | mergedPath.add(mergePathItem); 227 | } 228 | path.remove(path_1); 229 | } 230 | } 231 | } 232 | 233 | return mergedPath; 234 | 235 | } 236 | 237 | /** 238 | * Description: 得到一条路径的头尾 239 | * @param path 输入路径 240 | * @return 返回 路径的头尾 start databasename tablename columnname end databasename tablename columnname 241 | */ 242 | public String pathGetStart(String path){ 243 | String table_name=" " ; 244 | String column_name=" " ; 245 | String database_name =" "; 246 | String end_table_name= " "; 247 | String end_column_name =" "; 248 | String end_database_name=" "; 249 | String table_name_str = "(?<=, table: )[A-Za-z0-9_]{1,60}"; 250 | String column_name_str ="(?<=, column: )[A-Za-z()*0-9_-]{1,60}"; 251 | String database_name_str ="(?<=, database: )[A-Za-z_]{1,20}"; 252 | String end_column_name_str ="(?<=R, type: )UNSUPPORTED"; 253 | //String end_database_name_str ="(?<=, database: )[a-z_]{1,20}"; 254 | Pattern table_name_pattern = Pattern.compile(table_name_str); 255 | Pattern column_name_pattern = Pattern.compile(column_name_str); 256 | Pattern end_column_name_pattern = Pattern.compile(end_column_name_str); 257 | Pattern database_name_pattern = Pattern.compile(database_name_str); 258 | 259 | Matcher table_matcher = table_name_pattern.matcher(path); 260 | Matcher column_matcher = column_name_pattern.matcher(path); 261 | Matcher end_column_matcher = end_column_name_pattern.matcher(path); 262 | Matcher database_matcher = database_name_pattern.matcher(path); 263 | 264 | if(table_matcher.find()){ 265 | table_name = table_matcher.group(); 266 | } 267 | while(table_matcher.find()){ 268 | end_table_name = table_matcher.group(); 269 | } 270 | if(column_matcher.find()){ 271 | column_name = column_matcher.group(); 272 | } 273 | while (column_matcher.find()){ 274 | end_column_name = column_matcher.group(); 275 | } 276 | if(database_matcher.find()){ 277 | database_name = database_matcher.group(); 278 | } 279 | while (database_matcher.find()){ 280 | end_database_name = database_matcher.group(); 281 | } 282 | if(end_column_matcher.find()){ 283 | end_column_name = end_column_matcher.group(); 284 | } 285 | return database_name+"\t"+table_name+"\t"+column_name +"\t"+end_database_name+"\t"+end_table_name+"\t"+end_column_name; 286 | } 287 | 288 | /** 289 | * Description : 将路径头和尾合并成列表 290 | * @param path 291 | * @param start 292 | * @param end 293 | * @return 294 | */ 295 | public List stringToList(String path,String start,String end){ 296 | List pathItem =new ArrayList<>(); 297 | pathItem.add(path); 298 | pathItem.add(start); 299 | pathItem.add(end); 300 | return pathItem; 301 | } 302 | 303 | 304 | 305 | 306 | } 307 | -------------------------------------------------------------------------------- /src/test/java/com/jd/jr/daat/dw/lineage/utils/GetUnparsedSQLTest.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import com.alibaba.druid.sql.SQLUtils; 4 | import com.alibaba.druid.sql.ast.SQLStatement; 5 | import com.alibaba.druid.sql.repository.SchemaRepository; 6 | import com.alibaba.druid.util.JdbcConstants; 7 | import com.google.common.collect.Lists; 8 | import com.jd.jr.daat.dw.lineage.analysis.ColumnLineageAnalyzer; 9 | import org.apache.commons.io.FileUtils; 10 | import org.junit.jupiter.api.BeforeEach; 11 | import org.junit.jupiter.api.Test; 12 | 13 | import java.io.*; 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | 17 | public class GetUnparsedSQLTest { 18 | private SchemaLoader schemaLoader; 19 | private SQLExtractor sqlExtractor; 20 | private SchemaRepository schemaRepository; 21 | private ColumnLineageAnalyzer analyzer; 22 | 23 | private String logsFilePath; 24 | private List> sqlsEAPF; 25 | private List sqlLineageDemo01; 26 | private List sqlsLineageDemo01Tables; 27 | private String FileTableSchemaPath; 28 | private String unparsedSqlPath; 29 | private String parsedSqlPath; 30 | private String writeFilePath; 31 | private String readFilePath; 32 | 33 | private List> Allpath = new ArrayList>(); 34 | 35 | private List results; 36 | private int count; 37 | private int normalCount; 38 | 39 | 40 | @BeforeEach 41 | void init() throws IOException { 42 | schemaLoader = new SchemaLoader(); 43 | sqlExtractor = new SQLExtractor(); 44 | 45 | // 获取当前编译好类的路径 46 | String resourcesPath = getClass().getClassLoader().getResource("").getPath(); 47 | 48 | logsFilePath = resourcesPath + "/logs"; 49 | 50 | //sqlsEAPF = sqlExtractor.extractPathLogSQLs(logsFilePath); 51 | 52 | FileTableSchemaPath = resourcesPath + "schemaTable.sql"; 53 | 54 | unparsedSqlPath = resourcesPath + "process_unparsed/GetSubUnParsed.sql"; 55 | parsedSqlPath = resourcesPath + "process_unparsed/GetSubParsed.sql"; 56 | 57 | writeFilePath = resourcesPath + "process_unparsed/GetSubParsed.sql"; 58 | readFilePath = resourcesPath + "process_unparsed/GetParsed.sql"; 59 | 60 | sqlLineageDemo01 = Lists.newArrayList(FileUtils.readFileToString(new File(resourcesPath + 61 | "process_unparsed/SubParsed.sql"), "UTF-8").split(";")); 62 | sqlsLineageDemo01Tables = Lists.newArrayList(FileUtils.readFileToString(new File(resourcesPath + 63 | "tables/demo-01-table"), "UTF-8").split(";")); 64 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 65 | analyzer = new ColumnLineageAnalyzer(schemaRepository); 66 | 67 | } 68 | 69 | 70 | @Test 71 | /** 72 | * Description: 73 | * 合并头尾连接的path,得到主外键结果 74 | * 75 | */ 76 | public void testGetUnparsedSQL() throws IOException { 77 | schemaRepository = schemaLoader.load(sqlsLineageDemo01Tables, JdbcConstants.HIVE); 78 | 79 | analyzer = new ColumnLineageAnalyzer(schemaRepository); 80 | 81 | BufferedWriter unparsedSqlWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(unparsedSqlPath,true),"UTF-8")); 82 | BufferedWriter parsedSqlWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(parsedSqlPath,true),"UTF-8")); 83 | 84 | // newStruct是一个存储结构,存储了血缘分析结果tableColumnLineage(原始sql的血缘分析和用主外键构造出sql的血缘分析结果)和最终链接到物理表的主外键对列表 85 | 86 | for (String sql : sqlLineageDemo01) { 87 | try { 88 | List stmtList = SQLUtils.parseStatements(sql, schemaRepository.getDbType()); 89 | 90 | stmtList.forEach(schemaRepository::resolve); 91 | for (SQLStatement stmt : stmtList) { 92 | parsedSqlWriter.write(sql+";"); 93 | continue; 94 | } 95 | } catch (com.alibaba.druid.sql.parser.ParserException e) { 96 | //System.out.println(sql); 97 | count++; 98 | System.out.println(count); 99 | unparsedSqlWriter.write(sql+";"); 100 | } 101 | } 102 | unparsedSqlWriter.flush(); 103 | unparsedSqlWriter.close(); 104 | parsedSqlWriter.flush(); 105 | parsedSqlWriter.close(); 106 | } 107 | 108 | /** 109 | * 合并两个文件 110 | * @param 111 | * @param 112 | */ 113 | @Test 114 | public void mergeFile() throws IOException { 115 | 116 | BufferedReader br = new BufferedReader (new InputStreamReader(new FileInputStream(readFilePath),"UTF-8")); 117 | BufferedWriter bw = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(writeFilePath,true),"UTF-8")); 118 | String str =null; 119 | while((str = br.readLine()) != null){ 120 | bw.write(str+"\n"); 121 | } 122 | bw.flush(); 123 | bw.close(); 124 | br.close(); 125 | } 126 | 127 | } 128 | -------------------------------------------------------------------------------- /src/test/java/com/jd/jr/daat/dw/lineage/utils/ProcessUnparsedSQLTest.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import com.google.common.collect.Lists; 4 | import org.apache.commons.io.FileUtils; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import java.io.*; 8 | import java.util.List; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | 12 | class ProcessUnparsedSQLTest { 13 | private ProcessUnparsedSQL SQLProcess; 14 | String resourcesPath = getClass().getClassLoader().getResource("").getPath(); 15 | private List sqlLineageDemo01; 16 | 17 | String parsedSplitSQLPath = resourcesPath +"process_unparsed/SubParsed.sql"; 18 | String unParsedSQLPath = resourcesPath +"process_unparsed/SubUnParsed.sql"; 19 | 20 | /** 21 | * 从文件中得到未解析的sql 22 | * @param 23 | */ 24 | @Test 25 | void getParsedSQL() throws IOException { 26 | BufferedWriter parsedSplitSQLWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(parsedSplitSQLPath,true),"UTF-8")); 27 | BufferedWriter unParsedWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(unParsedSQLPath,true),"UTF-8")); 28 | 29 | sqlLineageDemo01 = Lists.newArrayList(FileUtils.readFileToString(new File(resourcesPath + 30 | "process_unparsed/GetUnParsed.sql"), "UTF-8").split(";")); 31 | //过滤掉不能解析的sql语句 32 | for(String sql:sqlLineageDemo01){ 33 | String beforeSQL=sql; 34 | sql = processUnparsedCreateSQL(sql); 35 | sql = processUnparsedSplitSQL(sql); 36 | sql = processUnparsedSortSQL(sql); 37 | sql = processUnparsedGroupingSets(sql); 38 | sql = processUnparsedDistributeBy(sql); 39 | sql = processUnparsedBracketSQL(sql); 40 | sql = processUnparsedOver(sql); 41 | sql = processUnparsedOver1(sql); 42 | sql = processUnparseInteger(sql); 43 | sql = processUnparsedIndex(sql); 44 | //如果sql还是等于之前内容,说明该sql不符合以上能解析的情况。 45 | if(!sql.equals("")){ 46 | if(beforeSQL.equals(sql)){ 47 | unParsedWriter.write(sql+";"+"\n"); 48 | }else{ 49 | parsedSplitSQLWriter.write(sql+";"+"\n"); 50 | } 51 | } 52 | 53 | } 54 | unParsedWriter.flush(); 55 | unParsedWriter.close(); 56 | parsedSplitSQLWriter.flush(); 57 | parsedSplitSQLWriter.close(); 58 | 59 | } 60 | 61 | /** 62 | * 处理split函数,把split函数去掉,留下中间的字段 63 | * 64 | */ 65 | 66 | public String processUnparsedSplitSQL(String sql ) throws IOException { 67 | //String sql = "split(regexp_replace(requesturldecode,'^http[s]?://m.jr.jd.com/mjractivity/',''),'-')[0] id"; 68 | //String sql ="split(split(split(spoint2,'#')[2],'_')[1],'[*]')[1] as from_sec_page_card_name"; 69 | 70 | final String SQL_SPLIT_PREFIX_REGEX_STR ="split\\s*\\("; 71 | final String SQL_SPLIT_SUFFFIX_REGEX_STR ="\\,\\s*\'*[a-zA-Z\\d\\_\\-\"|%#&;.,:*?{}()=\\[\\]\\\\]{1,30}\\s*\'*\\s*\\)\\s*\\[\\d+\\]"; 72 | 73 | Pattern SQL_SPLIT_PREFIX_PATTERN=Pattern.compile(SQL_SPLIT_PREFIX_REGEX_STR); 74 | Matcher SQL_SPLIT_PREFIX_MATCHER = SQL_SPLIT_PREFIX_PATTERN.matcher(sql.toLowerCase()); 75 | 76 | Pattern SQL_SPLIT_SUFFIX_PATTERN=Pattern.compile(SQL_SPLIT_SUFFFIX_REGEX_STR); 77 | String s=""; 78 | String finalSQL=""; 79 | 80 | while(SQL_SPLIT_PREFIX_MATCHER.find()){ 81 | s =SQL_SPLIT_PREFIX_MATCHER.replaceAll(""); 82 | } 83 | 84 | Matcher SQL_SPLIT_SUFFIX_MATCHER = SQL_SPLIT_SUFFIX_PATTERN.matcher(s); 85 | while(SQL_SPLIT_SUFFIX_MATCHER.find()){ 86 | finalSQL = SQL_SPLIT_SUFFIX_MATCHER.replaceAll(""); 87 | } 88 | 89 | if(!finalSQL.equals("")){ 90 | sql =finalSQL; 91 | } 92 | return sql; 93 | 94 | } 95 | 96 | 97 | /** 98 | * 处理row_number 、rank、 count(1) Sum() over 语句 ,直接去掉,保留后面的别名 99 | */ 100 | 101 | public String processUnparsedSortSQL(String sql){ 102 | //String sql ="row_Number() over(distribute by item_first_cate_id,item_second_cate_id,item_third_cate_id sort by ratio desc,good_comment_ratio desc,price) rank_sku\n"; 103 | final String SQL_SORT_REGEX_STR ="(row_number)\\s*\\(\\)\\s*over\\s*\\(.*\\)\\s*(as)*|(rank)\\(\\)\\s*over\\s*\\(.*\\)\\s*(as)*|(count)\\(1\\)\\s*over\\s*\\(.*\\)\\s*(as)*|(sum\\(\\w+\\))\\(\\)\\s*over\\s*\\(.*\\)\\s*(as)*"; 104 | 105 | String repalce_SQL=""; 106 | Pattern SQL_SORT_PATTERN=Pattern.compile(SQL_SORT_REGEX_STR); 107 | Matcher SQL_SORT_MATCHER = SQL_SORT_PATTERN.matcher(sql.toLowerCase()); 108 | 109 | while(SQL_SORT_MATCHER.find()){ 110 | repalce_SQL = (SQL_SORT_MATCHER.replaceAll("")); 111 | } 112 | if(!repalce_SQL.equals("")){ 113 | sql=repalce_SQL; 114 | } 115 | return sql; 116 | 117 | } 118 | 119 | /** 120 | * 121 | * @param sql 122 | * @return 123 | */ 124 | public String processUnparsedBracketSQL(String sql){ 125 | //String sql ="row_Number() over(distribute by item_first_cate_id,item_second_cate_id,item_third_cate_id sort by ratio desc,good_comment_ratio desc,price) rank_sku\n"; 126 | final String SQL_SORT_REGEX_STR ="\\[\\d+\\]"; 127 | 128 | String repalce_SQL=""; 129 | Pattern SQL_SORT_PATTERN=Pattern.compile(SQL_SORT_REGEX_STR); 130 | Matcher SQL_SORT_MATCHER = SQL_SORT_PATTERN.matcher(sql.toLowerCase()); 131 | 132 | while(SQL_SORT_MATCHER.find()){ 133 | repalce_SQL = (SQL_SORT_MATCHER.replaceAll("")); 134 | } 135 | if(!repalce_SQL.equals("")){ 136 | sql=repalce_SQL; 137 | } 138 | return sql; 139 | 140 | } 141 | /** 142 | * 处理含有DistributeBy 143 | */ 144 | public String processUnparsedDistributeBy(String sql){ 145 | 146 | String SQL_OVER_DISTRIBUTE_BY_REGEX_STR = "over\\s*\\(\\s*distribute by.*\\)"; 147 | Pattern SQL_OVER_DISTRIBUTE_BY_PATTERN= Pattern.compile(SQL_OVER_DISTRIBUTE_BY_REGEX_STR); 148 | Matcher SQL_OVER_DISTRIBUTE_BY_MATCHER = SQL_OVER_DISTRIBUTE_BY_PATTERN.matcher(sql.toLowerCase()); 149 | 150 | String SQL_DISTRIBUTE_BY_REGEX_STR = "distribute by\\s*\\w+"; 151 | Pattern SQL_DISTRIBUTE_BY_PATTERN= Pattern.compile(SQL_DISTRIBUTE_BY_REGEX_STR); 152 | Matcher SQL_DISTRIBUTE_BY_MATCHER = SQL_DISTRIBUTE_BY_PATTERN.matcher(sql.toLowerCase()); 153 | 154 | while(SQL_OVER_DISTRIBUTE_BY_MATCHER.find()){ 155 | // System.out.println("ok"); 156 | // System.out.println(SQL_DISTRIBUTE_BY_MATCHER.group()); 157 | sql = SQL_OVER_DISTRIBUTE_BY_MATCHER.replaceAll(""); 158 | } 159 | while(SQL_DISTRIBUTE_BY_MATCHER.find()){ 160 | // System.out.println("ok"); 161 | // System.out.println(SQL_DISTRIBUTE_BY_MATCHER.group()); 162 | sql = SQL_DISTRIBUTE_BY_MATCHER.replaceAll(""); 163 | } 164 | 165 | 166 | //System.out.println(sql); 167 | return sql; 168 | 169 | } 170 | 171 | /** 172 | * 处理不能解析的含create语句,直接把create语句删掉 173 | */ 174 | 175 | public String processUnparsedCreateSQL(String sql){ 176 | 177 | final String SQL_CREATE_REGEX_STR ="create\\s*table\\s*(if not exists)*.*"; 178 | String repalce_SQL=""; 179 | Pattern SQL_CREATE_PATTERN=Pattern.compile(SQL_CREATE_REGEX_STR); 180 | Matcher SQL_CREATE_MATCHER = SQL_CREATE_PATTERN.matcher(sql.toLowerCase()); 181 | 182 | final String SQL_SELECT_REGEX_STR = "select\\s+"; 183 | Pattern SQL_SELECT_PATTERN=Pattern.compile(SQL_SELECT_REGEX_STR); 184 | Matcher SQL_SELECT_MATCHER = SQL_SELECT_PATTERN.matcher(sql.toLowerCase()); 185 | 186 | 187 | if(SQL_CREATE_MATCHER.find()&&!SQL_SELECT_MATCHER.find()){ 188 | System.out.println("ok"); 189 | sql = repalce_SQL; 190 | } 191 | 192 | return sql; 193 | 194 | } 195 | 196 | /** 197 | * 处理含有grouping sets语句,找到该语句用空格代替 198 | * @param 199 | * @return 200 | */ 201 | public String processUnparsedGroupingSets(String sql){ 202 | 203 | String SQL_GROUP_REGEX_STR = "grouping sets\\s*\\n*\\([^\\(\\)]*(\\([^\\(\\)]*(\\([^\\(\\)]*\\)[^\\(\\)]*)*\\)[^\\(\\)]*)*\\)"; 204 | 205 | Pattern SQL_GROUP_PATTERN= Pattern.compile(SQL_GROUP_REGEX_STR); 206 | Matcher SQL_GROUP_MATCHER = SQL_GROUP_PATTERN.matcher(sql.toLowerCase()); 207 | 208 | while(SQL_GROUP_MATCHER.find()){ 209 | 210 | sql = SQL_GROUP_MATCHER.replaceAll(""); 211 | } 212 | 213 | return sql; 214 | 215 | } 216 | 217 | /** 218 | * 处理over类型的SQL 219 | */ 220 | public String processUnparsedOver(String sql){ 221 | 222 | String SQL_OVER_REGEX_STR = "over\\(.*\\)"; 223 | 224 | Pattern SQL_OVER_PATTERN= Pattern.compile(SQL_OVER_REGEX_STR); 225 | Matcher SQL_OVER_MATCHER = SQL_OVER_PATTERN.matcher(sql.toLowerCase()); 226 | 227 | while(SQL_OVER_MATCHER.find()){ 228 | sql = SQL_OVER_MATCHER.replaceAll(""); 229 | } 230 | return sql; 231 | 232 | 233 | } 234 | 235 | /** 236 | * 处理含换行的over类型的SQL 237 | */ 238 | public String processUnparsedOver1(String sql){ 239 | 240 | String SQL_OVER_REGEX_STR = "over\\(\\n.*\\n.*\\)"; 241 | 242 | Pattern SQL_OVER_PATTERN= Pattern.compile(SQL_OVER_REGEX_STR); 243 | Matcher SQL_OVER_MATCHER = SQL_OVER_PATTERN.matcher(sql.toLowerCase()); 244 | 245 | while(SQL_OVER_MATCHER.find()){ 246 | sql = SQL_OVER_MATCHER.replaceAll(""); 247 | } 248 | return sql; 249 | 250 | 251 | } 252 | 253 | /** 254 | * 处理含数字的字段 255 | * @param sql 256 | * @return 257 | */ 258 | public String processUnparseInteger(String sql){ 259 | String SQL_Integer_REGEX_STR = "as\\s(\\d+_)+"; 260 | 261 | Pattern SQL_Integer_PATTERN= Pattern.compile(SQL_Integer_REGEX_STR); 262 | Matcher SQL_Integer_MATCHER = SQL_Integer_PATTERN.matcher(sql.toLowerCase()); 263 | 264 | while(SQL_Integer_MATCHER.find()){ 265 | sql = SQL_Integer_MATCHER.replaceAll(" "); 266 | } 267 | return sql; 268 | } 269 | 270 | /** 271 | * 处理包含关键字index的sql,替换成index_ 272 | * @param sql 273 | * @return 274 | */ 275 | public String processUnparsedIndex(String sql){ 276 | String SQL_Index_REGEX_STR = "index(?!\\w)"; 277 | 278 | Pattern SQL_Index_PATTERN= Pattern.compile(SQL_Index_REGEX_STR); 279 | Matcher SQL_Index_MATCHER = SQL_Index_PATTERN.matcher(sql.toLowerCase()); 280 | 281 | while(SQL_Index_MATCHER.find()){ 282 | sql = SQL_Index_MATCHER.replaceAll("index_"); 283 | } 284 | return sql; 285 | } 286 | 287 | 288 | @Test 289 | public void processUnparsedCreateSQL1(){ 290 | String sql = "CREATE TABLE IF NOT EXISTS DWD.DWD_O_MY1_MGR_USER_I_I_D(\n" + 291 | " etl_dt string comment 'ETL日期'\n" + 292 | " ,jrjt_del_dt string comment '删除日期'\n" + 293 | " ,id bigint comment '自增主键'\n" + 294 | " ,select_pwd string comment '数据源用户密码查询秘钥'\n" + 295 | " ,erp string comment '用户erp'\n" + 296 | " ,user_name string comment '用户姓名(中文)'\n" + 297 | " ,email string comment '用户邮箱'\n" + 298 | " ,tel string comment '联系电话'\n" + 299 | " ,designated_person string comment '任务指派人'\n" + 300 | " ,department_id string comment '关联部门id'\n" + 301 | " ,create_time string comment '用户创建时间')COMMENT '201801081433022 测试任务'\n" + 302 | "PARTITIONED BY (dt string COMMENT '日期分区')\n" + 303 | "ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'\n" + 304 | "STORED AS ORC\n" + 305 | ";"; 306 | final String SQL_CREATE_REGEX_STR ="create table if not exists"; 307 | final String SQL_REGEX_STR = "select"; 308 | String repalce_SQL=""; 309 | Pattern SQL_CREATE_PATTERN=Pattern.compile(SQL_CREATE_REGEX_STR); 310 | Matcher SQL_CREATE_MATCHER = SQL_CREATE_PATTERN.matcher(sql.toLowerCase()); 311 | Pattern SQL_PATTERN=Pattern.compile(SQL_REGEX_STR); 312 | Matcher SQL_MATCHER = SQL_PATTERN.matcher(sql.toLowerCase()); 313 | 314 | // if(SQL_CREATE_MATCHER.find()){ 315 | // System.out.println(SQL_CREATE_MATCHER.group()); 316 | // System.out.println("ok"); 317 | // sql = repalce_SQL; 318 | // } 319 | if(SQL_CREATE_MATCHER.find()&&!SQL_MATCHER.find()){ 320 | System.out.println(SQL_CREATE_MATCHER.group()); 321 | System.out.println("ok"); 322 | sql = repalce_SQL; 323 | } 324 | System.out.println(sql.toLowerCase()); 325 | } 326 | @Test 327 | public void processUnparsedSortSQL1(){ 328 | String sql =" count(1) over(distribute by new_type,sdtperiod sort by score_all desc) rsc,"; 329 | final String SQL_SORT_REGEX_STR ="(row_number)\\(\\)\\sover\\(.*\\)\\s(as)*|(rank)\\(\\)\\sover\\(.*\\)\\s(as)*|(count)\\(1\\)\\sover\\(.*\\)\\s(as)*"; 330 | final String SQL_RANK_REGEX_STR ="(rank\\(\\)\\sover\\().*?\\)\\s(as)*"; 331 | String repalce_SQL=""; 332 | Pattern SQL_SORT_PATTERN=Pattern.compile(SQL_SORT_REGEX_STR); 333 | Matcher SQL_SORT_MATCHER = SQL_SORT_PATTERN.matcher(sql.toLowerCase()); 334 | 335 | Pattern SQL_RANK_PATTERN=Pattern.compile(SQL_RANK_REGEX_STR); 336 | Matcher SQL_RANK_MATCHER = SQL_RANK_PATTERN.matcher(sql.toLowerCase()); 337 | 338 | 339 | if(SQL_SORT_MATCHER.find()){ 340 | System.out.println(SQL_SORT_MATCHER.group()); 341 | 342 | sql=SQL_SORT_MATCHER.replaceAll(""); 343 | } 344 | // if(SQL_RANK_MATCHER.find()){ 345 | // System.out.println(SQL_RANK_MATCHER.group()); 346 | // 347 | // sql=SQL_RANK_MATCHER.replaceAll(""); 348 | // } 349 | System.out.println(sql); 350 | 351 | } 352 | 353 | /** 354 | * 处理含有grouping sets语句,找到该语句用空格代替 355 | * @param 356 | * @return 357 | */ 358 | @Test 359 | public void processUnparsedGroupingSets1(){ 360 | String sql ="grouping sets ((dim_day),(dim_day,investor))"; 361 | String SQL_GROUP_REGEX_STR ="grouping sets\\s*\\n*\\([^\\(\\)]*(\\([^\\(\\)]*\\)[^\\(\\)]*)*\\)"; 362 | 363 | Pattern SQL_GROUP_PATTERN=Pattern.compile(SQL_GROUP_REGEX_STR); 364 | Matcher SQL_GROUP_MATCHER = SQL_GROUP_PATTERN.matcher(sql); 365 | 366 | String SQL_GROUP_REGEX_STR_1 = "grouping sets\\s*\\n*\\([^\\(\\)]*(\\([^\\(\\)]*(\\([^\\(\\)]*\\)[^\\(\\)]*)*\\)[^\\(\\)]*)*\\)"; 367 | 368 | Pattern SQL_GROUP_PATTERN_1=Pattern.compile(SQL_GROUP_REGEX_STR_1); 369 | Matcher SQL_GROUP_MATCHER_1 = SQL_GROUP_PATTERN_1.matcher(sql); 370 | 371 | 372 | while(SQL_GROUP_MATCHER.find()){ 373 | System.out.println("ok"); 374 | System.out.println(SQL_GROUP_MATCHER.group()); 375 | sql = SQL_GROUP_MATCHER.replaceAll(""); 376 | } 377 | while(SQL_GROUP_MATCHER_1.find()){ 378 | System.out.println("ok"); 379 | System.out.println(SQL_GROUP_MATCHER_1.group()); 380 | sql = SQL_GROUP_MATCHER_1.replaceAll(""); 381 | } 382 | System.out.println(sql); 383 | 384 | 385 | } 386 | //测试 387 | @Test 388 | void processUnparsedSQL() throws IOException { 389 | //String sql = "split(regexp_replace(requesturldecode,'^http[s]?://m.jr.jd.com/mjractivity/',''),'-')[0] id"; 390 | String sql ="split(csl_col_parser(value,'jcd'), \"\\\\\\\\||\\\\%7C\")[0] in ('',' ','null','NULL') then null else split(csl_col_parser(value,'jcd'), \"\\\\\\\\||\\\\%7C\")[0] end as deviceid_base"; 391 | final String SQL_SPLIT_PREFIX_REGEX_STR ="split\\s*\\("; 392 | final String SQL_SPLIT_SUFFFIX_REGEX_STR ="\\,\\s*\'*[a-zA-Z\\d\\_\\-\"|%#&.,:*?{}()=\\[\\]\\\\]{1,30}\\s*\'*\\s*\\)\\s*\\[\\d\\]"; 393 | 394 | Pattern SQL_SPLIT_PREFIX_PATTERN=Pattern.compile(SQL_SPLIT_PREFIX_REGEX_STR); 395 | Matcher SQL_SPLIT_PREFIX_MATCHER = SQL_SPLIT_PREFIX_PATTERN.matcher(sql); 396 | 397 | Pattern SQL_SPLIT_SUFFIX_PATTERN=Pattern.compile(SQL_SPLIT_SUFFFIX_REGEX_STR); 398 | String s=""; 399 | String finalSQL=""; 400 | while(SQL_SPLIT_PREFIX_MATCHER.find()){ 401 | System.out.println(SQL_SPLIT_PREFIX_MATCHER.group(0)); 402 | s =SQL_SPLIT_PREFIX_MATCHER.replaceAll(""); 403 | } 404 | 405 | Matcher SQL_SPLIT_SUFFIX_MATCHER = SQL_SPLIT_SUFFIX_PATTERN.matcher(s); 406 | while(SQL_SPLIT_SUFFIX_MATCHER.find()){ 407 | finalSQL = SQL_SPLIT_SUFFIX_MATCHER.replaceAll(""); 408 | System.out.println(finalSQL); 409 | } 410 | 411 | 412 | } 413 | 414 | @Test 415 | public void processUnparsedDistributeBy1(){ 416 | String sql="distribute by dt"; 417 | 418 | String SQL_DISTRIBUTE_BY_REGEX_STR = "distribute by\\s\\w+"; 419 | Pattern SQL_DISTRIBUTE_BY_PATTERN= Pattern.compile(SQL_DISTRIBUTE_BY_REGEX_STR); 420 | Matcher SQL_DISTRIBUTE_BY_MATCHER = SQL_DISTRIBUTE_BY_PATTERN.matcher(sql); 421 | 422 | while(SQL_DISTRIBUTE_BY_MATCHER.find()){ 423 | System.out.println("ok"); 424 | System.out.println(SQL_DISTRIBUTE_BY_MATCHER.group()); 425 | sql = SQL_DISTRIBUTE_BY_MATCHER.replaceAll(""); 426 | } 427 | System.out.println(sql); 428 | 429 | } 430 | 431 | /** 432 | * 433 | */ 434 | public void processUnparsedIntegerSQL(){ 435 | String sql = ""; 436 | 437 | String SQL_DISTRIBUTE_BY_REGEX_STR = "select"; 438 | Pattern SQL_DISTRIBUTE_BY_PATTERN= Pattern.compile(SQL_DISTRIBUTE_BY_REGEX_STR); 439 | Matcher SQL_DISTRIBUTE_BY_MATCHER = SQL_DISTRIBUTE_BY_PATTERN.matcher(sql); 440 | 441 | while(SQL_DISTRIBUTE_BY_MATCHER.find()){ 442 | System.out.println("ok"); 443 | System.out.println(SQL_DISTRIBUTE_BY_MATCHER.group()); 444 | sql = SQL_DISTRIBUTE_BY_MATCHER.replaceAll(""); 445 | } 446 | System.out.println(sql); 447 | 448 | } 449 | 450 | 451 | } -------------------------------------------------------------------------------- /src/test/java/com/jd/jr/daat/dw/lineage/utils/SQLExtractorTest.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import com.google.common.base.Joiner; 4 | import org.apache.commons.io.FileUtils; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.io.*; 9 | import java.util.List; 10 | 11 | class SQLExtractorTest { 12 | private SQLExtractor sqlExtractor; 13 | private List logsEAPFLines; 14 | private String filename; 15 | String writeSqlFileName; 16 | String resourcesPath; 17 | String logsPath; 18 | 19 | @BeforeEach 20 | void init() throws Exception { 21 | sqlExtractor = new SQLExtractor(); 22 | 23 | resourcesPath = getClass().getClassLoader().getResource("").getPath(); 24 | logsPath = resourcesPath +"/logs"; 25 | filename = resourcesPath + "/JDW_DMT_WEIXIN_PAYUSER_SEARCH_ACTIVITY_7DAYS-2018-05-15-73.log"; 26 | writeSqlFileName = resourcesPath +"SQLS/sqls_ns_jrdw_internal_1.sql"; 27 | logsEAPFLines = FileUtils.readLines(new File(filename), "UTF-8"); 28 | 29 | } 30 | 31 | @Test 32 | void testExtractSingleLogContent() { 33 | String logContent_ = "job exe Command...perl /export/jrdw/private/ns_jrdw_internal/" + 34 | "JDW_DMR_S_EAPF_ENTERPRISE_ADDRESS_RELATION_S_D/JDW_DMR_S_EAPF_ENTERPRISE_ADDRESS_RELATION_S_D.pl" + 35 | " JDW_JDW_DMR_S_EAPF_ENTERPRISE_ADDRESS_RELATION_S_D_20180515.dir"; 36 | String logContent = sqlExtractor.extractSingleLineLogContent(logsEAPFLines.get(0)); 37 | //assertEquals(logContent_, logContent); 38 | } 39 | 40 | @Test 41 | void testExtractMultiLinesLogContent() { 42 | String multiLinesLog = String.format("%s\n%s", logsEAPFLines.get(3), logsEAPFLines.get(4)); 43 | // List logContents_ = Arrays.asList("Run start...pid:156912", " "); 44 | // List logContents = sqlExtractor.extractMultiLinesLogContent(multiLinesLog); 45 | // assertEquals(logContents_, logContents); 46 | } 47 | 48 | @Test 49 | void testIsSQLHeadAndTail() { 50 | List logContents = sqlExtractor.extractMultiLinesLogContent( 51 | Joiner.on("\n").join(logsEAPFLines)); 52 | // assertEquals(13, logContents.size()); 53 | // 54 | // String logContent0 = logContents.get(0); 55 | // assertFalse(sqlExtractor.isSQLHead(logContent0)); 56 | // assertFalse(sqlExtractor.isSQLTail(logContent0)); 57 | // 58 | // String logContent1 = logContents.get(1); 59 | // assertTrue(sqlExtractor.isSQLHead(logContent1)); 60 | // assertFalse(sqlExtractor.isSQLTail(logContent1)); 61 | 62 | String logContent11 = logContents.get(11); 63 | // assertFalse(sqlExtractor.isSQLHead(logContent11)); 64 | // assertTrue(sqlExtractor.isSQLTail(logContent11)); 65 | } 66 | 67 | @Test 68 | void testExtractLogContentsSQLs() throws IOException { 69 | List logContents = sqlExtractor.extractMultiLinesLogContent( 70 | Joiner.on("\n").join(logsEAPFLines)); 71 | List sqls = sqlExtractor.extractLogContentsSQLs(logContents); 72 | //System.out.println(sqls); 73 | } 74 | 75 | @Test 76 | void testExtractLogSQLs() throws IOException { 77 | List sqls = sqlExtractor.extractLogSQLs(Joiner.on("\n").join(logsEAPFLines)); 78 | 79 | } 80 | @Test 81 | void testextractPathLogSQLs() throws IOException { 82 | BufferedWriter sqlWriter = new BufferedWriter (new OutputStreamWriter(new FileOutputStream(writeSqlFileName,true),"UTF-8")); 83 | 84 | sqlExtractor.extractPathLogSQLs(logsPath,sqlWriter); 85 | // sqls.stream().forEach(sql -> { 86 | // sql.stream().forEach(s -> { 87 | // try { 88 | // sqlWriter.write(s+";"); 89 | // } catch (IOException e) { 90 | // e.printStackTrace(); 91 | // } 92 | // }); 93 | // }); 94 | // sqlWriter.flush(); 95 | // sqlWriter.close(); 96 | 97 | 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /src/test/java/com/jd/jr/daat/dw/lineage/utils/SchemaExtractorTest.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import org.junit.jupiter.api.BeforeEach; 4 | import org.junit.jupiter.api.Test; 5 | 6 | 7 | import java.io.IOException; 8 | 9 | import static org.junit.jupiter.api.Assertions.*; 10 | 11 | class SchemaExtractorTest { 12 | private SchemaExtractor schemaExtractor; 13 | private String resourcePath; 14 | private String schemaResourcePath; 15 | private String schemaExtractColunmnResourcePath; 16 | private String createSchemTablePath; 17 | @BeforeEach 18 | void init(){ 19 | schemaExtractor = new SchemaExtractor(); 20 | resourcePath = getClass().getClassLoader().getResource("").getPath(); 21 | schemaResourcePath = resourcePath + "/nebulae_column.txt"; 22 | //schemaExtractColunmnResourcePath = resourcePath +"/nebulae_extract_column.txt"; 23 | createSchemTablePath = resourcePath +"tables/schemaTable"; 24 | } 25 | 26 | 27 | @Test 28 | void testcreateTableSchema() throws IOException { 29 | schemaExtractor.createTableSchema(schemaResourcePath,createSchemTablePath); 30 | } 31 | } -------------------------------------------------------------------------------- /src/test/java/com/jd/jr/daat/dw/lineage/utils/SchemaLoaderTest.java: -------------------------------------------------------------------------------- 1 | package com.jd.jr.daat.dw.lineage.utils; 2 | 3 | import com.alibaba.druid.sql.repository.SchemaRepository; 4 | import com.alibaba.druid.util.JdbcConstants; 5 | import org.apache.commons.io.FileUtils; 6 | import org.junit.jupiter.api.BeforeEach; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.io.File; 10 | import java.io.IOException; 11 | import java.util.List; 12 | 13 | import static org.junit.jupiter.api.Assertions.assertEquals; 14 | 15 | public class SchemaLoaderTest { 16 | private SchemaLoader schemaLoader; 17 | private SQLExtractor sqlExtractor; 18 | private String logsFilePath; 19 | 20 | @BeforeEach 21 | void init() throws Exception { 22 | schemaLoader = new SchemaLoader(); 23 | sqlExtractor = new SQLExtractor(); 24 | 25 | String resourcesPath = getClass().getClassLoader().getResource("").getPath(); 26 | logsFilePath = resourcesPath + "/logs"; 27 | 28 | } 29 | 30 | @Test 31 | void testLoad() throws IOException { 32 | 33 | 34 | //List> sqls = sqlExtractor.extractPathLogSQLs(logsFilePath); 35 | //SchemaRepository schemaRepository = schemaLoader.load(sqls, JdbcConstants.HIVE); 36 | // 37 | // assertEquals(4, schemaRepository.getSchemas().size()); 38 | // assertEquals(1, schemaRepository.findSchema("dmr").getTableCount()); 39 | // assertEquals(10, schemaRepository.findSchema("tmp").getTableCount()); 40 | } 41 | } 42 | --------------------------------------------------------------------------------