├── .github
└── workflows
│ ├── maven-publish.yml
│ └── maven.yml
├── .gitignore
├── LICENSE
├── README.md
├── pom.xml
├── sqllineage4j-cli
├── pom.xml
└── src
│ ├── main
│ └── java
│ │ └── io
│ │ └── github
│ │ └── reata
│ │ └── sqllineage4j
│ │ └── cli
│ │ ├── SQLLineage4j.java
│ │ └── utils
│ │ └── Helper.java
│ └── test
│ └── java
│ └── io
│ └── github
│ └── reata
│ └── sqllineage4j
│ └── cli
│ └── SQLLineage4jTest.java
├── sqllineage4j-common
├── pom.xml
└── src
│ ├── main
│ └── java
│ │ └── io
│ │ └── github
│ │ └── reata
│ │ └── sqllineage4j
│ │ └── common
│ │ ├── constant
│ │ ├── EdgeType.java
│ │ └── NodeTag.java
│ │ ├── entity
│ │ ├── ColumnQualifierTuple.java
│ │ └── EdgeTuple.java
│ │ ├── model
│ │ ├── Column.java
│ │ ├── QuerySet.java
│ │ ├── Schema.java
│ │ ├── SubQuery.java
│ │ └── Table.java
│ │ └── utils
│ │ └── Helper.java
│ └── test
│ └── java
│ └── io
│ └── github
│ └── reata
│ └── sqllineage4j
│ └── common
│ └── ModelTest.java
├── sqllineage4j-core
├── pom.xml
└── src
│ ├── main
│ └── java
│ │ └── io
│ │ └── github
│ │ └── reata
│ │ └── sqllineage4j
│ │ └── core
│ │ ├── LineageAnalyzer.java
│ │ ├── LineageRunner.java
│ │ └── holder
│ │ ├── SQLLineageHolder.java
│ │ ├── StatementLineageHolder.java
│ │ └── SubQueryLineageHolder.java
│ └── test
│ └── java
│ └── io
│ └── github
│ └── reata
│ └── sqllineage4j
│ └── core
│ ├── CTETest.java
│ ├── ColumnTest.java
│ ├── CreateTest.java
│ ├── Helper.java
│ ├── InsertTest.java
│ ├── OtherTest.java
│ └── SelectTest.java
├── sqllineage4j-graph
├── pom.xml
└── src
│ └── main
│ └── java
│ └── io
│ └── github
│ └── reata
│ └── sqllineage4j
│ └── graph
│ ├── GremlinLineageGraph.java
│ └── LineageGraph.java
├── sqllineage4j-parser
├── pom.xml
└── src
│ └── main
│ ├── antlr4
│ └── io
│ │ └── github
│ │ └── reata
│ │ └── sqllineage4j
│ │ └── parser
│ │ └── SqlBase.g4
│ └── java
│ └── io
│ └── github
│ └── reata
│ └── sqllineage4j
│ └── parser
│ ├── LineageParser.java
│ └── StatementSplitter.java
└── sqllineage4j-test-coverage
└── pom.xml
/.github/workflows/maven-publish.yml:
--------------------------------------------------------------------------------
1 | name: Maven Package
2 |
3 | on:
4 | release:
5 | types: [created]
6 |
7 | jobs:
8 | build:
9 |
10 | runs-on: ubuntu-latest
11 | permissions:
12 | contents: read
13 | packages: write
14 |
15 | steps:
16 | - uses: actions/checkout@v3
17 | - name: Set up JDK 11
18 | uses: actions/setup-java@v3
19 | with:
20 | distribution: 'temurin'
21 | java-version: '11'
22 |
23 | - name: Build with Maven
24 | run: mvn -B package --file pom.xml
25 |
26 | - name: Set up Apache Maven Central
27 | uses: actions/setup-java@v3
28 | with: # running setup-java again overwrites the settings.xml
29 | distribution: 'temurin'
30 | java-version: '11'
31 | server-id: ossrh # Value of the distributionManagement/repository/id field of the pom.xml
32 | server-username: MAVEN_USERNAME # env variable for username in deploy
33 | server-password: MAVEN_CENTRAL_TOKEN # env variable for token in deploy
34 | gpg-private-key: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }} # Value of the GPG private key to import
35 | gpg-passphrase: MAVEN_GPG_PASSPHRASE # env variable for GPG private key passphrase
36 |
37 | - name: Publish to Apache Maven Central
38 | run: mvn deploy
39 | env:
40 | MAVEN_USERNAME: reata
41 | MAVEN_CENTRAL_TOKEN: ${{ secrets.MAVEN_CENTRAL_TOKEN }}
42 | MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }}
43 |
--------------------------------------------------------------------------------
/.github/workflows/maven.yml:
--------------------------------------------------------------------------------
1 | name: Java CI with Maven
2 |
3 | on:
4 | push:
5 | branches: [ "main" ]
6 | pull_request:
7 | branches: [ "main" ]
8 |
9 | jobs:
10 | build:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v3
16 | - name: Set up JDK 11
17 | uses: actions/setup-java@v3
18 | with:
19 | java-version: '11'
20 | distribution: 'temurin'
21 | cache: maven
22 | - name: Build with Maven
23 | run: mvn -B package --file pom.xml -Dmaven.javadoc.skip=true -Dmaven.source.skip=true
24 |
25 | # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive
26 | - name: Update dependency graph
27 | uses: advanced-security/maven-dependency-submission-action@571e99aab1055c2e71a1e2309b9691de18d6b7d6
28 | - name: Upload coverage to Codecov
29 | uses: codecov/codecov-action@v3
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | !.mvn/wrapper/maven-wrapper.jar
3 | !**/src/main/**/target/
4 | !**/src/test/**/target/
5 | dependency-reduced-pom.xml
6 |
7 | ### IntelliJ IDEA ###
8 | .idea/modules.xml
9 | .idea/jarRepositories.xml
10 | .idea/compiler.xml
11 | .idea/libraries/
12 | *.iws
13 | *.iml
14 | *.ipr
15 |
16 | ### Eclipse ###
17 | .apt_generated
18 | .classpath
19 | .factorypath
20 | .project
21 | .settings
22 | .springBeans
23 | .sts4-cache
24 |
25 | ### NetBeans ###
26 | /nbproject/private/
27 | /nbbuild/
28 | /dist/
29 | /nbdist/
30 | /.nb-gradle/
31 | build/
32 | !**/src/main/**/build/
33 | !**/src/test/**/build/
34 |
35 | ### VS Code ###
36 | .vscode/
37 |
38 | ### Mac OS ###
39 | .DS_Store
40 |
41 | # Idea
42 | .idea/
43 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SQLLineage4J
2 |
3 | sqllineage4j is a proof of concept to implement [sqllineage](https://github.com/reata/sqllineage) with [antlr4](https://github.com/antlr/antlr4).
4 |
5 | [](https://central.sonatype.dev/search?q=sqllineage4j&namespace=io.github.reata)
6 | [](https://github.com/reata/sqllineage4j)
7 | [](https://github.com/reata/sqllineage4j/actions/workflows/maven.yml)
8 | [](https://codecov.io/gh/reata/sqllineage4j)
9 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 | 4.0.0
6 |
7 | io.github.reata
8 | sqllineage4j
9 | 1.0.1-SNAPSHOT
10 | pom
11 |
12 | ${project.groupId}:${project.artifactId}
13 | A Java implementation of sqllineage using ANTLR v4
14 | https://github.com/reata/sqllineage4j
15 |
16 |
17 |
18 | Apache License, Version 2.0
19 | https://www.apache.org/licenses/LICENSE-2.0.txt
20 | repo
21 |
22 |
23 |
24 |
25 |
26 | reata
27 | reddevil.hjw@gmail.com
28 | reata
29 | https://github.com/reata
30 |
31 |
32 |
33 |
34 | scm:git:git://github.com/reata/sqllineage4j.git
35 | scm:git:ssh://github.com:reata/sqllineage4j.git
36 | https://github.com/reata/sqllineage4j/tree/master
37 | HEAD
38 |
39 |
40 |
41 | UTF-8
42 | 11
43 | 11
44 | 4.7.1
45 | 1.10.1
46 |
47 |
48 |
49 | sqllineage4j-parser
50 | sqllineage4j-common
51 | sqllineage4j-graph
52 | sqllineage4j-core
53 | sqllineage4j-cli
54 | sqllineage4j-test-coverage
55 |
56 |
57 |
58 |
59 |
60 | io.github.reata
61 | sqllineage4j-cli
62 | ${project.version}
63 |
64 |
65 | io.github.reata
66 | sqllineage4j-core
67 | ${project.version}
68 |
69 |
70 | io.github.reata
71 | sqllineage4j-common
72 | ${project.version}
73 |
74 |
75 | io.github.reata
76 | sqllineage4j-graph
77 | ${project.version}
78 |
79 |
80 | io.github.reata
81 | sqllineage4j-parser
82 | ${project.version}
83 |
84 |
85 | commons-cli
86 | commons-cli
87 | 1.5.0
88 |
89 |
90 | com.github.stefanbirkner
91 | system-lambda
92 | 1.2.1
93 |
94 |
95 | com.google.auto.value
96 | auto-value-annotations
97 | ${dep.auto-value.version}
98 |
99 |
100 | com.google.code.findbugs
101 | annotations
102 | 3.0.1
103 |
104 |
105 | org.apache.tinkerpop
106 | tinkergraph-gremlin
107 | 3.6.4
108 |
109 |
110 | org.antlr
111 | antlr4
112 | ${dep.antlr.version}
113 |
114 |
115 |
116 |
117 |
118 |
119 | junit
120 | junit
121 | 4.13.2
122 | test
123 |
124 |
125 | org.junit.jupiter
126 | junit-jupiter-params
127 | 5.9.2
128 | test
129 |
130 |
131 |
132 |
133 |
134 | ossrh
135 | https://s01.oss.sonatype.org/content/repositories/snapshots
136 |
137 |
138 | ossrh
139 | https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 | maven-clean-plugin
149 | 3.1.0
150 |
151 |
152 |
153 | maven-resources-plugin
154 | 3.0.2
155 |
156 |
157 | maven-compiler-plugin
158 | 3.8.0
159 |
160 |
161 | maven-surefire-plugin
162 | 2.22.1
163 |
164 |
165 | maven-jar-plugin
166 | 3.0.2
167 |
168 |
169 | maven-install-plugin
170 | 2.5.2
171 |
172 |
173 | maven-deploy-plugin
174 | 2.8.2
175 |
176 |
177 |
178 | maven-site-plugin
179 | 3.7.1
180 |
181 |
182 | maven-project-info-reports-plugin
183 | 3.0.0
184 |
185 |
186 |
187 |
188 |
189 | org.apache.maven.plugins
190 | maven-source-plugin
191 | 3.1.0
192 |
193 |
194 | attach-sources
195 |
196 | jar-no-fork
197 |
198 |
199 |
200 |
201 |
202 | org.apache.maven.plugins
203 | maven-javadoc-plugin
204 | 2.9.1
205 |
206 |
207 | attach-javadocs
208 |
209 | jar
210 |
211 |
212 |
213 |
214 |
215 | org.apache.maven.plugins
216 | maven-gpg-plugin
217 | 3.0.1
218 |
219 |
220 | sign-artifacts
221 | verify
222 |
223 | sign
224 |
225 |
226 |
227 | --pinentry-mode
228 | loopback
229 |
230 |
231 |
232 |
233 |
234 |
235 | org.jacoco
236 | jacoco-maven-plugin
237 | 0.8.7
238 |
239 |
240 |
241 | prepare-agent
242 |
243 |
244 |
245 |
246 | report
247 | test
248 |
249 | report
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
--------------------------------------------------------------------------------
/sqllineage4j-cli/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | sqllineage4j
5 | io.github.reata
6 | 1.0.1-SNAPSHOT
7 |
8 | 4.0.0
9 | sqllineage4j-cli
10 | sqllineage4j-cli
11 |
12 |
13 |
14 | commons-cli
15 | commons-cli
16 |
17 |
18 | io.github.reata
19 | sqllineage4j-core
20 |
21 |
22 | com.github.stefanbirkner
23 | system-lambda
24 | test
25 |
26 |
27 |
28 |
29 |
30 |
31 | org.apache.maven.plugins
32 | maven-shade-plugin
33 |
34 |
35 | package
36 |
37 | shade
38 |
39 |
40 | true
41 | executable
42 |
43 |
45 |
46 | io.github.reata.sqllineage4j.cli.SQLLineage4j
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 | org.skife.maven
57 | really-executable-jar-maven-plugin
58 |
59 | -Xmx1G
60 | executable
61 |
62 |
63 |
64 | package
65 |
66 | really-executable-jar
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
--------------------------------------------------------------------------------
/sqllineage4j-cli/src/main/java/io/github/reata/sqllineage4j/cli/SQLLineage4j.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.cli;
2 |
3 | import io.github.reata.sqllineage4j.core.LineageRunner;
4 | import org.apache.commons.cli.*;
5 |
6 | import static io.github.reata.sqllineage4j.cli.utils.Helper.extractSqlFromArgs;
7 |
8 | public class SQLLineage4j {
9 |
10 | public static void main(String[] args) {
11 | Options options = new Options();
12 | Option exec = Option.builder("e").argName("quoted-query-string").hasArg().desc("SQL from command line").build();
13 | Option file = Option.builder("f").argName("filename").hasArg().desc("SQL from files").build();
14 | Option verbose = Option.builder("v").longOpt("verbose").desc("increase output verbosity, show statement level lineage result").build();
15 | options.addOption(exec);
16 | options.addOption(file);
17 | options.addOption(verbose);
18 |
19 | CommandLineParser parser = new DefaultParser();
20 | try {
21 | CommandLine cmd = parser.parse(options, args);
22 | if (cmd.hasOption("e") && cmd.hasOption("f")) {
23 | System.out.println("Both -e and -f options are specified. -e option will be ignored");
24 | }
25 | if (cmd.hasOption("e") || cmd.hasOption("f")) {
26 | String sql = extractSqlFromArgs(cmd);
27 | LineageRunner runner = cmd.hasOption("v") ? LineageRunner.builder(sql).verbose().build() : LineageRunner.builder(sql).build();
28 | runner.printTableLineage();
29 | } else {
30 | HelpFormatter formatter = new HelpFormatter();
31 | formatter.printHelp("sqllineage4j", options);
32 | }
33 | } catch (ParseException e) {
34 | throw new RuntimeException(e);
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/sqllineage4j-cli/src/main/java/io/github/reata/sqllineage4j/cli/utils/Helper.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.cli.utils;
2 |
3 | import org.apache.commons.cli.CommandLine;
4 |
5 | import java.io.BufferedReader;
6 | import java.io.FileNotFoundException;
7 | import java.io.FileReader;
8 | import java.io.IOException;
9 |
10 | public final class Helper {
11 | public static String extractSqlFromArgs(CommandLine cmd) {
12 | StringBuilder sql = new StringBuilder();
13 | if (cmd.getOptionValue("f") != null) {
14 | String file = cmd.getOptionValue("f");
15 | try {
16 | BufferedReader reader = new BufferedReader(new FileReader(file));
17 | String line = reader.readLine();
18 | while (line != null) {
19 | sql.append(line);
20 | sql.append(System.lineSeparator());
21 | line = reader.readLine();
22 | }
23 | } catch (FileNotFoundException e) {
24 | e.printStackTrace();
25 | System.exit(1);
26 | } catch (IOException e) {
27 | System.exit(1);
28 | }
29 | } else if (cmd.getOptionValue("e") != null) {
30 | sql.append(cmd.getOptionValue("e"));
31 | }
32 | return sql.toString();
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/sqllineage4j-cli/src/test/java/io/github/reata/sqllineage4j/cli/SQLLineage4jTest.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.cli;
2 |
3 | import com.github.stefanbirkner.systemlambda.SystemLambda;
4 | import org.junit.Test;
5 |
6 | import java.io.File;
7 | import java.io.FileWriter;
8 | import java.io.IOException;
9 |
10 | import static org.junit.Assert.assertEquals;
11 | import static org.junit.Assert.assertTrue;
12 |
13 | public class SQLLineage4jTest {
14 |
15 | @Test
16 | public void testCliDummy() {
17 | String testSql = "insert overwrite table foo select * from dual inner join laud; insert overwrite table bar select * from foo";
18 | SQLLineage4j.main(new String[]{});
19 | SQLLineage4j.main(new String[]{"-e", testSql});
20 | SQLLineage4j.main(new String[]{"-e", testSql, "-v"});
21 | try {
22 | File f = File.createTempFile("test", ".sql");
23 | FileWriter fw = new FileWriter(f);
24 | fw.write(testSql);
25 | fw.close();
26 | SQLLineage4j.main(new String[]{"-f", f.getAbsolutePath()});
27 | SQLLineage4j.main(new String[]{"-e", testSql, "-f", f.getAbsolutePath()});
28 | assertTrue(f.delete());
29 | } catch (IOException e) {
30 | e.printStackTrace();
31 | }
32 | }
33 |
34 | @Test
35 | public void testFileException() throws Exception {
36 | int statusCode = SystemLambda.catchSystemExit(() -> SQLLineage4j.main(new String[]{"-f", "nonexist_file"}));
37 | assertEquals(1, statusCode);
38 | }
39 |
40 | @Test
41 | public void testFilePermissionError() throws Exception {
42 | int statusCode = SystemLambda.catchSystemExit(() -> SQLLineage4j.main(new String[]{"-f", "/"}));
43 | assertEquals(1, statusCode);
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/sqllineage4j-common/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | sqllineage4j
5 | io.github.reata
6 | 1.0.1-SNAPSHOT
7 |
8 | 4.0.0
9 | sqllineage4j-common
10 | sqllineage4j-common
11 |
12 |
13 |
14 | com.google.auto.value
15 | auto-value-annotations
16 | provided
17 |
18 |
19 | com.google.code.findbugs
20 | annotations
21 | provided
22 |
23 |
24 |
25 |
26 |
27 |
28 | maven-compiler-plugin
29 |
30 |
31 |
32 | com.google.auto.value
33 | auto-value
34 | ${dep.auto-value.version}
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/constant/EdgeType.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.constant;
2 |
3 | public class EdgeType {
4 | public static final String LINEAGE = "lineage";
5 |
6 | public static final String RENAME = "rename";
7 |
8 | public static final String HAS_COLUMN = "has_column";
9 |
10 | public static final String HAS_ALIAS = "has_alias";
11 | }
12 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/constant/NodeTag.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.constant;
2 |
3 | public class NodeTag {
4 | public static final String READ = "read";
5 |
6 | public static final String WRITE = "write";
7 |
8 | public static final String CTE = "cte";
9 |
10 | public static final String DROP = "drop";
11 |
12 | public static final String SOURCE_ONLY = "source_only";
13 |
14 | public static final String TARGET_ONLY = "target_only";
15 |
16 | public static final String SELFLOOP = "selfloop";
17 | }
18 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/entity/ColumnQualifierTuple.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.entity;
2 |
3 | import com.google.auto.value.AutoValue;
4 |
5 | import javax.annotation.Nullable;
6 |
7 | @AutoValue
8 | abstract public class ColumnQualifierTuple {
9 | public static ColumnQualifierTuple create(String column, @Nullable String qualifier) {
10 | return new AutoValue_ColumnQualifierTuple(column, qualifier);
11 | }
12 |
13 | abstract public String column();
14 |
15 | @Nullable
16 | abstract public String qualifier();
17 | }
18 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/entity/EdgeTuple.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.entity;
2 |
3 | import com.google.auto.value.AutoValue;
4 |
5 | @AutoValue
6 | abstract public class EdgeTuple {
7 | public static EdgeTuple create(Object source, String label, Object target) {
8 | return new AutoValue_EdgeTuple(source, label, target);
9 | }
10 |
11 | abstract public Object source();
12 |
13 | abstract public String label();
14 |
15 | abstract public Object target();
16 | }
17 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/Column.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.model;
2 |
3 | import io.github.reata.sqllineage4j.common.entity.ColumnQualifierTuple;
4 |
5 | import javax.annotation.Nullable;
6 | import java.util.*;
7 |
8 | import static io.github.reata.sqllineage4j.common.utils.Helper.escapeIdentifierName;
9 |
10 |
11 | public class Column {
12 | private final Set parent = new HashSet<>();
13 | private final String rawName;
14 | private final List sourceColumns = new ArrayList<>();
15 |
16 | public Column(String name) {
17 | this.rawName = escapeIdentifierName(name);
18 | }
19 |
20 | @Override
21 | public String toString() {
22 | if (getParent() != null) {
23 | return getParent().toString() + "." + rawName.toLowerCase();
24 | } else {
25 | return rawName.toLowerCase();
26 | }
27 | }
28 |
29 | @Override
30 | public boolean equals(Object obj) {
31 | return obj instanceof Column && this.toString().equals(obj.toString());
32 | }
33 |
34 | @Override
35 | public int hashCode() {
36 | return Objects.hash(this.toString());
37 | }
38 |
39 | public @Nullable QuerySet getParent() {
40 | return parent.size() == 1 ? List.copyOf(parent).get(0) : null;
41 | }
42 |
43 | public void setParent(QuerySet table) {
44 | parent.add(table);
45 | }
46 |
47 | public void setSourceColumns(ColumnQualifierTuple cqt) {
48 | sourceColumns.add(cqt);
49 | }
50 |
51 | public List toSourceColumns(Map aliasMapping) {
52 | List sourceColumns = new ArrayList<>();
53 | for (ColumnQualifierTuple columnQualifierTuple : this.sourceColumns) {
54 | String srcCol = columnQualifierTuple.column();
55 | String qualifier = columnQualifierTuple.qualifier();
56 | if (qualifier == null) {
57 | if (srcCol.equals("*")) {
58 | // SELECT *
59 | for (QuerySet dataSet : aliasMapping.values()) {
60 | sourceColumns.add(toSourceColumn(srcCol, dataSet));
61 | }
62 | } else {
63 | // select unqualified column
64 | Column source = new Column(srcCol);
65 | for (QuerySet dataSet : aliasMapping.values()) {
66 | // in case of only one table, we get the right answer
67 | // in case of multiple tables, a bunch of possible tables are set
68 | source.setParent(dataSet);
69 | }
70 | sourceColumns.add(source);
71 | }
72 | } else {
73 | if (aliasMapping.containsKey(qualifier)) {
74 | sourceColumns.add(toSourceColumn(srcCol, aliasMapping.get(qualifier)));
75 | } else {
76 | sourceColumns.add(toSourceColumn(srcCol, new Table(qualifier)));
77 | }
78 | }
79 | }
80 | return sourceColumns;
81 | }
82 |
83 | private Column toSourceColumn(String columnName, QuerySet parent) {
84 | Column col = new Column(columnName);
85 | if (parent != null) {
86 | col.setParent(parent);
87 | }
88 | return col;
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/QuerySet.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.model;
2 |
3 | public interface QuerySet {
4 | String getAlias();
5 | }
6 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/Schema.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.model;
2 |
3 | import java.util.Objects;
4 |
5 | import static io.github.reata.sqllineage4j.common.utils.Helper.escapeIdentifierName;
6 |
7 | public final class Schema {
8 | private final String rawName;
9 |
10 | public Schema() {
11 | rawName = "";
12 | }
13 |
14 | public Schema(String name) {
15 | rawName = escapeIdentifierName(name);
16 | }
17 |
18 | @Override
19 | public String toString() {
20 | return rawName.toLowerCase();
21 | }
22 |
23 | @Override
24 | public boolean equals(Object obj) {
25 | return obj instanceof Schema && this.toString().equals(obj.toString());
26 | }
27 |
28 | @Override
29 | public int hashCode() {
30 | return Objects.hash(this.toString());
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/SubQuery.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.model;
2 |
3 | public final class SubQuery implements QuerySet {
4 | private final String query;
5 | private final String alias;
6 |
7 | public SubQuery(String query, String alias) {
8 | this.query = query;
9 | if (alias == null) {
10 | alias = "subquery_" + query.hashCode();
11 | }
12 | this.alias = alias;
13 | }
14 |
15 | @Override
16 | public String toString() {
17 | return alias;
18 | }
19 |
20 | @Override
21 | public boolean equals(Object obj) {
22 | return obj instanceof SubQuery && this.query.equals(((SubQuery) obj).getQuery());
23 | }
24 |
25 | @Override
26 | public int hashCode() {
27 | return query.hashCode();
28 | }
29 |
30 | public String getQuery() {
31 | return query;
32 | }
33 |
34 | public String getAlias() {
35 | return alias;
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/Table.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.model;
2 |
3 | import java.util.Objects;
4 |
5 | import static io.github.reata.sqllineage4j.common.utils.Helper.escapeIdentifierName;
6 |
7 | public final class Table implements QuerySet {
8 | private final String rawName;
9 |
10 | private final String alias;
11 | private Schema schema = new Schema();
12 |
13 | public Table(String name) {
14 | this(name, name);
15 | }
16 |
17 | public Table(String name, String alias) {
18 | if (name.contains(".")) {
19 | int pos = name.lastIndexOf(".");
20 | String schemaName = name.substring(0, pos + 1);
21 | String tableName = name.substring(pos + 1);
22 | this.schema = new Schema(schemaName);
23 | this.rawName = escapeIdentifierName(tableName);
24 | } else {
25 | this.rawName = escapeIdentifierName(name);
26 | }
27 | this.alias = alias;
28 | }
29 |
30 | @Override
31 | public String toString() {
32 | return schema.toString() + "." + rawName.toLowerCase();
33 | }
34 |
35 | @Override
36 | public boolean equals(Object obj) {
37 | return obj instanceof Table && this.toString().equals(obj.toString());
38 | }
39 |
40 | @Override
41 | public int hashCode() {
42 | return Objects.hash(this.toString());
43 | }
44 |
45 | @Override
46 | public String getAlias() {
47 | return alias;
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/utils/Helper.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common.utils;
2 |
3 | public final class Helper {
4 | public static String escapeIdentifierName(String name) {
5 | return name.replaceAll("`", "").replaceAll("'", "").replaceAll("\"", "");
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/sqllineage4j-common/src/test/java/io/github/reata/sqllineage4j/common/ModelTest.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.common;
2 |
3 | import io.github.reata.sqllineage4j.common.model.Schema;
4 | import io.github.reata.sqllineage4j.common.model.Table;
5 | import org.junit.Test;
6 |
7 | import java.util.HashSet;
8 | import java.util.List;
9 |
10 | import static org.junit.Assert.assertEquals;
11 | import static org.junit.Assert.assertNotNull;
12 |
13 | public class ModelTest {
14 |
15 | @Test
16 | public void testDummy() {
17 | assertNotNull(new Schema().toString());
18 | assertNotNull(new Table("").toString());
19 | assertNotNull(new Table("a.b.c").toString());
20 | }
21 |
22 | @Test
23 | public void testHashEq() {
24 | assertEquals(new Schema("a"), new Schema("a"));
25 | assertEquals(1, new HashSet<>(List.of(new Schema("a"), new Schema("a"))).size());
26 | assertEquals(new Table("a"), new Table("a"));
27 | assertEquals(1, new HashSet<>(List.of(new Table("a"), new Table("a"))).size());
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/sqllineage4j-core/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | sqllineage4j
5 | io.github.reata
6 | 1.0.1-SNAPSHOT
7 |
8 | 4.0.0
9 | sqllineage4j-core
10 | sqllineage4j-core
11 |
12 |
13 |
14 | io.github.reata
15 | sqllineage4j-common
16 |
17 |
18 | io.github.reata
19 | sqllineage4j-parser
20 |
21 |
22 | io.github.reata
23 | sqllineage4j-graph
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/LineageAnalyzer.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import io.github.reata.sqllineage4j.common.constant.NodeTag;
4 | import io.github.reata.sqllineage4j.common.entity.ColumnQualifierTuple;
5 | import io.github.reata.sqllineage4j.common.model.Column;
6 | import io.github.reata.sqllineage4j.common.model.QuerySet;
7 | import io.github.reata.sqllineage4j.common.model.SubQuery;
8 | import io.github.reata.sqllineage4j.common.model.Table;
9 | import io.github.reata.sqllineage4j.core.holder.StatementLineageHolder;
10 | import io.github.reata.sqllineage4j.core.holder.SubQueryLineageHolder;
11 | import io.github.reata.sqllineage4j.parser.SqlBaseBaseListener;
12 | import io.github.reata.sqllineage4j.parser.SqlBaseParser;
13 | import org.antlr.v4.runtime.CharStream;
14 | import org.antlr.v4.runtime.ParserRuleContext;
15 | import org.antlr.v4.runtime.misc.Interval;
16 | import org.antlr.v4.runtime.tree.ParseTree;
17 | import org.antlr.v4.runtime.tree.ParseTreeWalker;
18 |
19 | import java.util.*;
20 | import java.util.function.Function;
21 | import java.util.stream.Collectors;
22 |
23 | public class LineageAnalyzer {
24 |
25 | public StatementLineageHolder analyze(ParseTree stmt) {
26 | ParseTreeWalker walker = new ParseTreeWalker();
27 | LineageListener listener = new LineageListener();
28 | walker.walk(listener, stmt);
29 | return listener.getStatementLineageHolder();
30 | }
31 |
32 | public static class LineageListener extends SqlBaseBaseListener {
33 |
34 | private final StatementLineageHolder statementLineageHolder = new StatementLineageHolder();
35 | private final Map subQueryLineageHolders = new HashMap<>();
36 |
37 | public StatementLineageHolder getStatementLineageHolder() {
38 | return statementLineageHolder;
39 | }
40 |
41 | private String getOriginalText(ParserRuleContext parserRuleContext) {
42 | CharStream stream = parserRuleContext.start.getInputStream();
43 | return stream.getText(new Interval(parserRuleContext.start.getStartIndex(), parserRuleContext.stop.getStopIndex()));
44 | }
45 |
46 | private SubQueryLineageHolder getHolder(ParserRuleContext ctx) {
47 | while (ctx.getParent() != null) {
48 | ctx = ctx.getParent();
49 | if (ctx instanceof SqlBaseParser.RegularQuerySpecificationContext) {
50 | return subQueryLineageHolders.get(ctx.hashCode());
51 | }
52 | }
53 | return null;
54 | }
55 |
56 | @Override
57 | public void exitSingleStatement(SqlBaseParser.SingleStatementContext ctx) {
58 | for (SubQueryLineageHolder holder : subQueryLineageHolders.values()) {
59 | statementLineageHolder.union(holder);
60 | }
61 | }
62 |
63 | @Override
64 | public void enterInsertIntoTable(SqlBaseParser.InsertIntoTableContext ctx) {
65 | statementLineageHolder.addWrite(new Table(ctx.multipartIdentifier().getText()));
66 | }
67 |
68 | @Override
69 | public void enterInsertOverwriteTable(SqlBaseParser.InsertOverwriteTableContext ctx) {
70 | statementLineageHolder.addWrite(new Table(ctx.multipartIdentifier().getText()));
71 | }
72 |
73 | @Override
74 | public void enterCreateTableHeader(SqlBaseParser.CreateTableHeaderContext ctx) {
75 | statementLineageHolder.addWrite(new Table(ctx.multipartIdentifier().getText()));
76 | }
77 |
78 | @Override
79 | public void enterCreateTableLike(SqlBaseParser.CreateTableLikeContext ctx) {
80 | statementLineageHolder.addWrite(new Table(ctx.target.getText()));
81 | statementLineageHolder.addRead(new Table(ctx.source.getText()));
82 | }
83 |
84 | @Override
85 | public void enterUpdateTable(SqlBaseParser.UpdateTableContext ctx) {
86 | handleMultipartIdentifier(ctx.multipartIdentifier(), NodeTag.WRITE, null);
87 | }
88 |
89 | @Override
90 | public void enterDropTable(SqlBaseParser.DropTableContext ctx) {
91 | handleMultipartIdentifier(ctx.multipartIdentifier(), NodeTag.DROP, null);
92 | }
93 |
94 | @Override
95 | public void enterRenameTable(SqlBaseParser.RenameTableContext ctx) {
96 | statementLineageHolder.addRename(new Table(ctx.from.getText()), new Table(ctx.to.getText()));
97 | }
98 |
99 | @Override
100 | public void enterFailNativeCommand(SqlBaseParser.FailNativeCommandContext ctx) {
101 | SqlBaseParser.UnsupportedHiveNativeCommandsContext unsupportedHiveNativeCommandsContext = ctx.unsupportedHiveNativeCommands();
102 | if (unsupportedHiveNativeCommandsContext != null) {
103 | if (unsupportedHiveNativeCommandsContext.ALTER() != null
104 | && unsupportedHiveNativeCommandsContext.TABLE() != null
105 | && unsupportedHiveNativeCommandsContext.EXCHANGE() != null
106 | && unsupportedHiveNativeCommandsContext.PARTITION() != null) {
107 | statementLineageHolder.addWrite(new Table(unsupportedHiveNativeCommandsContext.tableIdentifier().getText()));
108 | statementLineageHolder.addRead(new Table(ctx.getChild(ctx.getChildCount() - 1).getText()));
109 | }
110 | }
111 | }
112 |
113 | @Override
114 | public void enterCtes(SqlBaseParser.CtesContext ctx) {
115 | for (SqlBaseParser.NamedQueryContext namedQueryContext : ctx.namedQuery()) {
116 | if (namedQueryContext.query() != null) {
117 | statementLineageHolder.addCTE(new SubQuery(
118 | namedQueryContext.query().getText(),
119 | namedQueryContext.errorCapturingIdentifier().getText()
120 | ));
121 | }
122 | }
123 | }
124 |
125 | @Override
126 | public void enterRegularQuerySpecification(SqlBaseParser.RegularQuerySpecificationContext ctx) {
127 | SubQueryLineageHolder holder = new SubQueryLineageHolder();
128 | subQueryLineageHolders.put(ctx.hashCode(), holder);
129 | ParserRuleContext parentCtx = ctx;
130 | boolean isSubQuery = false;
131 | while (parentCtx.getParent() != null) {
132 | parentCtx = parentCtx.getParent();
133 | if (parentCtx instanceof SqlBaseParser.AliasedQueryContext) {
134 | isSubQuery = true;
135 | SqlBaseParser.AliasedQueryContext aliasedQueryContext = (SqlBaseParser.AliasedQueryContext) parentCtx;
136 | SubQuery subQuery = new SubQuery(aliasedQueryContext.query().getText(), aliasedQueryContext.tableAlias().getText());
137 | holder.addWrite(subQuery);
138 | break;
139 | } else if (parentCtx instanceof SqlBaseParser.NamedQueryContext) {
140 | isSubQuery = true;
141 | SqlBaseParser.NamedQueryContext namedQueryContext = (SqlBaseParser.NamedQueryContext) parentCtx;
142 | SubQuery subQuery = new SubQuery(namedQueryContext.query().getText(), namedQueryContext.errorCapturingIdentifier().getText());
143 | holder.addWrite(subQuery);
144 | break;
145 | }
146 | }
147 | if (!isSubQuery) {
148 | if (statementLineageHolder.getWrite().size() > 0) {
149 | holder.addWrite(new ArrayList<>(statementLineageHolder.getWrite()).get(0));
150 | }
151 | }
152 | }
153 |
154 | @Override
155 | public void exitRegularQuerySpecification(SqlBaseParser.RegularQuerySpecificationContext ctx) {
156 | SubQueryLineageHolder holder = subQueryLineageHolders.get(ctx.hashCode());
157 | QuerySet tgtTbl = null;
158 | if (holder.getWrite().size() == 1) {
159 | tgtTbl = List.copyOf(holder.getWrite()).get(0);
160 | }
161 | if (tgtTbl != null) {
162 | for (Column tgtCol : holder.getSelectColumns()) {
163 | tgtCol.setParent(tgtTbl);
164 | Map aliasMapping = getAliasMappingFromTableGroup(holder);
165 | for (Column srcCol : tgtCol.toSourceColumns(aliasMapping)) {
166 | holder.addColumnLineage(srcCol, tgtCol);
167 | }
168 | }
169 | }
170 | }
171 |
172 | @Override
173 | public void enterSelectClause(SqlBaseParser.SelectClauseContext ctx) {
174 | for (SqlBaseParser.NamedExpressionContext namedExpressionContext : ctx.namedExpressionSeq().namedExpression()) {
175 | String alias = getIdentifierName(namedExpressionContext.errorCapturingIdentifier());
176 | SqlBaseParser.BooleanExpressionContext booleanExpressionContext = namedExpressionContext.expression().booleanExpression();
177 | handleBooleanExpression(booleanExpressionContext, alias);
178 | }
179 | }
180 |
181 | @Override
182 | public void enterFromClause(SqlBaseParser.FromClauseContext ctx) {
183 | for (SqlBaseParser.RelationContext relationContext : ctx.relation()) {
184 | handleRelationPrimary(relationContext.relationPrimary());
185 | for (SqlBaseParser.JoinRelationContext joinRelationContext : relationContext.joinRelation()) {
186 | handleRelationPrimary(joinRelationContext.relationPrimary());
187 | }
188 | }
189 | }
190 |
191 | @Override
192 | public void enterFunctionCall(SqlBaseParser.FunctionCallContext ctx) {
193 | if (ctx.functionName().getText().equalsIgnoreCase("swap_partitions_between_tables")) {
194 | List arguments = ctx.argument;
195 | if (arguments.size() == 4) {
196 | statementLineageHolder.addRead(new Table(arguments.get(0).getText().replace("'", "").replace("\"", "")));
197 | statementLineageHolder.addWrite(new Table(arguments.get(3).getText().replace("'", "").replace("\"", "")));
198 | }
199 | }
200 | }
201 |
202 | private void handleRelationPrimary(SqlBaseParser.RelationPrimaryContext relationPrimaryContext) {
203 | if (relationPrimaryContext instanceof SqlBaseParser.TableNameContext) {
204 | SqlBaseParser.TableNameContext tableNameContext = (SqlBaseParser.TableNameContext) relationPrimaryContext;
205 | String alias = null;
206 | if (tableNameContext.tableAlias().strictIdentifier() != null) {
207 | alias = getOriginalText(tableNameContext.tableAlias().strictIdentifier());
208 | }
209 | handleMultipartIdentifier(tableNameContext.multipartIdentifier(), NodeTag.READ, alias);
210 | } else if (relationPrimaryContext instanceof SqlBaseParser.AliasedRelationContext) {
211 | SqlBaseParser.AliasedRelationContext aliasedRelationContext = (SqlBaseParser.AliasedRelationContext) relationPrimaryContext;
212 | handleRelationPrimary(aliasedRelationContext.relation().relationPrimary());
213 | } else if (relationPrimaryContext instanceof SqlBaseParser.AliasedQueryContext) {
214 | SqlBaseParser.AliasedQueryContext aliasedQueryContext = (SqlBaseParser.AliasedQueryContext) relationPrimaryContext;
215 | SubQueryLineageHolder holder = getHolder(relationPrimaryContext);
216 | Objects.requireNonNull(holder).addRead(new SubQuery(aliasedQueryContext.query().getText(), aliasedQueryContext.tableAlias().getText()));
217 | }
218 | }
219 |
220 | private void handleMultipartIdentifier(SqlBaseParser.MultipartIdentifierContext multipartIdentifierContext, String type, String alias) {
221 | SubQueryLineageHolder holder = getHolder(multipartIdentifierContext);
222 | List unquotedParts = new ArrayList<>();
223 | for (SqlBaseParser.ErrorCapturingIdentifierContext errorCapturingIdentifierContext : multipartIdentifierContext.errorCapturingIdentifier()) {
224 | String identifier = getIdentifierName(errorCapturingIdentifierContext);
225 | if (!identifier.equals("")) {
226 | unquotedParts.add(identifier);
227 | }
228 | }
229 | String rawName = String.join(".", unquotedParts);
230 | Table table = alias == null ? new Table(rawName) : new Table(rawName, alias);
231 | switch (type) {
232 | case NodeTag.READ:
233 | Map cteMap = statementLineageHolder.getCTE().stream().collect(Collectors.toMap(SubQuery::getAlias, Function.identity()));
234 | if (cteMap.containsKey(rawName)) {
235 | SubQuery cte = cteMap.get(rawName);
236 | if (alias != null) {
237 | Objects.requireNonNull(holder).addRead(new SubQuery(cte.getQuery(), alias));
238 | }
239 | Objects.requireNonNull(holder).addRead(cte);
240 | } else {
241 | Objects.requireNonNull(holder).addRead(table);
242 | }
243 | break;
244 | case NodeTag.WRITE:
245 | Objects.requireNonNullElse(holder, statementLineageHolder).addWrite(table);
246 | break;
247 | case NodeTag.DROP:
248 | statementLineageHolder.addDrop(table);
249 | break;
250 | }
251 | }
252 |
253 | private void handleBooleanExpression(SqlBaseParser.BooleanExpressionContext booleanExpressionContext, String alias) {
254 | if (booleanExpressionContext instanceof SqlBaseParser.PredicatedContext) {
255 | SqlBaseParser.PredicatedContext predicatedContext = (SqlBaseParser.PredicatedContext) booleanExpressionContext;
256 | SqlBaseParser.ValueExpressionContext valueExpressionContext = predicatedContext.valueExpression();
257 | handleValueExpression(valueExpressionContext, alias);
258 | } else if (booleanExpressionContext instanceof SqlBaseParser.LogicalBinaryContext) {
259 | SqlBaseParser.LogicalBinaryContext logicalBinaryContext = (SqlBaseParser.LogicalBinaryContext) booleanExpressionContext;
260 | for (SqlBaseParser.BooleanExpressionContext subBooleanExpressionContext : logicalBinaryContext.booleanExpression()) {
261 | handleBooleanExpression(subBooleanExpressionContext, alias);
262 | }
263 | }
264 | }
265 |
266 | private void handleValueExpression(SqlBaseParser.ValueExpressionContext valueExpressionContext, String alias) {
267 | SubQueryLineageHolder holder = getHolder(valueExpressionContext);
268 | List selectColumns = Objects.requireNonNull(holder).getSelectColumns();
269 | if (valueExpressionContext instanceof SqlBaseParser.ValueExpressionDefaultContext) {
270 | SqlBaseParser.ValueExpressionDefaultContext valueExpressionDefaultContext = (SqlBaseParser.ValueExpressionDefaultContext) valueExpressionContext;
271 | SqlBaseParser.PrimaryExpressionContext primaryExpressionContext = valueExpressionDefaultContext.primaryExpression();
272 | if (primaryExpressionContext instanceof SqlBaseParser.ColumnReferenceContext) {
273 | SqlBaseParser.ColumnReferenceContext columnReferenceContext = (SqlBaseParser.ColumnReferenceContext) primaryExpressionContext;
274 | String columnName = columnReferenceContext.getText();
275 | Column column = new Column(alias.equals("") ? columnName : alias);
276 | column.setSourceColumns(ColumnQualifierTuple.create(columnName, null));
277 | selectColumns.add(column);
278 | } else if (primaryExpressionContext instanceof SqlBaseParser.DereferenceContext) {
279 | SqlBaseParser.DereferenceContext dereferenceContext = (SqlBaseParser.DereferenceContext) primaryExpressionContext;
280 | String columnName = dereferenceContext.identifier().strictIdentifier().getText();
281 | Column column = new Column(alias.equals("") ? columnName : alias);
282 | String qualifierName = dereferenceContext.primaryExpression().getText();
283 | column.setSourceColumns(ColumnQualifierTuple.create(columnName, qualifierName));
284 | selectColumns.add(column);
285 | } else if (primaryExpressionContext instanceof SqlBaseParser.StarContext) {
286 | SqlBaseParser.StarContext starContext = (SqlBaseParser.StarContext) primaryExpressionContext;
287 | String columnName = starContext.ASTERISK().getText();
288 | Column column = new Column(alias.equals("") ? columnName : alias);
289 | column.setSourceColumns(ColumnQualifierTuple.create(columnName, null));
290 | selectColumns.add(column);
291 | } else if (primaryExpressionContext instanceof SqlBaseParser.FunctionCallContext) {
292 | SqlBaseParser.FunctionCallContext functionCallContext = (SqlBaseParser.FunctionCallContext) primaryExpressionContext;
293 | for (SqlBaseParser.ExpressionContext expressionContext : functionCallContext.expression()) {
294 | handleBooleanExpression(expressionContext.booleanExpression(), alias.equals("") ? functionCallContext.getText() : alias);
295 | }
296 | if (functionCallContext.windowSpec() != null) {
297 | SqlBaseParser.WindowSpecContext windowSpecContext = functionCallContext.windowSpec();
298 | if (windowSpecContext instanceof SqlBaseParser.WindowDefContext) {
299 | SqlBaseParser.WindowDefContext windowDefContext = (SqlBaseParser.WindowDefContext) windowSpecContext;
300 | for (SqlBaseParser.ExpressionContext expressionContext : windowDefContext.expression()) {
301 | handleBooleanExpression(expressionContext.booleanExpression(), alias.equals("") ? functionCallContext.getText() : alias);
302 | }
303 | for (SqlBaseParser.SortItemContext sortItemContext : windowDefContext.sortItem()) {
304 | handleBooleanExpression(sortItemContext.expression().booleanExpression(), alias.equals("") ? functionCallContext.getText() : alias);
305 | }
306 | }
307 | }
308 | } else if (primaryExpressionContext instanceof SqlBaseParser.CastContext) {
309 | SqlBaseParser.CastContext castContext = (SqlBaseParser.CastContext) primaryExpressionContext;
310 | handleBooleanExpression(castContext.expression().booleanExpression(), alias.equals("") ? getOriginalText(castContext) : alias);
311 | } else if (primaryExpressionContext instanceof SqlBaseParser.ParenthesizedExpressionContext) {
312 | SqlBaseParser.ParenthesizedExpressionContext parenthesizedExpressionContext = (SqlBaseParser.ParenthesizedExpressionContext) primaryExpressionContext;
313 | handleBooleanExpression(parenthesizedExpressionContext.expression().booleanExpression(), alias);
314 | } else if (primaryExpressionContext instanceof SqlBaseParser.SearchedCaseContext) {
315 | SqlBaseParser.SearchedCaseContext searchedCaseContext = (SqlBaseParser.SearchedCaseContext) primaryExpressionContext;
316 | alias = alias.equals("") ? getOriginalText(searchedCaseContext) : alias;
317 | for (SqlBaseParser.WhenClauseContext whenClauseContext : searchedCaseContext.whenClause()) {
318 | for (SqlBaseParser.ExpressionContext expressionContext : whenClauseContext.expression()) {
319 | handleBooleanExpression(expressionContext.booleanExpression(), alias);
320 | }
321 | }
322 | if (searchedCaseContext.expression() != null) {
323 | handleBooleanExpression(searchedCaseContext.expression().booleanExpression(), alias);
324 | }
325 | }
326 | } else if (valueExpressionContext instanceof SqlBaseParser.ComparisonContext) {
327 | SqlBaseParser.ComparisonContext comparisonContext = (SqlBaseParser.ComparisonContext) valueExpressionContext;
328 | for (SqlBaseParser.ValueExpressionContext subValueExpressionContext : comparisonContext.valueExpression()) {
329 | handleValueExpression(subValueExpressionContext, alias);
330 | }
331 | } else if (valueExpressionContext instanceof SqlBaseParser.ArithmeticBinaryContext) {
332 | SqlBaseParser.ArithmeticBinaryContext arithmeticBinaryContext = (SqlBaseParser.ArithmeticBinaryContext) valueExpressionContext;
333 | alias = alias.equals("") ? getOriginalText(arithmeticBinaryContext) : alias;
334 | for (SqlBaseParser.ValueExpressionContext subValueExpressionContext : arithmeticBinaryContext.valueExpression()) {
335 | handleValueExpression(subValueExpressionContext, alias);
336 | }
337 | }
338 | }
339 |
340 | private String getIdentifierName(SqlBaseParser.ErrorCapturingIdentifierContext errorCapturingIdentifierContext) {
341 | String name = "";
342 | if (errorCapturingIdentifierContext != null) {
343 | SqlBaseParser.StrictIdentifierContext strictIdentifierContext = errorCapturingIdentifierContext.identifier().strictIdentifier();
344 | if (strictIdentifierContext instanceof SqlBaseParser.QuotedIdentifierAlternativeContext) {
345 | name = strictIdentifierContext.getText().replace("`", "");
346 | } else if (strictIdentifierContext instanceof SqlBaseParser.UnquotedIdentifierContext) {
347 | name = strictIdentifierContext.getText();
348 | }
349 | }
350 | return name;
351 | }
352 |
353 | private Map getAliasMappingFromTableGroup(SubQueryLineageHolder holder) {
354 | Map alias = holder.getQuerySetAlias();
355 | for (QuerySet dataset : holder.getRead()) {
356 | alias.put(dataset.toString(), dataset);
357 | // TODO: rawName -> dataset
358 | }
359 | return alias;
360 | }
361 | }
362 | }
363 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/LineageRunner.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import io.github.reata.sqllineage4j.common.model.Column;
4 | import io.github.reata.sqllineage4j.common.model.Table;
5 | import io.github.reata.sqllineage4j.core.holder.SQLLineageHolder;
6 | import io.github.reata.sqllineage4j.core.holder.StatementLineageHolder;
7 | import io.github.reata.sqllineage4j.parser.LineageParser;
8 | import io.github.reata.sqllineage4j.parser.StatementSplitter;
9 | import org.javatuples.Pair;
10 |
11 | import java.util.List;
12 | import java.util.stream.Collectors;
13 |
14 | public class LineageRunner {
15 |
16 | public static final class Builder {
17 | private final String sql;
18 | private boolean verbose = false;
19 |
20 | private Builder(final String sql) {
21 | this.sql = sql;
22 | }
23 |
24 | public Builder verbose() {
25 | this.verbose = true;
26 | return this;
27 | }
28 |
29 | public LineageRunner build() {
30 | if (sql == null) {
31 | throw new IllegalArgumentException("sql string must be specified");
32 | }
33 | return new LineageRunner(this);
34 | }
35 | }
36 |
37 |
38 | private final List statementLineageHolders;
39 | private final SQLLineageHolder sqlLineageHolder;
40 | private final List statements;
41 |
42 | private final boolean verbose;
43 |
44 | private LineageRunner(final Builder builder) {
45 | String sql = builder.sql;
46 | this.verbose = builder.verbose;
47 | statements = new StatementSplitter(sql).split();
48 | statementLineageHolders = statements.stream().map(x -> new LineageAnalyzer().analyze(LineageParser.parse(x))).collect(Collectors.toList());
49 | sqlLineageHolder = SQLLineageHolder.of(statementLineageHolders.toArray(StatementLineageHolder[]::new));
50 | }
51 |
52 | public List sourceTables() {
53 | return List.copyOf(sqlLineageHolder.getSourceTables());
54 | }
55 |
56 | public List targetTables() {
57 | return List.copyOf(sqlLineageHolder.getTargetTables());
58 | }
59 |
60 | public List intermediateTables() {
61 | return List.copyOf(sqlLineageHolder.getIntermediateTables());
62 | }
63 |
64 | public List> getColumnLineage() {
65 | return getColumnLineage(true);
66 | }
67 |
68 | public List> getColumnLineage(boolean excludeSubquery) {
69 | return sqlLineageHolder.getColumnLineage(excludeSubquery)
70 | .stream().map(path -> Pair.with(path.get(0), path.get(path.size() - 1)))
71 | .collect(Collectors.toList());
72 | }
73 |
74 | public void printTableLineage() {
75 | String sourceTables = sourceTables().stream().map(t -> " " + t.toString() + "\n").collect(Collectors.joining());
76 | String targetTables = targetTables().stream().map(t -> " " + t.toString() + "\n").collect(Collectors.joining());
77 | String combined = "Statements(#): " + statements.size() + "\n"
78 | + "Source Tables:\n"
79 | + sourceTables
80 | + "Target Tables:\n"
81 | + targetTables;
82 | if (intermediateTables().size() > 0) {
83 | String intermediateTables = intermediateTables().stream().map(t -> " " + t.toString() + "\n").collect(Collectors.joining());
84 | combined += "Intermediate Tables:\n" + intermediateTables;
85 | }
86 | if (verbose) {
87 | StringBuilder result = new StringBuilder();
88 | for (int i = 0; i < statementLineageHolders.size(); i++) {
89 | String stmtShort = statements.get(i).replace("\n", "");
90 | if (stmtShort.length() > 50) {
91 | stmtShort = stmtShort.substring(0, 50) + "...";
92 | }
93 | String content = statementLineageHolders.get(i).toString().replace("\n", "\n ");
94 | result.append("Statement #").append(i + 1).append(": ").append(stmtShort).append("\n ").append(content).append("\n");
95 | }
96 | combined = result + "==========\nSummary:\n" + combined;
97 | }
98 | System.out.println(combined);
99 | }
100 |
101 | public static Builder builder(final String sql) {
102 | return new Builder(sql);
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/holder/SQLLineageHolder.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core.holder;
2 |
3 | import io.github.reata.sqllineage4j.common.constant.EdgeType;
4 | import io.github.reata.sqllineage4j.common.constant.NodeTag;
5 | import io.github.reata.sqllineage4j.common.entity.EdgeTuple;
6 | import io.github.reata.sqllineage4j.common.model.Column;
7 | import io.github.reata.sqllineage4j.common.model.Table;
8 | import io.github.reata.sqllineage4j.graph.GremlinLineageGraph;
9 | import io.github.reata.sqllineage4j.graph.LineageGraph;
10 | import org.javatuples.Pair;
11 |
12 | import java.util.Collections;
13 | import java.util.HashSet;
14 | import java.util.List;
15 | import java.util.Set;
16 | import java.util.stream.Collectors;
17 |
18 | public class SQLLineageHolder {
19 | private final LineageGraph lineageGraph;
20 |
21 | public SQLLineageHolder(LineageGraph lineageGraph) {
22 | this.lineageGraph = lineageGraph;
23 | }
24 |
25 | public Set getSourceTables() {
26 | LineageGraph tableLineageGraph = getTableLineageGraph();
27 | Set sourceTables = tableLineageGraph.retrieveSourceOnlyVertices()
28 | .stream().map(Table.class::cast).collect(Collectors.toSet());
29 | Set sourceOnlyTables = retrieveTagTables(NodeTag.SOURCE_ONLY);
30 | Set selfLoopTables = retrieveTagTables(NodeTag.SELFLOOP);
31 | sourceTables.addAll(sourceOnlyTables);
32 | sourceTables.addAll(selfLoopTables);
33 | return sourceTables;
34 | }
35 |
36 | public Set getTargetTables() {
37 | LineageGraph tableLineageGraph = getTableLineageGraph();
38 | Set targetTables = tableLineageGraph.retrieveTargetOnlyVertices()
39 | .stream().map(Table.class::cast).collect(Collectors.toSet());
40 | Set targetOnlyTables = retrieveTagTables(NodeTag.TARGET_ONLY);
41 | Set selfLoopTables = retrieveTagTables(NodeTag.SELFLOOP);
42 | targetTables.addAll(targetOnlyTables);
43 | targetTables.addAll(selfLoopTables);
44 | return targetTables;
45 | }
46 |
47 | public Set getIntermediateTables() {
48 | LineageGraph tableLineageGraph = getTableLineageGraph();
49 | Set intermediateTables = tableLineageGraph.retrieveConnectedVertices()
50 | .stream().map(Table.class::cast).collect(Collectors.toSet());
51 | intermediateTables.removeAll(retrieveTagTables(NodeTag.SELFLOOP));
52 | return intermediateTables;
53 | }
54 |
55 | public Set> getColumnLineage(boolean excludeSubquery) {
56 | LineageGraph columnLineageGraph = getColumnLineageGraph();
57 | Set targetColumns = columnLineageGraph.retrieveTargetOnlyVertices()
58 | .stream().map(Column.class::cast).collect(Collectors.toSet());
59 | Set sourceColumns = columnLineageGraph.retrieveSourceOnlyVertices()
60 | .stream().map(Column.class::cast).collect(Collectors.toSet());
61 | if (excludeSubquery) {
62 | targetColumns = targetColumns.stream().filter(c -> c.getParent() instanceof Table).collect(Collectors.toSet());
63 | }
64 |
65 | Set> columns = new HashSet<>();
66 | for (Column sourceColumn : sourceColumns) {
67 | for (Column targetColumn : targetColumns) {
68 | columnLineageGraph.listPath(sourceColumn, targetColumn).forEach(
69 | path -> columns.add(path.stream().map(c -> (Column) c).collect(Collectors.toList()))
70 | );
71 | }
72 | }
73 | return columns;
74 | }
75 |
76 | private LineageGraph getTableLineageGraph() {
77 | return lineageGraph.getSubGraph(Table.class.getSimpleName());
78 | }
79 |
80 | private LineageGraph getColumnLineageGraph() {
81 | return lineageGraph.getSubGraph(Column.class.getSimpleName());
82 | }
83 |
84 | private Set retrieveTagTables(String tag) {
85 | return lineageGraph.retrieveVerticesByProps(Collections.singletonMap(tag, true))
86 | .stream().map(Table.class::cast).collect(Collectors.toSet());
87 | }
88 |
89 | public static SQLLineageHolder of(StatementLineageHolder... statementLineageHolders) {
90 | LineageGraph graph = buildDiGraph(statementLineageHolders);
91 | return new SQLLineageHolder(graph);
92 | }
93 |
94 | private static LineageGraph buildDiGraph(StatementLineageHolder... statementLineageHolders) {
95 | LineageGraph lineageGraph = new GremlinLineageGraph();
96 | for (StatementLineageHolder holder : statementLineageHolders) {
97 | lineageGraph.merge(holder.getGraph());
98 | if (holder.getDrop().size() > 0) {
99 | lineageGraph.dropVerticesIfOrphan(holder.getDrop().toArray());
100 | } else if (holder.getRename().size() > 0) {
101 | for (Pair p : holder.getRename()) {
102 | Table tableOld = p.getValue0();
103 | Table tableNew = p.getValue1();
104 | for (EdgeTuple edgeTuple : lineageGraph.retrieveEdgesByVertex(tableOld)) {
105 | if (edgeTuple.source().equals(tableOld)) {
106 | lineageGraph.addEdgeIfNotExist(edgeTuple.label(), tableNew, edgeTuple.target());
107 | } else if (edgeTuple.target().equals(tableOld)) {
108 | lineageGraph.addEdgeIfNotExist(edgeTuple.label(), edgeTuple.source(), tableNew);
109 | }
110 | }
111 | lineageGraph.dropVertices(tableOld);
112 | lineageGraph.dropSelfLoopEdge();
113 | lineageGraph.dropVerticesIfOrphan(tableNew);
114 | }
115 | } else {
116 | Set read = holder.getRead();
117 | Set write = holder.getWrite();
118 | if (read.size() > 0 && write.size() == 0) {
119 | // source only table comes from SELECT statement
120 | lineageGraph.updateVertices(Collections.singletonMap(NodeTag.SOURCE_ONLY, Boolean.TRUE), read.toArray());
121 | } else if (read.size() == 0 && write.size() > 0) {
122 | // target only table comes from case like: 1) INSERT/UPDATE constant values; 2) CREATE TABLE
123 | lineageGraph.updateVertices(Collections.singletonMap(NodeTag.TARGET_ONLY, Boolean.TRUE), write.toArray());
124 | } else {
125 | for (Table r : read) {
126 | for (Table w : write) {
127 | lineageGraph.addEdgeIfNotExist(EdgeType.LINEAGE, r, w);
128 | }
129 | }
130 | }
131 | }
132 | }
133 | lineageGraph.updateVertices(Collections.singletonMap(NodeTag.SELFLOOP, Boolean.TRUE),
134 | lineageGraph.retrieveSelfLoopVertices().stream().filter(x -> x instanceof Table).toArray());
135 | return lineageGraph;
136 | }
137 | }
138 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/holder/StatementLineageHolder.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core.holder;
2 |
3 | import io.github.reata.sqllineage4j.common.constant.EdgeType;
4 | import io.github.reata.sqllineage4j.common.constant.NodeTag;
5 | import io.github.reata.sqllineage4j.common.model.Table;
6 | import org.javatuples.Pair;
7 |
8 | import java.util.Set;
9 | import java.util.stream.Collectors;
10 |
11 | public class StatementLineageHolder extends SubQueryLineageHolder {
12 |
13 | @Override
14 | public Set getRead() {
15 | return super.getRead().stream().filter(x -> x instanceof Table).map(x -> (Table) x).collect(Collectors.toSet());
16 | }
17 |
18 | @Override
19 | public Set getWrite() {
20 | return super.getWrite().stream().filter(x -> x instanceof Table).map(x -> (Table) x).collect(Collectors.toSet());
21 | }
22 |
23 | public Set getDrop() {
24 | return propertyGetter(NodeTag.DROP).stream().map(x -> (Table) x).collect(Collectors.toSet());
25 | }
26 |
27 | public Set> getRename() {
28 | return lineageGraph.retrieveEdgesByLabel(EdgeType.RENAME).stream().map(
29 | e -> new Pair<>((Table) e.source(), (Table) e.target())
30 | ).collect(Collectors.toSet());
31 | }
32 |
33 | public void addDrop(Table drop) {
34 | propertySetter(drop, NodeTag.DROP);
35 | }
36 |
37 | public void addRename(Table src, Table tgt) {
38 | lineageGraph.addVertexIfNotExist(src);
39 | lineageGraph.addVertexIfNotExist(tgt);
40 | lineageGraph.addEdgeIfNotExist(EdgeType.RENAME, src, tgt);
41 | }
42 |
43 | @Override
44 | public String toString() {
45 | return super.toString() +
46 | "table drop: " + getDrop().toString() + "\n" +
47 | "table cte: " + getRename().toString();
48 | }
49 |
50 | public void union(SubQueryLineageHolder holder) {
51 | getGraph().merge(holder.getGraph());
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/holder/SubQueryLineageHolder.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core.holder;
2 |
3 | import io.github.reata.sqllineage4j.common.constant.EdgeType;
4 | import io.github.reata.sqllineage4j.common.constant.NodeTag;
5 | import io.github.reata.sqllineage4j.common.entity.EdgeTuple;
6 | import io.github.reata.sqllineage4j.common.model.Column;
7 | import io.github.reata.sqllineage4j.common.model.QuerySet;
8 | import io.github.reata.sqllineage4j.common.model.SubQuery;
9 | import io.github.reata.sqllineage4j.graph.GremlinLineageGraph;
10 | import io.github.reata.sqllineage4j.graph.LineageGraph;
11 |
12 | import java.util.*;
13 | import java.util.stream.Collectors;
14 |
15 | public class SubQueryLineageHolder {
16 | final LineageGraph lineageGraph = new GremlinLineageGraph();
17 |
18 | public LineageGraph getGraph() {
19 | return lineageGraph;
20 | }
21 |
22 | void propertySetter(QuerySet value, String prop) {
23 | lineageGraph.addVertexIfNotExist(value, Collections.singletonMap(prop, Boolean.TRUE));
24 | }
25 |
26 | Set propertyGetter(String prop) {
27 | return lineageGraph.retrieveVerticesByProps(Collections.singletonMap(prop, true))
28 | .stream().map(x -> (QuerySet) x).collect(Collectors.toSet());
29 | }
30 |
31 | public Set extends QuerySet> getRead() {
32 | return propertyGetter(NodeTag.READ);
33 | }
34 |
35 | public Set extends QuerySet> getWrite() {
36 | return propertyGetter(NodeTag.WRITE);
37 | }
38 |
39 | public Set getCTE() {
40 | return propertyGetter(NodeTag.CTE).stream().map(x -> (SubQuery) x).collect(Collectors.toSet());
41 | }
42 |
43 | public void addRead(QuerySet read) {
44 | propertySetter(read, NodeTag.READ);
45 | if (read.getAlias() != null) {
46 | lineageGraph.addVertexIfNotExist(read.getAlias());
47 | lineageGraph.addEdgeIfNotExist(EdgeType.HAS_ALIAS, read, read.getAlias());
48 | }
49 | }
50 |
51 | public void addWrite(QuerySet write) {
52 | propertySetter(write, NodeTag.WRITE);
53 | }
54 |
55 | public void addCTE(SubQuery cte) {
56 | propertySetter(cte, NodeTag.CTE);
57 | }
58 |
59 | public void addColumnLineage(Column src, Column tgt) {
60 | lineageGraph.addVertexIfNotExist(src);
61 | lineageGraph.addVertexIfNotExist(tgt);
62 | lineageGraph.addEdgeIfNotExist(EdgeType.LINEAGE, src, tgt);
63 | lineageGraph.addEdgeIfNotExist(EdgeType.HAS_COLUMN, Objects.requireNonNull(tgt.getParent()), tgt);
64 | if (src.getParent() != null) {
65 | lineageGraph.addEdgeIfNotExist(EdgeType.HAS_COLUMN, Objects.requireNonNull(src.getParent()), src);
66 | }
67 | }
68 |
69 | @Override
70 | public String toString() {
71 | return "table read: " + getRead().toString() + "\n" +
72 | "table write: " + getWrite().toString() + "\n" +
73 | "table cte: " + getCTE().toString();
74 | }
75 |
76 | private final List selectColumns = new ArrayList<>();
77 |
78 | public List getSelectColumns() {
79 | return selectColumns;
80 | }
81 |
82 | public Map getQuerySetAlias() {
83 | Map aliasMapping = new HashMap<>();
84 | for (EdgeTuple edgeTuple : lineageGraph.retrieveEdgesByLabel(EdgeType.HAS_ALIAS)) {
85 | aliasMapping.put((String) edgeTuple.target(), (QuerySet) edgeTuple.source());
86 | }
87 | return aliasMapping;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/CTETest.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import org.junit.Test;
4 |
5 | import java.util.Set;
6 |
7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage;
8 |
9 | public class CTETest {
10 | @Test
11 | public void testWithSelect() {
12 | assertTableLineage("WITH tab1 AS (SELECT 1) SELECT * FROM tab1", Set.of());
13 | }
14 |
15 | @Test
16 | public void testWithSelectOne() {
17 | assertTableLineage("WITH wtab1 AS (SELECT * FROM schema1.tab1) SELECT * FROM wtab1", Set.of("schema1.tab1"));
18 | }
19 |
20 | @Test
21 | public void testWithSelectOneWithoutAs() {
22 | // AS in CTE is negligible in SparkSQL, however it is required in MySQL. See below reference
23 | // https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-cte.html
24 | // https://dev.mysql.com/doc/refman/8.0/en/with.html
25 | assertTableLineage("WITH wtab1 (SELECT * FROM schema1.tab1) SELECT * FROM wtab1", Set.of("schema1.tab1"));
26 | }
27 |
28 | @Test
29 | public void testWithSelectMany() {
30 | assertTableLineage("WITH\n" +
31 | "cte1 AS (SELECT a, b FROM table1),\n" +
32 | "cte2 AS (SELECT c, d FROM table2)\n" +
33 | "SELECT b, d FROM cte1 JOIN cte2\n" +
34 | "WHERE cte1.a = cte2.c", Set.of("table1", "table2"));
35 | }
36 |
37 | @Test
38 | public void testWithSelectManyReference() {
39 | assertTableLineage("WITH\n" +
40 | "cte1 AS (SELECT a, b FROM tab1),\n" +
41 | "cte2 AS (SELECT a, count(*) AS cnt FROM cte1 GROUP BY a)\n" +
42 | "SELECT a, b, cnt FROM cte1 JOIN cte2\n" +
43 | "WHERE cte1.a = cte2.a", Set.of("tab1"));
44 | }
45 |
46 | @Test
47 | public void testWithUsingAlias() {
48 | assertTableLineage("WITH wtab1 AS (SELECT * FROM schema1.tab1) SELECT * FROM wtab1 wt", Set.of("schema1.tab1"));
49 | }
50 |
51 | @Test
52 | public void testWithSelectJoinTableWithSameName() {
53 | assertTableLineage("WITH wtab1 AS (SELECT * FROM schema1.tab1) SELECT * FROM wtab1 CROSS JOIN db.wtab1", Set.of("schema1.tab1", "db.wtab1"));
54 | }
55 |
56 | @Test
57 | public void testWithInsert() {
58 | assertTableLineage("WITH tab1 AS (SELECT * FROM tab2) INSERT INTO tab3 SELECT * FROM tab1", Set.of("tab2"), Set.of("tab3"));
59 | }
60 |
61 | @Test
62 | public void testWithInsertOverwrite() {
63 | assertTableLineage("WITH tab1 AS (SELECT * FROM tab2) INSERT OVERWRITE tab3 SELECT * FROM tab1", Set.of("tab2"), Set.of("tab3"));
64 | }
65 |
66 | @Test
67 | public void testWithInsertPlusKeywordTable() {
68 | assertTableLineage("WITH tab1 AS (SELECT * FROM tab2) INSERT INTO TABLE tab3 SELECT * FROM tab1", Set.of("tab2"), Set.of("tab3"));
69 | }
70 |
71 | @Test
72 | public void testWithInsertOverwritePlusKeywordTable() {
73 | assertTableLineage("WITH tab1 AS (SELECT * FROM tab2) INSERT OVERWRITE TABLE tab3 SELECT * FROM tab1", Set.of("tab2"), Set.of("tab3"));
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/ColumnTest.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import io.github.reata.sqllineage4j.common.entity.ColumnQualifierTuple;
4 | import org.javatuples.Pair;
5 | import org.junit.Test;
6 | import org.junit.jupiter.params.ParameterizedTest;
7 | import org.junit.jupiter.params.provider.ValueSource;
8 |
9 | import java.util.Set;
10 |
11 | import static io.github.reata.sqllineage4j.core.Helper.assertColumnLineage;
12 |
13 |
14 | public class ColumnTest {
15 | @Test
16 | public void testSelectColumn() {
17 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
18 | "SELECT col1\n" +
19 | "FROM tab2",
20 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
21 | ColumnQualifierTuple.create("col1", "tab1"))));
22 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
23 | "SELECT col1 AS col2\n" +
24 | "FROM tab2",
25 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
26 | ColumnQualifierTuple.create("col2", "tab1"))));
27 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
28 | "SELECT tab2.col1 AS col2\n" +
29 | "FROM tab2",
30 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
31 | ColumnQualifierTuple.create("col2", "tab1"))));
32 | }
33 |
34 | @Test
35 | public void testSelectColumnWildcard() {
36 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
37 | "SELECT *\n" +
38 | "FROM tab2",
39 | Set.of(Pair.with(ColumnQualifierTuple.create("*", "tab2"),
40 | ColumnQualifierTuple.create("*", "tab1"))));
41 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
42 | "SELECT *\n" +
43 | "FROM tab2 a\n" +
44 | " INNER JOIN tab3 b\n" +
45 | " ON a.id = b.id",
46 | Set.of(Pair.with(ColumnQualifierTuple.create("*", "tab2"),
47 | ColumnQualifierTuple.create("*", "tab1")),
48 | Pair.with(ColumnQualifierTuple.create("*", "tab3"),
49 | ColumnQualifierTuple.create("*", "tab1"))));
50 | }
51 |
52 | @Test
53 | public void testSelectColumnUsingFunction() {
54 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
55 | "SELECT max(col1),\n" +
56 | " count(*)\n" +
57 | "FROM tab2",
58 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
59 | ColumnQualifierTuple.create("max(col1)", "tab1")),
60 | Pair.with(ColumnQualifierTuple.create("*", "tab2"),
61 | ColumnQualifierTuple.create("count(*)", "tab1"))));
62 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
63 | "SELECT max(col1) AS col2,\n" +
64 | " count(*) AS cnt\n" +
65 | "FROM tab2",
66 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
67 | ColumnQualifierTuple.create("col2", "tab1")),
68 | Pair.with(ColumnQualifierTuple.create("*", "tab2"),
69 | ColumnQualifierTuple.create("cnt", "tab1"))));
70 | }
71 |
72 | @Test
73 | public void testSelectColumnUsingFunctionWithComplexParameter() {
74 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
75 | "SELECT if(col1 = 'foo' AND col2 = 'bar', 1, 0) AS flag\n" +
76 | "FROM tab2",
77 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
78 | ColumnQualifierTuple.create("flag", "tab1")),
79 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
80 | ColumnQualifierTuple.create("flag", "tab1"))));
81 | }
82 |
83 | @Test
84 | public void testSelectColumnUsingWindowFunction() {
85 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
86 | "SELECT row_number() OVER (PARTITION BY col1 ORDER BY col2 DESC) AS rnum\n" +
87 | "FROM tab2",
88 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
89 | ColumnQualifierTuple.create("rnum", "tab1")),
90 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
91 | ColumnQualifierTuple.create("rnum", "tab1"))));
92 | }
93 |
94 | @Test
95 | public void testSelectColumnUsingWindowFunctionWithParameters() {
96 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
97 | "SELECT col0,\n" +
98 | " max(col3) OVER (PARTITION BY col1 ORDER BY col2 DESC) AS rnum,\n" +
99 | " col4\n" +
100 | "FROM tab2",
101 | Set.of(Pair.with(ColumnQualifierTuple.create("col0", "tab2"),
102 | ColumnQualifierTuple.create("col0", "tab1")),
103 | Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
104 | ColumnQualifierTuple.create("rnum", "tab1")),
105 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
106 | ColumnQualifierTuple.create("rnum", "tab1")),
107 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"),
108 | ColumnQualifierTuple.create("rnum", "tab1")),
109 | Pair.with(ColumnQualifierTuple.create("col4", "tab2"),
110 | ColumnQualifierTuple.create("col4", "tab1"))));
111 | }
112 |
113 | @Test
114 | public void testSelectColumnUsingCast() {
115 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
116 | "SELECT cast(col1 as timestamp)\n" +
117 | "FROM tab2",
118 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
119 | ColumnQualifierTuple.create("cast(col1 as timestamp)", "tab1"))));
120 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
121 | "SELECT cast(col1 as timestamp) as col2\n" +
122 | "FROM tab2",
123 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
124 | ColumnQualifierTuple.create("col2", "tab1"))));
125 | }
126 |
127 | @Test
128 | public void testSelectColumnUsingExpression() {
129 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
130 | "SELECT col1 + col2\n" +
131 | "FROM tab2",
132 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
133 | ColumnQualifierTuple.create("col1 + col2", "tab1")),
134 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
135 | ColumnQualifierTuple.create("col1 + col2", "tab1"))));
136 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
137 | "SELECT col1 + col2 AS col3\n" +
138 | "FROM tab2",
139 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
140 | ColumnQualifierTuple.create("col3", "tab1")),
141 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
142 | ColumnQualifierTuple.create("col3", "tab1"))));
143 | }
144 |
145 | @Test
146 | public void testSelectColumnUsingExpressionInParenthesis() {
147 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
148 | "SELECT (col1 + col2) AS col3\n" +
149 | "FROM tab2",
150 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
151 | ColumnQualifierTuple.create("col3", "tab1")),
152 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
153 | ColumnQualifierTuple.create("col3", "tab1"))));
154 | }
155 |
156 | @Test
157 | public void testSelectColumnUsingBooleanExpressionInParenthesis() {
158 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
159 | "SELECT (col1 > 0 AND col2 > 0) AS col3\n" +
160 | "FROM tab2",
161 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
162 | ColumnQualifierTuple.create("col3", "tab1")),
163 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
164 | ColumnQualifierTuple.create("col3", "tab1"))));
165 | }
166 |
167 | @Test
168 | public void testSelectColumnUsingExpressionWithTableQualifierWithoutColumnAlias() {
169 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
170 | "SELECT a.col1 + a.col2 + a.col3 + a.col4\n" +
171 | "FROM tab2 a",
172 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
173 | ColumnQualifierTuple.create("a.col1 + a.col2 + a.col3 + a.col4", "tab1")),
174 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
175 | ColumnQualifierTuple.create("a.col1 + a.col2 + a.col3 + a.col4", "tab1")),
176 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"),
177 | ColumnQualifierTuple.create("a.col1 + a.col2 + a.col3 + a.col4", "tab1")),
178 | Pair.with(ColumnQualifierTuple.create("col4", "tab2"),
179 | ColumnQualifierTuple.create("a.col1 + a.col2 + a.col3 + a.col4", "tab1"))));
180 | }
181 |
182 | @Test
183 | public void testSelectColumnUsingCaseWhen() {
184 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
185 | "SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END\n" +
186 | "FROM tab2",
187 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
188 | ColumnQualifierTuple.create("CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END", "tab1"))));
189 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
190 | "SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END AS col2\n" +
191 | "FROM tab2",
192 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
193 | ColumnQualifierTuple.create("col2", "tab1"))));
194 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
195 | "SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' ELSE col_v END\n" +
196 | "FROM tab2",
197 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
198 | ColumnQualifierTuple.create("CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' ELSE col_v END", "tab1")),
199 | Pair.with(ColumnQualifierTuple.create("col_v", "tab2"),
200 | ColumnQualifierTuple.create("CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' ELSE col_v END", "tab1"))));
201 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
202 | "SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' ELSE col_v END AS col2\n" +
203 | "FROM tab2",
204 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
205 | ColumnQualifierTuple.create("col2", "tab1")),
206 | Pair.with(ColumnQualifierTuple.create("col_v", "tab2"),
207 | ColumnQualifierTuple.create("col2", "tab1"))));
208 | }
209 |
210 | // @Test
211 | // public void testSelectColumnUsingCaseWhenWithSubquery() {
212 | // assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
213 | // "SELECT CASE WHEN (SELECT avg(col1) FROM tab3) > 0 AND col2 = 1 THEN (SELECT avg(col1) FROM tab3) ELSE 0 END AS col1\n" +
214 | // "FROM tab4",
215 | // Set.of(Pair.with(ColumnQualifierTuple.create("col2", "tab4"),
216 | // ColumnQualifierTuple.create("col1", "tab1")),
217 | // Pair.with(ColumnQualifierTuple.create("col1", "tab3"),
218 | // ColumnQualifierTuple.create("col1", "tab1"))));
219 | // }
220 |
221 | @Test
222 | public void testSelectColumnWithTableQualifier() {
223 | // assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
224 | // "SELECT tab2.col1\n" +
225 | // "FROM tab2",
226 | // Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
227 | // ColumnQualifierTuple.create("col1", "tab1"))));
228 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
229 | "SELECT t.col1\n" +
230 | "FROM tab2 AS t",
231 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
232 | ColumnQualifierTuple.create("col1", "tab1"))));
233 | }
234 |
235 | @Test
236 | public void testSelectColumns() {
237 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
238 | "SELECT col1,\n" +
239 | "col2\n" +
240 | "FROM tab2",
241 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
242 | ColumnQualifierTuple.create("col1", "tab1")),
243 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
244 | ColumnQualifierTuple.create("col2", "tab1"))));
245 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
246 | "SELECT max(col1),\n" +
247 | "max(col2)\n" +
248 | "FROM tab2",
249 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
250 | ColumnQualifierTuple.create("max(col1)", "tab1")),
251 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
252 | ColumnQualifierTuple.create("max(col2)", "tab1"))));
253 | }
254 |
255 | @Test
256 | public void testSelectColumnInSubquery() {
257 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
258 | "SELECT col1\n" +
259 | "FROM (SELECT col1 FROM tab2) dt",
260 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
261 | ColumnQualifierTuple.create("col1", "tab1"))));
262 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
263 | "SELECT col1\n" +
264 | "FROM (SELECT col1, col2 FROM tab2) dt",
265 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
266 | ColumnQualifierTuple.create("col1", "tab1"))));
267 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
268 | "SELECT col1\n" +
269 | "FROM (SELECT col1 FROM tab2)",
270 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
271 | ColumnQualifierTuple.create("col1", "tab1"))));
272 | }
273 |
274 | @Test
275 | public void testSelectColumnInSubqueryWithTwoParenthesis() {
276 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
277 | "SELECT col1\n" +
278 | "FROM ((SELECT col1 FROM tab2)) dt",
279 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
280 | ColumnQualifierTuple.create("col1", "tab1"))));
281 | }
282 |
283 | @Test
284 | public void testSelectColumnInSubqueryWithTwoParenthesisAndBlankInBetween() {
285 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
286 | "SELECT col1\n" +
287 | "FROM (\n" +
288 | "(SELECT col1 FROM tab2)\n" +
289 | ") dt",
290 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
291 | ColumnQualifierTuple.create("col1", "tab1"))));
292 | }
293 |
294 | @Test
295 | public void testSelectColumnInSubqueryWithTwoParenthesisAndUnion() {
296 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
297 | "SELECT col1\n" +
298 | "FROM (\n" +
299 | " (SELECT col1 FROM tab2)\n" +
300 | " UNION ALL\n" +
301 | " (SELECT col1 FROM tab3)\n" +
302 | ") dt",
303 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
304 | ColumnQualifierTuple.create("col1", "tab1")),
305 | Pair.with(ColumnQualifierTuple.create("col1", "tab3"),
306 | ColumnQualifierTuple.create("col1", "tab1"))));
307 | }
308 |
309 | @Test
310 | public void testSelectColumnInSubqueryWithTwoParenthesisAndUnionV2() {
311 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
312 | "SELECT col1\n" +
313 | "FROM (\n" +
314 | " SELECT col1 FROM tab2\n" +
315 | " UNION ALL\n" +
316 | " SELECT col1 FROM tab3\n" +
317 | ") dt",
318 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
319 | ColumnQualifierTuple.create("col1", "tab1")),
320 | Pair.with(ColumnQualifierTuple.create("col1", "tab3"),
321 | ColumnQualifierTuple.create("col1", "tab1"))));
322 | }
323 |
324 | @Test
325 | public void testSelectColumnWithoutTableQualifierFromTableJoin() {
326 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
327 | "SELECT col1\n" +
328 | "FROM tab2 a\n" +
329 | " INNER JOIN tab3 b\n" +
330 | " ON a.id = b.id",
331 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", null),
332 | ColumnQualifierTuple.create("col1", "tab1"))));
333 | }
334 |
335 | @Test
336 | public void testSelectColumnFromSameTableMultipleTimeUsingDifferentAlias() {
337 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
338 | "SELECT a.col1 AS col2,\n" +
339 | " b.col1 AS col3\n" +
340 | "FROM tab2 a\n" +
341 | " JOIN tab2 b\n" +
342 | " ON a.parent_id = b.id",
343 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
344 | ColumnQualifierTuple.create("col2", "tab1")),
345 | Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
346 | ColumnQualifierTuple.create("col3", "tab1"))));
347 | }
348 |
349 | @Test
350 | public void testCommentAfterColumnCommaFirst() {
351 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
352 | "SELECT a.col1\n" +
353 | " --, a.col2\n" +
354 | " , a.col3\n" +
355 | "FROM tab2 a",
356 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
357 | ColumnQualifierTuple.create("col1", "tab1")),
358 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"),
359 | ColumnQualifierTuple.create("col3", "tab1"))));
360 | }
361 |
362 | @Test
363 | public void testCommentAfterColumnCommaLast() {
364 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
365 | "SELECT a.col1,\n" +
366 | " -- a.col2,\n" +
367 | " a.col3\n" +
368 | "FROM tab2 a",
369 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
370 | ColumnQualifierTuple.create("col1", "tab1")),
371 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"),
372 | ColumnQualifierTuple.create("col3", "tab1"))));
373 | }
374 |
375 | @Test
376 | public void testCastWithComparison() {
377 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
378 | "SELECT cast(col1 = 1 AS int) col1, col2 = col3 col2\n" +
379 | "FROM tab2",
380 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
381 | ColumnQualifierTuple.create("col1", "tab1")),
382 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
383 | ColumnQualifierTuple.create("col2", "tab1")),
384 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"),
385 | ColumnQualifierTuple.create("col2", "tab1"))));
386 | }
387 |
388 | @ParameterizedTest
389 | @ValueSource(strings = {"string", "timestamp", "date", "datetime", "decimal(18, 0)"})
390 | public void testCastToDataType(String dtype) {
391 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
392 | "SELECT cast(col1 as " + dtype + ") AS col1\n" +
393 | "FROM tab2",
394 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
395 | ColumnQualifierTuple.create("col1", "tab1"))));
396 | }
397 |
398 | @ParameterizedTest
399 | @ValueSource(strings = {"string", "timestamp", "date", "datetime", "decimal(18, 0)"})
400 | public void testNestedCastToDataType(String dtype) {
401 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
402 | "SELECT cast(cast(col1 AS " + dtype + ") AS " + dtype + ") AS col1\n" +
403 | "FROM tab2",
404 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
405 | ColumnQualifierTuple.create("col1", "tab1"))));
406 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
407 | "SELECT cast(cast(cast(cast(cast(col1 AS " + dtype + ") AS " + dtype + ") AS " + dtype + ") AS " + dtype + ") AS " + dtype + ") AS col1\n" +
408 | "FROM tab2",
409 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
410 | ColumnQualifierTuple.create("col1", "tab1"))));
411 | }
412 |
413 | @ParameterizedTest
414 | @ValueSource(strings = {"string", "timestamp", "date", "datetime", "decimal(18, 0)"})
415 | public void testCastToDataTypeWithCaseWhen(String dtype) {
416 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
417 | "SELECT cast(case when col1 > 0 then col2 else col3 end as " + dtype + ") AS col1\n" +
418 | "FROM tab2",
419 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
420 | ColumnQualifierTuple.create("col1", "tab1")),
421 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
422 | ColumnQualifierTuple.create("col1", "tab1")),
423 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"),
424 | ColumnQualifierTuple.create("col1", "tab1"))));
425 | }
426 |
427 | @Test
428 | public void testCastUsingConstant() {
429 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" +
430 | "SELECT cast('2012-12-21' as date) AS col2",
431 | Set.of());
432 | }
433 |
434 | @Test
435 | public void testWindowFunctionInSubquery() {
436 | assertColumnLineage("INSERT INTO tab1\n" +
437 | "SELECT rn FROM (\n" +
438 | " SELECT\n" +
439 | " row_number() OVER (PARTITION BY col1, col2) rn\n" +
440 | " FROM tab2\n" +
441 | ") sub\n" +
442 | "WHERE rn = 1",
443 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
444 | ColumnQualifierTuple.create("rn", "tab1")),
445 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"),
446 | ColumnQualifierTuple.create("rn", "tab1"))));
447 | }
448 |
449 | @Test
450 | public void testInvalidSyntaxAsWithoutAlias() {
451 | String sql = "INSERT OVERWRITE TABLE tab1\n" +
452 | "SELECT col1,\n" +
453 | " col2 as,\n" +
454 | " col3\n" +
455 | "FROM tab2";
456 | // just assure no exception, don't guarantee the result
457 | LineageRunner runner = LineageRunner.builder(sql).build();
458 | runner.getColumnLineage();
459 | }
460 |
461 | @Test
462 | public void testColumnReferenceFromCteUsingAlias() {
463 | assertColumnLineage("WITH wtab1 AS (SELECT col1 FROM tab2)\n" +
464 | "INSERT OVERWRITE TABLE tab1\n" +
465 | "SELECT wt.col1 FROM wtab1 wt",
466 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
467 | ColumnQualifierTuple.create("col1", "tab1"))));
468 | }
469 |
470 | @Test
471 | public void testColumnReferenceFromCteUsingQualifier() {
472 | assertColumnLineage("WITH wtab1 AS (SELECT col1 FROM tab2)\n" +
473 | "INSERT OVERWRITE TABLE tab1\n" +
474 | "SELECT wtab1.col1 FROM wtab1",
475 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
476 | ColumnQualifierTuple.create("col1", "tab1"))));
477 | }
478 |
479 | @Test
480 | public void testColumnReferenceFromPreviousDefinedCte() {
481 | assertColumnLineage("WITH\n" +
482 | "cte1 AS (SELECT a FROM tab1),\n" +
483 | "cte2 AS (SELECT a FROM cte1)\n" +
484 | "INSERT OVERWRITE TABLE tab2\n" +
485 | "SELECT a FROM cte2",
486 | Set.of(Pair.with(ColumnQualifierTuple.create("a", "tab1"),
487 | ColumnQualifierTuple.create("a", "tab2"))));
488 | }
489 |
490 | @Test
491 | public void testMultipleColumnReferencesFromPreviousDefinedCte() {
492 | assertColumnLineage("WITH\n" +
493 | "cte1 AS (SELECT a, b FROM tab1),\n" +
494 | "cte2 AS (SELECT a, max(b) AS b_max, count(b) AS b_cnt FROM cte1 GROUP BY a)\n" +
495 | "INSERT OVERWRITE TABLE tab2\n" +
496 | "SELECT cte1.a, cte2.b_max, cte2.b_cnt FROM cte1 JOIN cte2\n" +
497 | "WHERE cte1.a = cte2.a",
498 | Set.of(Pair.with(ColumnQualifierTuple.create("a", "tab1"),
499 | ColumnQualifierTuple.create("a", "tab2")),
500 | Pair.with(ColumnQualifierTuple.create("b", "tab1"),
501 | ColumnQualifierTuple.create("b_max", "tab2")),
502 | Pair.with(ColumnQualifierTuple.create("b", "tab1"),
503 | ColumnQualifierTuple.create("b_cnt", "tab2"))));
504 | }
505 |
506 | @Test
507 | public void testColumnReferenceWithAnsi89Join() {
508 | assertColumnLineage("INSERT OVERWRITE TABLE tab3\n" +
509 | "SELECT a.id,\n" +
510 | " a.name AS name1,\n" +
511 | " b.name AS name2\n" +
512 | "FROM (SELECT id, name\n" +
513 | " FROM tab1) a,\n" +
514 | " (SELECT id, name\n" +
515 | " FROM tab2) b\n" +
516 | "WHERE a.id = b.id",
517 | Set.of(Pair.with(ColumnQualifierTuple.create("id", "tab1"),
518 | ColumnQualifierTuple.create("id", "tab3")),
519 | Pair.with(ColumnQualifierTuple.create("name", "tab1"),
520 | ColumnQualifierTuple.create("name1", "tab3")),
521 | Pair.with(ColumnQualifierTuple.create("name", "tab2"),
522 | ColumnQualifierTuple.create("name2", "tab3"))));
523 | }
524 |
525 | // @Test
526 | // public void testSmarterColumnResolutionUsingQueryContext() {
527 | // assertColumnLineage("WITH\n" +
528 | // "cte1 AS (SELECT a, b FROM tab1),\n" +
529 | // "cte2 AS (SELECT c, d FROM tab2)\n" +
530 | // "INSERT OVERWRITE TABLE tab3\n" +
531 | // "SELECT b, d FROM cte1 JOIN cte2\n" +
532 | // "WHERE cte1.a = cte2.c",
533 | // Set.of(Pair.with(ColumnQualifierTuple.create("b", "tab1"),
534 | // ColumnQualifierTuple.create("b", "tab3")),
535 | // Pair.with(ColumnQualifierTuple.create("d", "tab2"),
536 | // ColumnQualifierTuple.create("d", "tab3"))));
537 | // }
538 |
539 | @Test
540 | public void testColumnReferenceUsingUnion() {
541 | assertColumnLineage("INSERT OVERWRITE TABLE tab3\n" +
542 | "SELECT col1\n" +
543 | "FROM tab1\n" +
544 | "UNION ALL\n" +
545 | "SELECT col1\n" +
546 | "FROM tab2",
547 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab1"),
548 | ColumnQualifierTuple.create("col1", "tab3")),
549 | Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
550 | ColumnQualifierTuple.create("col1", "tab3"))));
551 | assertColumnLineage("INSERT OVERWRITE TABLE tab3\n" +
552 | "SELECT col1\n" +
553 | "FROM tab1\n" +
554 | "UNION\n" +
555 | "SELECT col1\n" +
556 | "FROM tab2",
557 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab1"),
558 | ColumnQualifierTuple.create("col1", "tab3")),
559 | Pair.with(ColumnQualifierTuple.create("col1", "tab2"),
560 | ColumnQualifierTuple.create("col1", "tab3"))));
561 | }
562 |
563 | // @Test
564 | // public void testColumnLineageMultiplePathsForSameColumn() {
565 | // assertColumnLineage("INSERT OVERWRITE TABLE tab2\n" +
566 | // "SELECT tab1.id,\n" +
567 | // " coalesce(join_table_1.col1, join_table_2.col1, join_table_3.col1) AS col1\n" +
568 | // "FROM tab1\n" +
569 | // " LEFT JOIN (SELECT id, col1 FROM tab1 WHERE flag = 1) AS join_table_1\n" +
570 | // " ON tab1.id = join_table_1.id\n" +
571 | // " LEFT JOIN (SELECT id, col1 FROM tab1 WHERE flag = 2) AS join_table_2\n" +
572 | // " ON tab1.id = join_table_2.id\n" +
573 | // " LEFT JOIN (SELECT id, col1 FROM tab1 WHERE flag = 3) AS join_table_3\n" +
574 | // " ON tab1.id = join_table_3.id",
575 | // Set.of(Pair.with(ColumnQualifierTuple.create("id", "tab1"),
576 | // ColumnQualifierTuple.create("id", "tab2")),
577 | // Pair.with(ColumnQualifierTuple.create("col1", "tab1"),
578 | // ColumnQualifierTuple.create("col1", "tab2"))));
579 | // }
580 |
581 | // @ParameterizedTest
582 | // @ValueSource(strings = {"string", "timestamp", "date", "datetime", "decimal(18, 0)"})
583 | // public void testColumnTryCastWithFunc(String func) {
584 | // assertColumnLineage("INSERT OVERWRITE TABLE tab2\n" +
585 | // "SELECT try_cast(" + func + ") AS col2\n" +
586 | // "FROM tab1",
587 | // Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab1"),
588 | // ColumnQualifierTuple.create("col1", "tab2"))));
589 | // }
590 |
591 | @Test
592 | public void testColumnWithCtasAndFunc() {
593 | assertColumnLineage("CREATE TABLE tab2 AS\n" +
594 | "SELECT\n" +
595 | " coalesce(col1, 0) AS col1,\n" +
596 | " IF(\n" +
597 | " col1 IS NOT NULL,\n" +
598 | " 1,\n" +
599 | " NULL\n" +
600 | " ) AS col2\n" +
601 | "FROM\n" +
602 | " tab1",
603 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab1"),
604 | ColumnQualifierTuple.create("col1", "tab2")),
605 | Pair.with(ColumnQualifierTuple.create("col1", "tab1"),
606 | ColumnQualifierTuple.create("col2", "tab2"))));
607 | }
608 | }
609 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/CreateTest.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import org.junit.Test;
4 |
5 | import java.util.Set;
6 |
7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage;
8 |
9 | public class CreateTest {
10 | @Test
11 | public void testCreate() {
12 | assertTableLineage("CREATE TABLE tab1 (col1 STRING)", Set.of(), Set.of("tab1"));
13 | }
14 |
15 | @Test
16 | public void testCreateIfNotExist() {
17 | assertTableLineage("CREATE TABLE IF NOT EXISTS tab1 (col1 STRING)", Set.of(), Set.of("tab1"));
18 | }
19 |
20 | @Test
21 | public void testCreateBucketTable() {
22 | assertTableLineage("CREATE TABLE tab1 USING parquet CLUSTERED BY (col1) INTO 500 BUCKETS", Set.of(), Set.of("tab1"));
23 | }
24 |
25 | @Test
26 | public void testCreateAs() {
27 | assertTableLineage("CREATE TABLE tab1 AS SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1"));
28 | }
29 |
30 | @Test
31 | public void testCreateAsWithParenthesisAroundSelectStatement() {
32 | assertTableLineage("CREATE TABLE tab1 AS (SELECT * FROM tab2)", Set.of("tab2"), Set.of("tab1"));
33 | }
34 |
35 | @Test
36 | public void testCreateAsWithParenthesisAroundTableName() {
37 | assertTableLineage("CREATE TABLE tab1 AS SELECT * FROM (tab2)", Set.of("tab2"), Set.of("tab1"));
38 | }
39 |
40 | @Test
41 | public void testCreateAsWithParenthesisAroundBoth() {
42 | assertTableLineage("CREATE TABLE tab1 AS (SELECT * FROM (tab2))", Set.of("tab2"), Set.of("tab1"));
43 | }
44 |
45 | @Test
46 | public void testCreateLike() {
47 | assertTableLineage("CREATE TABLE tab1 LIKE tab2", Set.of("tab2"), Set.of("tab1"));
48 | }
49 |
50 | @Test
51 | public void testCreateSelect() {
52 | assertTableLineage("CREATE TABLE tab1 SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1"));
53 | }
54 |
55 | @Test
56 | public void testCreateUsingSerde() {
57 | // Check https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-RowFormats&SerDe
58 | // here with is not an indicator for CTE
59 | assertTableLineage("CREATE TABLE apachelog (\n" +
60 | " host STRING,\n" +
61 | " identity STRING,\n" +
62 | " user STRING,\n" +
63 | " time STRING,\n" +
64 | " request STRING,\n" +
65 | " status STRING,\n" +
66 | " size STRING,\n" +
67 | " referer STRING,\n" +
68 | " agent STRING)\n" +
69 | "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'\n" +
70 | "WITH SERDEPROPERTIES (\n" +
71 | " \"input.regex\" = \"([^]*) ([^]*) ([^]*) (-|\\\\[^\\\\]*\\\\]) ([^ \\\"]*|\\\"[^\\\"]*\\\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \\\"]*|\\\".*\\\") ([^ \\\"]*|\\\".*\\\"))?\"\n" +
72 | ")\n" +
73 | "STORED AS TEXTFILE", Set.of(), Set.of("apachelog"));
74 | }
75 |
76 | @Test
77 | public void testBucketWithUsingParenthesis() {
78 | assertTableLineage("CREATE TABLE tbl1 (col1 VARCHAR)\n" +
79 | "WITH (bucketed_on = array['col1'], bucket_count = 256);", Set.of(), Set.of("tbl1"));
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/Helper.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import io.github.reata.sqllineage4j.common.entity.ColumnQualifierTuple;
4 | import io.github.reata.sqllineage4j.common.model.Column;
5 | import io.github.reata.sqllineage4j.common.model.Table;
6 | import org.javatuples.Pair;
7 |
8 | import java.util.HashSet;
9 | import java.util.Objects;
10 | import java.util.Set;
11 | import java.util.stream.Collectors;
12 |
13 | import static org.junit.Assert.assertEquals;
14 |
15 | public class Helper {
16 |
17 | public static void assertTableLineage(String sql) {
18 | assertTableLineage(sql, Set.of(), Set.of());
19 | }
20 |
21 | public static void assertTableLineage(String sql, Set sourceTables) {
22 | assertTableLineage(sql, sourceTables, Set.of());
23 | }
24 |
25 | public static void assertTableLineage(String sql, Set sourceTables, Set targetTables) {
26 | LineageRunner runner = LineageRunner.builder(sql).build();
27 | assertEquals("Source Table Equal", sourceTables.stream().map(Table::new).collect(Collectors.toSet()), Set.copyOf(runner.sourceTables()));
28 | assertEquals("Target Table Equal", targetTables.stream().map(Table::new).collect(Collectors.toSet()), Set.copyOf(runner.targetTables()));
29 | }
30 |
31 | public static void assertColumnLineage(String sql, Set> columnLineages) {
32 | Set> expected = new HashSet<>();
33 | for (Pair cqtPair : columnLineages) {
34 | ColumnQualifierTuple srcCqt = cqtPair.getValue0();
35 | ColumnQualifierTuple tgtCqt = cqtPair.getValue1();
36 | Column srcCol = new Column(srcCqt.column());
37 | if (srcCqt.qualifier() != null) {
38 | srcCol.setParent(new Table(srcCqt.qualifier()));
39 | }
40 | Column tgtCol = new Column(tgtCqt.column());
41 | tgtCol.setParent(new Table(Objects.requireNonNull(tgtCqt.qualifier())));
42 | expected.add(Pair.with(srcCol, tgtCol));
43 | }
44 | LineageRunner runner = LineageRunner.builder(sql).build();
45 | Set> actual = new HashSet<>(runner.getColumnLineage());
46 | assertEquals(expected, actual);
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/InsertTest.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import org.junit.Test;
4 |
5 | import java.util.Set;
6 |
7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage;
8 |
9 | public class InsertTest {
10 | @Test
11 | public void testInsertInto() {
12 | assertTableLineage("INSERT INTO tab1 VALUES (1, 2)", Set.of(), Set.of("tab1"));
13 | }
14 |
15 | @Test
16 | public void testInsertIntoWithKeywordTable() {
17 | assertTableLineage("INSERT INTO TABLE tab1 VALUES (1, 2)", Set.of(), Set.of("tab1"));
18 | }
19 |
20 | @Test
21 | public void testInsertIntoWithColumns() {
22 | assertTableLineage("INSERT INTO tab1 (col1, col2) SELECT * FROM tab2;", Set.of("tab2"), Set.of("tab1"));
23 | }
24 |
25 | @Test
26 | public void testInsertIntoWithColumnsAndSelect() {
27 | assertTableLineage("INSERT INTO tab1 (col1, col2) SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1"));
28 | }
29 |
30 | @Test
31 | public void testInsertIntoWithColumnsAndSelectUnion() {
32 | assertTableLineage("INSERT INTO tab1 (col1, col2) SELECT * FROM tab2 UNION SELECT * FROM tab3", Set.of("tab2", "tab3"), Set.of("tab1"));
33 | assertTableLineage("INSERT INTO tab1 (col1, col2) (SELECT * FROM tab2 UNION SELECT * FROM tab3)", Set.of("tab2", "tab3"), Set.of("tab1"));
34 | }
35 |
36 | @Test
37 | public void testInsertIntoPartitions() {
38 | assertTableLineage("INSERT INTO TABLE tab1 PARTITION (par1=1) SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1"));
39 | }
40 |
41 | @Test
42 | public void testInsertOverwrite() {
43 | assertTableLineage("INSERT OVERWRITE tab1 SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1"));
44 | }
45 |
46 | @Test
47 | public void testInsertOverwriteWithKeywordTable() {
48 | assertTableLineage("INSERT OVERWRITE TABLE tab1 SELECT col1 FROM tab2", Set.of("tab2"), Set.of("tab1"));
49 | }
50 |
51 | @Test
52 | public void testInsertOverwriteValues() {
53 | assertTableLineage("INSERT OVERWRITE tab1 VALUES ('val1', 'val2'), ('val3', 'val4')", Set.of(), Set.of("tab1"));
54 | }
55 |
56 | @Test
57 | public void testInsertOverwriteFromSelf() {
58 | assertTableLineage("INSERT OVERWRITE TABLE foo\n" +
59 | "SELECT col from foo\n" +
60 | "WHERE flag IS NOT NULL", Set.of("foo"), Set.of("foo"));
61 | }
62 |
63 | @Test
64 | public void testInsertOverwriteFromSelfWithJoin() {
65 | assertTableLineage("INSERT OVERWRITE TABLE tab_1\n" +
66 | "SELECT tab2.col_a from tab_2\n" +
67 | "JOIN tab_1\n" +
68 | "ON tab_1.col_a = tab_2.cola", Set.of("tab_1", "tab_2"), Set.of("tab_1"));
69 | }
70 |
71 | @Test
72 | public void testInsertIntoQualifiedTableWithParenthesizedQuery() {
73 | assertTableLineage("INSERT INTO default.tab2\n" +
74 | " (SELECT *\n" +
75 | " FROM tab1)", Set.of("tab1"), Set.of("default.tab2"));
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/OtherTest.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import org.junit.Test;
4 |
5 | import java.util.Set;
6 |
7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage;
8 |
9 | public class OtherTest {
10 | @Test
11 | public void testUse() {
12 | assertTableLineage("USE db1", Set.of());
13 | }
14 |
15 | @Test
16 | public void testTableNameCase() {
17 | assertTableLineage("insert overwrite table tab_a\n" +
18 | "select * from tab_b\n" +
19 | "union all\n" +
20 | "select * from TAB_B", Set.of("tab_b"), Set.of("tab_a"));
21 | }
22 |
23 | @Test
24 | public void testCreateAfterDrop() {
25 | assertTableLineage("DROP TABLE IF EXISTS tab1; CREATE TABLE IF NOT EXISTS tab1 (col1 STRING)", Set.of(), Set.of("tab1"));
26 | }
27 |
28 | @Test
29 | public void testUpdate() {
30 | assertTableLineage("UPDATE tab1 SET col1='val1' WHERE col2='val2'", Set.of(), Set.of("tab1"));
31 | }
32 |
33 | // @Test
34 | // public void testUpdateWithJoin() {
35 | // // SparkSQL doesn't support this syntax
36 | // helper("UPDATE tab1 a INNER JOIN tab2 b ON a.col1=b.col1 SET a.col2=b.col2", Set.of("tab2"), Set.of("tab1"));
37 | // }
38 |
39 | // @Test
40 | // public void testCopyFromTable() {
41 | // // SparkSQL doesn't support this syntax
42 | // helper("COPY tab1 FROM tab2", Set.of("tab2"), Set.of("tab1"));
43 | // }
44 |
45 | @Test
46 | public void testDrop() {
47 | assertTableLineage("DROP TABLE IF EXISTS tab1", Set.of(), Set.of());
48 | }
49 |
50 | @Test
51 | public void testDropWithComment() {
52 | assertTableLineage("--comment\n" +
53 | "DROP TABLE IF EXISTS tab1", Set.of(), Set.of());
54 | }
55 |
56 | @Test
57 | public void testDropAfterCreate() {
58 | assertTableLineage("CREATE TABLE IF NOT EXISTS tab1 (col1 STRING);DROP TABLE IF EXISTS tab1", Set.of(), Set.of());
59 | }
60 |
61 | @Test
62 | public void testDropTmpTabAfterCreate() {
63 | assertTableLineage("create table tab_a as select * from tab_b;\n" +
64 | "insert overwrite table tab_c select * from tab_a;\n" +
65 | "drop table tab_a", Set.of("tab_b"), Set.of("tab_c"));
66 | }
67 |
68 | @Test
69 | public void testNewCreateTabAsTmpTable() {
70 | assertTableLineage("create table tab_a as select * from tab_b;\n" +
71 | "create table tab_c as select * from tab_a;", Set.of("tab_b"), Set.of("tab_c"));
72 | }
73 |
74 | @Test
75 | public void testAlterTableRename() {
76 | assertTableLineage("alter table tab1 rename to tab2", Set.of(), Set.of());
77 | }
78 |
79 | // /*
80 | // This syntax is MySQL specific:
81 | // https://dev.mysql.com/doc/refman/8.0/en/rename-table.html
82 | // */
83 | // @Test
84 | // public void testRenameTable() {
85 | // // SparkSQL doesn't support this syntax
86 | // helper("rename table tab1 to tab2", Set.of(), Set.of());
87 | // }
88 | //
89 | // @Test
90 | // public void testRenameTables() {
91 | // // SparkSQL doesn't support this syntax
92 | // helper("rename table tab1 to tab2, tab3 to tab4", Set.of(), Set.of());
93 | // }
94 |
95 | /*
96 | See https://cwiki.apache.org/confluence/display/Hive/Exchange+Partition for language manual
97 | */
98 | @Test
99 | public void testAlterTableExchangePartition() {
100 | assertTableLineage("alter table tab1 exchange partition(pt='part1') with table tab2", Set.of("tab2"), Set.of("tab1"));
101 | }
102 |
103 | /*
104 | See https://www.vertica.com/docs/10.0.x/HTML/Content/Authoring/AdministratorsGuide/Partitions/SwappingPartitions.htm
105 | for language specification
106 | */
107 | @Test
108 | public void testSwappingPartitions() {
109 | assertTableLineage("select swap_partitions_between_tables('staging', 'min-range-value', 'max-range-value', 'target')", Set.of("staging"), Set.of("target"));
110 | }
111 |
112 | @Test
113 | public void testAlterTargetTableName() {
114 | assertTableLineage("insert overwrite tab1 select * from tab2; alter table tab1 rename to tab3;", Set.of("tab2"), Set.of("tab3"));
115 | assertTableLineage("insert overwrite tab2 select * from tab1; alter table tab1 rename to tab3;", Set.of("tab3"), Set.of("tab2"));
116 | }
117 |
118 | @Test
119 | public void testRefreshTable() {
120 | assertTableLineage("refresh table tab1", Set.of(), Set.of());
121 | }
122 |
123 | @Test
124 | public void testCacheTable() {
125 | assertTableLineage("cache table tab1", Set.of(), Set.of());
126 | }
127 |
128 | @Test
129 | public void testUncacheTable() {
130 | assertTableLineage("uncache table tab1", Set.of(), Set.of());
131 | }
132 |
133 | @Test
134 | public void testUncacheTableIfExists() {
135 | assertTableLineage("uncache table if exists tab1", Set.of(), Set.of());
136 | }
137 |
138 | @Test
139 | public void testTruncateTable() {
140 | assertTableLineage("truncate table tab1", Set.of(), Set.of());
141 | }
142 |
143 | @Test
144 | public void testDeleteFromTable() {
145 | assertTableLineage("delete from table tab1", Set.of(), Set.of());
146 | }
147 |
148 | @Test
149 | public void testLateralViewUsingJsonTuple() {
150 | assertTableLineage("INSERT OVERWRITE TABLE foo\n" +
151 | "SELECT sc.id, q.item0, q.item1\n" +
152 | "FROM bar sc\n" +
153 | "LATERAL VIEW json_tuple(sc.json, 'key1', 'key2') q AS item0, item1", Set.of("bar"), Set.of("foo"));
154 | }
155 |
156 | @Test
157 | public void testLateralViewOuter() {
158 | assertTableLineage("INSERT OVERWRITE TABLE foo\n" +
159 | "SELECT sc.id, q.col1\n" +
160 | "FROM bar sc\n" +
161 | "LATERAL VIEW OUTER explode(sc.json_array) q AS col1", Set.of("bar"), Set.of("foo"));
162 | }
163 |
164 | @Test
165 | public void testShowCreateTable() {
166 | assertTableLineage("show create table tab1", Set.of());
167 | }
168 | }
169 |
--------------------------------------------------------------------------------
/sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/SelectTest.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.core;
2 |
3 | import org.junit.Test;
4 |
5 | import java.util.Set;
6 |
7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage;
8 |
9 | public class SelectTest {
10 | @Test
11 | public void testSelect() {
12 | assertTableLineage("SELECT col1 FROM tab1", Set.of("tab1"));
13 | }
14 |
15 | @Test
16 | public void testSelectWithSchema() {
17 | assertTableLineage("SELECT col1 FROM schema1.tab1", Set.of("schema1.tab1"));
18 | }
19 |
20 | @Test
21 | public void testSelectWithSchemaAndDatabase() {
22 | assertTableLineage("SELECT col1 FROM db1.schema1.tbl1", Set.of("db1.schema1.tbl1"));
23 | }
24 |
25 | @Test
26 | public void testSelectWithTableNameInBacktick() {
27 | assertTableLineage("SELECT * FROM `tab1`", Set.of("tab1"));
28 | }
29 |
30 | @Test
31 | public void testSelectWithSchemaInBacktick() {
32 | assertTableLineage("SELECT col1 FROM `schema1`.`tab1`", Set.of("schema1.tab1"));
33 | }
34 |
35 | @Test
36 | public void testSelectMultiLine() {
37 | assertTableLineage("SELECT col1 FROM\n" +
38 | "tab1", Set.of("tab1"));
39 | }
40 |
41 | @Test
42 | public void testSelectAsterisk() {
43 | assertTableLineage("SELECT * FROM tab1", Set.of("tab1"));
44 | }
45 |
46 | @Test
47 | public void testSelectValue() {
48 | assertTableLineage("SELECT 1");
49 | }
50 |
51 | @Test
52 | public void testSelectFunction() {
53 | assertTableLineage("SELECT NOW()");
54 | }
55 |
56 | @Test
57 | public void testSelectTrimFunctionWithFromKeyword() {
58 | assertTableLineage("SELECT trim(BOTH ' ' FROM ' abc ')");
59 | }
60 |
61 | @Test
62 | public void testSelectTrimFunctionWithFromKeywordFromSourceTable() {
63 | assertTableLineage("SELECT trim(BOTH ' ' FROM col1) FROM tab1", Set.of("tab1"));
64 | }
65 |
66 | @Test
67 | public void testSelectWithWhere() {
68 | assertTableLineage("SELECT * FROM tab1 WHERE col1 > val1 AND col2 = 'val2'", Set.of("tab1"));
69 | }
70 |
71 | @Test
72 | public void testSelectWithComment() {
73 | assertTableLineage("SELECT -- comment1\n col1 FROM tab1", Set.of("tab1"));
74 | }
75 |
76 | @Test
77 | public void testSelectWithCommentAfterFrom() {
78 | assertTableLineage("SELECT col1\nFROM -- comment\ntab1", Set.of("tab1"));
79 | }
80 |
81 | @Test
82 | public void testSelectWithCommentAfterJoin() {
83 | assertTableLineage("select * from tab1 join --comment\ntab2 on tab1.x = tab2.x", Set.of("tab1", "tab2"));
84 | }
85 |
86 | @Test
87 | public void testSelectKeywordAsColumnAlias() {
88 | // here `as` is the column alias
89 | assertTableLineage("SELECT 1 `as` FROM tab1", Set.of("tab1"));
90 | // the following is hive specific, MySQL doesn't allow this syntax. As of now, we don't test against it
91 | // helper("SELECT 1 as FROM tab1", Set.of("tab1"));
92 | }
93 |
94 | @Test
95 | public void testSelectWithTableAlias() {
96 | assertTableLineage("SELECT 1 FROM tab1 AS alias1", Set.of("tab1"));
97 | }
98 |
99 | @Test
100 | public void testSelectCount() {
101 | assertTableLineage("SELECT COUNT(*) FROM tab1", Set.of("tab1"));
102 | }
103 |
104 | @Test
105 | public void testSelectSubquery() {
106 | assertTableLineage("SELECT col1 FROM (SELECT col1 FROM tab1) dt", Set.of("tab1"));
107 | // with an extra space
108 | assertTableLineage("SELECT col1 FROM ( SELECT col1 FROM tab1) dt", Set.of("tab1"));
109 | }
110 |
111 | @Test
112 | public void testSelectSubqueryWithTwoParenthesis() {
113 | assertTableLineage("SELECT col1 FROM ((SELECT col1 FROM tab1)) dt", Set.of("tab1"));
114 | }
115 |
116 | @Test
117 | public void testSelectSubqueryWithMoreParenthesis() {
118 | assertTableLineage("SELECT col1 FROM (((((((SELECT col1 FROM tab1))))))) dt", Set.of("tab1"));
119 | }
120 |
121 | @Test
122 | public void testSelectSubqueryInCase() {
123 | assertTableLineage("SELECT\n" +
124 | "CASE WHEN (SELECT count(*) FROM tab1 WHERE col1 = 'tab2') = 1 THEN (SELECT count(*) FROM tab2) ELSE 0 END AS cnt",
125 | Set.of("tab1", "tab2"));
126 | assertTableLineage("SELECT\n" +
127 | "CASE WHEN 1 = (SELECT count(*) FROM tab1 WHERE col1 = 'tab2') THEN (SELECT count(*) FROM tab2) ELSE 0 END AS cnt",
128 | Set.of("tab1", "tab2"));
129 | }
130 |
131 | @Test
132 | public void testSelectSubqueryWithoutAlias() {
133 | // this syntax is valid in SparkSQL, not for MySQL
134 | assertTableLineage("SELECT col1 FROM (SELECT col1 FROM tab1)", Set.of("tab1"));
135 | }
136 |
137 | @Test
138 | public void testSelectSubqueryInWhereClause() {
139 | assertTableLineage("SELECT col1\n" +
140 | "FROM tab1\n" +
141 | "WHERE col1 IN (SELECT max(col1) FROM tab2)", Set.of("tab1", "tab2"));
142 | }
143 |
144 | @Test
145 | public void testSelectInnerJoin() {
146 | assertTableLineage("SELECT * FROM tab1 INNER JOIN tab2", Set.of("tab1", "tab2"));
147 | }
148 |
149 | @Test
150 | public void testSelectJoin() {
151 | assertTableLineage("SELECT * FROM tab1 JOIN tab2", Set.of("tab1", "tab2"));
152 | }
153 |
154 | @Test
155 | public void testSelectLeftJoin() {
156 | assertTableLineage("SELECT * FROM tab1 LEFT JOIN tab2", Set.of("tab1", "tab2"));
157 | }
158 |
159 | @Test
160 | public void testSelectLeftJoinWithExtraSpaceInMiddle() {
161 | assertTableLineage("SELECT * FROM tab1 LEFT JOIN tab2", Set.of("tab1", "tab2"));
162 | }
163 |
164 | @Test
165 | public void testSelectLeftSemiJoin() {
166 | assertTableLineage("SELECT * FROM tab1 LEFT SEMI JOIN tab2", Set.of("tab1", "tab2"));
167 | }
168 |
169 | @Test
170 | public void testSelectLeftSemiJoinWithOn() {
171 | assertTableLineage("SELECT * FROM tab1 LEFT SEMI JOIN tab2 ON (tab1.col1 = tab2.col2)", Set.of("tab1", "tab2"));
172 | }
173 |
174 | @Test
175 | public void testSelectRightJoin() {
176 | assertTableLineage("SELECT * FROM tab1 RIGHT JOIN tab2", Set.of("tab1", "tab2"));
177 | }
178 |
179 | @Test
180 | public void testSelectFullOuterJoin() {
181 | assertTableLineage("SELECT * FROM tab1 FULL OUTER JOIN tab2", Set.of("tab1", "tab2"));
182 | }
183 |
184 | // @Test
185 | // public void testSelectFullOuterJoinWithFullAsAlias() {
186 | // // SparkSQL can't handle this
187 | // helper("SELECT * FROM tab1 AS full FULL OUTER JOIN tab2", Set.of("tab1", "tab2"));
188 | // }
189 |
190 | @Test
191 | public void testSelectCrossJoin() {
192 | assertTableLineage("SELECT * FROM tab1 CROSS JOIN tab2", Set.of("tab1", "tab2"));
193 | }
194 |
195 | @Test
196 | public void testSelectCrossJoinWithOn() {
197 | assertTableLineage("SELECT * FROM tab1 CROSS JOIN tab2 on tab1.col1 = tab2.col2", Set.of("tab1", "tab2"));
198 | }
199 |
200 | @Test
201 | public void testSelectJoinWithSubquery() {
202 | assertTableLineage("SELECT col1 FROM tab1 AS a LEFT JOIN tab2 AS b ON a.id=b.tab1_id " +
203 | "WHERE col1 = (SELECT col1 FROM tab2 WHERE id = 1)", Set.of("tab1", "tab2"));
204 | }
205 |
206 | @Test
207 | public void testSelectJoinInAnsi89Syntax() {
208 | assertTableLineage("SELECT * FROM tab1 a, tab2 b", Set.of("tab1", "tab2"));
209 | }
210 |
211 | @Test
212 | public void testSelectJoinInAnsi89SyntaxWithSubquery() {
213 | assertTableLineage("SELECT * FROM (SELECT * FROM tab1) a, (SELECT * FROM tab2) b", Set.of("tab1", "tab2"));
214 | }
215 |
216 | @Test
217 | public void testSelectGroupBy() {
218 | assertTableLineage("SELECT col1, col2 FROM tab1 GROUP BY col1, col2", Set.of("tab1"));
219 | }
220 |
221 | @Test
222 | public void testSelectGroupByOrdinal() {
223 | assertTableLineage("SELECT col1, col2 FROM tab1 GROUP BY 1, 2", Set.of("tab1"));
224 | }
225 |
226 | @Test
227 | public void testSelectFromValues() {
228 | assertTableLineage("SELECT * FROM (VALUES (1, 2))");
229 | }
230 |
231 | @Test
232 | public void testSelectFromValuesNewline() {
233 | assertTableLineage("SELECT * FROM (\nVALUES (1, 2))");
234 | }
235 |
236 | @Test
237 | public void testSelectFromValuesWithAlias() {
238 | assertTableLineage("SELECT * FROM (VALUES (1, 2)) AS t(col1, col2)");
239 | }
240 |
241 | /*
242 | unnest function is Presto specific
243 | */
244 | @Test
245 | public void testSelectFromUnnest() {
246 | assertTableLineage("SELECT student, score FROM tests CROSS JOIN UNNEST(scores) AS t (score)", Set.of("tests"));
247 | }
248 |
249 | @Test
250 | public void testSelectFromUnnestParsedAsKeyword() {
251 | assertTableLineage("SELECT student, score FROM tests CROSS JOIN UNNEST (scores) AS t (score)", Set.of("tests"));
252 | }
253 |
254 | // @Test
255 | // public void testSelectFromUnnestWithOrdinality() {
256 | // // SparkSQL doesn't support this syntax
257 | // assertTableLineage("SELECT numbers, n, a\n" +
258 | // "FROM (\n" +
259 | // " VALUES\n" +
260 | // " (ARRAY[2, 5]),\n" +
261 | // " (ARRAY[7, 8, 9])\n" +
262 | // ") AS x (numbers)\n" +
263 | // "CROSS JOIN UNNEST(numbers) WITH ORDINALITY AS t (n, a);");
264 | // }
265 |
266 | /*
267 | generator is Snowflake specific
268 | */
269 | @Test
270 | public void testSelectFromGenerator() {
271 | assertTableLineage("SELECT seq4(), uniform(1, 10, random(12))\n" +
272 | "FROM table(generator()) v\n" +
273 | "ORDER BY 1;");
274 | }
275 | }
276 |
--------------------------------------------------------------------------------
/sqllineage4j-graph/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | sqllineage4j
5 | io.github.reata
6 | 1.0.1-SNAPSHOT
7 |
8 | 4.0.0
9 | sqllineage4j-graph
10 | sqllineage4j-graph
11 |
12 |
13 |
14 | org.apache.tinkerpop
15 | tinkergraph-gremlin
16 |
17 |
18 | io.github.reata
19 | sqllineage4j-common
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/sqllineage4j-graph/src/main/java/io/github/reata/sqllineage4j/graph/GremlinLineageGraph.java:
--------------------------------------------------------------------------------
1 | package io.github.reata.sqllineage4j.graph;
2 |
3 | import io.github.reata.sqllineage4j.common.entity.EdgeTuple;
4 | import org.apache.tinkerpop.gremlin.process.traversal.P;
5 | import org.apache.tinkerpop.gremlin.process.traversal.Path;
6 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
7 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
8 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
9 | import org.apache.tinkerpop.gremlin.structure.*;
10 | import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph;
11 |
12 | import java.util.*;
13 | import java.util.stream.Collectors;
14 |
15 | public class GremlinLineageGraph implements LineageGraph {
16 | private final GraphTraversalSource g;
17 |
18 | // public GraphTraversalSource getG() {
19 | // return g;
20 | // }
21 |
22 | public GremlinLineageGraph() {
23 | this.g = TinkerGraph.open().traversal();
24 | }
25 |
26 | public GremlinLineageGraph(Graph graph) {
27 | this.g = graph.traversal();
28 | }
29 |
30 | public void addVertexIfNotExist(Object obj) {
31 | HashMap props = new HashMap<>();
32 | addVertexIfNotExist(obj, props);
33 | }
34 |
35 | public void addVertexIfNotExist(Object obj, Map props) {
36 | int id = obj.hashCode();
37 | String label = obj.getClass().getSimpleName();
38 | GraphTraversal step = g.V().hasLabel(label).hasId(id).fold()
39 | .coalesce(__.unfold(),
40 | __.addV(label).property(T.id, id))
41 | .property("obj", obj);
42 | for (Map.Entry entry : props.entrySet()) {
43 | step = step.property(entry.getKey(), entry.getValue());
44 | }
45 | step.iterate();
46 | }
47 |
48 | public List