├── .github └── workflows │ ├── maven-publish.yml │ └── maven.yml ├── .gitignore ├── LICENSE ├── README.md ├── pom.xml ├── sqllineage4j-cli ├── pom.xml └── src │ ├── main │ └── java │ │ └── io │ │ └── github │ │ └── reata │ │ └── sqllineage4j │ │ └── cli │ │ ├── SQLLineage4j.java │ │ └── utils │ │ └── Helper.java │ └── test │ └── java │ └── io │ └── github │ └── reata │ └── sqllineage4j │ └── cli │ └── SQLLineage4jTest.java ├── sqllineage4j-common ├── pom.xml └── src │ ├── main │ └── java │ │ └── io │ │ └── github │ │ └── reata │ │ └── sqllineage4j │ │ └── common │ │ ├── constant │ │ ├── EdgeType.java │ │ └── NodeTag.java │ │ ├── entity │ │ ├── ColumnQualifierTuple.java │ │ └── EdgeTuple.java │ │ ├── model │ │ ├── Column.java │ │ ├── QuerySet.java │ │ ├── Schema.java │ │ ├── SubQuery.java │ │ └── Table.java │ │ └── utils │ │ └── Helper.java │ └── test │ └── java │ └── io │ └── github │ └── reata │ └── sqllineage4j │ └── common │ └── ModelTest.java ├── sqllineage4j-core ├── pom.xml └── src │ ├── main │ └── java │ │ └── io │ │ └── github │ │ └── reata │ │ └── sqllineage4j │ │ └── core │ │ ├── LineageAnalyzer.java │ │ ├── LineageRunner.java │ │ └── holder │ │ ├── SQLLineageHolder.java │ │ ├── StatementLineageHolder.java │ │ └── SubQueryLineageHolder.java │ └── test │ └── java │ └── io │ └── github │ └── reata │ └── sqllineage4j │ └── core │ ├── CTETest.java │ ├── ColumnTest.java │ ├── CreateTest.java │ ├── Helper.java │ ├── InsertTest.java │ ├── OtherTest.java │ └── SelectTest.java ├── sqllineage4j-graph ├── pom.xml └── src │ └── main │ └── java │ └── io │ └── github │ └── reata │ └── sqllineage4j │ └── graph │ ├── GremlinLineageGraph.java │ └── LineageGraph.java ├── sqllineage4j-parser ├── pom.xml └── src │ └── main │ ├── antlr4 │ └── io │ │ └── github │ │ └── reata │ │ └── sqllineage4j │ │ └── parser │ │ └── SqlBase.g4 │ └── java │ └── io │ └── github │ └── reata │ └── sqllineage4j │ └── parser │ ├── LineageParser.java │ └── StatementSplitter.java └── sqllineage4j-test-coverage └── pom.xml /.github/workflows/maven-publish.yml: -------------------------------------------------------------------------------- 1 | name: Maven Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | build: 9 | 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: read 13 | packages: write 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Set up JDK 11 18 | uses: actions/setup-java@v3 19 | with: 20 | distribution: 'temurin' 21 | java-version: '11' 22 | 23 | - name: Build with Maven 24 | run: mvn -B package --file pom.xml 25 | 26 | - name: Set up Apache Maven Central 27 | uses: actions/setup-java@v3 28 | with: # running setup-java again overwrites the settings.xml 29 | distribution: 'temurin' 30 | java-version: '11' 31 | server-id: ossrh # Value of the distributionManagement/repository/id field of the pom.xml 32 | server-username: MAVEN_USERNAME # env variable for username in deploy 33 | server-password: MAVEN_CENTRAL_TOKEN # env variable for token in deploy 34 | gpg-private-key: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }} # Value of the GPG private key to import 35 | gpg-passphrase: MAVEN_GPG_PASSPHRASE # env variable for GPG private key passphrase 36 | 37 | - name: Publish to Apache Maven Central 38 | run: mvn deploy 39 | env: 40 | MAVEN_USERNAME: reata 41 | MAVEN_CENTRAL_TOKEN: ${{ secrets.MAVEN_CENTRAL_TOKEN }} 42 | MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }} 43 | -------------------------------------------------------------------------------- /.github/workflows/maven.yml: -------------------------------------------------------------------------------- 1 | name: Java CI with Maven 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Set up JDK 11 17 | uses: actions/setup-java@v3 18 | with: 19 | java-version: '11' 20 | distribution: 'temurin' 21 | cache: maven 22 | - name: Build with Maven 23 | run: mvn -B package --file pom.xml -Dmaven.javadoc.skip=true -Dmaven.source.skip=true 24 | 25 | # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive 26 | - name: Update dependency graph 27 | uses: advanced-security/maven-dependency-submission-action@571e99aab1055c2e71a1e2309b9691de18d6b7d6 28 | - name: Upload coverage to Codecov 29 | uses: codecov/codecov-action@v3 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | !**/src/main/**/target/ 4 | !**/src/test/**/target/ 5 | dependency-reduced-pom.xml 6 | 7 | ### IntelliJ IDEA ### 8 | .idea/modules.xml 9 | .idea/jarRepositories.xml 10 | .idea/compiler.xml 11 | .idea/libraries/ 12 | *.iws 13 | *.iml 14 | *.ipr 15 | 16 | ### Eclipse ### 17 | .apt_generated 18 | .classpath 19 | .factorypath 20 | .project 21 | .settings 22 | .springBeans 23 | .sts4-cache 24 | 25 | ### NetBeans ### 26 | /nbproject/private/ 27 | /nbbuild/ 28 | /dist/ 29 | /nbdist/ 30 | /.nb-gradle/ 31 | build/ 32 | !**/src/main/**/build/ 33 | !**/src/test/**/build/ 34 | 35 | ### VS Code ### 36 | .vscode/ 37 | 38 | ### Mac OS ### 39 | .DS_Store 40 | 41 | # Idea 42 | .idea/ 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SQLLineage4J 2 | 3 | sqllineage4j is a proof of concept to implement [sqllineage](https://github.com/reata/sqllineage) with [antlr4](https://github.com/antlr/antlr4). 4 | 5 | [![Maven Central](https://img.shields.io/maven-central/v/io.github.reata/sqllineage4j)](https://central.sonatype.dev/search?q=sqllineage4j&namespace=io.github.reata) 6 | [![GitHub](https://img.shields.io/github/license/reata/sqllineage4j)](https://github.com/reata/sqllineage4j) 7 | [![Java CI with Maven](https://github.com/reata/sqllineage4j/actions/workflows/maven.yml/badge.svg)](https://github.com/reata/sqllineage4j/actions/workflows/maven.yml) 8 | [![codecov](https://codecov.io/gh/reata/sqllineage4j/branch/main/graph/badge.svg?token=2YbWOcmKnY)](https://codecov.io/gh/reata/sqllineage4j) 9 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | io.github.reata 8 | sqllineage4j 9 | 1.0.1-SNAPSHOT 10 | pom 11 | 12 | ${project.groupId}:${project.artifactId} 13 | A Java implementation of sqllineage using ANTLR v4 14 | https://github.com/reata/sqllineage4j 15 | 16 | 17 | 18 | Apache License, Version 2.0 19 | https://www.apache.org/licenses/LICENSE-2.0.txt 20 | repo 21 | 22 | 23 | 24 | 25 | 26 | reata 27 | reddevil.hjw@gmail.com 28 | reata 29 | https://github.com/reata 30 | 31 | 32 | 33 | 34 | scm:git:git://github.com/reata/sqllineage4j.git 35 | scm:git:ssh://github.com:reata/sqllineage4j.git 36 | https://github.com/reata/sqllineage4j/tree/master 37 | HEAD 38 | 39 | 40 | 41 | UTF-8 42 | 11 43 | 11 44 | 4.7.1 45 | 1.10.1 46 | 47 | 48 | 49 | sqllineage4j-parser 50 | sqllineage4j-common 51 | sqllineage4j-graph 52 | sqllineage4j-core 53 | sqllineage4j-cli 54 | sqllineage4j-test-coverage 55 | 56 | 57 | 58 | 59 | 60 | io.github.reata 61 | sqllineage4j-cli 62 | ${project.version} 63 | 64 | 65 | io.github.reata 66 | sqllineage4j-core 67 | ${project.version} 68 | 69 | 70 | io.github.reata 71 | sqllineage4j-common 72 | ${project.version} 73 | 74 | 75 | io.github.reata 76 | sqllineage4j-graph 77 | ${project.version} 78 | 79 | 80 | io.github.reata 81 | sqllineage4j-parser 82 | ${project.version} 83 | 84 | 85 | commons-cli 86 | commons-cli 87 | 1.5.0 88 | 89 | 90 | com.github.stefanbirkner 91 | system-lambda 92 | 1.2.1 93 | 94 | 95 | com.google.auto.value 96 | auto-value-annotations 97 | ${dep.auto-value.version} 98 | 99 | 100 | com.google.code.findbugs 101 | annotations 102 | 3.0.1 103 | 104 | 105 | org.apache.tinkerpop 106 | tinkergraph-gremlin 107 | 3.6.4 108 | 109 | 110 | org.antlr 111 | antlr4 112 | ${dep.antlr.version} 113 | 114 | 115 | 116 | 117 | 118 | 119 | junit 120 | junit 121 | 4.13.2 122 | test 123 | 124 | 125 | org.junit.jupiter 126 | junit-jupiter-params 127 | 5.9.2 128 | test 129 | 130 | 131 | 132 | 133 | 134 | ossrh 135 | https://s01.oss.sonatype.org/content/repositories/snapshots 136 | 137 | 138 | ossrh 139 | https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | maven-clean-plugin 149 | 3.1.0 150 | 151 | 152 | 153 | maven-resources-plugin 154 | 3.0.2 155 | 156 | 157 | maven-compiler-plugin 158 | 3.8.0 159 | 160 | 161 | maven-surefire-plugin 162 | 2.22.1 163 | 164 | 165 | maven-jar-plugin 166 | 3.0.2 167 | 168 | 169 | maven-install-plugin 170 | 2.5.2 171 | 172 | 173 | maven-deploy-plugin 174 | 2.8.2 175 | 176 | 177 | 178 | maven-site-plugin 179 | 3.7.1 180 | 181 | 182 | maven-project-info-reports-plugin 183 | 3.0.0 184 | 185 | 186 | 187 | 188 | 189 | org.apache.maven.plugins 190 | maven-source-plugin 191 | 3.1.0 192 | 193 | 194 | attach-sources 195 | 196 | jar-no-fork 197 | 198 | 199 | 200 | 201 | 202 | org.apache.maven.plugins 203 | maven-javadoc-plugin 204 | 2.9.1 205 | 206 | 207 | attach-javadocs 208 | 209 | jar 210 | 211 | 212 | 213 | 214 | 215 | org.apache.maven.plugins 216 | maven-gpg-plugin 217 | 3.0.1 218 | 219 | 220 | sign-artifacts 221 | verify 222 | 223 | sign 224 | 225 | 226 | 227 | --pinentry-mode 228 | loopback 229 | 230 | 231 | 232 | 233 | 234 | 235 | org.jacoco 236 | jacoco-maven-plugin 237 | 0.8.7 238 | 239 | 240 | 241 | prepare-agent 242 | 243 | 244 | 245 | 246 | report 247 | test 248 | 249 | report 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | -------------------------------------------------------------------------------- /sqllineage4j-cli/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | sqllineage4j 5 | io.github.reata 6 | 1.0.1-SNAPSHOT 7 | 8 | 4.0.0 9 | sqllineage4j-cli 10 | sqllineage4j-cli 11 | 12 | 13 | 14 | commons-cli 15 | commons-cli 16 | 17 | 18 | io.github.reata 19 | sqllineage4j-core 20 | 21 | 22 | com.github.stefanbirkner 23 | system-lambda 24 | test 25 | 26 | 27 | 28 | 29 | 30 | 31 | org.apache.maven.plugins 32 | maven-shade-plugin 33 | 34 | 35 | package 36 | 37 | shade 38 | 39 | 40 | true 41 | executable 42 | 43 | 45 | 46 | io.github.reata.sqllineage4j.cli.SQLLineage4j 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | org.skife.maven 57 | really-executable-jar-maven-plugin 58 | 59 | -Xmx1G 60 | executable 61 | 62 | 63 | 64 | package 65 | 66 | really-executable-jar 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /sqllineage4j-cli/src/main/java/io/github/reata/sqllineage4j/cli/SQLLineage4j.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.cli; 2 | 3 | import io.github.reata.sqllineage4j.core.LineageRunner; 4 | import org.apache.commons.cli.*; 5 | 6 | import static io.github.reata.sqllineage4j.cli.utils.Helper.extractSqlFromArgs; 7 | 8 | public class SQLLineage4j { 9 | 10 | public static void main(String[] args) { 11 | Options options = new Options(); 12 | Option exec = Option.builder("e").argName("quoted-query-string").hasArg().desc("SQL from command line").build(); 13 | Option file = Option.builder("f").argName("filename").hasArg().desc("SQL from files").build(); 14 | Option verbose = Option.builder("v").longOpt("verbose").desc("increase output verbosity, show statement level lineage result").build(); 15 | options.addOption(exec); 16 | options.addOption(file); 17 | options.addOption(verbose); 18 | 19 | CommandLineParser parser = new DefaultParser(); 20 | try { 21 | CommandLine cmd = parser.parse(options, args); 22 | if (cmd.hasOption("e") && cmd.hasOption("f")) { 23 | System.out.println("Both -e and -f options are specified. -e option will be ignored"); 24 | } 25 | if (cmd.hasOption("e") || cmd.hasOption("f")) { 26 | String sql = extractSqlFromArgs(cmd); 27 | LineageRunner runner = cmd.hasOption("v") ? LineageRunner.builder(sql).verbose().build() : LineageRunner.builder(sql).build(); 28 | runner.printTableLineage(); 29 | } else { 30 | HelpFormatter formatter = new HelpFormatter(); 31 | formatter.printHelp("sqllineage4j", options); 32 | } 33 | } catch (ParseException e) { 34 | throw new RuntimeException(e); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /sqllineage4j-cli/src/main/java/io/github/reata/sqllineage4j/cli/utils/Helper.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.cli.utils; 2 | 3 | import org.apache.commons.cli.CommandLine; 4 | 5 | import java.io.BufferedReader; 6 | import java.io.FileNotFoundException; 7 | import java.io.FileReader; 8 | import java.io.IOException; 9 | 10 | public final class Helper { 11 | public static String extractSqlFromArgs(CommandLine cmd) { 12 | StringBuilder sql = new StringBuilder(); 13 | if (cmd.getOptionValue("f") != null) { 14 | String file = cmd.getOptionValue("f"); 15 | try { 16 | BufferedReader reader = new BufferedReader(new FileReader(file)); 17 | String line = reader.readLine(); 18 | while (line != null) { 19 | sql.append(line); 20 | sql.append(System.lineSeparator()); 21 | line = reader.readLine(); 22 | } 23 | } catch (FileNotFoundException e) { 24 | e.printStackTrace(); 25 | System.exit(1); 26 | } catch (IOException e) { 27 | System.exit(1); 28 | } 29 | } else if (cmd.getOptionValue("e") != null) { 30 | sql.append(cmd.getOptionValue("e")); 31 | } 32 | return sql.toString(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /sqllineage4j-cli/src/test/java/io/github/reata/sqllineage4j/cli/SQLLineage4jTest.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.cli; 2 | 3 | import com.github.stefanbirkner.systemlambda.SystemLambda; 4 | import org.junit.Test; 5 | 6 | import java.io.File; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | import static org.junit.Assert.assertTrue; 12 | 13 | public class SQLLineage4jTest { 14 | 15 | @Test 16 | public void testCliDummy() { 17 | String testSql = "insert overwrite table foo select * from dual inner join laud; insert overwrite table bar select * from foo"; 18 | SQLLineage4j.main(new String[]{}); 19 | SQLLineage4j.main(new String[]{"-e", testSql}); 20 | SQLLineage4j.main(new String[]{"-e", testSql, "-v"}); 21 | try { 22 | File f = File.createTempFile("test", ".sql"); 23 | FileWriter fw = new FileWriter(f); 24 | fw.write(testSql); 25 | fw.close(); 26 | SQLLineage4j.main(new String[]{"-f", f.getAbsolutePath()}); 27 | SQLLineage4j.main(new String[]{"-e", testSql, "-f", f.getAbsolutePath()}); 28 | assertTrue(f.delete()); 29 | } catch (IOException e) { 30 | e.printStackTrace(); 31 | } 32 | } 33 | 34 | @Test 35 | public void testFileException() throws Exception { 36 | int statusCode = SystemLambda.catchSystemExit(() -> SQLLineage4j.main(new String[]{"-f", "nonexist_file"})); 37 | assertEquals(1, statusCode); 38 | } 39 | 40 | @Test 41 | public void testFilePermissionError() throws Exception { 42 | int statusCode = SystemLambda.catchSystemExit(() -> SQLLineage4j.main(new String[]{"-f", "/"})); 43 | assertEquals(1, statusCode); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /sqllineage4j-common/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | sqllineage4j 5 | io.github.reata 6 | 1.0.1-SNAPSHOT 7 | 8 | 4.0.0 9 | sqllineage4j-common 10 | sqllineage4j-common 11 | 12 | 13 | 14 | com.google.auto.value 15 | auto-value-annotations 16 | provided 17 | 18 | 19 | com.google.code.findbugs 20 | annotations 21 | provided 22 | 23 | 24 | 25 | 26 | 27 | 28 | maven-compiler-plugin 29 | 30 | 31 | 32 | com.google.auto.value 33 | auto-value 34 | ${dep.auto-value.version} 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/constant/EdgeType.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.constant; 2 | 3 | public class EdgeType { 4 | public static final String LINEAGE = "lineage"; 5 | 6 | public static final String RENAME = "rename"; 7 | 8 | public static final String HAS_COLUMN = "has_column"; 9 | 10 | public static final String HAS_ALIAS = "has_alias"; 11 | } 12 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/constant/NodeTag.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.constant; 2 | 3 | public class NodeTag { 4 | public static final String READ = "read"; 5 | 6 | public static final String WRITE = "write"; 7 | 8 | public static final String CTE = "cte"; 9 | 10 | public static final String DROP = "drop"; 11 | 12 | public static final String SOURCE_ONLY = "source_only"; 13 | 14 | public static final String TARGET_ONLY = "target_only"; 15 | 16 | public static final String SELFLOOP = "selfloop"; 17 | } 18 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/entity/ColumnQualifierTuple.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.entity; 2 | 3 | import com.google.auto.value.AutoValue; 4 | 5 | import javax.annotation.Nullable; 6 | 7 | @AutoValue 8 | abstract public class ColumnQualifierTuple { 9 | public static ColumnQualifierTuple create(String column, @Nullable String qualifier) { 10 | return new AutoValue_ColumnQualifierTuple(column, qualifier); 11 | } 12 | 13 | abstract public String column(); 14 | 15 | @Nullable 16 | abstract public String qualifier(); 17 | } 18 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/entity/EdgeTuple.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.entity; 2 | 3 | import com.google.auto.value.AutoValue; 4 | 5 | @AutoValue 6 | abstract public class EdgeTuple { 7 | public static EdgeTuple create(Object source, String label, Object target) { 8 | return new AutoValue_EdgeTuple(source, label, target); 9 | } 10 | 11 | abstract public Object source(); 12 | 13 | abstract public String label(); 14 | 15 | abstract public Object target(); 16 | } 17 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/Column.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.model; 2 | 3 | import io.github.reata.sqllineage4j.common.entity.ColumnQualifierTuple; 4 | 5 | import javax.annotation.Nullable; 6 | import java.util.*; 7 | 8 | import static io.github.reata.sqllineage4j.common.utils.Helper.escapeIdentifierName; 9 | 10 | 11 | public class Column { 12 | private final Set parent = new HashSet<>(); 13 | private final String rawName; 14 | private final List sourceColumns = new ArrayList<>(); 15 | 16 | public Column(String name) { 17 | this.rawName = escapeIdentifierName(name); 18 | } 19 | 20 | @Override 21 | public String toString() { 22 | if (getParent() != null) { 23 | return getParent().toString() + "." + rawName.toLowerCase(); 24 | } else { 25 | return rawName.toLowerCase(); 26 | } 27 | } 28 | 29 | @Override 30 | public boolean equals(Object obj) { 31 | return obj instanceof Column && this.toString().equals(obj.toString()); 32 | } 33 | 34 | @Override 35 | public int hashCode() { 36 | return Objects.hash(this.toString()); 37 | } 38 | 39 | public @Nullable QuerySet getParent() { 40 | return parent.size() == 1 ? List.copyOf(parent).get(0) : null; 41 | } 42 | 43 | public void setParent(QuerySet table) { 44 | parent.add(table); 45 | } 46 | 47 | public void setSourceColumns(ColumnQualifierTuple cqt) { 48 | sourceColumns.add(cqt); 49 | } 50 | 51 | public List toSourceColumns(Map aliasMapping) { 52 | List sourceColumns = new ArrayList<>(); 53 | for (ColumnQualifierTuple columnQualifierTuple : this.sourceColumns) { 54 | String srcCol = columnQualifierTuple.column(); 55 | String qualifier = columnQualifierTuple.qualifier(); 56 | if (qualifier == null) { 57 | if (srcCol.equals("*")) { 58 | // SELECT * 59 | for (QuerySet dataSet : aliasMapping.values()) { 60 | sourceColumns.add(toSourceColumn(srcCol, dataSet)); 61 | } 62 | } else { 63 | // select unqualified column 64 | Column source = new Column(srcCol); 65 | for (QuerySet dataSet : aliasMapping.values()) { 66 | // in case of only one table, we get the right answer 67 | // in case of multiple tables, a bunch of possible tables are set 68 | source.setParent(dataSet); 69 | } 70 | sourceColumns.add(source); 71 | } 72 | } else { 73 | if (aliasMapping.containsKey(qualifier)) { 74 | sourceColumns.add(toSourceColumn(srcCol, aliasMapping.get(qualifier))); 75 | } else { 76 | sourceColumns.add(toSourceColumn(srcCol, new Table(qualifier))); 77 | } 78 | } 79 | } 80 | return sourceColumns; 81 | } 82 | 83 | private Column toSourceColumn(String columnName, QuerySet parent) { 84 | Column col = new Column(columnName); 85 | if (parent != null) { 86 | col.setParent(parent); 87 | } 88 | return col; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/QuerySet.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.model; 2 | 3 | public interface QuerySet { 4 | String getAlias(); 5 | } 6 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/Schema.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.model; 2 | 3 | import java.util.Objects; 4 | 5 | import static io.github.reata.sqllineage4j.common.utils.Helper.escapeIdentifierName; 6 | 7 | public final class Schema { 8 | private final String rawName; 9 | 10 | public Schema() { 11 | rawName = ""; 12 | } 13 | 14 | public Schema(String name) { 15 | rawName = escapeIdentifierName(name); 16 | } 17 | 18 | @Override 19 | public String toString() { 20 | return rawName.toLowerCase(); 21 | } 22 | 23 | @Override 24 | public boolean equals(Object obj) { 25 | return obj instanceof Schema && this.toString().equals(obj.toString()); 26 | } 27 | 28 | @Override 29 | public int hashCode() { 30 | return Objects.hash(this.toString()); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/SubQuery.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.model; 2 | 3 | public final class SubQuery implements QuerySet { 4 | private final String query; 5 | private final String alias; 6 | 7 | public SubQuery(String query, String alias) { 8 | this.query = query; 9 | if (alias == null) { 10 | alias = "subquery_" + query.hashCode(); 11 | } 12 | this.alias = alias; 13 | } 14 | 15 | @Override 16 | public String toString() { 17 | return alias; 18 | } 19 | 20 | @Override 21 | public boolean equals(Object obj) { 22 | return obj instanceof SubQuery && this.query.equals(((SubQuery) obj).getQuery()); 23 | } 24 | 25 | @Override 26 | public int hashCode() { 27 | return query.hashCode(); 28 | } 29 | 30 | public String getQuery() { 31 | return query; 32 | } 33 | 34 | public String getAlias() { 35 | return alias; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/model/Table.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.model; 2 | 3 | import java.util.Objects; 4 | 5 | import static io.github.reata.sqllineage4j.common.utils.Helper.escapeIdentifierName; 6 | 7 | public final class Table implements QuerySet { 8 | private final String rawName; 9 | 10 | private final String alias; 11 | private Schema schema = new Schema(); 12 | 13 | public Table(String name) { 14 | this(name, name); 15 | } 16 | 17 | public Table(String name, String alias) { 18 | if (name.contains(".")) { 19 | int pos = name.lastIndexOf("."); 20 | String schemaName = name.substring(0, pos + 1); 21 | String tableName = name.substring(pos + 1); 22 | this.schema = new Schema(schemaName); 23 | this.rawName = escapeIdentifierName(tableName); 24 | } else { 25 | this.rawName = escapeIdentifierName(name); 26 | } 27 | this.alias = alias; 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | return schema.toString() + "." + rawName.toLowerCase(); 33 | } 34 | 35 | @Override 36 | public boolean equals(Object obj) { 37 | return obj instanceof Table && this.toString().equals(obj.toString()); 38 | } 39 | 40 | @Override 41 | public int hashCode() { 42 | return Objects.hash(this.toString()); 43 | } 44 | 45 | @Override 46 | public String getAlias() { 47 | return alias; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/main/java/io/github/reata/sqllineage4j/common/utils/Helper.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common.utils; 2 | 3 | public final class Helper { 4 | public static String escapeIdentifierName(String name) { 5 | return name.replaceAll("`", "").replaceAll("'", "").replaceAll("\"", ""); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /sqllineage4j-common/src/test/java/io/github/reata/sqllineage4j/common/ModelTest.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.common; 2 | 3 | import io.github.reata.sqllineage4j.common.model.Schema; 4 | import io.github.reata.sqllineage4j.common.model.Table; 5 | import org.junit.Test; 6 | 7 | import java.util.HashSet; 8 | import java.util.List; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | import static org.junit.Assert.assertNotNull; 12 | 13 | public class ModelTest { 14 | 15 | @Test 16 | public void testDummy() { 17 | assertNotNull(new Schema().toString()); 18 | assertNotNull(new Table("").toString()); 19 | assertNotNull(new Table("a.b.c").toString()); 20 | } 21 | 22 | @Test 23 | public void testHashEq() { 24 | assertEquals(new Schema("a"), new Schema("a")); 25 | assertEquals(1, new HashSet<>(List.of(new Schema("a"), new Schema("a"))).size()); 26 | assertEquals(new Table("a"), new Table("a")); 27 | assertEquals(1, new HashSet<>(List.of(new Table("a"), new Table("a"))).size()); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /sqllineage4j-core/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | sqllineage4j 5 | io.github.reata 6 | 1.0.1-SNAPSHOT 7 | 8 | 4.0.0 9 | sqllineage4j-core 10 | sqllineage4j-core 11 | 12 | 13 | 14 | io.github.reata 15 | sqllineage4j-common 16 | 17 | 18 | io.github.reata 19 | sqllineage4j-parser 20 | 21 | 22 | io.github.reata 23 | sqllineage4j-graph 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/LineageAnalyzer.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import io.github.reata.sqllineage4j.common.constant.NodeTag; 4 | import io.github.reata.sqllineage4j.common.entity.ColumnQualifierTuple; 5 | import io.github.reata.sqllineage4j.common.model.Column; 6 | import io.github.reata.sqllineage4j.common.model.QuerySet; 7 | import io.github.reata.sqllineage4j.common.model.SubQuery; 8 | import io.github.reata.sqllineage4j.common.model.Table; 9 | import io.github.reata.sqllineage4j.core.holder.StatementLineageHolder; 10 | import io.github.reata.sqllineage4j.core.holder.SubQueryLineageHolder; 11 | import io.github.reata.sqllineage4j.parser.SqlBaseBaseListener; 12 | import io.github.reata.sqllineage4j.parser.SqlBaseParser; 13 | import org.antlr.v4.runtime.CharStream; 14 | import org.antlr.v4.runtime.ParserRuleContext; 15 | import org.antlr.v4.runtime.misc.Interval; 16 | import org.antlr.v4.runtime.tree.ParseTree; 17 | import org.antlr.v4.runtime.tree.ParseTreeWalker; 18 | 19 | import java.util.*; 20 | import java.util.function.Function; 21 | import java.util.stream.Collectors; 22 | 23 | public class LineageAnalyzer { 24 | 25 | public StatementLineageHolder analyze(ParseTree stmt) { 26 | ParseTreeWalker walker = new ParseTreeWalker(); 27 | LineageListener listener = new LineageListener(); 28 | walker.walk(listener, stmt); 29 | return listener.getStatementLineageHolder(); 30 | } 31 | 32 | public static class LineageListener extends SqlBaseBaseListener { 33 | 34 | private final StatementLineageHolder statementLineageHolder = new StatementLineageHolder(); 35 | private final Map subQueryLineageHolders = new HashMap<>(); 36 | 37 | public StatementLineageHolder getStatementLineageHolder() { 38 | return statementLineageHolder; 39 | } 40 | 41 | private String getOriginalText(ParserRuleContext parserRuleContext) { 42 | CharStream stream = parserRuleContext.start.getInputStream(); 43 | return stream.getText(new Interval(parserRuleContext.start.getStartIndex(), parserRuleContext.stop.getStopIndex())); 44 | } 45 | 46 | private SubQueryLineageHolder getHolder(ParserRuleContext ctx) { 47 | while (ctx.getParent() != null) { 48 | ctx = ctx.getParent(); 49 | if (ctx instanceof SqlBaseParser.RegularQuerySpecificationContext) { 50 | return subQueryLineageHolders.get(ctx.hashCode()); 51 | } 52 | } 53 | return null; 54 | } 55 | 56 | @Override 57 | public void exitSingleStatement(SqlBaseParser.SingleStatementContext ctx) { 58 | for (SubQueryLineageHolder holder : subQueryLineageHolders.values()) { 59 | statementLineageHolder.union(holder); 60 | } 61 | } 62 | 63 | @Override 64 | public void enterInsertIntoTable(SqlBaseParser.InsertIntoTableContext ctx) { 65 | statementLineageHolder.addWrite(new Table(ctx.multipartIdentifier().getText())); 66 | } 67 | 68 | @Override 69 | public void enterInsertOverwriteTable(SqlBaseParser.InsertOverwriteTableContext ctx) { 70 | statementLineageHolder.addWrite(new Table(ctx.multipartIdentifier().getText())); 71 | } 72 | 73 | @Override 74 | public void enterCreateTableHeader(SqlBaseParser.CreateTableHeaderContext ctx) { 75 | statementLineageHolder.addWrite(new Table(ctx.multipartIdentifier().getText())); 76 | } 77 | 78 | @Override 79 | public void enterCreateTableLike(SqlBaseParser.CreateTableLikeContext ctx) { 80 | statementLineageHolder.addWrite(new Table(ctx.target.getText())); 81 | statementLineageHolder.addRead(new Table(ctx.source.getText())); 82 | } 83 | 84 | @Override 85 | public void enterUpdateTable(SqlBaseParser.UpdateTableContext ctx) { 86 | handleMultipartIdentifier(ctx.multipartIdentifier(), NodeTag.WRITE, null); 87 | } 88 | 89 | @Override 90 | public void enterDropTable(SqlBaseParser.DropTableContext ctx) { 91 | handleMultipartIdentifier(ctx.multipartIdentifier(), NodeTag.DROP, null); 92 | } 93 | 94 | @Override 95 | public void enterRenameTable(SqlBaseParser.RenameTableContext ctx) { 96 | statementLineageHolder.addRename(new Table(ctx.from.getText()), new Table(ctx.to.getText())); 97 | } 98 | 99 | @Override 100 | public void enterFailNativeCommand(SqlBaseParser.FailNativeCommandContext ctx) { 101 | SqlBaseParser.UnsupportedHiveNativeCommandsContext unsupportedHiveNativeCommandsContext = ctx.unsupportedHiveNativeCommands(); 102 | if (unsupportedHiveNativeCommandsContext != null) { 103 | if (unsupportedHiveNativeCommandsContext.ALTER() != null 104 | && unsupportedHiveNativeCommandsContext.TABLE() != null 105 | && unsupportedHiveNativeCommandsContext.EXCHANGE() != null 106 | && unsupportedHiveNativeCommandsContext.PARTITION() != null) { 107 | statementLineageHolder.addWrite(new Table(unsupportedHiveNativeCommandsContext.tableIdentifier().getText())); 108 | statementLineageHolder.addRead(new Table(ctx.getChild(ctx.getChildCount() - 1).getText())); 109 | } 110 | } 111 | } 112 | 113 | @Override 114 | public void enterCtes(SqlBaseParser.CtesContext ctx) { 115 | for (SqlBaseParser.NamedQueryContext namedQueryContext : ctx.namedQuery()) { 116 | if (namedQueryContext.query() != null) { 117 | statementLineageHolder.addCTE(new SubQuery( 118 | namedQueryContext.query().getText(), 119 | namedQueryContext.errorCapturingIdentifier().getText() 120 | )); 121 | } 122 | } 123 | } 124 | 125 | @Override 126 | public void enterRegularQuerySpecification(SqlBaseParser.RegularQuerySpecificationContext ctx) { 127 | SubQueryLineageHolder holder = new SubQueryLineageHolder(); 128 | subQueryLineageHolders.put(ctx.hashCode(), holder); 129 | ParserRuleContext parentCtx = ctx; 130 | boolean isSubQuery = false; 131 | while (parentCtx.getParent() != null) { 132 | parentCtx = parentCtx.getParent(); 133 | if (parentCtx instanceof SqlBaseParser.AliasedQueryContext) { 134 | isSubQuery = true; 135 | SqlBaseParser.AliasedQueryContext aliasedQueryContext = (SqlBaseParser.AliasedQueryContext) parentCtx; 136 | SubQuery subQuery = new SubQuery(aliasedQueryContext.query().getText(), aliasedQueryContext.tableAlias().getText()); 137 | holder.addWrite(subQuery); 138 | break; 139 | } else if (parentCtx instanceof SqlBaseParser.NamedQueryContext) { 140 | isSubQuery = true; 141 | SqlBaseParser.NamedQueryContext namedQueryContext = (SqlBaseParser.NamedQueryContext) parentCtx; 142 | SubQuery subQuery = new SubQuery(namedQueryContext.query().getText(), namedQueryContext.errorCapturingIdentifier().getText()); 143 | holder.addWrite(subQuery); 144 | break; 145 | } 146 | } 147 | if (!isSubQuery) { 148 | if (statementLineageHolder.getWrite().size() > 0) { 149 | holder.addWrite(new ArrayList<>(statementLineageHolder.getWrite()).get(0)); 150 | } 151 | } 152 | } 153 | 154 | @Override 155 | public void exitRegularQuerySpecification(SqlBaseParser.RegularQuerySpecificationContext ctx) { 156 | SubQueryLineageHolder holder = subQueryLineageHolders.get(ctx.hashCode()); 157 | QuerySet tgtTbl = null; 158 | if (holder.getWrite().size() == 1) { 159 | tgtTbl = List.copyOf(holder.getWrite()).get(0); 160 | } 161 | if (tgtTbl != null) { 162 | for (Column tgtCol : holder.getSelectColumns()) { 163 | tgtCol.setParent(tgtTbl); 164 | Map aliasMapping = getAliasMappingFromTableGroup(holder); 165 | for (Column srcCol : tgtCol.toSourceColumns(aliasMapping)) { 166 | holder.addColumnLineage(srcCol, tgtCol); 167 | } 168 | } 169 | } 170 | } 171 | 172 | @Override 173 | public void enterSelectClause(SqlBaseParser.SelectClauseContext ctx) { 174 | for (SqlBaseParser.NamedExpressionContext namedExpressionContext : ctx.namedExpressionSeq().namedExpression()) { 175 | String alias = getIdentifierName(namedExpressionContext.errorCapturingIdentifier()); 176 | SqlBaseParser.BooleanExpressionContext booleanExpressionContext = namedExpressionContext.expression().booleanExpression(); 177 | handleBooleanExpression(booleanExpressionContext, alias); 178 | } 179 | } 180 | 181 | @Override 182 | public void enterFromClause(SqlBaseParser.FromClauseContext ctx) { 183 | for (SqlBaseParser.RelationContext relationContext : ctx.relation()) { 184 | handleRelationPrimary(relationContext.relationPrimary()); 185 | for (SqlBaseParser.JoinRelationContext joinRelationContext : relationContext.joinRelation()) { 186 | handleRelationPrimary(joinRelationContext.relationPrimary()); 187 | } 188 | } 189 | } 190 | 191 | @Override 192 | public void enterFunctionCall(SqlBaseParser.FunctionCallContext ctx) { 193 | if (ctx.functionName().getText().equalsIgnoreCase("swap_partitions_between_tables")) { 194 | List arguments = ctx.argument; 195 | if (arguments.size() == 4) { 196 | statementLineageHolder.addRead(new Table(arguments.get(0).getText().replace("'", "").replace("\"", ""))); 197 | statementLineageHolder.addWrite(new Table(arguments.get(3).getText().replace("'", "").replace("\"", ""))); 198 | } 199 | } 200 | } 201 | 202 | private void handleRelationPrimary(SqlBaseParser.RelationPrimaryContext relationPrimaryContext) { 203 | if (relationPrimaryContext instanceof SqlBaseParser.TableNameContext) { 204 | SqlBaseParser.TableNameContext tableNameContext = (SqlBaseParser.TableNameContext) relationPrimaryContext; 205 | String alias = null; 206 | if (tableNameContext.tableAlias().strictIdentifier() != null) { 207 | alias = getOriginalText(tableNameContext.tableAlias().strictIdentifier()); 208 | } 209 | handleMultipartIdentifier(tableNameContext.multipartIdentifier(), NodeTag.READ, alias); 210 | } else if (relationPrimaryContext instanceof SqlBaseParser.AliasedRelationContext) { 211 | SqlBaseParser.AliasedRelationContext aliasedRelationContext = (SqlBaseParser.AliasedRelationContext) relationPrimaryContext; 212 | handleRelationPrimary(aliasedRelationContext.relation().relationPrimary()); 213 | } else if (relationPrimaryContext instanceof SqlBaseParser.AliasedQueryContext) { 214 | SqlBaseParser.AliasedQueryContext aliasedQueryContext = (SqlBaseParser.AliasedQueryContext) relationPrimaryContext; 215 | SubQueryLineageHolder holder = getHolder(relationPrimaryContext); 216 | Objects.requireNonNull(holder).addRead(new SubQuery(aliasedQueryContext.query().getText(), aliasedQueryContext.tableAlias().getText())); 217 | } 218 | } 219 | 220 | private void handleMultipartIdentifier(SqlBaseParser.MultipartIdentifierContext multipartIdentifierContext, String type, String alias) { 221 | SubQueryLineageHolder holder = getHolder(multipartIdentifierContext); 222 | List unquotedParts = new ArrayList<>(); 223 | for (SqlBaseParser.ErrorCapturingIdentifierContext errorCapturingIdentifierContext : multipartIdentifierContext.errorCapturingIdentifier()) { 224 | String identifier = getIdentifierName(errorCapturingIdentifierContext); 225 | if (!identifier.equals("")) { 226 | unquotedParts.add(identifier); 227 | } 228 | } 229 | String rawName = String.join(".", unquotedParts); 230 | Table table = alias == null ? new Table(rawName) : new Table(rawName, alias); 231 | switch (type) { 232 | case NodeTag.READ: 233 | Map cteMap = statementLineageHolder.getCTE().stream().collect(Collectors.toMap(SubQuery::getAlias, Function.identity())); 234 | if (cteMap.containsKey(rawName)) { 235 | SubQuery cte = cteMap.get(rawName); 236 | if (alias != null) { 237 | Objects.requireNonNull(holder).addRead(new SubQuery(cte.getQuery(), alias)); 238 | } 239 | Objects.requireNonNull(holder).addRead(cte); 240 | } else { 241 | Objects.requireNonNull(holder).addRead(table); 242 | } 243 | break; 244 | case NodeTag.WRITE: 245 | Objects.requireNonNullElse(holder, statementLineageHolder).addWrite(table); 246 | break; 247 | case NodeTag.DROP: 248 | statementLineageHolder.addDrop(table); 249 | break; 250 | } 251 | } 252 | 253 | private void handleBooleanExpression(SqlBaseParser.BooleanExpressionContext booleanExpressionContext, String alias) { 254 | if (booleanExpressionContext instanceof SqlBaseParser.PredicatedContext) { 255 | SqlBaseParser.PredicatedContext predicatedContext = (SqlBaseParser.PredicatedContext) booleanExpressionContext; 256 | SqlBaseParser.ValueExpressionContext valueExpressionContext = predicatedContext.valueExpression(); 257 | handleValueExpression(valueExpressionContext, alias); 258 | } else if (booleanExpressionContext instanceof SqlBaseParser.LogicalBinaryContext) { 259 | SqlBaseParser.LogicalBinaryContext logicalBinaryContext = (SqlBaseParser.LogicalBinaryContext) booleanExpressionContext; 260 | for (SqlBaseParser.BooleanExpressionContext subBooleanExpressionContext : logicalBinaryContext.booleanExpression()) { 261 | handleBooleanExpression(subBooleanExpressionContext, alias); 262 | } 263 | } 264 | } 265 | 266 | private void handleValueExpression(SqlBaseParser.ValueExpressionContext valueExpressionContext, String alias) { 267 | SubQueryLineageHolder holder = getHolder(valueExpressionContext); 268 | List selectColumns = Objects.requireNonNull(holder).getSelectColumns(); 269 | if (valueExpressionContext instanceof SqlBaseParser.ValueExpressionDefaultContext) { 270 | SqlBaseParser.ValueExpressionDefaultContext valueExpressionDefaultContext = (SqlBaseParser.ValueExpressionDefaultContext) valueExpressionContext; 271 | SqlBaseParser.PrimaryExpressionContext primaryExpressionContext = valueExpressionDefaultContext.primaryExpression(); 272 | if (primaryExpressionContext instanceof SqlBaseParser.ColumnReferenceContext) { 273 | SqlBaseParser.ColumnReferenceContext columnReferenceContext = (SqlBaseParser.ColumnReferenceContext) primaryExpressionContext; 274 | String columnName = columnReferenceContext.getText(); 275 | Column column = new Column(alias.equals("") ? columnName : alias); 276 | column.setSourceColumns(ColumnQualifierTuple.create(columnName, null)); 277 | selectColumns.add(column); 278 | } else if (primaryExpressionContext instanceof SqlBaseParser.DereferenceContext) { 279 | SqlBaseParser.DereferenceContext dereferenceContext = (SqlBaseParser.DereferenceContext) primaryExpressionContext; 280 | String columnName = dereferenceContext.identifier().strictIdentifier().getText(); 281 | Column column = new Column(alias.equals("") ? columnName : alias); 282 | String qualifierName = dereferenceContext.primaryExpression().getText(); 283 | column.setSourceColumns(ColumnQualifierTuple.create(columnName, qualifierName)); 284 | selectColumns.add(column); 285 | } else if (primaryExpressionContext instanceof SqlBaseParser.StarContext) { 286 | SqlBaseParser.StarContext starContext = (SqlBaseParser.StarContext) primaryExpressionContext; 287 | String columnName = starContext.ASTERISK().getText(); 288 | Column column = new Column(alias.equals("") ? columnName : alias); 289 | column.setSourceColumns(ColumnQualifierTuple.create(columnName, null)); 290 | selectColumns.add(column); 291 | } else if (primaryExpressionContext instanceof SqlBaseParser.FunctionCallContext) { 292 | SqlBaseParser.FunctionCallContext functionCallContext = (SqlBaseParser.FunctionCallContext) primaryExpressionContext; 293 | for (SqlBaseParser.ExpressionContext expressionContext : functionCallContext.expression()) { 294 | handleBooleanExpression(expressionContext.booleanExpression(), alias.equals("") ? functionCallContext.getText() : alias); 295 | } 296 | if (functionCallContext.windowSpec() != null) { 297 | SqlBaseParser.WindowSpecContext windowSpecContext = functionCallContext.windowSpec(); 298 | if (windowSpecContext instanceof SqlBaseParser.WindowDefContext) { 299 | SqlBaseParser.WindowDefContext windowDefContext = (SqlBaseParser.WindowDefContext) windowSpecContext; 300 | for (SqlBaseParser.ExpressionContext expressionContext : windowDefContext.expression()) { 301 | handleBooleanExpression(expressionContext.booleanExpression(), alias.equals("") ? functionCallContext.getText() : alias); 302 | } 303 | for (SqlBaseParser.SortItemContext sortItemContext : windowDefContext.sortItem()) { 304 | handleBooleanExpression(sortItemContext.expression().booleanExpression(), alias.equals("") ? functionCallContext.getText() : alias); 305 | } 306 | } 307 | } 308 | } else if (primaryExpressionContext instanceof SqlBaseParser.CastContext) { 309 | SqlBaseParser.CastContext castContext = (SqlBaseParser.CastContext) primaryExpressionContext; 310 | handleBooleanExpression(castContext.expression().booleanExpression(), alias.equals("") ? getOriginalText(castContext) : alias); 311 | } else if (primaryExpressionContext instanceof SqlBaseParser.ParenthesizedExpressionContext) { 312 | SqlBaseParser.ParenthesizedExpressionContext parenthesizedExpressionContext = (SqlBaseParser.ParenthesizedExpressionContext) primaryExpressionContext; 313 | handleBooleanExpression(parenthesizedExpressionContext.expression().booleanExpression(), alias); 314 | } else if (primaryExpressionContext instanceof SqlBaseParser.SearchedCaseContext) { 315 | SqlBaseParser.SearchedCaseContext searchedCaseContext = (SqlBaseParser.SearchedCaseContext) primaryExpressionContext; 316 | alias = alias.equals("") ? getOriginalText(searchedCaseContext) : alias; 317 | for (SqlBaseParser.WhenClauseContext whenClauseContext : searchedCaseContext.whenClause()) { 318 | for (SqlBaseParser.ExpressionContext expressionContext : whenClauseContext.expression()) { 319 | handleBooleanExpression(expressionContext.booleanExpression(), alias); 320 | } 321 | } 322 | if (searchedCaseContext.expression() != null) { 323 | handleBooleanExpression(searchedCaseContext.expression().booleanExpression(), alias); 324 | } 325 | } 326 | } else if (valueExpressionContext instanceof SqlBaseParser.ComparisonContext) { 327 | SqlBaseParser.ComparisonContext comparisonContext = (SqlBaseParser.ComparisonContext) valueExpressionContext; 328 | for (SqlBaseParser.ValueExpressionContext subValueExpressionContext : comparisonContext.valueExpression()) { 329 | handleValueExpression(subValueExpressionContext, alias); 330 | } 331 | } else if (valueExpressionContext instanceof SqlBaseParser.ArithmeticBinaryContext) { 332 | SqlBaseParser.ArithmeticBinaryContext arithmeticBinaryContext = (SqlBaseParser.ArithmeticBinaryContext) valueExpressionContext; 333 | alias = alias.equals("") ? getOriginalText(arithmeticBinaryContext) : alias; 334 | for (SqlBaseParser.ValueExpressionContext subValueExpressionContext : arithmeticBinaryContext.valueExpression()) { 335 | handleValueExpression(subValueExpressionContext, alias); 336 | } 337 | } 338 | } 339 | 340 | private String getIdentifierName(SqlBaseParser.ErrorCapturingIdentifierContext errorCapturingIdentifierContext) { 341 | String name = ""; 342 | if (errorCapturingIdentifierContext != null) { 343 | SqlBaseParser.StrictIdentifierContext strictIdentifierContext = errorCapturingIdentifierContext.identifier().strictIdentifier(); 344 | if (strictIdentifierContext instanceof SqlBaseParser.QuotedIdentifierAlternativeContext) { 345 | name = strictIdentifierContext.getText().replace("`", ""); 346 | } else if (strictIdentifierContext instanceof SqlBaseParser.UnquotedIdentifierContext) { 347 | name = strictIdentifierContext.getText(); 348 | } 349 | } 350 | return name; 351 | } 352 | 353 | private Map getAliasMappingFromTableGroup(SubQueryLineageHolder holder) { 354 | Map alias = holder.getQuerySetAlias(); 355 | for (QuerySet dataset : holder.getRead()) { 356 | alias.put(dataset.toString(), dataset); 357 | // TODO: rawName -> dataset 358 | } 359 | return alias; 360 | } 361 | } 362 | } 363 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/LineageRunner.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import io.github.reata.sqllineage4j.common.model.Column; 4 | import io.github.reata.sqllineage4j.common.model.Table; 5 | import io.github.reata.sqllineage4j.core.holder.SQLLineageHolder; 6 | import io.github.reata.sqllineage4j.core.holder.StatementLineageHolder; 7 | import io.github.reata.sqllineage4j.parser.LineageParser; 8 | import io.github.reata.sqllineage4j.parser.StatementSplitter; 9 | import org.javatuples.Pair; 10 | 11 | import java.util.List; 12 | import java.util.stream.Collectors; 13 | 14 | public class LineageRunner { 15 | 16 | public static final class Builder { 17 | private final String sql; 18 | private boolean verbose = false; 19 | 20 | private Builder(final String sql) { 21 | this.sql = sql; 22 | } 23 | 24 | public Builder verbose() { 25 | this.verbose = true; 26 | return this; 27 | } 28 | 29 | public LineageRunner build() { 30 | if (sql == null) { 31 | throw new IllegalArgumentException("sql string must be specified"); 32 | } 33 | return new LineageRunner(this); 34 | } 35 | } 36 | 37 | 38 | private final List statementLineageHolders; 39 | private final SQLLineageHolder sqlLineageHolder; 40 | private final List statements; 41 | 42 | private final boolean verbose; 43 | 44 | private LineageRunner(final Builder builder) { 45 | String sql = builder.sql; 46 | this.verbose = builder.verbose; 47 | statements = new StatementSplitter(sql).split(); 48 | statementLineageHolders = statements.stream().map(x -> new LineageAnalyzer().analyze(LineageParser.parse(x))).collect(Collectors.toList()); 49 | sqlLineageHolder = SQLLineageHolder.of(statementLineageHolders.toArray(StatementLineageHolder[]::new)); 50 | } 51 | 52 | public List sourceTables() { 53 | return List.copyOf(sqlLineageHolder.getSourceTables()); 54 | } 55 | 56 | public List
targetTables() { 57 | return List.copyOf(sqlLineageHolder.getTargetTables()); 58 | } 59 | 60 | public List
intermediateTables() { 61 | return List.copyOf(sqlLineageHolder.getIntermediateTables()); 62 | } 63 | 64 | public List> getColumnLineage() { 65 | return getColumnLineage(true); 66 | } 67 | 68 | public List> getColumnLineage(boolean excludeSubquery) { 69 | return sqlLineageHolder.getColumnLineage(excludeSubquery) 70 | .stream().map(path -> Pair.with(path.get(0), path.get(path.size() - 1))) 71 | .collect(Collectors.toList()); 72 | } 73 | 74 | public void printTableLineage() { 75 | String sourceTables = sourceTables().stream().map(t -> " " + t.toString() + "\n").collect(Collectors.joining()); 76 | String targetTables = targetTables().stream().map(t -> " " + t.toString() + "\n").collect(Collectors.joining()); 77 | String combined = "Statements(#): " + statements.size() + "\n" 78 | + "Source Tables:\n" 79 | + sourceTables 80 | + "Target Tables:\n" 81 | + targetTables; 82 | if (intermediateTables().size() > 0) { 83 | String intermediateTables = intermediateTables().stream().map(t -> " " + t.toString() + "\n").collect(Collectors.joining()); 84 | combined += "Intermediate Tables:\n" + intermediateTables; 85 | } 86 | if (verbose) { 87 | StringBuilder result = new StringBuilder(); 88 | for (int i = 0; i < statementLineageHolders.size(); i++) { 89 | String stmtShort = statements.get(i).replace("\n", ""); 90 | if (stmtShort.length() > 50) { 91 | stmtShort = stmtShort.substring(0, 50) + "..."; 92 | } 93 | String content = statementLineageHolders.get(i).toString().replace("\n", "\n "); 94 | result.append("Statement #").append(i + 1).append(": ").append(stmtShort).append("\n ").append(content).append("\n"); 95 | } 96 | combined = result + "==========\nSummary:\n" + combined; 97 | } 98 | System.out.println(combined); 99 | } 100 | 101 | public static Builder builder(final String sql) { 102 | return new Builder(sql); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/holder/SQLLineageHolder.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core.holder; 2 | 3 | import io.github.reata.sqllineage4j.common.constant.EdgeType; 4 | import io.github.reata.sqllineage4j.common.constant.NodeTag; 5 | import io.github.reata.sqllineage4j.common.entity.EdgeTuple; 6 | import io.github.reata.sqllineage4j.common.model.Column; 7 | import io.github.reata.sqllineage4j.common.model.Table; 8 | import io.github.reata.sqllineage4j.graph.GremlinLineageGraph; 9 | import io.github.reata.sqllineage4j.graph.LineageGraph; 10 | import org.javatuples.Pair; 11 | 12 | import java.util.Collections; 13 | import java.util.HashSet; 14 | import java.util.List; 15 | import java.util.Set; 16 | import java.util.stream.Collectors; 17 | 18 | public class SQLLineageHolder { 19 | private final LineageGraph lineageGraph; 20 | 21 | public SQLLineageHolder(LineageGraph lineageGraph) { 22 | this.lineageGraph = lineageGraph; 23 | } 24 | 25 | public Set
getSourceTables() { 26 | LineageGraph tableLineageGraph = getTableLineageGraph(); 27 | Set
sourceTables = tableLineageGraph.retrieveSourceOnlyVertices() 28 | .stream().map(Table.class::cast).collect(Collectors.toSet()); 29 | Set
sourceOnlyTables = retrieveTagTables(NodeTag.SOURCE_ONLY); 30 | Set
selfLoopTables = retrieveTagTables(NodeTag.SELFLOOP); 31 | sourceTables.addAll(sourceOnlyTables); 32 | sourceTables.addAll(selfLoopTables); 33 | return sourceTables; 34 | } 35 | 36 | public Set
getTargetTables() { 37 | LineageGraph tableLineageGraph = getTableLineageGraph(); 38 | Set
targetTables = tableLineageGraph.retrieveTargetOnlyVertices() 39 | .stream().map(Table.class::cast).collect(Collectors.toSet()); 40 | Set
targetOnlyTables = retrieveTagTables(NodeTag.TARGET_ONLY); 41 | Set
selfLoopTables = retrieveTagTables(NodeTag.SELFLOOP); 42 | targetTables.addAll(targetOnlyTables); 43 | targetTables.addAll(selfLoopTables); 44 | return targetTables; 45 | } 46 | 47 | public Set
getIntermediateTables() { 48 | LineageGraph tableLineageGraph = getTableLineageGraph(); 49 | Set
intermediateTables = tableLineageGraph.retrieveConnectedVertices() 50 | .stream().map(Table.class::cast).collect(Collectors.toSet()); 51 | intermediateTables.removeAll(retrieveTagTables(NodeTag.SELFLOOP)); 52 | return intermediateTables; 53 | } 54 | 55 | public Set> getColumnLineage(boolean excludeSubquery) { 56 | LineageGraph columnLineageGraph = getColumnLineageGraph(); 57 | Set targetColumns = columnLineageGraph.retrieveTargetOnlyVertices() 58 | .stream().map(Column.class::cast).collect(Collectors.toSet()); 59 | Set sourceColumns = columnLineageGraph.retrieveSourceOnlyVertices() 60 | .stream().map(Column.class::cast).collect(Collectors.toSet()); 61 | if (excludeSubquery) { 62 | targetColumns = targetColumns.stream().filter(c -> c.getParent() instanceof Table).collect(Collectors.toSet()); 63 | } 64 | 65 | Set> columns = new HashSet<>(); 66 | for (Column sourceColumn : sourceColumns) { 67 | for (Column targetColumn : targetColumns) { 68 | columnLineageGraph.listPath(sourceColumn, targetColumn).forEach( 69 | path -> columns.add(path.stream().map(c -> (Column) c).collect(Collectors.toList())) 70 | ); 71 | } 72 | } 73 | return columns; 74 | } 75 | 76 | private LineageGraph getTableLineageGraph() { 77 | return lineageGraph.getSubGraph(Table.class.getSimpleName()); 78 | } 79 | 80 | private LineageGraph getColumnLineageGraph() { 81 | return lineageGraph.getSubGraph(Column.class.getSimpleName()); 82 | } 83 | 84 | private Set
retrieveTagTables(String tag) { 85 | return lineageGraph.retrieveVerticesByProps(Collections.singletonMap(tag, true)) 86 | .stream().map(Table.class::cast).collect(Collectors.toSet()); 87 | } 88 | 89 | public static SQLLineageHolder of(StatementLineageHolder... statementLineageHolders) { 90 | LineageGraph graph = buildDiGraph(statementLineageHolders); 91 | return new SQLLineageHolder(graph); 92 | } 93 | 94 | private static LineageGraph buildDiGraph(StatementLineageHolder... statementLineageHolders) { 95 | LineageGraph lineageGraph = new GremlinLineageGraph(); 96 | for (StatementLineageHolder holder : statementLineageHolders) { 97 | lineageGraph.merge(holder.getGraph()); 98 | if (holder.getDrop().size() > 0) { 99 | lineageGraph.dropVerticesIfOrphan(holder.getDrop().toArray()); 100 | } else if (holder.getRename().size() > 0) { 101 | for (Pair p : holder.getRename()) { 102 | Table tableOld = p.getValue0(); 103 | Table tableNew = p.getValue1(); 104 | for (EdgeTuple edgeTuple : lineageGraph.retrieveEdgesByVertex(tableOld)) { 105 | if (edgeTuple.source().equals(tableOld)) { 106 | lineageGraph.addEdgeIfNotExist(edgeTuple.label(), tableNew, edgeTuple.target()); 107 | } else if (edgeTuple.target().equals(tableOld)) { 108 | lineageGraph.addEdgeIfNotExist(edgeTuple.label(), edgeTuple.source(), tableNew); 109 | } 110 | } 111 | lineageGraph.dropVertices(tableOld); 112 | lineageGraph.dropSelfLoopEdge(); 113 | lineageGraph.dropVerticesIfOrphan(tableNew); 114 | } 115 | } else { 116 | Set
read = holder.getRead(); 117 | Set
write = holder.getWrite(); 118 | if (read.size() > 0 && write.size() == 0) { 119 | // source only table comes from SELECT statement 120 | lineageGraph.updateVertices(Collections.singletonMap(NodeTag.SOURCE_ONLY, Boolean.TRUE), read.toArray()); 121 | } else if (read.size() == 0 && write.size() > 0) { 122 | // target only table comes from case like: 1) INSERT/UPDATE constant values; 2) CREATE TABLE 123 | lineageGraph.updateVertices(Collections.singletonMap(NodeTag.TARGET_ONLY, Boolean.TRUE), write.toArray()); 124 | } else { 125 | for (Table r : read) { 126 | for (Table w : write) { 127 | lineageGraph.addEdgeIfNotExist(EdgeType.LINEAGE, r, w); 128 | } 129 | } 130 | } 131 | } 132 | } 133 | lineageGraph.updateVertices(Collections.singletonMap(NodeTag.SELFLOOP, Boolean.TRUE), 134 | lineageGraph.retrieveSelfLoopVertices().stream().filter(x -> x instanceof Table).toArray()); 135 | return lineageGraph; 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/holder/StatementLineageHolder.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core.holder; 2 | 3 | import io.github.reata.sqllineage4j.common.constant.EdgeType; 4 | import io.github.reata.sqllineage4j.common.constant.NodeTag; 5 | import io.github.reata.sqllineage4j.common.model.Table; 6 | import org.javatuples.Pair; 7 | 8 | import java.util.Set; 9 | import java.util.stream.Collectors; 10 | 11 | public class StatementLineageHolder extends SubQueryLineageHolder { 12 | 13 | @Override 14 | public Set
getRead() { 15 | return super.getRead().stream().filter(x -> x instanceof Table).map(x -> (Table) x).collect(Collectors.toSet()); 16 | } 17 | 18 | @Override 19 | public Set
getWrite() { 20 | return super.getWrite().stream().filter(x -> x instanceof Table).map(x -> (Table) x).collect(Collectors.toSet()); 21 | } 22 | 23 | public Set
getDrop() { 24 | return propertyGetter(NodeTag.DROP).stream().map(x -> (Table) x).collect(Collectors.toSet()); 25 | } 26 | 27 | public Set> getRename() { 28 | return lineageGraph.retrieveEdgesByLabel(EdgeType.RENAME).stream().map( 29 | e -> new Pair<>((Table) e.source(), (Table) e.target()) 30 | ).collect(Collectors.toSet()); 31 | } 32 | 33 | public void addDrop(Table drop) { 34 | propertySetter(drop, NodeTag.DROP); 35 | } 36 | 37 | public void addRename(Table src, Table tgt) { 38 | lineageGraph.addVertexIfNotExist(src); 39 | lineageGraph.addVertexIfNotExist(tgt); 40 | lineageGraph.addEdgeIfNotExist(EdgeType.RENAME, src, tgt); 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return super.toString() + 46 | "table drop: " + getDrop().toString() + "\n" + 47 | "table cte: " + getRename().toString(); 48 | } 49 | 50 | public void union(SubQueryLineageHolder holder) { 51 | getGraph().merge(holder.getGraph()); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/main/java/io/github/reata/sqllineage4j/core/holder/SubQueryLineageHolder.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core.holder; 2 | 3 | import io.github.reata.sqllineage4j.common.constant.EdgeType; 4 | import io.github.reata.sqllineage4j.common.constant.NodeTag; 5 | import io.github.reata.sqllineage4j.common.entity.EdgeTuple; 6 | import io.github.reata.sqllineage4j.common.model.Column; 7 | import io.github.reata.sqllineage4j.common.model.QuerySet; 8 | import io.github.reata.sqllineage4j.common.model.SubQuery; 9 | import io.github.reata.sqllineage4j.graph.GremlinLineageGraph; 10 | import io.github.reata.sqllineage4j.graph.LineageGraph; 11 | 12 | import java.util.*; 13 | import java.util.stream.Collectors; 14 | 15 | public class SubQueryLineageHolder { 16 | final LineageGraph lineageGraph = new GremlinLineageGraph(); 17 | 18 | public LineageGraph getGraph() { 19 | return lineageGraph; 20 | } 21 | 22 | void propertySetter(QuerySet value, String prop) { 23 | lineageGraph.addVertexIfNotExist(value, Collections.singletonMap(prop, Boolean.TRUE)); 24 | } 25 | 26 | Set propertyGetter(String prop) { 27 | return lineageGraph.retrieveVerticesByProps(Collections.singletonMap(prop, true)) 28 | .stream().map(x -> (QuerySet) x).collect(Collectors.toSet()); 29 | } 30 | 31 | public Set getRead() { 32 | return propertyGetter(NodeTag.READ); 33 | } 34 | 35 | public Set getWrite() { 36 | return propertyGetter(NodeTag.WRITE); 37 | } 38 | 39 | public Set getCTE() { 40 | return propertyGetter(NodeTag.CTE).stream().map(x -> (SubQuery) x).collect(Collectors.toSet()); 41 | } 42 | 43 | public void addRead(QuerySet read) { 44 | propertySetter(read, NodeTag.READ); 45 | if (read.getAlias() != null) { 46 | lineageGraph.addVertexIfNotExist(read.getAlias()); 47 | lineageGraph.addEdgeIfNotExist(EdgeType.HAS_ALIAS, read, read.getAlias()); 48 | } 49 | } 50 | 51 | public void addWrite(QuerySet write) { 52 | propertySetter(write, NodeTag.WRITE); 53 | } 54 | 55 | public void addCTE(SubQuery cte) { 56 | propertySetter(cte, NodeTag.CTE); 57 | } 58 | 59 | public void addColumnLineage(Column src, Column tgt) { 60 | lineageGraph.addVertexIfNotExist(src); 61 | lineageGraph.addVertexIfNotExist(tgt); 62 | lineageGraph.addEdgeIfNotExist(EdgeType.LINEAGE, src, tgt); 63 | lineageGraph.addEdgeIfNotExist(EdgeType.HAS_COLUMN, Objects.requireNonNull(tgt.getParent()), tgt); 64 | if (src.getParent() != null) { 65 | lineageGraph.addEdgeIfNotExist(EdgeType.HAS_COLUMN, Objects.requireNonNull(src.getParent()), src); 66 | } 67 | } 68 | 69 | @Override 70 | public String toString() { 71 | return "table read: " + getRead().toString() + "\n" + 72 | "table write: " + getWrite().toString() + "\n" + 73 | "table cte: " + getCTE().toString(); 74 | } 75 | 76 | private final List selectColumns = new ArrayList<>(); 77 | 78 | public List getSelectColumns() { 79 | return selectColumns; 80 | } 81 | 82 | public Map getQuerySetAlias() { 83 | Map aliasMapping = new HashMap<>(); 84 | for (EdgeTuple edgeTuple : lineageGraph.retrieveEdgesByLabel(EdgeType.HAS_ALIAS)) { 85 | aliasMapping.put((String) edgeTuple.target(), (QuerySet) edgeTuple.source()); 86 | } 87 | return aliasMapping; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/CTETest.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.Set; 6 | 7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage; 8 | 9 | public class CTETest { 10 | @Test 11 | public void testWithSelect() { 12 | assertTableLineage("WITH tab1 AS (SELECT 1) SELECT * FROM tab1", Set.of()); 13 | } 14 | 15 | @Test 16 | public void testWithSelectOne() { 17 | assertTableLineage("WITH wtab1 AS (SELECT * FROM schema1.tab1) SELECT * FROM wtab1", Set.of("schema1.tab1")); 18 | } 19 | 20 | @Test 21 | public void testWithSelectOneWithoutAs() { 22 | // AS in CTE is negligible in SparkSQL, however it is required in MySQL. See below reference 23 | // https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-cte.html 24 | // https://dev.mysql.com/doc/refman/8.0/en/with.html 25 | assertTableLineage("WITH wtab1 (SELECT * FROM schema1.tab1) SELECT * FROM wtab1", Set.of("schema1.tab1")); 26 | } 27 | 28 | @Test 29 | public void testWithSelectMany() { 30 | assertTableLineage("WITH\n" + 31 | "cte1 AS (SELECT a, b FROM table1),\n" + 32 | "cte2 AS (SELECT c, d FROM table2)\n" + 33 | "SELECT b, d FROM cte1 JOIN cte2\n" + 34 | "WHERE cte1.a = cte2.c", Set.of("table1", "table2")); 35 | } 36 | 37 | @Test 38 | public void testWithSelectManyReference() { 39 | assertTableLineage("WITH\n" + 40 | "cte1 AS (SELECT a, b FROM tab1),\n" + 41 | "cte2 AS (SELECT a, count(*) AS cnt FROM cte1 GROUP BY a)\n" + 42 | "SELECT a, b, cnt FROM cte1 JOIN cte2\n" + 43 | "WHERE cte1.a = cte2.a", Set.of("tab1")); 44 | } 45 | 46 | @Test 47 | public void testWithUsingAlias() { 48 | assertTableLineage("WITH wtab1 AS (SELECT * FROM schema1.tab1) SELECT * FROM wtab1 wt", Set.of("schema1.tab1")); 49 | } 50 | 51 | @Test 52 | public void testWithSelectJoinTableWithSameName() { 53 | assertTableLineage("WITH wtab1 AS (SELECT * FROM schema1.tab1) SELECT * FROM wtab1 CROSS JOIN db.wtab1", Set.of("schema1.tab1", "db.wtab1")); 54 | } 55 | 56 | @Test 57 | public void testWithInsert() { 58 | assertTableLineage("WITH tab1 AS (SELECT * FROM tab2) INSERT INTO tab3 SELECT * FROM tab1", Set.of("tab2"), Set.of("tab3")); 59 | } 60 | 61 | @Test 62 | public void testWithInsertOverwrite() { 63 | assertTableLineage("WITH tab1 AS (SELECT * FROM tab2) INSERT OVERWRITE tab3 SELECT * FROM tab1", Set.of("tab2"), Set.of("tab3")); 64 | } 65 | 66 | @Test 67 | public void testWithInsertPlusKeywordTable() { 68 | assertTableLineage("WITH tab1 AS (SELECT * FROM tab2) INSERT INTO TABLE tab3 SELECT * FROM tab1", Set.of("tab2"), Set.of("tab3")); 69 | } 70 | 71 | @Test 72 | public void testWithInsertOverwritePlusKeywordTable() { 73 | assertTableLineage("WITH tab1 AS (SELECT * FROM tab2) INSERT OVERWRITE TABLE tab3 SELECT * FROM tab1", Set.of("tab2"), Set.of("tab3")); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/ColumnTest.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import io.github.reata.sqllineage4j.common.entity.ColumnQualifierTuple; 4 | import org.javatuples.Pair; 5 | import org.junit.Test; 6 | import org.junit.jupiter.params.ParameterizedTest; 7 | import org.junit.jupiter.params.provider.ValueSource; 8 | 9 | import java.util.Set; 10 | 11 | import static io.github.reata.sqllineage4j.core.Helper.assertColumnLineage; 12 | 13 | 14 | public class ColumnTest { 15 | @Test 16 | public void testSelectColumn() { 17 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 18 | "SELECT col1\n" + 19 | "FROM tab2", 20 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 21 | ColumnQualifierTuple.create("col1", "tab1")))); 22 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 23 | "SELECT col1 AS col2\n" + 24 | "FROM tab2", 25 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 26 | ColumnQualifierTuple.create("col2", "tab1")))); 27 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 28 | "SELECT tab2.col1 AS col2\n" + 29 | "FROM tab2", 30 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 31 | ColumnQualifierTuple.create("col2", "tab1")))); 32 | } 33 | 34 | @Test 35 | public void testSelectColumnWildcard() { 36 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 37 | "SELECT *\n" + 38 | "FROM tab2", 39 | Set.of(Pair.with(ColumnQualifierTuple.create("*", "tab2"), 40 | ColumnQualifierTuple.create("*", "tab1")))); 41 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 42 | "SELECT *\n" + 43 | "FROM tab2 a\n" + 44 | " INNER JOIN tab3 b\n" + 45 | " ON a.id = b.id", 46 | Set.of(Pair.with(ColumnQualifierTuple.create("*", "tab2"), 47 | ColumnQualifierTuple.create("*", "tab1")), 48 | Pair.with(ColumnQualifierTuple.create("*", "tab3"), 49 | ColumnQualifierTuple.create("*", "tab1")))); 50 | } 51 | 52 | @Test 53 | public void testSelectColumnUsingFunction() { 54 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 55 | "SELECT max(col1),\n" + 56 | " count(*)\n" + 57 | "FROM tab2", 58 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 59 | ColumnQualifierTuple.create("max(col1)", "tab1")), 60 | Pair.with(ColumnQualifierTuple.create("*", "tab2"), 61 | ColumnQualifierTuple.create("count(*)", "tab1")))); 62 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 63 | "SELECT max(col1) AS col2,\n" + 64 | " count(*) AS cnt\n" + 65 | "FROM tab2", 66 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 67 | ColumnQualifierTuple.create("col2", "tab1")), 68 | Pair.with(ColumnQualifierTuple.create("*", "tab2"), 69 | ColumnQualifierTuple.create("cnt", "tab1")))); 70 | } 71 | 72 | @Test 73 | public void testSelectColumnUsingFunctionWithComplexParameter() { 74 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 75 | "SELECT if(col1 = 'foo' AND col2 = 'bar', 1, 0) AS flag\n" + 76 | "FROM tab2", 77 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 78 | ColumnQualifierTuple.create("flag", "tab1")), 79 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 80 | ColumnQualifierTuple.create("flag", "tab1")))); 81 | } 82 | 83 | @Test 84 | public void testSelectColumnUsingWindowFunction() { 85 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 86 | "SELECT row_number() OVER (PARTITION BY col1 ORDER BY col2 DESC) AS rnum\n" + 87 | "FROM tab2", 88 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 89 | ColumnQualifierTuple.create("rnum", "tab1")), 90 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 91 | ColumnQualifierTuple.create("rnum", "tab1")))); 92 | } 93 | 94 | @Test 95 | public void testSelectColumnUsingWindowFunctionWithParameters() { 96 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 97 | "SELECT col0,\n" + 98 | " max(col3) OVER (PARTITION BY col1 ORDER BY col2 DESC) AS rnum,\n" + 99 | " col4\n" + 100 | "FROM tab2", 101 | Set.of(Pair.with(ColumnQualifierTuple.create("col0", "tab2"), 102 | ColumnQualifierTuple.create("col0", "tab1")), 103 | Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 104 | ColumnQualifierTuple.create("rnum", "tab1")), 105 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 106 | ColumnQualifierTuple.create("rnum", "tab1")), 107 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"), 108 | ColumnQualifierTuple.create("rnum", "tab1")), 109 | Pair.with(ColumnQualifierTuple.create("col4", "tab2"), 110 | ColumnQualifierTuple.create("col4", "tab1")))); 111 | } 112 | 113 | @Test 114 | public void testSelectColumnUsingCast() { 115 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 116 | "SELECT cast(col1 as timestamp)\n" + 117 | "FROM tab2", 118 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 119 | ColumnQualifierTuple.create("cast(col1 as timestamp)", "tab1")))); 120 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 121 | "SELECT cast(col1 as timestamp) as col2\n" + 122 | "FROM tab2", 123 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 124 | ColumnQualifierTuple.create("col2", "tab1")))); 125 | } 126 | 127 | @Test 128 | public void testSelectColumnUsingExpression() { 129 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 130 | "SELECT col1 + col2\n" + 131 | "FROM tab2", 132 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 133 | ColumnQualifierTuple.create("col1 + col2", "tab1")), 134 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 135 | ColumnQualifierTuple.create("col1 + col2", "tab1")))); 136 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 137 | "SELECT col1 + col2 AS col3\n" + 138 | "FROM tab2", 139 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 140 | ColumnQualifierTuple.create("col3", "tab1")), 141 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 142 | ColumnQualifierTuple.create("col3", "tab1")))); 143 | } 144 | 145 | @Test 146 | public void testSelectColumnUsingExpressionInParenthesis() { 147 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 148 | "SELECT (col1 + col2) AS col3\n" + 149 | "FROM tab2", 150 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 151 | ColumnQualifierTuple.create("col3", "tab1")), 152 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 153 | ColumnQualifierTuple.create("col3", "tab1")))); 154 | } 155 | 156 | @Test 157 | public void testSelectColumnUsingBooleanExpressionInParenthesis() { 158 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 159 | "SELECT (col1 > 0 AND col2 > 0) AS col3\n" + 160 | "FROM tab2", 161 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 162 | ColumnQualifierTuple.create("col3", "tab1")), 163 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 164 | ColumnQualifierTuple.create("col3", "tab1")))); 165 | } 166 | 167 | @Test 168 | public void testSelectColumnUsingExpressionWithTableQualifierWithoutColumnAlias() { 169 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 170 | "SELECT a.col1 + a.col2 + a.col3 + a.col4\n" + 171 | "FROM tab2 a", 172 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 173 | ColumnQualifierTuple.create("a.col1 + a.col2 + a.col3 + a.col4", "tab1")), 174 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 175 | ColumnQualifierTuple.create("a.col1 + a.col2 + a.col3 + a.col4", "tab1")), 176 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"), 177 | ColumnQualifierTuple.create("a.col1 + a.col2 + a.col3 + a.col4", "tab1")), 178 | Pair.with(ColumnQualifierTuple.create("col4", "tab2"), 179 | ColumnQualifierTuple.create("a.col1 + a.col2 + a.col3 + a.col4", "tab1")))); 180 | } 181 | 182 | @Test 183 | public void testSelectColumnUsingCaseWhen() { 184 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 185 | "SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END\n" + 186 | "FROM tab2", 187 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 188 | ColumnQualifierTuple.create("CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END", "tab1")))); 189 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 190 | "SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' END AS col2\n" + 191 | "FROM tab2", 192 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 193 | ColumnQualifierTuple.create("col2", "tab1")))); 194 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 195 | "SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' ELSE col_v END\n" + 196 | "FROM tab2", 197 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 198 | ColumnQualifierTuple.create("CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' ELSE col_v END", "tab1")), 199 | Pair.with(ColumnQualifierTuple.create("col_v", "tab2"), 200 | ColumnQualifierTuple.create("CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' ELSE col_v END", "tab1")))); 201 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 202 | "SELECT CASE WHEN col1 = 1 THEN 'V1' WHEN col1 = 2 THEN 'V2' ELSE col_v END AS col2\n" + 203 | "FROM tab2", 204 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 205 | ColumnQualifierTuple.create("col2", "tab1")), 206 | Pair.with(ColumnQualifierTuple.create("col_v", "tab2"), 207 | ColumnQualifierTuple.create("col2", "tab1")))); 208 | } 209 | 210 | // @Test 211 | // public void testSelectColumnUsingCaseWhenWithSubquery() { 212 | // assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 213 | // "SELECT CASE WHEN (SELECT avg(col1) FROM tab3) > 0 AND col2 = 1 THEN (SELECT avg(col1) FROM tab3) ELSE 0 END AS col1\n" + 214 | // "FROM tab4", 215 | // Set.of(Pair.with(ColumnQualifierTuple.create("col2", "tab4"), 216 | // ColumnQualifierTuple.create("col1", "tab1")), 217 | // Pair.with(ColumnQualifierTuple.create("col1", "tab3"), 218 | // ColumnQualifierTuple.create("col1", "tab1")))); 219 | // } 220 | 221 | @Test 222 | public void testSelectColumnWithTableQualifier() { 223 | // assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 224 | // "SELECT tab2.col1\n" + 225 | // "FROM tab2", 226 | // Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 227 | // ColumnQualifierTuple.create("col1", "tab1")))); 228 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 229 | "SELECT t.col1\n" + 230 | "FROM tab2 AS t", 231 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 232 | ColumnQualifierTuple.create("col1", "tab1")))); 233 | } 234 | 235 | @Test 236 | public void testSelectColumns() { 237 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 238 | "SELECT col1,\n" + 239 | "col2\n" + 240 | "FROM tab2", 241 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 242 | ColumnQualifierTuple.create("col1", "tab1")), 243 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 244 | ColumnQualifierTuple.create("col2", "tab1")))); 245 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 246 | "SELECT max(col1),\n" + 247 | "max(col2)\n" + 248 | "FROM tab2", 249 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 250 | ColumnQualifierTuple.create("max(col1)", "tab1")), 251 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 252 | ColumnQualifierTuple.create("max(col2)", "tab1")))); 253 | } 254 | 255 | @Test 256 | public void testSelectColumnInSubquery() { 257 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 258 | "SELECT col1\n" + 259 | "FROM (SELECT col1 FROM tab2) dt", 260 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 261 | ColumnQualifierTuple.create("col1", "tab1")))); 262 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 263 | "SELECT col1\n" + 264 | "FROM (SELECT col1, col2 FROM tab2) dt", 265 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 266 | ColumnQualifierTuple.create("col1", "tab1")))); 267 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 268 | "SELECT col1\n" + 269 | "FROM (SELECT col1 FROM tab2)", 270 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 271 | ColumnQualifierTuple.create("col1", "tab1")))); 272 | } 273 | 274 | @Test 275 | public void testSelectColumnInSubqueryWithTwoParenthesis() { 276 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 277 | "SELECT col1\n" + 278 | "FROM ((SELECT col1 FROM tab2)) dt", 279 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 280 | ColumnQualifierTuple.create("col1", "tab1")))); 281 | } 282 | 283 | @Test 284 | public void testSelectColumnInSubqueryWithTwoParenthesisAndBlankInBetween() { 285 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 286 | "SELECT col1\n" + 287 | "FROM (\n" + 288 | "(SELECT col1 FROM tab2)\n" + 289 | ") dt", 290 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 291 | ColumnQualifierTuple.create("col1", "tab1")))); 292 | } 293 | 294 | @Test 295 | public void testSelectColumnInSubqueryWithTwoParenthesisAndUnion() { 296 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 297 | "SELECT col1\n" + 298 | "FROM (\n" + 299 | " (SELECT col1 FROM tab2)\n" + 300 | " UNION ALL\n" + 301 | " (SELECT col1 FROM tab3)\n" + 302 | ") dt", 303 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 304 | ColumnQualifierTuple.create("col1", "tab1")), 305 | Pair.with(ColumnQualifierTuple.create("col1", "tab3"), 306 | ColumnQualifierTuple.create("col1", "tab1")))); 307 | } 308 | 309 | @Test 310 | public void testSelectColumnInSubqueryWithTwoParenthesisAndUnionV2() { 311 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 312 | "SELECT col1\n" + 313 | "FROM (\n" + 314 | " SELECT col1 FROM tab2\n" + 315 | " UNION ALL\n" + 316 | " SELECT col1 FROM tab3\n" + 317 | ") dt", 318 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 319 | ColumnQualifierTuple.create("col1", "tab1")), 320 | Pair.with(ColumnQualifierTuple.create("col1", "tab3"), 321 | ColumnQualifierTuple.create("col1", "tab1")))); 322 | } 323 | 324 | @Test 325 | public void testSelectColumnWithoutTableQualifierFromTableJoin() { 326 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 327 | "SELECT col1\n" + 328 | "FROM tab2 a\n" + 329 | " INNER JOIN tab3 b\n" + 330 | " ON a.id = b.id", 331 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", null), 332 | ColumnQualifierTuple.create("col1", "tab1")))); 333 | } 334 | 335 | @Test 336 | public void testSelectColumnFromSameTableMultipleTimeUsingDifferentAlias() { 337 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 338 | "SELECT a.col1 AS col2,\n" + 339 | " b.col1 AS col3\n" + 340 | "FROM tab2 a\n" + 341 | " JOIN tab2 b\n" + 342 | " ON a.parent_id = b.id", 343 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 344 | ColumnQualifierTuple.create("col2", "tab1")), 345 | Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 346 | ColumnQualifierTuple.create("col3", "tab1")))); 347 | } 348 | 349 | @Test 350 | public void testCommentAfterColumnCommaFirst() { 351 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 352 | "SELECT a.col1\n" + 353 | " --, a.col2\n" + 354 | " , a.col3\n" + 355 | "FROM tab2 a", 356 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 357 | ColumnQualifierTuple.create("col1", "tab1")), 358 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"), 359 | ColumnQualifierTuple.create("col3", "tab1")))); 360 | } 361 | 362 | @Test 363 | public void testCommentAfterColumnCommaLast() { 364 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 365 | "SELECT a.col1,\n" + 366 | " -- a.col2,\n" + 367 | " a.col3\n" + 368 | "FROM tab2 a", 369 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 370 | ColumnQualifierTuple.create("col1", "tab1")), 371 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"), 372 | ColumnQualifierTuple.create("col3", "tab1")))); 373 | } 374 | 375 | @Test 376 | public void testCastWithComparison() { 377 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 378 | "SELECT cast(col1 = 1 AS int) col1, col2 = col3 col2\n" + 379 | "FROM tab2", 380 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 381 | ColumnQualifierTuple.create("col1", "tab1")), 382 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 383 | ColumnQualifierTuple.create("col2", "tab1")), 384 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"), 385 | ColumnQualifierTuple.create("col2", "tab1")))); 386 | } 387 | 388 | @ParameterizedTest 389 | @ValueSource(strings = {"string", "timestamp", "date", "datetime", "decimal(18, 0)"}) 390 | public void testCastToDataType(String dtype) { 391 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 392 | "SELECT cast(col1 as " + dtype + ") AS col1\n" + 393 | "FROM tab2", 394 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 395 | ColumnQualifierTuple.create("col1", "tab1")))); 396 | } 397 | 398 | @ParameterizedTest 399 | @ValueSource(strings = {"string", "timestamp", "date", "datetime", "decimal(18, 0)"}) 400 | public void testNestedCastToDataType(String dtype) { 401 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 402 | "SELECT cast(cast(col1 AS " + dtype + ") AS " + dtype + ") AS col1\n" + 403 | "FROM tab2", 404 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 405 | ColumnQualifierTuple.create("col1", "tab1")))); 406 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 407 | "SELECT cast(cast(cast(cast(cast(col1 AS " + dtype + ") AS " + dtype + ") AS " + dtype + ") AS " + dtype + ") AS " + dtype + ") AS col1\n" + 408 | "FROM tab2", 409 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 410 | ColumnQualifierTuple.create("col1", "tab1")))); 411 | } 412 | 413 | @ParameterizedTest 414 | @ValueSource(strings = {"string", "timestamp", "date", "datetime", "decimal(18, 0)"}) 415 | public void testCastToDataTypeWithCaseWhen(String dtype) { 416 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 417 | "SELECT cast(case when col1 > 0 then col2 else col3 end as " + dtype + ") AS col1\n" + 418 | "FROM tab2", 419 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 420 | ColumnQualifierTuple.create("col1", "tab1")), 421 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 422 | ColumnQualifierTuple.create("col1", "tab1")), 423 | Pair.with(ColumnQualifierTuple.create("col3", "tab2"), 424 | ColumnQualifierTuple.create("col1", "tab1")))); 425 | } 426 | 427 | @Test 428 | public void testCastUsingConstant() { 429 | assertColumnLineage("INSERT OVERWRITE TABLE tab1\n" + 430 | "SELECT cast('2012-12-21' as date) AS col2", 431 | Set.of()); 432 | } 433 | 434 | @Test 435 | public void testWindowFunctionInSubquery() { 436 | assertColumnLineage("INSERT INTO tab1\n" + 437 | "SELECT rn FROM (\n" + 438 | " SELECT\n" + 439 | " row_number() OVER (PARTITION BY col1, col2) rn\n" + 440 | " FROM tab2\n" + 441 | ") sub\n" + 442 | "WHERE rn = 1", 443 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 444 | ColumnQualifierTuple.create("rn", "tab1")), 445 | Pair.with(ColumnQualifierTuple.create("col2", "tab2"), 446 | ColumnQualifierTuple.create("rn", "tab1")))); 447 | } 448 | 449 | @Test 450 | public void testInvalidSyntaxAsWithoutAlias() { 451 | String sql = "INSERT OVERWRITE TABLE tab1\n" + 452 | "SELECT col1,\n" + 453 | " col2 as,\n" + 454 | " col3\n" + 455 | "FROM tab2"; 456 | // just assure no exception, don't guarantee the result 457 | LineageRunner runner = LineageRunner.builder(sql).build(); 458 | runner.getColumnLineage(); 459 | } 460 | 461 | @Test 462 | public void testColumnReferenceFromCteUsingAlias() { 463 | assertColumnLineage("WITH wtab1 AS (SELECT col1 FROM tab2)\n" + 464 | "INSERT OVERWRITE TABLE tab1\n" + 465 | "SELECT wt.col1 FROM wtab1 wt", 466 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 467 | ColumnQualifierTuple.create("col1", "tab1")))); 468 | } 469 | 470 | @Test 471 | public void testColumnReferenceFromCteUsingQualifier() { 472 | assertColumnLineage("WITH wtab1 AS (SELECT col1 FROM tab2)\n" + 473 | "INSERT OVERWRITE TABLE tab1\n" + 474 | "SELECT wtab1.col1 FROM wtab1", 475 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 476 | ColumnQualifierTuple.create("col1", "tab1")))); 477 | } 478 | 479 | @Test 480 | public void testColumnReferenceFromPreviousDefinedCte() { 481 | assertColumnLineage("WITH\n" + 482 | "cte1 AS (SELECT a FROM tab1),\n" + 483 | "cte2 AS (SELECT a FROM cte1)\n" + 484 | "INSERT OVERWRITE TABLE tab2\n" + 485 | "SELECT a FROM cte2", 486 | Set.of(Pair.with(ColumnQualifierTuple.create("a", "tab1"), 487 | ColumnQualifierTuple.create("a", "tab2")))); 488 | } 489 | 490 | @Test 491 | public void testMultipleColumnReferencesFromPreviousDefinedCte() { 492 | assertColumnLineage("WITH\n" + 493 | "cte1 AS (SELECT a, b FROM tab1),\n" + 494 | "cte2 AS (SELECT a, max(b) AS b_max, count(b) AS b_cnt FROM cte1 GROUP BY a)\n" + 495 | "INSERT OVERWRITE TABLE tab2\n" + 496 | "SELECT cte1.a, cte2.b_max, cte2.b_cnt FROM cte1 JOIN cte2\n" + 497 | "WHERE cte1.a = cte2.a", 498 | Set.of(Pair.with(ColumnQualifierTuple.create("a", "tab1"), 499 | ColumnQualifierTuple.create("a", "tab2")), 500 | Pair.with(ColumnQualifierTuple.create("b", "tab1"), 501 | ColumnQualifierTuple.create("b_max", "tab2")), 502 | Pair.with(ColumnQualifierTuple.create("b", "tab1"), 503 | ColumnQualifierTuple.create("b_cnt", "tab2")))); 504 | } 505 | 506 | @Test 507 | public void testColumnReferenceWithAnsi89Join() { 508 | assertColumnLineage("INSERT OVERWRITE TABLE tab3\n" + 509 | "SELECT a.id,\n" + 510 | " a.name AS name1,\n" + 511 | " b.name AS name2\n" + 512 | "FROM (SELECT id, name\n" + 513 | " FROM tab1) a,\n" + 514 | " (SELECT id, name\n" + 515 | " FROM tab2) b\n" + 516 | "WHERE a.id = b.id", 517 | Set.of(Pair.with(ColumnQualifierTuple.create("id", "tab1"), 518 | ColumnQualifierTuple.create("id", "tab3")), 519 | Pair.with(ColumnQualifierTuple.create("name", "tab1"), 520 | ColumnQualifierTuple.create("name1", "tab3")), 521 | Pair.with(ColumnQualifierTuple.create("name", "tab2"), 522 | ColumnQualifierTuple.create("name2", "tab3")))); 523 | } 524 | 525 | // @Test 526 | // public void testSmarterColumnResolutionUsingQueryContext() { 527 | // assertColumnLineage("WITH\n" + 528 | // "cte1 AS (SELECT a, b FROM tab1),\n" + 529 | // "cte2 AS (SELECT c, d FROM tab2)\n" + 530 | // "INSERT OVERWRITE TABLE tab3\n" + 531 | // "SELECT b, d FROM cte1 JOIN cte2\n" + 532 | // "WHERE cte1.a = cte2.c", 533 | // Set.of(Pair.with(ColumnQualifierTuple.create("b", "tab1"), 534 | // ColumnQualifierTuple.create("b", "tab3")), 535 | // Pair.with(ColumnQualifierTuple.create("d", "tab2"), 536 | // ColumnQualifierTuple.create("d", "tab3")))); 537 | // } 538 | 539 | @Test 540 | public void testColumnReferenceUsingUnion() { 541 | assertColumnLineage("INSERT OVERWRITE TABLE tab3\n" + 542 | "SELECT col1\n" + 543 | "FROM tab1\n" + 544 | "UNION ALL\n" + 545 | "SELECT col1\n" + 546 | "FROM tab2", 547 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab1"), 548 | ColumnQualifierTuple.create("col1", "tab3")), 549 | Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 550 | ColumnQualifierTuple.create("col1", "tab3")))); 551 | assertColumnLineage("INSERT OVERWRITE TABLE tab3\n" + 552 | "SELECT col1\n" + 553 | "FROM tab1\n" + 554 | "UNION\n" + 555 | "SELECT col1\n" + 556 | "FROM tab2", 557 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab1"), 558 | ColumnQualifierTuple.create("col1", "tab3")), 559 | Pair.with(ColumnQualifierTuple.create("col1", "tab2"), 560 | ColumnQualifierTuple.create("col1", "tab3")))); 561 | } 562 | 563 | // @Test 564 | // public void testColumnLineageMultiplePathsForSameColumn() { 565 | // assertColumnLineage("INSERT OVERWRITE TABLE tab2\n" + 566 | // "SELECT tab1.id,\n" + 567 | // " coalesce(join_table_1.col1, join_table_2.col1, join_table_3.col1) AS col1\n" + 568 | // "FROM tab1\n" + 569 | // " LEFT JOIN (SELECT id, col1 FROM tab1 WHERE flag = 1) AS join_table_1\n" + 570 | // " ON tab1.id = join_table_1.id\n" + 571 | // " LEFT JOIN (SELECT id, col1 FROM tab1 WHERE flag = 2) AS join_table_2\n" + 572 | // " ON tab1.id = join_table_2.id\n" + 573 | // " LEFT JOIN (SELECT id, col1 FROM tab1 WHERE flag = 3) AS join_table_3\n" + 574 | // " ON tab1.id = join_table_3.id", 575 | // Set.of(Pair.with(ColumnQualifierTuple.create("id", "tab1"), 576 | // ColumnQualifierTuple.create("id", "tab2")), 577 | // Pair.with(ColumnQualifierTuple.create("col1", "tab1"), 578 | // ColumnQualifierTuple.create("col1", "tab2")))); 579 | // } 580 | 581 | // @ParameterizedTest 582 | // @ValueSource(strings = {"string", "timestamp", "date", "datetime", "decimal(18, 0)"}) 583 | // public void testColumnTryCastWithFunc(String func) { 584 | // assertColumnLineage("INSERT OVERWRITE TABLE tab2\n" + 585 | // "SELECT try_cast(" + func + ") AS col2\n" + 586 | // "FROM tab1", 587 | // Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab1"), 588 | // ColumnQualifierTuple.create("col1", "tab2")))); 589 | // } 590 | 591 | @Test 592 | public void testColumnWithCtasAndFunc() { 593 | assertColumnLineage("CREATE TABLE tab2 AS\n" + 594 | "SELECT\n" + 595 | " coalesce(col1, 0) AS col1,\n" + 596 | " IF(\n" + 597 | " col1 IS NOT NULL,\n" + 598 | " 1,\n" + 599 | " NULL\n" + 600 | " ) AS col2\n" + 601 | "FROM\n" + 602 | " tab1", 603 | Set.of(Pair.with(ColumnQualifierTuple.create("col1", "tab1"), 604 | ColumnQualifierTuple.create("col1", "tab2")), 605 | Pair.with(ColumnQualifierTuple.create("col1", "tab1"), 606 | ColumnQualifierTuple.create("col2", "tab2")))); 607 | } 608 | } 609 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/CreateTest.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.Set; 6 | 7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage; 8 | 9 | public class CreateTest { 10 | @Test 11 | public void testCreate() { 12 | assertTableLineage("CREATE TABLE tab1 (col1 STRING)", Set.of(), Set.of("tab1")); 13 | } 14 | 15 | @Test 16 | public void testCreateIfNotExist() { 17 | assertTableLineage("CREATE TABLE IF NOT EXISTS tab1 (col1 STRING)", Set.of(), Set.of("tab1")); 18 | } 19 | 20 | @Test 21 | public void testCreateBucketTable() { 22 | assertTableLineage("CREATE TABLE tab1 USING parquet CLUSTERED BY (col1) INTO 500 BUCKETS", Set.of(), Set.of("tab1")); 23 | } 24 | 25 | @Test 26 | public void testCreateAs() { 27 | assertTableLineage("CREATE TABLE tab1 AS SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1")); 28 | } 29 | 30 | @Test 31 | public void testCreateAsWithParenthesisAroundSelectStatement() { 32 | assertTableLineage("CREATE TABLE tab1 AS (SELECT * FROM tab2)", Set.of("tab2"), Set.of("tab1")); 33 | } 34 | 35 | @Test 36 | public void testCreateAsWithParenthesisAroundTableName() { 37 | assertTableLineage("CREATE TABLE tab1 AS SELECT * FROM (tab2)", Set.of("tab2"), Set.of("tab1")); 38 | } 39 | 40 | @Test 41 | public void testCreateAsWithParenthesisAroundBoth() { 42 | assertTableLineage("CREATE TABLE tab1 AS (SELECT * FROM (tab2))", Set.of("tab2"), Set.of("tab1")); 43 | } 44 | 45 | @Test 46 | public void testCreateLike() { 47 | assertTableLineage("CREATE TABLE tab1 LIKE tab2", Set.of("tab2"), Set.of("tab1")); 48 | } 49 | 50 | @Test 51 | public void testCreateSelect() { 52 | assertTableLineage("CREATE TABLE tab1 SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1")); 53 | } 54 | 55 | @Test 56 | public void testCreateUsingSerde() { 57 | // Check https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-RowFormats&SerDe 58 | // here with is not an indicator for CTE 59 | assertTableLineage("CREATE TABLE apachelog (\n" + 60 | " host STRING,\n" + 61 | " identity STRING,\n" + 62 | " user STRING,\n" + 63 | " time STRING,\n" + 64 | " request STRING,\n" + 65 | " status STRING,\n" + 66 | " size STRING,\n" + 67 | " referer STRING,\n" + 68 | " agent STRING)\n" + 69 | "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'\n" + 70 | "WITH SERDEPROPERTIES (\n" + 71 | " \"input.regex\" = \"([^]*) ([^]*) ([^]*) (-|\\\\[^\\\\]*\\\\]) ([^ \\\"]*|\\\"[^\\\"]*\\\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \\\"]*|\\\".*\\\") ([^ \\\"]*|\\\".*\\\"))?\"\n" + 72 | ")\n" + 73 | "STORED AS TEXTFILE", Set.of(), Set.of("apachelog")); 74 | } 75 | 76 | @Test 77 | public void testBucketWithUsingParenthesis() { 78 | assertTableLineage("CREATE TABLE tbl1 (col1 VARCHAR)\n" + 79 | "WITH (bucketed_on = array['col1'], bucket_count = 256);", Set.of(), Set.of("tbl1")); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/Helper.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import io.github.reata.sqllineage4j.common.entity.ColumnQualifierTuple; 4 | import io.github.reata.sqllineage4j.common.model.Column; 5 | import io.github.reata.sqllineage4j.common.model.Table; 6 | import org.javatuples.Pair; 7 | 8 | import java.util.HashSet; 9 | import java.util.Objects; 10 | import java.util.Set; 11 | import java.util.stream.Collectors; 12 | 13 | import static org.junit.Assert.assertEquals; 14 | 15 | public class Helper { 16 | 17 | public static void assertTableLineage(String sql) { 18 | assertTableLineage(sql, Set.of(), Set.of()); 19 | } 20 | 21 | public static void assertTableLineage(String sql, Set sourceTables) { 22 | assertTableLineage(sql, sourceTables, Set.of()); 23 | } 24 | 25 | public static void assertTableLineage(String sql, Set sourceTables, Set targetTables) { 26 | LineageRunner runner = LineageRunner.builder(sql).build(); 27 | assertEquals("Source Table Equal", sourceTables.stream().map(Table::new).collect(Collectors.toSet()), Set.copyOf(runner.sourceTables())); 28 | assertEquals("Target Table Equal", targetTables.stream().map(Table::new).collect(Collectors.toSet()), Set.copyOf(runner.targetTables())); 29 | } 30 | 31 | public static void assertColumnLineage(String sql, Set> columnLineages) { 32 | Set> expected = new HashSet<>(); 33 | for (Pair cqtPair : columnLineages) { 34 | ColumnQualifierTuple srcCqt = cqtPair.getValue0(); 35 | ColumnQualifierTuple tgtCqt = cqtPair.getValue1(); 36 | Column srcCol = new Column(srcCqt.column()); 37 | if (srcCqt.qualifier() != null) { 38 | srcCol.setParent(new Table(srcCqt.qualifier())); 39 | } 40 | Column tgtCol = new Column(tgtCqt.column()); 41 | tgtCol.setParent(new Table(Objects.requireNonNull(tgtCqt.qualifier()))); 42 | expected.add(Pair.with(srcCol, tgtCol)); 43 | } 44 | LineageRunner runner = LineageRunner.builder(sql).build(); 45 | Set> actual = new HashSet<>(runner.getColumnLineage()); 46 | assertEquals(expected, actual); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/InsertTest.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.Set; 6 | 7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage; 8 | 9 | public class InsertTest { 10 | @Test 11 | public void testInsertInto() { 12 | assertTableLineage("INSERT INTO tab1 VALUES (1, 2)", Set.of(), Set.of("tab1")); 13 | } 14 | 15 | @Test 16 | public void testInsertIntoWithKeywordTable() { 17 | assertTableLineage("INSERT INTO TABLE tab1 VALUES (1, 2)", Set.of(), Set.of("tab1")); 18 | } 19 | 20 | @Test 21 | public void testInsertIntoWithColumns() { 22 | assertTableLineage("INSERT INTO tab1 (col1, col2) SELECT * FROM tab2;", Set.of("tab2"), Set.of("tab1")); 23 | } 24 | 25 | @Test 26 | public void testInsertIntoWithColumnsAndSelect() { 27 | assertTableLineage("INSERT INTO tab1 (col1, col2) SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1")); 28 | } 29 | 30 | @Test 31 | public void testInsertIntoWithColumnsAndSelectUnion() { 32 | assertTableLineage("INSERT INTO tab1 (col1, col2) SELECT * FROM tab2 UNION SELECT * FROM tab3", Set.of("tab2", "tab3"), Set.of("tab1")); 33 | assertTableLineage("INSERT INTO tab1 (col1, col2) (SELECT * FROM tab2 UNION SELECT * FROM tab3)", Set.of("tab2", "tab3"), Set.of("tab1")); 34 | } 35 | 36 | @Test 37 | public void testInsertIntoPartitions() { 38 | assertTableLineage("INSERT INTO TABLE tab1 PARTITION (par1=1) SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1")); 39 | } 40 | 41 | @Test 42 | public void testInsertOverwrite() { 43 | assertTableLineage("INSERT OVERWRITE tab1 SELECT * FROM tab2", Set.of("tab2"), Set.of("tab1")); 44 | } 45 | 46 | @Test 47 | public void testInsertOverwriteWithKeywordTable() { 48 | assertTableLineage("INSERT OVERWRITE TABLE tab1 SELECT col1 FROM tab2", Set.of("tab2"), Set.of("tab1")); 49 | } 50 | 51 | @Test 52 | public void testInsertOverwriteValues() { 53 | assertTableLineage("INSERT OVERWRITE tab1 VALUES ('val1', 'val2'), ('val3', 'val4')", Set.of(), Set.of("tab1")); 54 | } 55 | 56 | @Test 57 | public void testInsertOverwriteFromSelf() { 58 | assertTableLineage("INSERT OVERWRITE TABLE foo\n" + 59 | "SELECT col from foo\n" + 60 | "WHERE flag IS NOT NULL", Set.of("foo"), Set.of("foo")); 61 | } 62 | 63 | @Test 64 | public void testInsertOverwriteFromSelfWithJoin() { 65 | assertTableLineage("INSERT OVERWRITE TABLE tab_1\n" + 66 | "SELECT tab2.col_a from tab_2\n" + 67 | "JOIN tab_1\n" + 68 | "ON tab_1.col_a = tab_2.cola", Set.of("tab_1", "tab_2"), Set.of("tab_1")); 69 | } 70 | 71 | @Test 72 | public void testInsertIntoQualifiedTableWithParenthesizedQuery() { 73 | assertTableLineage("INSERT INTO default.tab2\n" + 74 | " (SELECT *\n" + 75 | " FROM tab1)", Set.of("tab1"), Set.of("default.tab2")); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/OtherTest.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.Set; 6 | 7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage; 8 | 9 | public class OtherTest { 10 | @Test 11 | public void testUse() { 12 | assertTableLineage("USE db1", Set.of()); 13 | } 14 | 15 | @Test 16 | public void testTableNameCase() { 17 | assertTableLineage("insert overwrite table tab_a\n" + 18 | "select * from tab_b\n" + 19 | "union all\n" + 20 | "select * from TAB_B", Set.of("tab_b"), Set.of("tab_a")); 21 | } 22 | 23 | @Test 24 | public void testCreateAfterDrop() { 25 | assertTableLineage("DROP TABLE IF EXISTS tab1; CREATE TABLE IF NOT EXISTS tab1 (col1 STRING)", Set.of(), Set.of("tab1")); 26 | } 27 | 28 | @Test 29 | public void testUpdate() { 30 | assertTableLineage("UPDATE tab1 SET col1='val1' WHERE col2='val2'", Set.of(), Set.of("tab1")); 31 | } 32 | 33 | // @Test 34 | // public void testUpdateWithJoin() { 35 | // // SparkSQL doesn't support this syntax 36 | // helper("UPDATE tab1 a INNER JOIN tab2 b ON a.col1=b.col1 SET a.col2=b.col2", Set.of("tab2"), Set.of("tab1")); 37 | // } 38 | 39 | // @Test 40 | // public void testCopyFromTable() { 41 | // // SparkSQL doesn't support this syntax 42 | // helper("COPY tab1 FROM tab2", Set.of("tab2"), Set.of("tab1")); 43 | // } 44 | 45 | @Test 46 | public void testDrop() { 47 | assertTableLineage("DROP TABLE IF EXISTS tab1", Set.of(), Set.of()); 48 | } 49 | 50 | @Test 51 | public void testDropWithComment() { 52 | assertTableLineage("--comment\n" + 53 | "DROP TABLE IF EXISTS tab1", Set.of(), Set.of()); 54 | } 55 | 56 | @Test 57 | public void testDropAfterCreate() { 58 | assertTableLineage("CREATE TABLE IF NOT EXISTS tab1 (col1 STRING);DROP TABLE IF EXISTS tab1", Set.of(), Set.of()); 59 | } 60 | 61 | @Test 62 | public void testDropTmpTabAfterCreate() { 63 | assertTableLineage("create table tab_a as select * from tab_b;\n" + 64 | "insert overwrite table tab_c select * from tab_a;\n" + 65 | "drop table tab_a", Set.of("tab_b"), Set.of("tab_c")); 66 | } 67 | 68 | @Test 69 | public void testNewCreateTabAsTmpTable() { 70 | assertTableLineage("create table tab_a as select * from tab_b;\n" + 71 | "create table tab_c as select * from tab_a;", Set.of("tab_b"), Set.of("tab_c")); 72 | } 73 | 74 | @Test 75 | public void testAlterTableRename() { 76 | assertTableLineage("alter table tab1 rename to tab2", Set.of(), Set.of()); 77 | } 78 | 79 | // /* 80 | // This syntax is MySQL specific: 81 | // https://dev.mysql.com/doc/refman/8.0/en/rename-table.html 82 | // */ 83 | // @Test 84 | // public void testRenameTable() { 85 | // // SparkSQL doesn't support this syntax 86 | // helper("rename table tab1 to tab2", Set.of(), Set.of()); 87 | // } 88 | // 89 | // @Test 90 | // public void testRenameTables() { 91 | // // SparkSQL doesn't support this syntax 92 | // helper("rename table tab1 to tab2, tab3 to tab4", Set.of(), Set.of()); 93 | // } 94 | 95 | /* 96 | See https://cwiki.apache.org/confluence/display/Hive/Exchange+Partition for language manual 97 | */ 98 | @Test 99 | public void testAlterTableExchangePartition() { 100 | assertTableLineage("alter table tab1 exchange partition(pt='part1') with table tab2", Set.of("tab2"), Set.of("tab1")); 101 | } 102 | 103 | /* 104 | See https://www.vertica.com/docs/10.0.x/HTML/Content/Authoring/AdministratorsGuide/Partitions/SwappingPartitions.htm 105 | for language specification 106 | */ 107 | @Test 108 | public void testSwappingPartitions() { 109 | assertTableLineage("select swap_partitions_between_tables('staging', 'min-range-value', 'max-range-value', 'target')", Set.of("staging"), Set.of("target")); 110 | } 111 | 112 | @Test 113 | public void testAlterTargetTableName() { 114 | assertTableLineage("insert overwrite tab1 select * from tab2; alter table tab1 rename to tab3;", Set.of("tab2"), Set.of("tab3")); 115 | assertTableLineage("insert overwrite tab2 select * from tab1; alter table tab1 rename to tab3;", Set.of("tab3"), Set.of("tab2")); 116 | } 117 | 118 | @Test 119 | public void testRefreshTable() { 120 | assertTableLineage("refresh table tab1", Set.of(), Set.of()); 121 | } 122 | 123 | @Test 124 | public void testCacheTable() { 125 | assertTableLineage("cache table tab1", Set.of(), Set.of()); 126 | } 127 | 128 | @Test 129 | public void testUncacheTable() { 130 | assertTableLineage("uncache table tab1", Set.of(), Set.of()); 131 | } 132 | 133 | @Test 134 | public void testUncacheTableIfExists() { 135 | assertTableLineage("uncache table if exists tab1", Set.of(), Set.of()); 136 | } 137 | 138 | @Test 139 | public void testTruncateTable() { 140 | assertTableLineage("truncate table tab1", Set.of(), Set.of()); 141 | } 142 | 143 | @Test 144 | public void testDeleteFromTable() { 145 | assertTableLineage("delete from table tab1", Set.of(), Set.of()); 146 | } 147 | 148 | @Test 149 | public void testLateralViewUsingJsonTuple() { 150 | assertTableLineage("INSERT OVERWRITE TABLE foo\n" + 151 | "SELECT sc.id, q.item0, q.item1\n" + 152 | "FROM bar sc\n" + 153 | "LATERAL VIEW json_tuple(sc.json, 'key1', 'key2') q AS item0, item1", Set.of("bar"), Set.of("foo")); 154 | } 155 | 156 | @Test 157 | public void testLateralViewOuter() { 158 | assertTableLineage("INSERT OVERWRITE TABLE foo\n" + 159 | "SELECT sc.id, q.col1\n" + 160 | "FROM bar sc\n" + 161 | "LATERAL VIEW OUTER explode(sc.json_array) q AS col1", Set.of("bar"), Set.of("foo")); 162 | } 163 | 164 | @Test 165 | public void testShowCreateTable() { 166 | assertTableLineage("show create table tab1", Set.of()); 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /sqllineage4j-core/src/test/java/io/github/reata/sqllineage4j/core/SelectTest.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.core; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.Set; 6 | 7 | import static io.github.reata.sqllineage4j.core.Helper.assertTableLineage; 8 | 9 | public class SelectTest { 10 | @Test 11 | public void testSelect() { 12 | assertTableLineage("SELECT col1 FROM tab1", Set.of("tab1")); 13 | } 14 | 15 | @Test 16 | public void testSelectWithSchema() { 17 | assertTableLineage("SELECT col1 FROM schema1.tab1", Set.of("schema1.tab1")); 18 | } 19 | 20 | @Test 21 | public void testSelectWithSchemaAndDatabase() { 22 | assertTableLineage("SELECT col1 FROM db1.schema1.tbl1", Set.of("db1.schema1.tbl1")); 23 | } 24 | 25 | @Test 26 | public void testSelectWithTableNameInBacktick() { 27 | assertTableLineage("SELECT * FROM `tab1`", Set.of("tab1")); 28 | } 29 | 30 | @Test 31 | public void testSelectWithSchemaInBacktick() { 32 | assertTableLineage("SELECT col1 FROM `schema1`.`tab1`", Set.of("schema1.tab1")); 33 | } 34 | 35 | @Test 36 | public void testSelectMultiLine() { 37 | assertTableLineage("SELECT col1 FROM\n" + 38 | "tab1", Set.of("tab1")); 39 | } 40 | 41 | @Test 42 | public void testSelectAsterisk() { 43 | assertTableLineage("SELECT * FROM tab1", Set.of("tab1")); 44 | } 45 | 46 | @Test 47 | public void testSelectValue() { 48 | assertTableLineage("SELECT 1"); 49 | } 50 | 51 | @Test 52 | public void testSelectFunction() { 53 | assertTableLineage("SELECT NOW()"); 54 | } 55 | 56 | @Test 57 | public void testSelectTrimFunctionWithFromKeyword() { 58 | assertTableLineage("SELECT trim(BOTH ' ' FROM ' abc ')"); 59 | } 60 | 61 | @Test 62 | public void testSelectTrimFunctionWithFromKeywordFromSourceTable() { 63 | assertTableLineage("SELECT trim(BOTH ' ' FROM col1) FROM tab1", Set.of("tab1")); 64 | } 65 | 66 | @Test 67 | public void testSelectWithWhere() { 68 | assertTableLineage("SELECT * FROM tab1 WHERE col1 > val1 AND col2 = 'val2'", Set.of("tab1")); 69 | } 70 | 71 | @Test 72 | public void testSelectWithComment() { 73 | assertTableLineage("SELECT -- comment1\n col1 FROM tab1", Set.of("tab1")); 74 | } 75 | 76 | @Test 77 | public void testSelectWithCommentAfterFrom() { 78 | assertTableLineage("SELECT col1\nFROM -- comment\ntab1", Set.of("tab1")); 79 | } 80 | 81 | @Test 82 | public void testSelectWithCommentAfterJoin() { 83 | assertTableLineage("select * from tab1 join --comment\ntab2 on tab1.x = tab2.x", Set.of("tab1", "tab2")); 84 | } 85 | 86 | @Test 87 | public void testSelectKeywordAsColumnAlias() { 88 | // here `as` is the column alias 89 | assertTableLineage("SELECT 1 `as` FROM tab1", Set.of("tab1")); 90 | // the following is hive specific, MySQL doesn't allow this syntax. As of now, we don't test against it 91 | // helper("SELECT 1 as FROM tab1", Set.of("tab1")); 92 | } 93 | 94 | @Test 95 | public void testSelectWithTableAlias() { 96 | assertTableLineage("SELECT 1 FROM tab1 AS alias1", Set.of("tab1")); 97 | } 98 | 99 | @Test 100 | public void testSelectCount() { 101 | assertTableLineage("SELECT COUNT(*) FROM tab1", Set.of("tab1")); 102 | } 103 | 104 | @Test 105 | public void testSelectSubquery() { 106 | assertTableLineage("SELECT col1 FROM (SELECT col1 FROM tab1) dt", Set.of("tab1")); 107 | // with an extra space 108 | assertTableLineage("SELECT col1 FROM ( SELECT col1 FROM tab1) dt", Set.of("tab1")); 109 | } 110 | 111 | @Test 112 | public void testSelectSubqueryWithTwoParenthesis() { 113 | assertTableLineage("SELECT col1 FROM ((SELECT col1 FROM tab1)) dt", Set.of("tab1")); 114 | } 115 | 116 | @Test 117 | public void testSelectSubqueryWithMoreParenthesis() { 118 | assertTableLineage("SELECT col1 FROM (((((((SELECT col1 FROM tab1))))))) dt", Set.of("tab1")); 119 | } 120 | 121 | @Test 122 | public void testSelectSubqueryInCase() { 123 | assertTableLineage("SELECT\n" + 124 | "CASE WHEN (SELECT count(*) FROM tab1 WHERE col1 = 'tab2') = 1 THEN (SELECT count(*) FROM tab2) ELSE 0 END AS cnt", 125 | Set.of("tab1", "tab2")); 126 | assertTableLineage("SELECT\n" + 127 | "CASE WHEN 1 = (SELECT count(*) FROM tab1 WHERE col1 = 'tab2') THEN (SELECT count(*) FROM tab2) ELSE 0 END AS cnt", 128 | Set.of("tab1", "tab2")); 129 | } 130 | 131 | @Test 132 | public void testSelectSubqueryWithoutAlias() { 133 | // this syntax is valid in SparkSQL, not for MySQL 134 | assertTableLineage("SELECT col1 FROM (SELECT col1 FROM tab1)", Set.of("tab1")); 135 | } 136 | 137 | @Test 138 | public void testSelectSubqueryInWhereClause() { 139 | assertTableLineage("SELECT col1\n" + 140 | "FROM tab1\n" + 141 | "WHERE col1 IN (SELECT max(col1) FROM tab2)", Set.of("tab1", "tab2")); 142 | } 143 | 144 | @Test 145 | public void testSelectInnerJoin() { 146 | assertTableLineage("SELECT * FROM tab1 INNER JOIN tab2", Set.of("tab1", "tab2")); 147 | } 148 | 149 | @Test 150 | public void testSelectJoin() { 151 | assertTableLineage("SELECT * FROM tab1 JOIN tab2", Set.of("tab1", "tab2")); 152 | } 153 | 154 | @Test 155 | public void testSelectLeftJoin() { 156 | assertTableLineage("SELECT * FROM tab1 LEFT JOIN tab2", Set.of("tab1", "tab2")); 157 | } 158 | 159 | @Test 160 | public void testSelectLeftJoinWithExtraSpaceInMiddle() { 161 | assertTableLineage("SELECT * FROM tab1 LEFT JOIN tab2", Set.of("tab1", "tab2")); 162 | } 163 | 164 | @Test 165 | public void testSelectLeftSemiJoin() { 166 | assertTableLineage("SELECT * FROM tab1 LEFT SEMI JOIN tab2", Set.of("tab1", "tab2")); 167 | } 168 | 169 | @Test 170 | public void testSelectLeftSemiJoinWithOn() { 171 | assertTableLineage("SELECT * FROM tab1 LEFT SEMI JOIN tab2 ON (tab1.col1 = tab2.col2)", Set.of("tab1", "tab2")); 172 | } 173 | 174 | @Test 175 | public void testSelectRightJoin() { 176 | assertTableLineage("SELECT * FROM tab1 RIGHT JOIN tab2", Set.of("tab1", "tab2")); 177 | } 178 | 179 | @Test 180 | public void testSelectFullOuterJoin() { 181 | assertTableLineage("SELECT * FROM tab1 FULL OUTER JOIN tab2", Set.of("tab1", "tab2")); 182 | } 183 | 184 | // @Test 185 | // public void testSelectFullOuterJoinWithFullAsAlias() { 186 | // // SparkSQL can't handle this 187 | // helper("SELECT * FROM tab1 AS full FULL OUTER JOIN tab2", Set.of("tab1", "tab2")); 188 | // } 189 | 190 | @Test 191 | public void testSelectCrossJoin() { 192 | assertTableLineage("SELECT * FROM tab1 CROSS JOIN tab2", Set.of("tab1", "tab2")); 193 | } 194 | 195 | @Test 196 | public void testSelectCrossJoinWithOn() { 197 | assertTableLineage("SELECT * FROM tab1 CROSS JOIN tab2 on tab1.col1 = tab2.col2", Set.of("tab1", "tab2")); 198 | } 199 | 200 | @Test 201 | public void testSelectJoinWithSubquery() { 202 | assertTableLineage("SELECT col1 FROM tab1 AS a LEFT JOIN tab2 AS b ON a.id=b.tab1_id " + 203 | "WHERE col1 = (SELECT col1 FROM tab2 WHERE id = 1)", Set.of("tab1", "tab2")); 204 | } 205 | 206 | @Test 207 | public void testSelectJoinInAnsi89Syntax() { 208 | assertTableLineage("SELECT * FROM tab1 a, tab2 b", Set.of("tab1", "tab2")); 209 | } 210 | 211 | @Test 212 | public void testSelectJoinInAnsi89SyntaxWithSubquery() { 213 | assertTableLineage("SELECT * FROM (SELECT * FROM tab1) a, (SELECT * FROM tab2) b", Set.of("tab1", "tab2")); 214 | } 215 | 216 | @Test 217 | public void testSelectGroupBy() { 218 | assertTableLineage("SELECT col1, col2 FROM tab1 GROUP BY col1, col2", Set.of("tab1")); 219 | } 220 | 221 | @Test 222 | public void testSelectGroupByOrdinal() { 223 | assertTableLineage("SELECT col1, col2 FROM tab1 GROUP BY 1, 2", Set.of("tab1")); 224 | } 225 | 226 | @Test 227 | public void testSelectFromValues() { 228 | assertTableLineage("SELECT * FROM (VALUES (1, 2))"); 229 | } 230 | 231 | @Test 232 | public void testSelectFromValuesNewline() { 233 | assertTableLineage("SELECT * FROM (\nVALUES (1, 2))"); 234 | } 235 | 236 | @Test 237 | public void testSelectFromValuesWithAlias() { 238 | assertTableLineage("SELECT * FROM (VALUES (1, 2)) AS t(col1, col2)"); 239 | } 240 | 241 | /* 242 | unnest function is Presto specific 243 | */ 244 | @Test 245 | public void testSelectFromUnnest() { 246 | assertTableLineage("SELECT student, score FROM tests CROSS JOIN UNNEST(scores) AS t (score)", Set.of("tests")); 247 | } 248 | 249 | @Test 250 | public void testSelectFromUnnestParsedAsKeyword() { 251 | assertTableLineage("SELECT student, score FROM tests CROSS JOIN UNNEST (scores) AS t (score)", Set.of("tests")); 252 | } 253 | 254 | // @Test 255 | // public void testSelectFromUnnestWithOrdinality() { 256 | // // SparkSQL doesn't support this syntax 257 | // assertTableLineage("SELECT numbers, n, a\n" + 258 | // "FROM (\n" + 259 | // " VALUES\n" + 260 | // " (ARRAY[2, 5]),\n" + 261 | // " (ARRAY[7, 8, 9])\n" + 262 | // ") AS x (numbers)\n" + 263 | // "CROSS JOIN UNNEST(numbers) WITH ORDINALITY AS t (n, a);"); 264 | // } 265 | 266 | /* 267 | generator is Snowflake specific 268 | */ 269 | @Test 270 | public void testSelectFromGenerator() { 271 | assertTableLineage("SELECT seq4(), uniform(1, 10, random(12))\n" + 272 | "FROM table(generator()) v\n" + 273 | "ORDER BY 1;"); 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /sqllineage4j-graph/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | sqllineage4j 5 | io.github.reata 6 | 1.0.1-SNAPSHOT 7 | 8 | 4.0.0 9 | sqllineage4j-graph 10 | sqllineage4j-graph 11 | 12 | 13 | 14 | org.apache.tinkerpop 15 | tinkergraph-gremlin 16 | 17 | 18 | io.github.reata 19 | sqllineage4j-common 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /sqllineage4j-graph/src/main/java/io/github/reata/sqllineage4j/graph/GremlinLineageGraph.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.graph; 2 | 3 | import io.github.reata.sqllineage4j.common.entity.EdgeTuple; 4 | import org.apache.tinkerpop.gremlin.process.traversal.P; 5 | import org.apache.tinkerpop.gremlin.process.traversal.Path; 6 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; 7 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; 8 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; 9 | import org.apache.tinkerpop.gremlin.structure.*; 10 | import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph; 11 | 12 | import java.util.*; 13 | import java.util.stream.Collectors; 14 | 15 | public class GremlinLineageGraph implements LineageGraph { 16 | private final GraphTraversalSource g; 17 | 18 | // public GraphTraversalSource getG() { 19 | // return g; 20 | // } 21 | 22 | public GremlinLineageGraph() { 23 | this.g = TinkerGraph.open().traversal(); 24 | } 25 | 26 | public GremlinLineageGraph(Graph graph) { 27 | this.g = graph.traversal(); 28 | } 29 | 30 | public void addVertexIfNotExist(Object obj) { 31 | HashMap props = new HashMap<>(); 32 | addVertexIfNotExist(obj, props); 33 | } 34 | 35 | public void addVertexIfNotExist(Object obj, Map props) { 36 | int id = obj.hashCode(); 37 | String label = obj.getClass().getSimpleName(); 38 | GraphTraversal step = g.V().hasLabel(label).hasId(id).fold() 39 | .coalesce(__.unfold(), 40 | __.addV(label).property(T.id, id)) 41 | .property("obj", obj); 42 | for (Map.Entry entry : props.entrySet()) { 43 | step = step.property(entry.getKey(), entry.getValue()); 44 | } 45 | step.iterate(); 46 | } 47 | 48 | public List retrieveVerticesByProps(Map props) { 49 | GraphTraversal step = g.V(); 50 | for (Map.Entry entry : props.entrySet()) { 51 | step = step.has(entry.getKey(), entry.getValue()); 52 | } 53 | return step.values("obj").toList(); 54 | } 55 | 56 | public List retrieveSourceOnlyVertices() { 57 | return retrieveVertices(g.V().where(__.outE()).not(__.inE())); 58 | } 59 | 60 | public List retrieveTargetOnlyVertices() { 61 | return retrieveVertices(g.V().where(__.inE()).not(__.outE())); 62 | } 63 | 64 | public List retrieveConnectedVertices() { 65 | return retrieveVertices(g.V().where(__.inE()).where(__.outE())); 66 | } 67 | 68 | public List retrieveSelfLoopVertices() { 69 | return retrieveVertices(g.V().as("src").where(__.outE().otherV().as("src"))); 70 | } 71 | 72 | private List retrieveVertices(GraphTraversal vertexStep) { 73 | return vertexStep.values("obj").toList(); 74 | } 75 | 76 | public void updateVertices(Map props, Object... objects) { 77 | GraphTraversal step = g.V().hasId(Arrays.stream(objects).map(Object::hashCode).toArray()); 78 | for (Map.Entry entry : props.entrySet()) { 79 | step = step.property(entry.getKey(), entry.getValue()); 80 | } 81 | step.iterate(); 82 | } 83 | 84 | public void dropVertices(Object... objects) { 85 | dropVertices(false, objects); 86 | } 87 | 88 | public void dropVerticesIfOrphan(Object... objects) { 89 | dropVertices(true, objects); 90 | } 91 | 92 | private void dropVertices(boolean orphan, Object... objects) { 93 | GraphTraversal step = g.V().hasId(Arrays.stream(objects).map(Object::hashCode).toArray()); 94 | if (orphan) { 95 | step = step.not(__.bothE()); 96 | } 97 | step.drop().iterate(); 98 | } 99 | 100 | public void addEdgeIfNotExist(String label, Object src, Object tgt) { 101 | g.V().hasLabel(src.getClass().getSimpleName()).hasId(src.hashCode()).as("src") 102 | .V().hasLabel(tgt.getClass().getSimpleName()).hasId(tgt.hashCode()) 103 | .coalesce(__.inE(label).where(__.outV().as("src")), 104 | __.addE(label).from("src")).iterate(); 105 | } 106 | 107 | public List retrieveEdgesByProps(Map props) { 108 | GraphTraversal step = g.E(); 109 | for (Map.Entry entry : props.entrySet()) { 110 | step = step.has(entry.getKey(), entry.getValue()); 111 | } 112 | return retrieveEdges(step); 113 | } 114 | 115 | public List retrieveEdgesByLabel(String label) { 116 | return retrieveEdges(g.E().hasLabel(label)); 117 | } 118 | 119 | public List retrieveEdgesByVertex(Object object) { 120 | return retrieveEdges(g.V().hasId(object.hashCode()).bothE()); 121 | } 122 | 123 | private List retrieveEdges(GraphTraversal edgeStep) { 124 | return edgeStep.project("from", "label", "to") 125 | .by(__.outV().values("obj")) 126 | .by(__.label()) 127 | .by(__.inV().values("obj")).toList() 128 | .stream().map(x -> EdgeTuple.create(x.get("from"), (String) x.get("label"), x.get("to"))).collect(Collectors.toList()); 129 | } 130 | 131 | public void dropSelfLoopEdge() { 132 | g.V().as("src").outE().as("e").inV() 133 | .where(P.eq("src")).inE().where(P.eq("e")).drop().iterate(); 134 | } 135 | 136 | public GremlinLineageGraph getSubGraph(String label) { 137 | return new GremlinLineageGraph((Graph) g.E().where(__.inV().hasLabel(label)).where(__.outV().hasLabel(label)) 138 | .subgraph("sg").cap("sg").next()); 139 | } 140 | 141 | public void merge(LineageGraph other) { 142 | GremlinLineageGraph graph = (GremlinLineageGraph) other; 143 | for (Object vertex : other.retrieveVerticesByProps(Collections.emptyMap())) { 144 | addVertexIfNotExist(vertex); 145 | } 146 | for (Property p : graph.g.V().properties().toList()) { 147 | if (!p.key().equals("obj")) { 148 | g.V().hasId(p.element().id()).property(p.key(), p.value()).iterate(); 149 | } 150 | } 151 | for (EdgeTuple edgeTuple : other.retrieveEdgesByProps(Collections.emptyMap())) { 152 | addEdgeIfNotExist(edgeTuple.label(), edgeTuple.source(), edgeTuple.target()); 153 | } 154 | } 155 | 156 | public List> listPath(Object source, Object target) { 157 | List> result = new ArrayList<>(); 158 | for (Path path : g.V().hasLabel(source.getClass().getSimpleName()).hasId(source.hashCode()) 159 | .repeat(__.out().simplePath()) 160 | .until(__.hasLabel(target.getClass().getSimpleName()).hasId(target.hashCode())) 161 | .path().toList()) { 162 | List singlePath = new ArrayList<>(); 163 | for (Object x : path) { 164 | if (x instanceof Vertex) { 165 | Vertex v = (Vertex) x; 166 | singlePath.add(g.V().hasId(v.id()).values("obj").toList().get(0)); 167 | } 168 | } 169 | result.add(singlePath); 170 | } 171 | return result; 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /sqllineage4j-graph/src/main/java/io/github/reata/sqllineage4j/graph/LineageGraph.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.graph; 2 | 3 | import io.github.reata.sqllineage4j.common.entity.EdgeTuple; 4 | 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | public interface LineageGraph { 9 | 10 | void addVertexIfNotExist(Object obj); 11 | 12 | void addVertexIfNotExist(Object obj, Map props); 13 | 14 | List retrieveVerticesByProps(Map props); 15 | 16 | List retrieveSourceOnlyVertices(); 17 | 18 | List retrieveTargetOnlyVertices(); 19 | 20 | List retrieveConnectedVertices(); 21 | 22 | List retrieveSelfLoopVertices(); 23 | 24 | void updateVertices(Map props, Object... objects); 25 | 26 | void dropVertices(Object... objects); 27 | 28 | void dropVerticesIfOrphan(Object... objects); 29 | 30 | void addEdgeIfNotExist(String label, Object src, Object tgt); 31 | 32 | List retrieveEdgesByProps(Map props); 33 | 34 | List retrieveEdgesByLabel(String label); 35 | 36 | List retrieveEdgesByVertex(Object object); 37 | 38 | void dropSelfLoopEdge(); 39 | 40 | LineageGraph getSubGraph(String label); 41 | 42 | void merge(LineageGraph other); 43 | 44 | List> listPath(Object source, Object target); 45 | } 46 | -------------------------------------------------------------------------------- /sqllineage4j-parser/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | sqllineage4j 7 | io.github.reata 8 | 1.0.1-SNAPSHOT 9 | 10 | 4.0.0 11 | sqllineage4j-parser 12 | sqllineage4j-parser 13 | 14 | 15 | 16 | org.antlr 17 | antlr4 18 | 19 | 20 | 21 | 22 | 23 | 24 | org.antlr 25 | antlr4-maven-plugin 26 | ${dep.antlr.version} 27 | 28 | true 29 | true 30 | 31 | 32 | 33 | 34 | antlr4 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /sqllineage4j-parser/src/main/antlr4/io/github/reata/sqllineage4j/parser/SqlBase.g4: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | * 14 | * This file is an adaptation of Spark's sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 grammar. 15 | */ 16 | 17 | grammar SqlBase; 18 | 19 | @parser::members { 20 | /** 21 | * When false, INTERSECT is given the greater precedence over the other set 22 | * operations (UNION, EXCEPT and MINUS) as per the SQL standard. 23 | */ 24 | public boolean legacy_setops_precedence_enbled = false; 25 | 26 | /** 27 | * When false, a literal with an exponent would be converted into 28 | * double type rather than decimal type. 29 | */ 30 | public boolean legacy_exponent_literal_as_decimal_enabled = false; 31 | 32 | /** 33 | * When true, the behavior of keywords follows ANSI SQL standard. 34 | */ 35 | public boolean SQL_standard_keyword_behavior = false; 36 | } 37 | 38 | @lexer::members { 39 | /** 40 | * Verify whether current token is a valid decimal token (which contains dot). 41 | * Returns true if the character that follows the token is not a digit or letter or underscore. 42 | * 43 | * For example: 44 | * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. 45 | * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. 46 | * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. 47 | * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed 48 | * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' 49 | * which is not a digit or letter or underscore. 50 | */ 51 | public boolean isValidDecimal() { 52 | int nextChar = _input.LA(1); 53 | if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || 54 | nextChar == '_') { 55 | return false; 56 | } else { 57 | return true; 58 | } 59 | } 60 | 61 | /** 62 | * This method will be called when we see '/*' and try to match it as a bracketed comment. 63 | * If the next character is '+', it should be parsed as hint later, and we cannot match 64 | * it as a bracketed comment. 65 | * 66 | * Returns true if the next character is '+'. 67 | */ 68 | public boolean isHint() { 69 | int nextChar = _input.LA(1); 70 | if (nextChar == '+') { 71 | return true; 72 | } else { 73 | return false; 74 | } 75 | } 76 | } 77 | 78 | singleStatement 79 | : statement ';'* EOF 80 | ; 81 | 82 | singleExpression 83 | : namedExpression EOF 84 | ; 85 | 86 | singleTableIdentifier 87 | : tableIdentifier EOF 88 | ; 89 | 90 | singleMultipartIdentifier 91 | : multipartIdentifier EOF 92 | ; 93 | 94 | singleFunctionIdentifier 95 | : functionIdentifier EOF 96 | ; 97 | 98 | singleDataType 99 | : dataType EOF 100 | ; 101 | 102 | singleTableSchema 103 | : colTypeList EOF 104 | ; 105 | 106 | statement 107 | : query #statementDefault 108 | | ctes? dmlStatementNoWith #dmlStatement 109 | | USE NAMESPACE? multipartIdentifier #use 110 | | CREATE namespace (IF NOT EXISTS)? multipartIdentifier 111 | (commentSpec | 112 | locationSpec | 113 | (WITH (DBPROPERTIES | PROPERTIES) tablePropertyList))* #createNamespace 114 | | ALTER namespace multipartIdentifier 115 | SET (DBPROPERTIES | PROPERTIES) tablePropertyList #setNamespaceProperties 116 | | ALTER namespace multipartIdentifier 117 | SET locationSpec #setNamespaceLocation 118 | | DROP namespace (IF EXISTS)? multipartIdentifier 119 | (RESTRICT | CASCADE)? #dropNamespace 120 | | SHOW (DATABASES | NAMESPACES) ((FROM | IN) multipartIdentifier)? 121 | (LIKE? pattern=STRING)? #showNamespaces 122 | | createTableHeader ('(' colTypeList ')')? tableProvider? 123 | createTableClauses 124 | (AS? query)? #createTable 125 | | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier 126 | LIKE source=tableIdentifier 127 | (tableProvider | 128 | rowFormat | 129 | createFileFormat | 130 | locationSpec | 131 | (TBLPROPERTIES tableProps=tablePropertyList))* #createTableLike 132 | | replaceTableHeader ('(' colTypeList ')')? tableProvider? 133 | createTableClauses 134 | (AS? query)? #replaceTable 135 | | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS 136 | (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)? #analyze 137 | | ALTER TABLE multipartIdentifier 138 | ADD (COLUMN | COLUMNS) 139 | columns=qualifiedColTypeWithPositionList #addTableColumns 140 | | ALTER TABLE multipartIdentifier 141 | ADD (COLUMN | COLUMNS) 142 | '(' columns=qualifiedColTypeWithPositionList ')' #addTableColumns 143 | | ALTER TABLE table=multipartIdentifier 144 | RENAME COLUMN 145 | from=multipartIdentifier TO to=errorCapturingIdentifier #renameTableColumn 146 | | ALTER TABLE multipartIdentifier 147 | DROP (COLUMN | COLUMNS) 148 | '(' columns=multipartIdentifierList ')' #dropTableColumns 149 | | ALTER TABLE multipartIdentifier 150 | DROP (COLUMN | COLUMNS) columns=multipartIdentifierList #dropTableColumns 151 | | ALTER (TABLE | VIEW) from=multipartIdentifier 152 | RENAME TO to=multipartIdentifier #renameTable 153 | | ALTER (TABLE | VIEW) multipartIdentifier 154 | SET TBLPROPERTIES tablePropertyList #setTableProperties 155 | | ALTER (TABLE | VIEW) multipartIdentifier 156 | UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties 157 | | ALTER TABLE table=multipartIdentifier 158 | (ALTER | CHANGE) COLUMN? column=multipartIdentifier 159 | alterColumnAction? #alterTableAlterColumn 160 | | ALTER TABLE table=multipartIdentifier partitionSpec? 161 | CHANGE COLUMN? 162 | colName=multipartIdentifier colType colPosition? #hiveChangeColumn 163 | | ALTER TABLE table=multipartIdentifier partitionSpec? 164 | REPLACE COLUMNS 165 | '(' columns=qualifiedColTypeWithPositionList ')' #hiveReplaceColumns 166 | | ALTER TABLE multipartIdentifier (partitionSpec)? 167 | SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe 168 | | ALTER TABLE multipartIdentifier (partitionSpec)? 169 | SET SERDEPROPERTIES tablePropertyList #setTableSerDe 170 | | ALTER (TABLE | VIEW) multipartIdentifier ADD (IF NOT EXISTS)? 171 | partitionSpecLocation+ #addTablePartition 172 | | ALTER TABLE multipartIdentifier 173 | from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition 174 | | ALTER (TABLE | VIEW) multipartIdentifier 175 | DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions 176 | | ALTER TABLE multipartIdentifier 177 | (partitionSpec)? SET locationSpec #setTableLocation 178 | | ALTER TABLE multipartIdentifier RECOVER PARTITIONS #recoverPartitions 179 | | DROP TABLE (IF EXISTS)? multipartIdentifier PURGE? #dropTable 180 | | DROP VIEW (IF EXISTS)? multipartIdentifier #dropView 181 | | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? 182 | VIEW (IF NOT EXISTS)? multipartIdentifier 183 | identifierCommentList? 184 | (commentSpec | 185 | (PARTITIONED ON identifierList) | 186 | (TBLPROPERTIES tablePropertyList))* 187 | AS query #createView 188 | | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW 189 | tableIdentifier ('(' colTypeList ')')? tableProvider 190 | (OPTIONS tablePropertyList)? #createTempViewUsing 191 | | ALTER VIEW multipartIdentifier AS? query #alterViewQuery 192 | | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)? 193 | multipartIdentifier AS className=STRING 194 | (USING resource (',' resource)*)? #createFunction 195 | | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier #dropFunction 196 | | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? 197 | statement #explain 198 | | SHOW TABLES ((FROM | IN) multipartIdentifier)? 199 | (LIKE? pattern=STRING)? #showTables 200 | | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)? 201 | LIKE pattern=STRING partitionSpec? #showTable 202 | | SHOW TBLPROPERTIES table=multipartIdentifier 203 | ('(' key=tablePropertyKey ')')? #showTblProperties 204 | | SHOW COLUMNS (FROM | IN) table=multipartIdentifier 205 | ((FROM | IN) ns=multipartIdentifier)? #showColumns 206 | | SHOW VIEWS ((FROM | IN) multipartIdentifier)? 207 | (LIKE? pattern=STRING)? #showViews 208 | | SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions 209 | | SHOW identifier? FUNCTIONS 210 | (LIKE? (multipartIdentifier | pattern=STRING))? #showFunctions 211 | | SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable 212 | | SHOW CURRENT NAMESPACE #showCurrentNamespace 213 | | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction 214 | | (DESC | DESCRIBE) namespace EXTENDED? 215 | multipartIdentifier #describeNamespace 216 | | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)? 217 | multipartIdentifier partitionSpec? describeColName? #describeRelation 218 | | (DESC | DESCRIBE) QUERY? query #describeQuery 219 | | COMMENT ON namespace multipartIdentifier IS 220 | comment=(STRING | NULL) #commentNamespace 221 | | COMMENT ON TABLE multipartIdentifier IS comment=(STRING | NULL) #commentTable 222 | | REFRESH TABLE multipartIdentifier #refreshTable 223 | | REFRESH FUNCTION multipartIdentifier #refreshFunction 224 | | REFRESH (STRING | .*?) #refreshResource 225 | | CACHE LAZY? TABLE multipartIdentifier 226 | (OPTIONS options=tablePropertyList)? (AS? query)? #cacheTable 227 | | UNCACHE TABLE (IF EXISTS)? multipartIdentifier #uncacheTable 228 | | CLEAR CACHE #clearCache 229 | | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE 230 | multipartIdentifier partitionSpec? #loadData 231 | | TRUNCATE TABLE multipartIdentifier partitionSpec? #truncateTable 232 | | MSCK REPAIR TABLE multipartIdentifier #repairTable 233 | | op=(ADD | LIST) identifier (STRING | .*?) #manageResource 234 | | SET ROLE .*? #failNativeCommand 235 | | SET TIME ZONE interval #setTimeZone 236 | | SET TIME ZONE timezone=(STRING | LOCAL) #setTimeZone 237 | | SET TIME ZONE .*? #setTimeZone 238 | | SET configKey EQ configValue #setQuotedConfiguration 239 | | SET configKey (EQ .*?)? #setQuotedConfiguration 240 | | SET .*? EQ configValue #setQuotedConfiguration 241 | | SET .*? #setConfiguration 242 | | RESET configKey #resetQuotedConfiguration 243 | | RESET .*? #resetConfiguration 244 | | unsupportedHiveNativeCommands .*? #failNativeCommand 245 | ; 246 | 247 | configKey 248 | : quotedIdentifier 249 | ; 250 | 251 | configValue 252 | : quotedIdentifier 253 | ; 254 | 255 | unsupportedHiveNativeCommands 256 | : kw1=CREATE kw2=ROLE 257 | | kw1=DROP kw2=ROLE 258 | | kw1=GRANT kw2=ROLE? 259 | | kw1=REVOKE kw2=ROLE? 260 | | kw1=SHOW kw2=GRANT 261 | | kw1=SHOW kw2=ROLE kw3=GRANT? 262 | | kw1=SHOW kw2=PRINCIPALS 263 | | kw1=SHOW kw2=ROLES 264 | | kw1=SHOW kw2=CURRENT kw3=ROLES 265 | | kw1=EXPORT kw2=TABLE 266 | | kw1=IMPORT kw2=TABLE 267 | | kw1=SHOW kw2=COMPACTIONS 268 | | kw1=SHOW kw2=CREATE kw3=TABLE 269 | | kw1=SHOW kw2=TRANSACTIONS 270 | | kw1=SHOW kw2=INDEXES 271 | | kw1=SHOW kw2=LOCKS 272 | | kw1=CREATE kw2=INDEX 273 | | kw1=DROP kw2=INDEX 274 | | kw1=ALTER kw2=INDEX 275 | | kw1=LOCK kw2=TABLE 276 | | kw1=LOCK kw2=DATABASE 277 | | kw1=UNLOCK kw2=TABLE 278 | | kw1=UNLOCK kw2=DATABASE 279 | | kw1=CREATE kw2=TEMPORARY kw3=MACRO 280 | | kw1=DROP kw2=TEMPORARY kw3=MACRO 281 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=CLUSTERED 282 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=CLUSTERED kw4=BY 283 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SORTED 284 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=SKEWED kw4=BY 285 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SKEWED 286 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=STORED kw5=AS kw6=DIRECTORIES 287 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=SET kw4=SKEWED kw5=LOCATION 288 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=EXCHANGE kw4=PARTITION 289 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=ARCHIVE kw4=PARTITION 290 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=UNARCHIVE kw4=PARTITION 291 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=TOUCH 292 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=COMPACT 293 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CONCATENATE 294 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=SET kw4=FILEFORMAT 295 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=REPLACE kw4=COLUMNS 296 | | kw1=START kw2=TRANSACTION 297 | | kw1=COMMIT 298 | | kw1=ROLLBACK 299 | | kw1=DFS 300 | ; 301 | 302 | createTableHeader 303 | : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? multipartIdentifier 304 | ; 305 | 306 | replaceTableHeader 307 | : (CREATE OR)? REPLACE TABLE multipartIdentifier 308 | ; 309 | 310 | bucketSpec 311 | : CLUSTERED BY identifierList 312 | (SORTED BY orderedIdentifierList)? 313 | INTO INTEGER_VALUE BUCKETS 314 | ; 315 | 316 | skewSpec 317 | : SKEWED BY identifierList 318 | ON (constantList | nestedConstantList) 319 | (STORED AS DIRECTORIES)? 320 | ; 321 | 322 | locationSpec 323 | : LOCATION STRING 324 | ; 325 | 326 | commentSpec 327 | : COMMENT STRING 328 | ; 329 | 330 | query 331 | : ctes? queryTerm queryOrganization 332 | ; 333 | 334 | insertInto 335 | : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)? identifierList? #insertOverwriteTable 336 | | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? identifierList? #insertIntoTable 337 | | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat? #insertOverwriteHiveDir 338 | | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=tablePropertyList)? #insertOverwriteDir 339 | ; 340 | 341 | partitionSpecLocation 342 | : partitionSpec locationSpec? 343 | ; 344 | 345 | partitionSpec 346 | : PARTITION '(' partitionVal (',' partitionVal)* ')' 347 | ; 348 | 349 | partitionVal 350 | : identifier (EQ constant)? 351 | ; 352 | 353 | namespace 354 | : NAMESPACE 355 | | DATABASE 356 | | SCHEMA 357 | ; 358 | 359 | describeFuncName 360 | : qualifiedName 361 | | STRING 362 | | comparisonOperator 363 | | arithmeticOperator 364 | | predicateOperator 365 | ; 366 | 367 | describeColName 368 | : nameParts+=identifier ('.' nameParts+=identifier)* 369 | ; 370 | 371 | ctes 372 | : WITH namedQuery (',' namedQuery)* 373 | ; 374 | 375 | namedQuery 376 | : name=errorCapturingIdentifier (columnAliases=identifierList)? AS? '(' query ')' 377 | ; 378 | 379 | tableProvider 380 | : USING multipartIdentifier 381 | ; 382 | 383 | createTableClauses 384 | :((OPTIONS options=tablePropertyList) | 385 | (PARTITIONED BY partitioning=partitionFieldList) | 386 | skewSpec | 387 | bucketSpec | 388 | rowFormat | 389 | createFileFormat | 390 | locationSpec | 391 | commentSpec | 392 | (TBLPROPERTIES tableProps=tablePropertyList))* 393 | ; 394 | 395 | tablePropertyList 396 | : '(' tableProperty (',' tableProperty)* ')' 397 | ; 398 | 399 | tableProperty 400 | : key=tablePropertyKey (EQ? value=tablePropertyValue)? 401 | ; 402 | 403 | tablePropertyKey 404 | : identifier ('.' identifier)* 405 | | STRING 406 | ; 407 | 408 | tablePropertyValue 409 | : INTEGER_VALUE 410 | | DECIMAL_VALUE 411 | | booleanValue 412 | | STRING 413 | ; 414 | 415 | constantList 416 | : '(' constant (',' constant)* ')' 417 | ; 418 | 419 | nestedConstantList 420 | : '(' constantList (',' constantList)* ')' 421 | ; 422 | 423 | createFileFormat 424 | : STORED AS fileFormat 425 | | STORED BY storageHandler 426 | ; 427 | 428 | fileFormat 429 | : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING #tableFileFormat 430 | | identifier #genericFileFormat 431 | ; 432 | 433 | storageHandler 434 | : STRING (WITH SERDEPROPERTIES tablePropertyList)? 435 | ; 436 | 437 | resource 438 | : identifier STRING 439 | ; 440 | 441 | dmlStatementNoWith 442 | : insertInto queryTerm queryOrganization #singleInsertQuery 443 | | fromClause multiInsertQueryBody+ #multiInsertQuery 444 | | DELETE FROM multipartIdentifier tableAlias whereClause? #deleteFromTable 445 | | UPDATE multipartIdentifier tableAlias setClause whereClause? #updateTable 446 | | MERGE INTO target=multipartIdentifier targetAlias=tableAlias 447 | USING (source=multipartIdentifier | 448 | '(' sourceQuery=query')') sourceAlias=tableAlias 449 | ON mergeCondition=booleanExpression 450 | matchedClause* 451 | notMatchedClause* #mergeIntoTable 452 | ; 453 | 454 | queryOrganization 455 | : (ORDER BY order+=sortItem (',' order+=sortItem)*)? 456 | (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? 457 | (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)? 458 | (SORT BY sort+=sortItem (',' sort+=sortItem)*)? 459 | windowClause? 460 | (LIMIT (ALL | limit=expression))? 461 | ; 462 | 463 | multiInsertQueryBody 464 | : insertInto fromStatementBody 465 | ; 466 | 467 | queryTerm 468 | : queryPrimary #queryTermDefault 469 | | left=queryTerm {legacy_setops_precedence_enbled}? 470 | operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation 471 | | left=queryTerm {!legacy_setops_precedence_enbled}? 472 | operator=INTERSECT setQuantifier? right=queryTerm #setOperation 473 | | left=queryTerm {!legacy_setops_precedence_enbled}? 474 | operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation 475 | ; 476 | 477 | queryPrimary 478 | : querySpecification #queryPrimaryDefault 479 | | fromStatement #fromStmt 480 | | TABLE multipartIdentifier #table 481 | | inlineTable #inlineTableDefault1 482 | | '(' query ')' #subquery 483 | ; 484 | 485 | sortItem 486 | : expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))? 487 | ; 488 | 489 | fromStatement 490 | : fromClause fromStatementBody+ 491 | ; 492 | 493 | fromStatementBody 494 | : transformClause 495 | whereClause? 496 | queryOrganization 497 | | selectClause 498 | lateralView* 499 | whereClause? 500 | aggregationClause? 501 | havingClause? 502 | windowClause? 503 | queryOrganization 504 | ; 505 | 506 | querySpecification 507 | : transformClause 508 | fromClause? 509 | whereClause? #transformQuerySpecification 510 | | selectClause 511 | fromClause? 512 | lateralView* 513 | whereClause? 514 | aggregationClause? 515 | havingClause? 516 | windowClause? #regularQuerySpecification 517 | ; 518 | 519 | transformClause 520 | : (SELECT kind=TRANSFORM '(' namedExpressionSeq ')' 521 | | kind=MAP namedExpressionSeq 522 | | kind=REDUCE namedExpressionSeq) 523 | inRowFormat=rowFormat? 524 | (RECORDWRITER recordWriter=STRING)? 525 | USING script=STRING 526 | (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))? 527 | outRowFormat=rowFormat? 528 | (RECORDREADER recordReader=STRING)? 529 | ; 530 | 531 | selectClause 532 | : SELECT (hints+=hint)* setQuantifier? namedExpressionSeq 533 | ; 534 | 535 | setClause 536 | : SET assignmentList 537 | ; 538 | 539 | matchedClause 540 | : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction 541 | ; 542 | notMatchedClause 543 | : WHEN NOT MATCHED (AND notMatchedCond=booleanExpression)? THEN notMatchedAction 544 | ; 545 | 546 | matchedAction 547 | : DELETE 548 | | UPDATE SET ASTERISK 549 | | UPDATE SET assignmentList 550 | ; 551 | 552 | notMatchedAction 553 | : INSERT ASTERISK 554 | | INSERT '(' columns=multipartIdentifierList ')' 555 | VALUES '(' expression (',' expression)* ')' 556 | ; 557 | 558 | assignmentList 559 | : assignment (',' assignment)* 560 | ; 561 | 562 | assignment 563 | : key=multipartIdentifier EQ value=expression 564 | ; 565 | 566 | whereClause 567 | : WHERE booleanExpression 568 | ; 569 | 570 | havingClause 571 | : HAVING booleanExpression 572 | ; 573 | 574 | hint 575 | : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/' 576 | ; 577 | 578 | hintStatement 579 | : hintName=identifier 580 | | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')' 581 | ; 582 | 583 | fromClause 584 | : FROM relation (',' relation)* lateralView* pivotClause? 585 | ; 586 | 587 | aggregationClause 588 | : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* ( 589 | WITH kind=ROLLUP 590 | | WITH kind=CUBE 591 | | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')? 592 | | GROUP BY kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')' 593 | ; 594 | 595 | groupingSet 596 | : '(' (expression (',' expression)*)? ')' 597 | | expression 598 | ; 599 | 600 | pivotClause 601 | : PIVOT '(' aggregates=namedExpressionSeq FOR pivotColumn IN '(' pivotValues+=pivotValue (',' pivotValues+=pivotValue)* ')' ')' 602 | ; 603 | 604 | pivotColumn 605 | : identifiers+=identifier 606 | | '(' identifiers+=identifier (',' identifiers+=identifier)* ')' 607 | ; 608 | 609 | pivotValue 610 | : expression (AS? identifier)? 611 | ; 612 | 613 | lateralView 614 | : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)? 615 | ; 616 | 617 | setQuantifier 618 | : DISTINCT 619 | | ALL 620 | ; 621 | 622 | relation 623 | : relationPrimary joinRelation* 624 | ; 625 | 626 | joinRelation 627 | : (joinType) JOIN right=relationPrimary joinCriteria? 628 | | NATURAL joinType JOIN right=relationPrimary 629 | ; 630 | 631 | joinType 632 | : INNER? 633 | | CROSS 634 | | LEFT OUTER? 635 | | LEFT? SEMI 636 | | RIGHT OUTER? 637 | | FULL OUTER? 638 | | LEFT? ANTI 639 | ; 640 | 641 | joinCriteria 642 | : ON booleanExpression 643 | | USING identifierList 644 | ; 645 | 646 | sample 647 | : TABLESAMPLE '(' sampleMethod? ')' 648 | ; 649 | 650 | sampleMethod 651 | : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT #sampleByPercentile 652 | | expression ROWS #sampleByRows 653 | | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE 654 | (ON (identifier | qualifiedName '(' ')'))? #sampleByBucket 655 | | bytes=expression #sampleByBytes 656 | ; 657 | 658 | identifierList 659 | : '(' identifierSeq ')' 660 | ; 661 | 662 | identifierSeq 663 | : ident+=errorCapturingIdentifier (',' ident+=errorCapturingIdentifier)* 664 | ; 665 | 666 | orderedIdentifierList 667 | : '(' orderedIdentifier (',' orderedIdentifier)* ')' 668 | ; 669 | 670 | orderedIdentifier 671 | : ident=errorCapturingIdentifier ordering=(ASC | DESC)? 672 | ; 673 | 674 | identifierCommentList 675 | : '(' identifierComment (',' identifierComment)* ')' 676 | ; 677 | 678 | identifierComment 679 | : identifier commentSpec? 680 | ; 681 | 682 | relationPrimary 683 | : multipartIdentifier sample? tableAlias #tableName 684 | | '(' query ')' sample? tableAlias #aliasedQuery 685 | | '(' relation ')' sample? tableAlias #aliasedRelation 686 | | inlineTable #inlineTableDefault2 687 | | functionTable #tableValuedFunction 688 | ; 689 | 690 | inlineTable 691 | : VALUES expression (',' expression)* tableAlias 692 | ; 693 | 694 | functionTable 695 | : funcName=errorCapturingIdentifier '(' (expression (',' expression)*)? ')' tableAlias 696 | ; 697 | 698 | tableAlias 699 | : (AS? strictIdentifier identifierList?)? 700 | ; 701 | 702 | rowFormat 703 | : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)? #rowFormatSerde 704 | | ROW FORMAT DELIMITED 705 | (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)? 706 | (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)? 707 | (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)? 708 | (LINES TERMINATED BY linesSeparatedBy=STRING)? 709 | (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited 710 | ; 711 | 712 | multipartIdentifierList 713 | : multipartIdentifier (',' multipartIdentifier)* 714 | ; 715 | 716 | multipartIdentifier 717 | : parts+=errorCapturingIdentifier ('.' parts+=errorCapturingIdentifier)* 718 | ; 719 | 720 | tableIdentifier 721 | : (db=errorCapturingIdentifier '.')? table=errorCapturingIdentifier 722 | ; 723 | 724 | functionIdentifier 725 | : (db=errorCapturingIdentifier '.')? function=errorCapturingIdentifier 726 | ; 727 | 728 | namedExpression 729 | : expression (AS? (name=errorCapturingIdentifier | identifierList))? 730 | ; 731 | 732 | namedExpressionSeq 733 | : namedExpression (',' namedExpression)* 734 | ; 735 | 736 | partitionFieldList 737 | : '(' fields+=partitionField (',' fields+=partitionField)* ')' 738 | ; 739 | 740 | partitionField 741 | : transform #partitionTransform 742 | | colType #partitionColumn 743 | ; 744 | 745 | transform 746 | : qualifiedName #identityTransform 747 | | transformName=identifier 748 | '(' argument+=transformArgument (',' argument+=transformArgument)* ')' #applyTransform 749 | ; 750 | 751 | transformArgument 752 | : qualifiedName 753 | | constant 754 | ; 755 | 756 | expression 757 | : booleanExpression 758 | ; 759 | 760 | booleanExpression 761 | : NOT booleanExpression #logicalNot 762 | | EXISTS '(' query ')' #exists 763 | | valueExpression predicate? #predicated 764 | | left=booleanExpression operator=AND right=booleanExpression #logicalBinary 765 | | left=booleanExpression operator=OR right=booleanExpression #logicalBinary 766 | ; 767 | 768 | predicate 769 | : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression 770 | | NOT? kind=IN '(' expression (',' expression)* ')' 771 | | NOT? kind=IN '(' query ')' 772 | | NOT? kind=RLIKE pattern=valueExpression 773 | | NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression (',' expression)* ')') 774 | | NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)? 775 | | IS NOT? kind=NULL 776 | | IS NOT? kind=(TRUE | FALSE | UNKNOWN) 777 | | IS NOT? kind=DISTINCT FROM right=valueExpression 778 | ; 779 | 780 | valueExpression 781 | : primaryExpression #valueExpressionDefault 782 | | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary 783 | | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary 784 | | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary 785 | | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary 786 | | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary 787 | | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary 788 | | left=valueExpression comparisonOperator right=valueExpression #comparison 789 | ; 790 | 791 | primaryExpression 792 | : name=(CURRENT_DATE | CURRENT_TIMESTAMP) #currentDatetime 793 | | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase 794 | | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase 795 | | CAST '(' expression AS dataType ')' #cast 796 | | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct 797 | | FIRST '(' expression (IGNORE NULLS)? ')' #first 798 | | LAST '(' expression (IGNORE NULLS)? ')' #last 799 | | POSITION '(' substr=valueExpression IN str=valueExpression ')' #position 800 | | constant #constantDefault 801 | | ASTERISK #star 802 | | qualifiedName '.' ASTERISK #star 803 | | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor 804 | | '(' query ')' #subqueryExpression 805 | | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' 806 | (FILTER '(' WHERE where=booleanExpression ')')? (OVER windowSpec)? #functionCall 807 | | identifier '->' expression #lambda 808 | | '(' identifier (',' identifier)+ ')' '->' expression #lambda 809 | | value=primaryExpression '[' index=valueExpression ']' #subscript 810 | | identifier #columnReference 811 | | base=primaryExpression '.' fieldName=identifier #dereference 812 | | '(' expression ')' #parenthesizedExpression 813 | | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract 814 | | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression 815 | ((FOR | ',') len=valueExpression)? ')' #substring 816 | | TRIM '(' trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)? 817 | FROM srcStr=valueExpression ')' #trim 818 | | OVERLAY '(' input=valueExpression PLACING replace=valueExpression 819 | FROM position=valueExpression (FOR length=valueExpression)? ')' #overlay 820 | ; 821 | 822 | constant 823 | : NULL #nullLiteral 824 | | interval #intervalLiteral 825 | | identifier STRING #typeConstructor 826 | | number #numericLiteral 827 | | booleanValue #booleanLiteral 828 | | STRING+ #stringLiteral 829 | ; 830 | 831 | comparisonOperator 832 | : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ 833 | ; 834 | 835 | arithmeticOperator 836 | : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT 837 | ; 838 | 839 | predicateOperator 840 | : OR | AND | IN | NOT 841 | ; 842 | 843 | booleanValue 844 | : TRUE | FALSE 845 | ; 846 | 847 | interval 848 | : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? 849 | ; 850 | 851 | errorCapturingMultiUnitsInterval 852 | : multiUnitsInterval unitToUnitInterval? 853 | ; 854 | 855 | multiUnitsInterval 856 | : (intervalValue unit+=identifier)+ 857 | ; 858 | 859 | errorCapturingUnitToUnitInterval 860 | : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)? 861 | ; 862 | 863 | unitToUnitInterval 864 | : value=intervalValue from=identifier TO to=identifier 865 | ; 866 | 867 | intervalValue 868 | : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE) 869 | | STRING 870 | ; 871 | 872 | colPosition 873 | : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier 874 | ; 875 | 876 | dataType 877 | : complex=ARRAY '<' dataType '>' #complexDataType 878 | | complex=MAP '<' dataType ',' dataType '>' #complexDataType 879 | | complex=STRUCT ('<' complexColTypeList? '>' | NEQ) #complexDataType 880 | | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType 881 | ; 882 | 883 | qualifiedColTypeWithPositionList 884 | : qualifiedColTypeWithPosition (',' qualifiedColTypeWithPosition)* 885 | ; 886 | 887 | qualifiedColTypeWithPosition 888 | : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition? 889 | ; 890 | 891 | colTypeList 892 | : colType (',' colType)* 893 | ; 894 | 895 | colType 896 | : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec? 897 | ; 898 | 899 | complexColTypeList 900 | : complexColType (',' complexColType)* 901 | ; 902 | 903 | complexColType 904 | : identifier ':' dataType (NOT NULL)? commentSpec? 905 | ; 906 | 907 | whenClause 908 | : WHEN condition=expression THEN result=expression 909 | ; 910 | 911 | windowClause 912 | : WINDOW namedWindow (',' namedWindow)* 913 | ; 914 | 915 | namedWindow 916 | : name=errorCapturingIdentifier AS windowSpec 917 | ; 918 | 919 | windowSpec 920 | : name=errorCapturingIdentifier #windowRef 921 | | '('name=errorCapturingIdentifier')' #windowRef 922 | | '(' 923 | ( CLUSTER BY partition+=expression (',' partition+=expression)* 924 | | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)? 925 | ((ORDER | SORT) BY sortItem (',' sortItem)*)?) 926 | windowFrame? 927 | ')' #windowDef 928 | ; 929 | 930 | windowFrame 931 | : frameType=RANGE start=frameBound 932 | | frameType=ROWS start=frameBound 933 | | frameType=RANGE BETWEEN start=frameBound AND end=frameBound 934 | | frameType=ROWS BETWEEN start=frameBound AND end=frameBound 935 | ; 936 | 937 | frameBound 938 | : UNBOUNDED boundType=(PRECEDING | FOLLOWING) 939 | | boundType=CURRENT ROW 940 | | expression boundType=(PRECEDING | FOLLOWING) 941 | ; 942 | 943 | qualifiedNameList 944 | : qualifiedName (',' qualifiedName)* 945 | ; 946 | 947 | functionName 948 | : qualifiedName 949 | | FILTER 950 | | LEFT 951 | | RIGHT 952 | ; 953 | 954 | qualifiedName 955 | : identifier ('.' identifier)* 956 | ; 957 | 958 | // this rule is used for explicitly capturing wrong identifiers such as test-table, which should actually be `test-table` 959 | // replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise 960 | // valid expressions such as "a-b" can be recognized as an identifier 961 | errorCapturingIdentifier 962 | : identifier errorCapturingIdentifierExtra 963 | ; 964 | 965 | // extra left-factoring grammar 966 | errorCapturingIdentifierExtra 967 | : (MINUS identifier)+ #errorIdent 968 | | #realIdent 969 | ; 970 | 971 | identifier 972 | : strictIdentifier 973 | | {!SQL_standard_keyword_behavior}? strictNonReserved 974 | ; 975 | 976 | strictIdentifier 977 | : IDENTIFIER #unquotedIdentifier 978 | | quotedIdentifier #quotedIdentifierAlternative 979 | | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier 980 | | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier 981 | ; 982 | 983 | quotedIdentifier 984 | : BACKQUOTED_IDENTIFIER 985 | ; 986 | 987 | number 988 | : {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral 989 | | {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral 990 | | {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral 991 | | MINUS? INTEGER_VALUE #integerLiteral 992 | | MINUS? BIGINT_LITERAL #bigIntLiteral 993 | | MINUS? SMALLINT_LITERAL #smallIntLiteral 994 | | MINUS? TINYINT_LITERAL #tinyIntLiteral 995 | | MINUS? DOUBLE_LITERAL #doubleLiteral 996 | | MINUS? FLOAT_LITERAL #floatLiteral 997 | | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral 998 | ; 999 | 1000 | alterColumnAction 1001 | : TYPE dataType 1002 | | commentSpec 1003 | | colPosition 1004 | | setOrDrop=(SET | DROP) NOT NULL 1005 | ; 1006 | 1007 | // When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. 1008 | // - Reserved keywords: 1009 | // Keywords that are reserved and can't be used as identifiers for table, view, column, 1010 | // function, alias, etc. 1011 | // - Non-reserved keywords: 1012 | // Keywords that have a special meaning only in particular contexts and can be used as 1013 | // identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN 1014 | // can be used as identifiers in other places. 1015 | // You can find the full keywords list by searching "Start of the keywords list" in this file. 1016 | // The non-reserved keywords are listed below. Keywords not in this list are reserved keywords. 1017 | ansiNonReserved 1018 | //--ANSI-NON-RESERVED-START 1019 | : ADD 1020 | | AFTER 1021 | | ALTER 1022 | | ANALYZE 1023 | | ANTI 1024 | | ARCHIVE 1025 | | ARRAY 1026 | | ASC 1027 | | AT 1028 | | BETWEEN 1029 | | BUCKET 1030 | | BUCKETS 1031 | | BY 1032 | | CACHE 1033 | | CASCADE 1034 | | CHANGE 1035 | | CLEAR 1036 | | CLUSTER 1037 | | CLUSTERED 1038 | | CODEGEN 1039 | | COLLECTION 1040 | | COLUMNS 1041 | | COMMENT 1042 | | COMMIT 1043 | | COMPACT 1044 | | COMPACTIONS 1045 | | COMPUTE 1046 | | CONCATENATE 1047 | | COST 1048 | | CUBE 1049 | | CURRENT 1050 | | DATA 1051 | | DATABASE 1052 | | DATABASES 1053 | | DBPROPERTIES 1054 | | DEFINED 1055 | | DELETE 1056 | | DELIMITED 1057 | | DESC 1058 | | DESCRIBE 1059 | | DFS 1060 | | DIRECTORIES 1061 | | DIRECTORY 1062 | | DISTRIBUTE 1063 | | DIV 1064 | | DROP 1065 | | ESCAPED 1066 | | EXCHANGE 1067 | | EXISTS 1068 | | EXPLAIN 1069 | | EXPORT 1070 | | EXTENDED 1071 | | EXTERNAL 1072 | | EXTRACT 1073 | | FIELDS 1074 | | FILEFORMAT 1075 | | FIRST 1076 | | FOLLOWING 1077 | | FORMAT 1078 | | FORMATTED 1079 | | FUNCTION 1080 | | FUNCTIONS 1081 | | GLOBAL 1082 | | GROUPING 1083 | | IF 1084 | | IGNORE 1085 | | IMPORT 1086 | | INDEX 1087 | | INDEXES 1088 | | INPATH 1089 | | INPUTFORMAT 1090 | | INSERT 1091 | | INTERVAL 1092 | | ITEMS 1093 | | KEYS 1094 | | LAST 1095 | | LATERAL 1096 | | LAZY 1097 | | LIKE 1098 | | LIMIT 1099 | | LINES 1100 | | LIST 1101 | | LOAD 1102 | | LOCAL 1103 | | LOCATION 1104 | | LOCK 1105 | | LOCKS 1106 | | LOGICAL 1107 | | MACRO 1108 | | MAP 1109 | | MATCHED 1110 | | MERGE 1111 | | MSCK 1112 | | NAMESPACE 1113 | | NAMESPACES 1114 | | NO 1115 | | NULLS 1116 | | OF 1117 | | OPTION 1118 | | OPTIONS 1119 | | OUT 1120 | | OUTPUTFORMAT 1121 | | OVER 1122 | | OVERLAY 1123 | | OVERWRITE 1124 | | PARTITION 1125 | | PARTITIONED 1126 | | PARTITIONS 1127 | | PERCENTLIT 1128 | | PIVOT 1129 | | PLACING 1130 | | POSITION 1131 | | PRECEDING 1132 | | PRINCIPALS 1133 | | PROPERTIES 1134 | | PURGE 1135 | | QUERY 1136 | | RANGE 1137 | | RECORDREADER 1138 | | RECORDWRITER 1139 | | RECOVER 1140 | | REDUCE 1141 | | REFRESH 1142 | | RENAME 1143 | | REPAIR 1144 | | REPLACE 1145 | | RESET 1146 | | RESTRICT 1147 | | REVOKE 1148 | | RLIKE 1149 | | ROLE 1150 | | ROLES 1151 | | ROLLBACK 1152 | | ROLLUP 1153 | | ROW 1154 | | ROWS 1155 | | SCHEMA 1156 | | SEMI 1157 | | SEPARATED 1158 | | SERDE 1159 | | SERDEPROPERTIES 1160 | | SET 1161 | | SETMINUS 1162 | | SETS 1163 | | SHOW 1164 | | SKEWED 1165 | | SORT 1166 | | SORTED 1167 | | START 1168 | | STATISTICS 1169 | | STORED 1170 | | STRATIFY 1171 | | STRUCT 1172 | | SUBSTR 1173 | | SUBSTRING 1174 | | TABLES 1175 | | TABLESAMPLE 1176 | | TBLPROPERTIES 1177 | | TEMPORARY 1178 | | TERMINATED 1179 | | TOUCH 1180 | | TRANSACTION 1181 | | TRANSACTIONS 1182 | | TRANSFORM 1183 | | TRIM 1184 | | TRUE 1185 | | TRUNCATE 1186 | | TYPE 1187 | | UNARCHIVE 1188 | | UNBOUNDED 1189 | | UNCACHE 1190 | | UNLOCK 1191 | | UNSET 1192 | | UPDATE 1193 | | USE 1194 | | VALUES 1195 | | VIEW 1196 | | VIEWS 1197 | | WINDOW 1198 | | ZONE 1199 | //--ANSI-NON-RESERVED-END 1200 | ; 1201 | 1202 | // When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL. 1203 | // - Non-reserved keywords: 1204 | // Same definition as the one when `SQL_standard_keyword_behavior=true`. 1205 | // - Strict-non-reserved keywords: 1206 | // A strict version of non-reserved keywords, which can not be used as table alias. 1207 | // You can find the full keywords list by searching "Start of the keywords list" in this file. 1208 | // The strict-non-reserved keywords are listed in `strictNonReserved`. 1209 | // The non-reserved keywords are listed in `nonReserved`. 1210 | // These 2 together contain all the keywords. 1211 | strictNonReserved 1212 | : ANTI 1213 | | CROSS 1214 | | EXCEPT 1215 | | FULL 1216 | | INNER 1217 | | INTERSECT 1218 | | JOIN 1219 | | LEFT 1220 | | NATURAL 1221 | | ON 1222 | | RIGHT 1223 | | SEMI 1224 | | SETMINUS 1225 | | UNION 1226 | | USING 1227 | ; 1228 | 1229 | nonReserved 1230 | //--DEFAULT-NON-RESERVED-START 1231 | : ADD 1232 | | AFTER 1233 | | ALL 1234 | | ALTER 1235 | | ANALYZE 1236 | | AND 1237 | | ANY 1238 | | ARCHIVE 1239 | | ARRAY 1240 | | AS 1241 | | ASC 1242 | | AT 1243 | | AUTHORIZATION 1244 | | BETWEEN 1245 | | BOTH 1246 | | BUCKET 1247 | | BUCKETS 1248 | | BY 1249 | | CACHE 1250 | | CASCADE 1251 | | CASE 1252 | | CAST 1253 | | CHANGE 1254 | | CHECK 1255 | | CLEAR 1256 | | CLUSTER 1257 | | CLUSTERED 1258 | | CODEGEN 1259 | | COLLATE 1260 | | COLLECTION 1261 | | COLUMN 1262 | | COLUMNS 1263 | | COMMENT 1264 | | COMMIT 1265 | | COMPACT 1266 | | COMPACTIONS 1267 | | COMPUTE 1268 | | CONCATENATE 1269 | | CONSTRAINT 1270 | | COST 1271 | | CREATE 1272 | | CUBE 1273 | | CURRENT 1274 | | CURRENT_DATE 1275 | | CURRENT_TIME 1276 | | CURRENT_TIMESTAMP 1277 | | CURRENT_USER 1278 | | DATA 1279 | | DATABASE 1280 | | DATABASES 1281 | | DBPROPERTIES 1282 | | DEFINED 1283 | | DELETE 1284 | | DELIMITED 1285 | | DESC 1286 | | DESCRIBE 1287 | | DFS 1288 | | DIRECTORIES 1289 | | DIRECTORY 1290 | | DISTINCT 1291 | | DISTRIBUTE 1292 | | DIV 1293 | | DROP 1294 | | ELSE 1295 | | END 1296 | | ESCAPE 1297 | | ESCAPED 1298 | | EXCHANGE 1299 | | EXISTS 1300 | | EXPLAIN 1301 | | EXPORT 1302 | | EXTENDED 1303 | | EXTERNAL 1304 | | EXTRACT 1305 | | FALSE 1306 | | FETCH 1307 | | FILTER 1308 | | FIELDS 1309 | | FILEFORMAT 1310 | | FIRST 1311 | | FOLLOWING 1312 | | FOR 1313 | | FOREIGN 1314 | | FORMAT 1315 | | FORMATTED 1316 | | FROM 1317 | | FUNCTION 1318 | | FUNCTIONS 1319 | | GLOBAL 1320 | | GRANT 1321 | | GROUP 1322 | | GROUPING 1323 | | HAVING 1324 | | IF 1325 | | IGNORE 1326 | | IMPORT 1327 | | IN 1328 | | INDEX 1329 | | INDEXES 1330 | | INPATH 1331 | | INPUTFORMAT 1332 | | INSERT 1333 | | INTERVAL 1334 | | INTO 1335 | | IS 1336 | | ITEMS 1337 | | KEYS 1338 | | LAST 1339 | | LATERAL 1340 | | LAZY 1341 | | LEADING 1342 | | LIKE 1343 | | LIMIT 1344 | | LINES 1345 | | LIST 1346 | | LOAD 1347 | | LOCAL 1348 | | LOCATION 1349 | | LOCK 1350 | | LOCKS 1351 | | LOGICAL 1352 | | MACRO 1353 | | MAP 1354 | | MATCHED 1355 | | MERGE 1356 | | MSCK 1357 | | NAMESPACE 1358 | | NAMESPACES 1359 | | NO 1360 | | NOT 1361 | | NULL 1362 | | NULLS 1363 | | OF 1364 | | ONLY 1365 | | OPTION 1366 | | OPTIONS 1367 | | OR 1368 | | ORDER 1369 | | OUT 1370 | | OUTER 1371 | | OUTPUTFORMAT 1372 | | OVER 1373 | | OVERLAPS 1374 | | OVERLAY 1375 | | OVERWRITE 1376 | | PARTITION 1377 | | PARTITIONED 1378 | | PARTITIONS 1379 | | PERCENTLIT 1380 | | PIVOT 1381 | | PLACING 1382 | | POSITION 1383 | | PRECEDING 1384 | | PRIMARY 1385 | | PRINCIPALS 1386 | | PROPERTIES 1387 | | PURGE 1388 | | QUERY 1389 | | RANGE 1390 | | RECORDREADER 1391 | | RECORDWRITER 1392 | | RECOVER 1393 | | REDUCE 1394 | | REFERENCES 1395 | | REFRESH 1396 | | RENAME 1397 | | REPAIR 1398 | | REPLACE 1399 | | RESET 1400 | | RESTRICT 1401 | | REVOKE 1402 | | RLIKE 1403 | | ROLE 1404 | | ROLES 1405 | | ROLLBACK 1406 | | ROLLUP 1407 | | ROW 1408 | | ROWS 1409 | | SCHEMA 1410 | | SELECT 1411 | | SEPARATED 1412 | | SERDE 1413 | | SERDEPROPERTIES 1414 | | SESSION_USER 1415 | | SET 1416 | | SETS 1417 | | SHOW 1418 | | SKEWED 1419 | | SOME 1420 | | SORT 1421 | | SORTED 1422 | | START 1423 | | STATISTICS 1424 | | STORED 1425 | | STRATIFY 1426 | | STRUCT 1427 | | SUBSTR 1428 | | SUBSTRING 1429 | | TABLE 1430 | | TABLES 1431 | | TABLESAMPLE 1432 | | TBLPROPERTIES 1433 | | TEMPORARY 1434 | | TERMINATED 1435 | | THEN 1436 | | TIME 1437 | | TO 1438 | | TOUCH 1439 | | TRAILING 1440 | | TRANSACTION 1441 | | TRANSACTIONS 1442 | | TRANSFORM 1443 | | TRIM 1444 | | TRUE 1445 | | TRUNCATE 1446 | | TYPE 1447 | | UNARCHIVE 1448 | | UNBOUNDED 1449 | | UNCACHE 1450 | | UNIQUE 1451 | | UNKNOWN 1452 | | UNLOCK 1453 | | UNSET 1454 | | UPDATE 1455 | | USE 1456 | | USER 1457 | | VALUES 1458 | | VIEW 1459 | | VIEWS 1460 | | WHEN 1461 | | WHERE 1462 | | WINDOW 1463 | | WITH 1464 | | ZONE 1465 | //--DEFAULT-NON-RESERVED-END 1466 | ; 1467 | 1468 | // NOTE: If you add a new token in the list below, you should update the list of keywords 1469 | // and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`. 1470 | 1471 | //============================ 1472 | // Start of the keywords list 1473 | //============================ 1474 | //--SPARK-KEYWORD-LIST-START 1475 | ADD: 'ADD'; 1476 | AFTER: 'AFTER'; 1477 | ALL: 'ALL'; 1478 | ALTER: 'ALTER'; 1479 | ANALYZE: 'ANALYZE'; 1480 | AND: 'AND'; 1481 | ANTI: 'ANTI'; 1482 | ANY: 'ANY'; 1483 | ARCHIVE: 'ARCHIVE'; 1484 | ARRAY: 'ARRAY'; 1485 | AS: 'AS'; 1486 | ASC: 'ASC'; 1487 | AT: 'AT'; 1488 | AUTHORIZATION: 'AUTHORIZATION'; 1489 | BETWEEN: 'BETWEEN'; 1490 | BOTH: 'BOTH'; 1491 | BUCKET: 'BUCKET'; 1492 | BUCKETS: 'BUCKETS'; 1493 | BY: 'BY'; 1494 | CACHE: 'CACHE'; 1495 | CASCADE: 'CASCADE'; 1496 | CASE: 'CASE'; 1497 | CAST: 'CAST'; 1498 | CHANGE: 'CHANGE'; 1499 | CHECK: 'CHECK'; 1500 | CLEAR: 'CLEAR'; 1501 | CLUSTER: 'CLUSTER'; 1502 | CLUSTERED: 'CLUSTERED'; 1503 | CODEGEN: 'CODEGEN'; 1504 | COLLATE: 'COLLATE'; 1505 | COLLECTION: 'COLLECTION'; 1506 | COLUMN: 'COLUMN'; 1507 | COLUMNS: 'COLUMNS'; 1508 | COMMENT: 'COMMENT'; 1509 | COMMIT: 'COMMIT'; 1510 | COMPACT: 'COMPACT'; 1511 | COMPACTIONS: 'COMPACTIONS'; 1512 | COMPUTE: 'COMPUTE'; 1513 | CONCATENATE: 'CONCATENATE'; 1514 | CONSTRAINT: 'CONSTRAINT'; 1515 | COST: 'COST'; 1516 | CREATE: 'CREATE'; 1517 | CROSS: 'CROSS'; 1518 | CUBE: 'CUBE'; 1519 | CURRENT: 'CURRENT'; 1520 | CURRENT_DATE: 'CURRENT_DATE'; 1521 | CURRENT_TIME: 'CURRENT_TIME'; 1522 | CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; 1523 | CURRENT_USER: 'CURRENT_USER'; 1524 | DATA: 'DATA'; 1525 | DATABASE: 'DATABASE'; 1526 | DATABASES: 'DATABASES' | 'SCHEMAS'; 1527 | DBPROPERTIES: 'DBPROPERTIES'; 1528 | DEFINED: 'DEFINED'; 1529 | DELETE: 'DELETE'; 1530 | DELIMITED: 'DELIMITED'; 1531 | DESC: 'DESC'; 1532 | DESCRIBE: 'DESCRIBE'; 1533 | DFS: 'DFS'; 1534 | DIRECTORIES: 'DIRECTORIES'; 1535 | DIRECTORY: 'DIRECTORY'; 1536 | DISTINCT: 'DISTINCT'; 1537 | DISTRIBUTE: 'DISTRIBUTE'; 1538 | DIV: 'DIV'; 1539 | DROP: 'DROP'; 1540 | ELSE: 'ELSE'; 1541 | END: 'END'; 1542 | ESCAPE: 'ESCAPE'; 1543 | ESCAPED: 'ESCAPED'; 1544 | EXCEPT: 'EXCEPT'; 1545 | EXCHANGE: 'EXCHANGE'; 1546 | EXISTS: 'EXISTS'; 1547 | EXPLAIN: 'EXPLAIN'; 1548 | EXPORT: 'EXPORT'; 1549 | EXTENDED: 'EXTENDED'; 1550 | EXTERNAL: 'EXTERNAL'; 1551 | EXTRACT: 'EXTRACT'; 1552 | FALSE: 'FALSE'; 1553 | FETCH: 'FETCH'; 1554 | FIELDS: 'FIELDS'; 1555 | FILTER: 'FILTER'; 1556 | FILEFORMAT: 'FILEFORMAT'; 1557 | FIRST: 'FIRST'; 1558 | FOLLOWING: 'FOLLOWING'; 1559 | FOR: 'FOR'; 1560 | FOREIGN: 'FOREIGN'; 1561 | FORMAT: 'FORMAT'; 1562 | FORMATTED: 'FORMATTED'; 1563 | FROM: 'FROM'; 1564 | FULL: 'FULL'; 1565 | FUNCTION: 'FUNCTION'; 1566 | FUNCTIONS: 'FUNCTIONS'; 1567 | GLOBAL: 'GLOBAL'; 1568 | GRANT: 'GRANT'; 1569 | GROUP: 'GROUP'; 1570 | GROUPING: 'GROUPING'; 1571 | HAVING: 'HAVING'; 1572 | IF: 'IF'; 1573 | IGNORE: 'IGNORE'; 1574 | IMPORT: 'IMPORT'; 1575 | IN: 'IN'; 1576 | INDEX: 'INDEX'; 1577 | INDEXES: 'INDEXES'; 1578 | INNER: 'INNER'; 1579 | INPATH: 'INPATH'; 1580 | INPUTFORMAT: 'INPUTFORMAT'; 1581 | INSERT: 'INSERT'; 1582 | INTERSECT: 'INTERSECT'; 1583 | INTERVAL: 'INTERVAL'; 1584 | INTO: 'INTO'; 1585 | IS: 'IS'; 1586 | ITEMS: 'ITEMS'; 1587 | JOIN: 'JOIN'; 1588 | KEYS: 'KEYS'; 1589 | LAST: 'LAST'; 1590 | LATERAL: 'LATERAL'; 1591 | LAZY: 'LAZY'; 1592 | LEADING: 'LEADING'; 1593 | LEFT: 'LEFT'; 1594 | LIKE: 'LIKE'; 1595 | LIMIT: 'LIMIT'; 1596 | LINES: 'LINES'; 1597 | LIST: 'LIST'; 1598 | LOAD: 'LOAD'; 1599 | LOCAL: 'LOCAL'; 1600 | LOCATION: 'LOCATION'; 1601 | LOCK: 'LOCK'; 1602 | LOCKS: 'LOCKS'; 1603 | LOGICAL: 'LOGICAL'; 1604 | MACRO: 'MACRO'; 1605 | MAP: 'MAP'; 1606 | MATCHED: 'MATCHED'; 1607 | MERGE: 'MERGE'; 1608 | MSCK: 'MSCK'; 1609 | NAMESPACE: 'NAMESPACE'; 1610 | NAMESPACES: 'NAMESPACES'; 1611 | NATURAL: 'NATURAL'; 1612 | NO: 'NO'; 1613 | NOT: 'NOT' | '!'; 1614 | NULL: 'NULL'; 1615 | NULLS: 'NULLS'; 1616 | OF: 'OF'; 1617 | ON: 'ON'; 1618 | ONLY: 'ONLY'; 1619 | OPTION: 'OPTION'; 1620 | OPTIONS: 'OPTIONS'; 1621 | OR: 'OR'; 1622 | ORDER: 'ORDER'; 1623 | OUT: 'OUT'; 1624 | OUTER: 'OUTER'; 1625 | OUTPUTFORMAT: 'OUTPUTFORMAT'; 1626 | OVER: 'OVER'; 1627 | OVERLAPS: 'OVERLAPS'; 1628 | OVERLAY: 'OVERLAY'; 1629 | OVERWRITE: 'OVERWRITE'; 1630 | PARTITION: 'PARTITION'; 1631 | PARTITIONED: 'PARTITIONED'; 1632 | PARTITIONS: 'PARTITIONS'; 1633 | PERCENTLIT: 'PERCENT'; 1634 | PIVOT: 'PIVOT'; 1635 | PLACING: 'PLACING'; 1636 | POSITION: 'POSITION'; 1637 | PRECEDING: 'PRECEDING'; 1638 | PRIMARY: 'PRIMARY'; 1639 | PRINCIPALS: 'PRINCIPALS'; 1640 | PROPERTIES: 'PROPERTIES'; 1641 | PURGE: 'PURGE'; 1642 | QUERY: 'QUERY'; 1643 | RANGE: 'RANGE'; 1644 | RECORDREADER: 'RECORDREADER'; 1645 | RECORDWRITER: 'RECORDWRITER'; 1646 | RECOVER: 'RECOVER'; 1647 | REDUCE: 'REDUCE'; 1648 | REFERENCES: 'REFERENCES'; 1649 | REFRESH: 'REFRESH'; 1650 | RENAME: 'RENAME'; 1651 | REPAIR: 'REPAIR'; 1652 | REPLACE: 'REPLACE'; 1653 | RESET: 'RESET'; 1654 | RESTRICT: 'RESTRICT'; 1655 | REVOKE: 'REVOKE'; 1656 | RIGHT: 'RIGHT'; 1657 | RLIKE: 'RLIKE' | 'REGEXP'; 1658 | ROLE: 'ROLE'; 1659 | ROLES: 'ROLES'; 1660 | ROLLBACK: 'ROLLBACK'; 1661 | ROLLUP: 'ROLLUP'; 1662 | ROW: 'ROW'; 1663 | ROWS: 'ROWS'; 1664 | SCHEMA: 'SCHEMA'; 1665 | SELECT: 'SELECT'; 1666 | SEMI: 'SEMI'; 1667 | SEPARATED: 'SEPARATED'; 1668 | SERDE: 'SERDE'; 1669 | SERDEPROPERTIES: 'SERDEPROPERTIES'; 1670 | SESSION_USER: 'SESSION_USER'; 1671 | SET: 'SET'; 1672 | SETMINUS: 'MINUS'; 1673 | SETS: 'SETS'; 1674 | SHOW: 'SHOW'; 1675 | SKEWED: 'SKEWED'; 1676 | SOME: 'SOME'; 1677 | SORT: 'SORT'; 1678 | SORTED: 'SORTED'; 1679 | START: 'START'; 1680 | STATISTICS: 'STATISTICS'; 1681 | STORED: 'STORED'; 1682 | STRATIFY: 'STRATIFY'; 1683 | STRUCT: 'STRUCT'; 1684 | SUBSTR: 'SUBSTR'; 1685 | SUBSTRING: 'SUBSTRING'; 1686 | TABLE: 'TABLE'; 1687 | TABLES: 'TABLES'; 1688 | TABLESAMPLE: 'TABLESAMPLE'; 1689 | TBLPROPERTIES: 'TBLPROPERTIES'; 1690 | TEMPORARY: 'TEMPORARY' | 'TEMP'; 1691 | TERMINATED: 'TERMINATED'; 1692 | THEN: 'THEN'; 1693 | TIME: 'TIME'; 1694 | TO: 'TO'; 1695 | TOUCH: 'TOUCH'; 1696 | TRAILING: 'TRAILING'; 1697 | TRANSACTION: 'TRANSACTION'; 1698 | TRANSACTIONS: 'TRANSACTIONS'; 1699 | TRANSFORM: 'TRANSFORM'; 1700 | TRIM: 'TRIM'; 1701 | TRUE: 'TRUE'; 1702 | TRUNCATE: 'TRUNCATE'; 1703 | TYPE: 'TYPE'; 1704 | UNARCHIVE: 'UNARCHIVE'; 1705 | UNBOUNDED: 'UNBOUNDED'; 1706 | UNCACHE: 'UNCACHE'; 1707 | UNION: 'UNION'; 1708 | UNIQUE: 'UNIQUE'; 1709 | UNKNOWN: 'UNKNOWN'; 1710 | UNLOCK: 'UNLOCK'; 1711 | UNSET: 'UNSET'; 1712 | UPDATE: 'UPDATE'; 1713 | USE: 'USE'; 1714 | USER: 'USER'; 1715 | USING: 'USING'; 1716 | VALUES: 'VALUES'; 1717 | VIEW: 'VIEW'; 1718 | VIEWS: 'VIEWS'; 1719 | WHEN: 'WHEN'; 1720 | WHERE: 'WHERE'; 1721 | WINDOW: 'WINDOW'; 1722 | WITH: 'WITH'; 1723 | ZONE: 'ZONE'; 1724 | //--SPARK-KEYWORD-LIST-END 1725 | //============================ 1726 | // End of the keywords list 1727 | //============================ 1728 | 1729 | EQ : '=' | '=='; 1730 | NSEQ: '<=>'; 1731 | NEQ : '<>'; 1732 | NEQJ: '!='; 1733 | LT : '<'; 1734 | LTE : '<=' | '!>'; 1735 | GT : '>'; 1736 | GTE : '>=' | '!<'; 1737 | 1738 | PLUS: '+'; 1739 | MINUS: '-'; 1740 | ASTERISK: '*'; 1741 | SLASH: '/'; 1742 | PERCENT: '%'; 1743 | TILDE: '~'; 1744 | AMPERSAND: '&'; 1745 | PIPE: '|'; 1746 | CONCAT_PIPE: '||'; 1747 | HAT: '^'; 1748 | 1749 | STRING 1750 | : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' 1751 | | '"' ( ~('"'|'\\') | ('\\' .) )* '"' 1752 | ; 1753 | 1754 | BIGINT_LITERAL 1755 | : DIGIT+ 'L' 1756 | ; 1757 | 1758 | SMALLINT_LITERAL 1759 | : DIGIT+ 'S' 1760 | ; 1761 | 1762 | TINYINT_LITERAL 1763 | : DIGIT+ 'Y' 1764 | ; 1765 | 1766 | INTEGER_VALUE 1767 | : DIGIT+ 1768 | ; 1769 | 1770 | EXPONENT_VALUE 1771 | : DIGIT+ EXPONENT 1772 | | DECIMAL_DIGITS EXPONENT {isValidDecimal()}? 1773 | ; 1774 | 1775 | DECIMAL_VALUE 1776 | : DECIMAL_DIGITS {isValidDecimal()}? 1777 | ; 1778 | 1779 | FLOAT_LITERAL 1780 | : DIGIT+ EXPONENT? 'F' 1781 | | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}? 1782 | ; 1783 | 1784 | DOUBLE_LITERAL 1785 | : DIGIT+ EXPONENT? 'D' 1786 | | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}? 1787 | ; 1788 | 1789 | BIGDECIMAL_LITERAL 1790 | : DIGIT+ EXPONENT? 'BD' 1791 | | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? 1792 | ; 1793 | 1794 | IDENTIFIER 1795 | : (LETTER | DIGIT | '_')+ 1796 | ; 1797 | 1798 | BACKQUOTED_IDENTIFIER 1799 | : '`' ( ~'`' | '``' )* '`' 1800 | ; 1801 | 1802 | fragment DECIMAL_DIGITS 1803 | : DIGIT+ '.' DIGIT* 1804 | | '.' DIGIT+ 1805 | ; 1806 | 1807 | fragment EXPONENT 1808 | : 'E' [+-]? DIGIT+ 1809 | ; 1810 | 1811 | fragment DIGIT 1812 | : [0-9] 1813 | ; 1814 | 1815 | fragment LETTER 1816 | : [A-Z] 1817 | ; 1818 | 1819 | SIMPLE_COMMENT 1820 | : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) 1821 | ; 1822 | 1823 | BRACKETED_COMMENT 1824 | : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) 1825 | ; 1826 | 1827 | WS 1828 | : [ \r\n\t]+ -> channel(HIDDEN) 1829 | ; 1830 | 1831 | // Catch-all for anything we can't recognize. 1832 | // We use this to be able to ignore and recover all the text 1833 | // when splitting statements with DelimiterLexer 1834 | UNRECOGNIZED 1835 | : . 1836 | ; 1837 | -------------------------------------------------------------------------------- /sqllineage4j-parser/src/main/java/io/github/reata/sqllineage4j/parser/LineageParser.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.parser; 2 | 3 | import org.antlr.v4.runtime.CharStream; 4 | import org.antlr.v4.runtime.CharStreams; 5 | import org.antlr.v4.runtime.CommonTokenStream; 6 | import org.antlr.v4.runtime.tree.ParseTree; 7 | 8 | public class LineageParser { 9 | 10 | public static ParseTree parse(String sql) { 11 | CharStream inputStream = CharStreams.fromString(sql.toUpperCase()); 12 | SqlBaseLexer sqlBaseLexer = new SqlBaseLexer(inputStream); 13 | CommonTokenStream commonTokenStream = new CommonTokenStream(sqlBaseLexer); 14 | SqlBaseParser sqlBaseParser = new SqlBaseParser(commonTokenStream); 15 | return sqlBaseParser.singleStatement(); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /sqllineage4j-parser/src/main/java/io/github/reata/sqllineage4j/parser/StatementSplitter.java: -------------------------------------------------------------------------------- 1 | package io.github.reata.sqllineage4j.parser; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /* 7 | This is from Spark's SparkSQLCLIDriver 8 | See https://github.com/apache/spark/blob/master/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala 9 | */ 10 | public class StatementSplitter { 11 | private boolean insideSingleQuote = false; 12 | private boolean insideDoubleQuote = false; 13 | private boolean insideSimpleComment = false; 14 | private int bracketedCommentLevel = 0; 15 | private boolean escape = false; 16 | private int beginIndex = 0; 17 | private boolean leavingBracketedComment = false; 18 | private boolean isStatement = false; 19 | private final ArrayList ret = new ArrayList<>(); 20 | 21 | private final String line; 22 | 23 | public StatementSplitter(String sql) { 24 | this.line = sql; 25 | } 26 | 27 | public List split() { 28 | for (int index = 0; index < line.length(); index++) { 29 | char c = line.charAt(index); 30 | if (leavingBracketedComment) { 31 | bracketedCommentLevel = -1; 32 | leavingBracketedComment = false; 33 | } 34 | if (c == '\'' && !insideComment()) { 35 | if (!escape && !insideDoubleQuote) { 36 | insideSingleQuote = !insideSingleQuote; 37 | } 38 | } else if (c == '\"' && !insideComment()) { 39 | if (!escape && !insideSingleQuote) { 40 | insideDoubleQuote = !insideDoubleQuote; 41 | } 42 | } else if (c == '-') { 43 | boolean hasNext = index + 1 < line.length(); 44 | if (insideDoubleQuote || insideSingleQuote || insideComment()) { 45 | } else if (hasNext && line.charAt(index + 1) == '-') { 46 | insideSimpleComment = true; 47 | } 48 | } else if (c == ';') { 49 | if (insideSingleQuote || insideDoubleQuote || insideComment()) { 50 | } else { 51 | if (isStatement) { 52 | ret.add(line.substring(beginIndex, index)); 53 | } 54 | beginIndex = index + 1; 55 | isStatement = false; 56 | } 57 | } else if (c == '\n') { 58 | if (!escape) { 59 | insideSimpleComment = false; 60 | } 61 | } else if (c == '/' && !insideSimpleComment) { 62 | boolean hasNext = index + 1 < line.length(); 63 | if (insideSingleQuote || insideDoubleQuote) { 64 | } else if (insideBracketedComment() && line.charAt(index - 1) == '*') { 65 | leavingBracketedComment = true; 66 | } else if (hasNext && !insideBracketedComment() & line.charAt(index + 1) == '*') { 67 | bracketedCommentLevel += 1; 68 | } 69 | } 70 | if (escape) { 71 | escape = false; 72 | } else if (line.charAt(index) == '\\') { 73 | escape = true; 74 | } 75 | isStatement = statementInProgress(index); 76 | } 77 | boolean endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1; 78 | if (!endOfBracketedComment && (isStatement || insideBracketedComment())) { 79 | ret.add(line.substring(beginIndex)); 80 | } 81 | return ret; 82 | } 83 | 84 | private boolean insideBracketedComment() { 85 | return bracketedCommentLevel > 0; 86 | } 87 | 88 | private boolean insideComment() { 89 | return insideSimpleComment || insideBracketedComment(); 90 | } 91 | 92 | private boolean statementInProgress(int index) { 93 | return isStatement || (!insideComment() && index > beginIndex && !String.valueOf(line.charAt(index)).trim().isEmpty()); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /sqllineage4j-test-coverage/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | sqllineage4j 5 | io.github.reata 6 | 1.0.1-SNAPSHOT 7 | 8 | 4.0.0 9 | sqllineage4j-test-coverage 10 | sqllineage4j-test-coverage 11 | 12 | 13 | true 14 | 15 | 16 | 17 | 18 | io.github.reata 19 | sqllineage4j-common 20 | 21 | 22 | io.github.reata 23 | sqllineage4j-parser 24 | 25 | 26 | io.github.reata 27 | sqllineage4j-core 28 | 29 | 30 | io.github.reata 31 | sqllineage4j-cli 32 | 33 | 34 | 35 | 36 | 37 | 38 | org.jacoco 39 | jacoco-maven-plugin 40 | 41 | 42 | report-aggregate 43 | verify 44 | 45 | report-aggregate 46 | 47 | 48 | 49 | 50 | 51 | io/github/reata/sqllineage4j/parser/SqlBase* 52 | 53 | 54 | 55 | 56 | 57 | 58 | --------------------------------------------------------------------------------