├── .gitignore ├── LICENSE ├── README.md ├── pom.xml ├── sql-parser.iml └── src ├── main └── java │ └── cn │ └── ganjiacheng │ ├── Application.java │ ├── SqlParserAbstract.java │ ├── SqlParserFactory.java │ ├── SqlParserService.java │ ├── antlr │ ├── HiveSql.g4 │ ├── MySqlLexer.g4 │ ├── MySqlParser.g4 │ ├── PrestoSql.g4 │ └── SparkSql.g4 │ ├── enums │ ├── SqlEngineEnum.java │ └── SqlTypeEnum.java │ ├── hive │ ├── HiveSqlFieldLineageParser.java │ ├── HiveSqlFormatterParser.java │ ├── HiveSqlMetadataParser.java │ ├── HiveSqlTableLineageParser.java │ ├── HiveSqlTypeParser.java │ └── MyHiveSqlParser.java │ ├── model │ ├── lineage │ │ ├── FieldLineageModel.java │ │ ├── FieldLineageSelectItemModel.java │ │ ├── FieldLineageSelectModel.java │ │ ├── FieldNameModel.java │ │ ├── FieldNameWithProcessModel.java │ │ ├── TableLineageModel.java │ │ └── TableNameModel.java │ └── metadata │ │ ├── FieldMetadataModel.java │ │ └── TableMetadataModel.java │ ├── mysql │ ├── MysqlSqlParser.java │ └── MysqlSqlTypeParser.java │ ├── presto │ ├── MyPresoSqlParser.java │ └── PrestoSqlTypeParser.java │ └── spark │ ├── MySparkSqlParser.java │ └── SparkSqlTypeParser.java └── test └── java └── cn └── ganjiacheng └── AppTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Java template 3 | # Compiled class file 4 | *.class 5 | 6 | # Log file 7 | *.log 8 | 9 | # BlueJ files 10 | *.ctxt 11 | 12 | # Mobile Tools for Java (J2ME) 13 | .mtj.tmp/ 14 | 15 | # Package Files # 16 | *.jar 17 | *.war 18 | *.nar 19 | *.ear 20 | *.zip 21 | *.tar.gz 22 | *.rar 23 | 24 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 25 | hs_err_pid* 26 | 27 | ### Maven template 28 | target/ 29 | pom.xml.tag 30 | pom.xml.releaseBackup 31 | pom.xml.versionsBackup 32 | pom.xml.next 33 | release.properties 34 | dependency-reduced-pom.xml 35 | buildNumber.properties 36 | .mvn/timing.properties 37 | # https://github.com/takari/maven-wrapper#usage-without-binary-jar 38 | .mvn/wrapper/maven-wrapper.jar 39 | 40 | .idea 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sql-parser 2 | ## 【学习使用】【仅做参考】 3 | 基于antlr4的sql解析,实现格式化,元数据,血源等自定义解析,包括hive 4 | 5 | ## 说明文章 6 | 7 | [基于antlr4实现HQL的解析[元数据]](http://ganjiacheng.cn/article/2020/article_6_%E5%9F%BA%E4%BA%8Eantlr4%E5%AE%9E%E7%8E%B0HQL%E7%9A%84%E8%A7%A3%E6%9E%90-%E5%85%83%E6%95%B0%E6%8D%AE/) 8 | 9 | [基于antlr4实现hiveSQL的解析[表血缘和字段血缘]](http://ganjiacheng.cn/article/2020/article_14_%E5%9F%BA%E4%BA%8Eantlr4%E5%AE%9E%E7%8E%B0HQL%E7%9A%84%E8%A7%A3%E6%9E%90-%E8%A1%A8%E8%A1%80%E7%BC%98%E5%92%8C%E5%AD%97%E6%AE%B5%E8%A1%80%E7%BC%98/) 10 | 11 | [基于antlr4实现HQL的解析[格式化]](http://ganjiacheng.cn/article/2020/article_12_%E5%9F%BA%E4%BA%8Eantlr4%E5%AE%9E%E7%8E%B0HQL%E7%9A%84%E8%A7%A3%E6%9E%90-%E6%A0%BC%E5%BC%8F%E5%8C%96/) 12 | 13 | ## 使用说明 14 | 15 | 下载安装[antlr4](https://www.antlr.org/index.html) 16 | 17 | idea装antlr4插件调试 cn.ganjiacheng.antlr.xxx.g4 中的规则 18 | 19 | 拉下来项目代码,运行Application 20 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | cn.ganjiacheng 8 | sql-parser 9 | 1.0-SNAPSHOT 10 | 11 | sql-parser 12 | 13 | http://www.example.com 14 | 15 | 16 | 4.9.1 17 | 18 | 19 | 20 | 21 | junit 22 | junit 23 | 4.11 24 | test 25 | 26 | 27 | com.alibaba 28 | fastjson 29 | 1.2.75 30 | 31 | 32 | org.slf4j 33 | slf4j-api 34 | 1.7.30 35 | 36 | 37 | org.slf4j 38 | slf4j-simple 39 | 1.7.30 40 | 41 | 42 | org.antlr 43 | antlr4-runtime 44 | ${antlr.version} 45 | 46 | 47 | org.antlr 48 | antlr4-maven-plugin 49 | ${antlr.version} 50 | 51 | 52 | 53 | 54 | 55 | 56 | org.antlr 57 | antlr4-maven-plugin 58 | ${antlr.version} 59 | 60 | src/main/java 61 | 62 | -visitor 63 | -listener 64 | 65 | 66 | 67 | 68 | 69 | antlr4 70 | 71 | 72 | 73 | 74 | 75 | org.apache.maven.plugins 76 | maven-compiler-plugin 77 | 78 | 8 79 | 8 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /sql-parser.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/Application.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng; 2 | 3 | import cn.ganjiacheng.enums.SqlEngineEnum; 4 | import com.alibaba.fastjson.JSON; 5 | import com.alibaba.fastjson.serializer.SerializerFeature; 6 | 7 | /** 8 | * @author: again 9 | */ 10 | public class Application { 11 | public static void main(String[] args) { 12 | SqlParserService parserService = SqlParserFactory.getParser(SqlEngineEnum.HIVE); 13 | String sql = "INSERT INTO TABLE db_test.table_result SELECT t1.id, t2.name FROM ( SELECT id1 + id2 AS id FROM db_test.table1 ) t1 LEFT JOIN ( SELECT id, name FROM ( SELECT id, sourcename AS name FROM db_test.table2 ) ) t2 ON t1.id=t2.id"; 14 | // String sql = "SELECT F1 FROM TAB1"; 15 | 16 | System.out.println("sql类型"); 17 | System.out.println(parserService.parseSqlType(sql)); 18 | System.out.println("\n"); 19 | 20 | System.out.println("sql格式化"); 21 | System.out.println(parserService.parseSqlFormatter(sql)); 22 | System.out.println("\n"); 23 | 24 | System.out.println("表血缘"); 25 | System.out.println(JSON.toJSONString(parserService.parseSqlTableLineage(sql), SerializerFeature.WriteMapNullValue, SerializerFeature.PrettyFormat)); 26 | System.out.println("\n"); 27 | 28 | System.out.println("字段血源"); 29 | System.out.println(JSON.toJSONString(parserService.parseSqlFieldLineage(sql), SerializerFeature.WriteMapNullValue, SerializerFeature.PrettyFormat)); 30 | System.out.println("\n"); 31 | 32 | String createSql = "CREATE TABLE db1.table1 (id number comment 'id', name string comment '姓名', age number)"; 33 | System.out.println("元数据"); 34 | System.out.println(JSON.toJSONString(parserService.parseSqlMetadata(createSql), SerializerFeature.WriteMapNullValue, SerializerFeature.PrettyFormat)); 35 | System.out.println("\n"); 36 | 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/SqlParserAbstract.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng; 2 | 3 | import cn.ganjiacheng.enums.SqlTypeEnum; 4 | import cn.ganjiacheng.model.lineage.FieldLineageModel; 5 | import cn.ganjiacheng.model.lineage.TableLineageModel; 6 | import cn.ganjiacheng.model.metadata.TableMetadataModel; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.util.List; 11 | 12 | /** 13 | * @ClassName SqlParserAbstractFactory 14 | * @description: 15 | * @author: again 16 | * @Date: 2021/3/10 8:14 下午 17 | */ 18 | public abstract class SqlParserAbstract implements SqlParserService { 19 | 20 | public static final Logger logger = LoggerFactory.getLogger(SqlParserAbstract.class); 21 | 22 | private void notSupport(String msg) { 23 | logger.error("not support {}", msg); 24 | // throw new RuntimeException("not support"); 25 | } 26 | 27 | @Override 28 | public SqlTypeEnum parseSqlType(String sql) { 29 | notSupport("parseSqlType"); 30 | return null; 31 | } 32 | 33 | @Override 34 | public TableMetadataModel parseSqlMetadata(String sql) { 35 | notSupport("parseSqlMetadata"); 36 | return null; 37 | } 38 | 39 | @Override 40 | public String parseSqlFormatter(String sql) { 41 | notSupport("parseSqlFormatter"); 42 | return null; 43 | } 44 | 45 | @Override 46 | public TableLineageModel parseSqlTableLineage(String sql) { 47 | notSupport("parseSqlTableLineage"); 48 | return null; 49 | } 50 | 51 | @Override 52 | public List parseSqlFieldLineage(String sql) { 53 | notSupport("parseSqlFieldLineage"); 54 | return null; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/SqlParserFactory.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng; 2 | 3 | import cn.ganjiacheng.enums.SqlEngineEnum; 4 | import cn.ganjiacheng.hive.MyHiveSqlParser; 5 | import cn.ganjiacheng.mysql.MysqlSqlParser; 6 | import cn.ganjiacheng.presto.MyPresoSqlParser; 7 | import cn.ganjiacheng.spark.MySparkSqlParser; 8 | 9 | /** 10 | * @ClassName SqlParserFactory 11 | * @description: 12 | * @author: again 13 | * @Date: 2021/3/10 4:21 下午 14 | */ 15 | public class SqlParserFactory { 16 | 17 | public static SqlParserService getParser(SqlEngineEnum sqlEngineEnum) { 18 | if (SqlEngineEnum.HIVE.equals(sqlEngineEnum)) { 19 | return new MyHiveSqlParser(); 20 | } else if (SqlEngineEnum.MYSQL.equals(sqlEngineEnum)) { 21 | return new MysqlSqlParser(); 22 | } else if (SqlEngineEnum.PRESTO.equals(sqlEngineEnum)) { 23 | return new MyPresoSqlParser(); 24 | } else if (SqlEngineEnum.SPARK.equals(sqlEngineEnum)) { 25 | return new MySparkSqlParser(); 26 | } 27 | throw new RuntimeException("db type is not support"); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/SqlParserService.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng; 2 | 3 | import cn.ganjiacheng.enums.SqlTypeEnum; 4 | import cn.ganjiacheng.model.lineage.FieldLineageModel; 5 | import cn.ganjiacheng.model.lineage.TableLineageModel; 6 | import cn.ganjiacheng.model.metadata.TableMetadataModel; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * @ClassName SqlParserService 12 | * @description: sql解析接口 13 | * @author: again 14 | * @Date: 2021/3/10 4:05 下午 15 | */ 16 | public interface SqlParserService { 17 | /** 18 | * 获取sql类型 19 | */ 20 | SqlTypeEnum parseSqlType(String sql); 21 | 22 | /** 23 | * 获取创表语句元数据 24 | */ 25 | TableMetadataModel parseSqlMetadata(String sql); 26 | 27 | /** 28 | * sql格式化 29 | */ 30 | String parseSqlFormatter(String sql); 31 | 32 | /** 33 | * sql解析表元数据 34 | */ 35 | TableLineageModel parseSqlTableLineage(String sql); 36 | 37 | /** 38 | * sql解析字段元数据 39 | */ 40 | List parseSqlFieldLineage(String sql); 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/antlr/PrestoSql.g4: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | grammar PrestoSql; 16 | 17 | tokens { 18 | DELIMITER 19 | } 20 | 21 | singleStatement 22 | : statement EOF 23 | ; 24 | 25 | standaloneExpression 26 | : expression EOF 27 | ; 28 | 29 | standaloneRoutineBody 30 | : routineBody EOF 31 | ; 32 | 33 | statement 34 | : query #statementDefault 35 | | USE schema=identifier #use 36 | | USE catalog=identifier '.' schema=identifier #use 37 | | CREATE SCHEMA (IF NOT EXISTS)? qualifiedName 38 | (WITH properties)? #createSchema 39 | | DROP SCHEMA (IF EXISTS)? qualifiedName (CASCADE | RESTRICT)? #dropSchema 40 | | ALTER SCHEMA qualifiedName RENAME TO identifier #renameSchema 41 | | CREATE TABLE (IF NOT EXISTS)? qualifiedName columnAliases? 42 | (COMMENT string)? 43 | (WITH properties)? AS (query | '('query')') 44 | (WITH (NO)? DATA)? #createTableAsSelect 45 | | CREATE TABLE (IF NOT EXISTS)? qualifiedName 46 | '(' tableElement (',' tableElement)* ')' 47 | (COMMENT string)? 48 | (WITH properties)? #createTable 49 | | DROP TABLE (IF EXISTS)? qualifiedName #dropTable 50 | | INSERT INTO qualifiedName columnAliases? query #insertInto 51 | | DELETE FROM qualifiedName (WHERE booleanExpression)? #delete 52 | | ALTER TABLE (IF EXISTS)? from=qualifiedName 53 | RENAME TO to=qualifiedName #renameTable 54 | | ALTER TABLE (IF EXISTS)? tableName=qualifiedName 55 | RENAME COLUMN (IF EXISTS)? from=identifier TO to=identifier #renameColumn 56 | | ALTER TABLE (IF EXISTS)? tableName=qualifiedName 57 | DROP COLUMN (IF EXISTS)? column=qualifiedName #dropColumn 58 | | ALTER TABLE (IF EXISTS)? tableName=qualifiedName 59 | ADD COLUMN (IF NOT EXISTS)? column=columnDefinition #addColumn 60 | | ANALYZE qualifiedName (WITH properties)? #analyze 61 | | CREATE (OR REPLACE)? VIEW qualifiedName 62 | (SECURITY (DEFINER | INVOKER))? AS query #createView 63 | | DROP VIEW (IF EXISTS)? qualifiedName #dropView 64 | | CREATE MATERIALIZED VIEW (IF NOT EXISTS)? qualifiedName 65 | (COMMENT string)? 66 | (WITH properties)? AS (query | '('query')') #createMaterializedView 67 | | CREATE (OR REPLACE)? TEMPORARY? FUNCTION functionName=qualifiedName 68 | '(' (sqlParameterDeclaration (',' sqlParameterDeclaration)*)? ')' 69 | RETURNS returnType=type 70 | (COMMENT string)? 71 | routineCharacteristics routineBody #createFunction 72 | | ALTER FUNCTION qualifiedName types? 73 | alterRoutineCharacteristics #alterFunction 74 | | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName types? #dropFunction 75 | | CALL qualifiedName '(' (callArgument (',' callArgument)*)? ')' #call 76 | | CREATE ROLE name=identifier 77 | (WITH ADMIN grantor)? #createRole 78 | | DROP ROLE name=identifier #dropRole 79 | | GRANT 80 | roles 81 | TO principal (',' principal)* 82 | (WITH ADMIN OPTION)? 83 | (GRANTED BY grantor)? #grantRoles 84 | | REVOKE 85 | (ADMIN OPTION FOR)? 86 | roles 87 | FROM principal (',' principal)* 88 | (GRANTED BY grantor)? #revokeRoles 89 | | SET ROLE (ALL | NONE | role=identifier) #setRole 90 | | GRANT 91 | (privilege (',' privilege)* | ALL PRIVILEGES) 92 | ON TABLE? qualifiedName TO grantee=principal 93 | (WITH GRANT OPTION)? #grant 94 | | REVOKE 95 | (GRANT OPTION FOR)? 96 | (privilege (',' privilege)* | ALL PRIVILEGES) 97 | ON TABLE? qualifiedName FROM grantee=principal #revoke 98 | | SHOW GRANTS 99 | (ON TABLE? qualifiedName)? #showGrants 100 | | EXPLAIN ANALYZE? VERBOSE? 101 | ('(' explainOption (',' explainOption)* ')')? statement #explain 102 | | SHOW CREATE TABLE qualifiedName #showCreateTable 103 | | SHOW CREATE VIEW qualifiedName #showCreateView 104 | | SHOW CREATE FUNCTION qualifiedName types? #showCreateFunction 105 | | SHOW TABLES ((FROM | IN) qualifiedName)? 106 | (LIKE pattern=string (ESCAPE escape=string)?)? #showTables 107 | | SHOW SCHEMAS ((FROM | IN) identifier)? 108 | (LIKE pattern=string (ESCAPE escape=string)?)? #showSchemas 109 | | SHOW CATALOGS (LIKE pattern=string)? #showCatalogs 110 | | SHOW COLUMNS (FROM | IN) qualifiedName #showColumns 111 | | SHOW STATS FOR qualifiedName #showStats 112 | | SHOW STATS FOR '(' querySpecification ')' #showStatsForQuery 113 | | SHOW CURRENT? ROLES ((FROM | IN) identifier)? #showRoles 114 | | SHOW ROLE GRANTS ((FROM | IN) identifier)? #showRoleGrants 115 | | DESCRIBE qualifiedName #showColumns 116 | | DESC qualifiedName #showColumns 117 | | SHOW FUNCTIONS 118 | (LIKE pattern=string (ESCAPE escape=string)?)? #showFunctions 119 | | SHOW SESSION #showSession 120 | | SET SESSION qualifiedName EQ expression #setSession 121 | | RESET SESSION qualifiedName #resetSession 122 | | START TRANSACTION (transactionMode (',' transactionMode)*)? #startTransaction 123 | | COMMIT WORK? #commit 124 | | ROLLBACK WORK? #rollback 125 | | PREPARE identifier FROM statement #prepare 126 | | DEALLOCATE PREPARE identifier #deallocate 127 | | EXECUTE identifier (USING expression (',' expression)*)? #execute 128 | | DESCRIBE INPUT identifier #describeInput 129 | | DESCRIBE OUTPUT identifier #describeOutput 130 | ; 131 | 132 | query 133 | : with? queryNoWith 134 | ; 135 | 136 | with 137 | : WITH RECURSIVE? namedQuery (',' namedQuery)* 138 | ; 139 | 140 | tableElement 141 | : columnDefinition 142 | | likeClause 143 | ; 144 | 145 | columnDefinition 146 | : identifier type (NOT NULL)? (COMMENT string)? (WITH properties)? 147 | ; 148 | 149 | likeClause 150 | : LIKE qualifiedName (optionType=(INCLUDING | EXCLUDING) PROPERTIES)? 151 | ; 152 | 153 | properties 154 | : '(' property (',' property)* ')' 155 | ; 156 | 157 | property 158 | : identifier EQ expression 159 | ; 160 | 161 | sqlParameterDeclaration 162 | : identifier type 163 | ; 164 | 165 | routineCharacteristics 166 | : routineCharacteristic* 167 | ; 168 | 169 | routineCharacteristic 170 | : LANGUAGE language 171 | | determinism 172 | | nullCallClause 173 | ; 174 | 175 | alterRoutineCharacteristics 176 | : alterRoutineCharacteristic* 177 | ; 178 | 179 | alterRoutineCharacteristic 180 | : nullCallClause 181 | ; 182 | 183 | routineBody 184 | : returnStatement 185 | | externalBodyReference 186 | ; 187 | 188 | returnStatement 189 | : RETURN expression 190 | ; 191 | 192 | externalBodyReference 193 | : EXTERNAL (NAME externalRoutineName)? 194 | ; 195 | 196 | language 197 | : SQL 198 | | identifier 199 | ; 200 | 201 | determinism 202 | : DETERMINISTIC 203 | | NOT DETERMINISTIC; 204 | 205 | nullCallClause 206 | : RETURNS NULL ON NULL INPUT 207 | | CALLED ON NULL INPUT 208 | ; 209 | 210 | externalRoutineName 211 | : identifier 212 | ; 213 | 214 | queryNoWith: 215 | queryTerm 216 | (ORDER BY sortItem (',' sortItem)*)? 217 | (LIMIT limit=(INTEGER_VALUE | ALL))? 218 | ; 219 | 220 | queryTerm 221 | : queryPrimary #queryTermDefault 222 | | left=queryTerm operator=INTERSECT setQuantifier? right=queryTerm #setOperation 223 | | left=queryTerm operator=(UNION | EXCEPT) setQuantifier? right=queryTerm #setOperation 224 | ; 225 | 226 | queryPrimary 227 | : querySpecification #queryPrimaryDefault 228 | | TABLE qualifiedName #table 229 | | VALUES expression (',' expression)* #inlineTable 230 | | '(' queryNoWith ')' #subquery 231 | ; 232 | 233 | sortItem 234 | : expression ordering=(ASC | DESC)? (NULLS nullOrdering=(FIRST | LAST))? 235 | ; 236 | 237 | querySpecification 238 | : SELECT setQuantifier? selectItem (',' selectItem)* 239 | (FROM relation (',' relation)*)? 240 | (WHERE where=booleanExpression)? 241 | (GROUP BY groupBy)? 242 | (HAVING having=booleanExpression)? 243 | ; 244 | 245 | groupBy 246 | : setQuantifier? groupingElement (',' groupingElement)* 247 | ; 248 | 249 | groupingElement 250 | : groupingSet #singleGroupingSet 251 | | ROLLUP '(' (expression (',' expression)*)? ')' #rollup 252 | | CUBE '(' (expression (',' expression)*)? ')' #cube 253 | | GROUPING SETS '(' groupingSet (',' groupingSet)* ')' #multipleGroupingSets 254 | ; 255 | 256 | groupingSet 257 | : '(' (expression (',' expression)*)? ')' 258 | | expression 259 | ; 260 | 261 | namedQuery 262 | : name=identifier (columnAliases)? AS '(' query ')' 263 | ; 264 | 265 | setQuantifier 266 | : DISTINCT 267 | | ALL 268 | ; 269 | 270 | selectItem 271 | : expression (AS? identifier)? #selectSingle 272 | | qualifiedName '.' ASTERISK #selectAll 273 | | ASTERISK #selectAll 274 | ; 275 | 276 | relation 277 | : left=relation 278 | ( CROSS JOIN right=sampledRelation 279 | | joinType JOIN rightRelation=relation joinCriteria 280 | | NATURAL joinType JOIN right=sampledRelation 281 | ) #joinRelation 282 | | sampledRelation #relationDefault 283 | ; 284 | 285 | joinType 286 | : INNER? 287 | | LEFT OUTER? 288 | | RIGHT OUTER? 289 | | FULL OUTER? 290 | ; 291 | 292 | joinCriteria 293 | : ON booleanExpression 294 | | USING '(' identifier (',' identifier)* ')' 295 | ; 296 | 297 | sampledRelation 298 | : aliasedRelation ( 299 | TABLESAMPLE sampleType '(' percentage=expression ')' 300 | )? 301 | ; 302 | 303 | sampleType 304 | : BERNOULLI 305 | | SYSTEM 306 | ; 307 | 308 | aliasedRelation 309 | : relationPrimary (AS? identifier columnAliases?)? 310 | ; 311 | 312 | columnAliases 313 | : '(' identifier (',' identifier)* ')' 314 | ; 315 | 316 | relationPrimary 317 | : qualifiedName #tableName 318 | | '(' query ')' #subqueryRelation 319 | | UNNEST '(' expression (',' expression)* ')' (WITH ORDINALITY)? #unnest 320 | | LATERAL '(' query ')' #lateral 321 | | '(' relation ')' #parenthesizedRelation 322 | ; 323 | 324 | expression 325 | : booleanExpression 326 | ; 327 | 328 | booleanExpression 329 | : valueExpression predicate[$valueExpression.ctx]? #predicated 330 | | NOT booleanExpression #logicalNot 331 | | left=booleanExpression operator=AND right=booleanExpression #logicalBinary 332 | | left=booleanExpression operator=OR right=booleanExpression #logicalBinary 333 | ; 334 | 335 | // workaround for https://github.com/antlr/antlr4/issues/780 336 | predicate[ParserRuleContext value] 337 | : comparisonOperator right=valueExpression #comparison 338 | | comparisonOperator comparisonQuantifier '(' query ')' #quantifiedComparison 339 | | NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between 340 | | NOT? IN '(' expression (',' expression)* ')' #inList 341 | | NOT? IN '(' query ')' #inSubquery 342 | | NOT? LIKE pattern=valueExpression (ESCAPE escape=valueExpression)? #like 343 | | IS NOT? NULL #nullPredicate 344 | | IS NOT? DISTINCT FROM right=valueExpression #distinctFrom 345 | ; 346 | 347 | valueExpression 348 | : primaryExpression #valueExpressionDefault 349 | | valueExpression AT timeZoneSpecifier #atTimeZone 350 | | operator=(MINUS | PLUS) valueExpression #arithmeticUnary 351 | | left=valueExpression operator=(ASTERISK | SLASH | PERCENT) right=valueExpression #arithmeticBinary 352 | | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary 353 | | left=valueExpression CONCAT right=valueExpression #concatenation 354 | ; 355 | 356 | primaryExpression 357 | : NULL #nullLiteral 358 | | interval #intervalLiteral 359 | | identifier string #typeConstructor 360 | | DOUBLE_PRECISION string #typeConstructor 361 | | number #numericLiteral 362 | | booleanValue #booleanLiteral 363 | | string #stringLiteral 364 | | BINARY_LITERAL #binaryLiteral 365 | | '?' #parameter 366 | | POSITION '(' valueExpression IN valueExpression ')' #position 367 | | '(' expression (',' expression)+ ')' #rowConstructor 368 | | ROW '(' expression (',' expression)* ')' #rowConstructor 369 | | qualifiedName '(' ASTERISK ')' filter? over? #functionCall 370 | | qualifiedName '(' (setQuantifier? expression (',' expression)*)? 371 | (ORDER BY sortItem (',' sortItem)*)? ')' filter? (nullTreatment? over)? #functionCall 372 | | identifier '->' expression #lambda 373 | | '(' (identifier (',' identifier)*)? ')' '->' expression #lambda 374 | | '(' query ')' #subqueryExpression 375 | // This is an extension to ANSI SQL, which considers EXISTS to be a 376 | | EXISTS '(' query ')' #exists 377 | | CASE valueExpression whenClause+ (ELSE elseExpression=expression)? END #simpleCase 378 | | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase 379 | | CAST '(' expression AS type ')' #cast 380 | | TRY_CAST '(' expression AS type ')' #cast 381 | | ARRAY '[' (expression (',' expression)*)? ']' #arrayConstructor 382 | | value=primaryExpression '[' index=valueExpression ']' #subscript 383 | | identifier #columnReference 384 | | base=primaryExpression '.' fieldName=identifier #dereference 385 | | name=CURRENT_DATE #specialDateTimeFunction 386 | | name=CURRENT_TIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction 387 | | name=CURRENT_TIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction 388 | | name=LOCALTIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction 389 | | name=LOCALTIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction 390 | | name=CURRENT_USER #currentUser 391 | | SUBSTRING '(' valueExpression FROM valueExpression (FOR valueExpression)? ')' #substring 392 | | NORMALIZE '(' valueExpression (',' normalForm)? ')' #normalize 393 | | EXTRACT '(' identifier FROM valueExpression ')' #extract 394 | | '(' expression ')' #parenthesizedExpression 395 | | GROUPING '(' (qualifiedName (',' qualifiedName)*)? ')' #groupingOperation 396 | ; 397 | 398 | string 399 | : STRING #basicStringLiteral 400 | | UNICODE_STRING (UESCAPE STRING)? #unicodeStringLiteral 401 | ; 402 | 403 | nullTreatment 404 | : IGNORE NULLS 405 | | RESPECT NULLS 406 | ; 407 | 408 | timeZoneSpecifier 409 | : TIME ZONE interval #timeZoneInterval 410 | | TIME ZONE string #timeZoneString 411 | ; 412 | 413 | comparisonOperator 414 | : EQ | NEQ | LT | LTE | GT | GTE 415 | ; 416 | 417 | comparisonQuantifier 418 | : ALL | SOME | ANY 419 | ; 420 | 421 | booleanValue 422 | : TRUE | FALSE 423 | ; 424 | 425 | interval 426 | : INTERVAL sign=(PLUS | MINUS)? string from=intervalField (TO to=intervalField)? 427 | ; 428 | 429 | intervalField 430 | : YEAR | MONTH | DAY | HOUR | MINUTE | SECOND 431 | ; 432 | 433 | normalForm 434 | : NFD | NFC | NFKD | NFKC 435 | ; 436 | 437 | types 438 | : '(' (type (',' type)*)? ')' 439 | ; 440 | 441 | type 442 | : type ARRAY 443 | | ARRAY '<' type '>' 444 | | MAP '<' type ',' type '>' 445 | | ROW '(' identifier type (',' identifier type)* ')' 446 | | baseType ('(' typeParameter (',' typeParameter)* ')')? 447 | | INTERVAL from=intervalField TO to=intervalField 448 | ; 449 | 450 | typeParameter 451 | : INTEGER_VALUE | type 452 | ; 453 | 454 | baseType 455 | : TIME_WITH_TIME_ZONE 456 | | TIMESTAMP_WITH_TIME_ZONE 457 | | DOUBLE_PRECISION 458 | | qualifiedName 459 | ; 460 | 461 | whenClause 462 | : WHEN condition=expression THEN result=expression 463 | ; 464 | 465 | filter 466 | : FILTER '(' WHERE booleanExpression ')' 467 | ; 468 | 469 | over 470 | : OVER '(' 471 | (PARTITION BY partition+=expression (',' partition+=expression)*)? 472 | (ORDER BY sortItem (',' sortItem)*)? 473 | windowFrame? 474 | ')' 475 | ; 476 | 477 | windowFrame 478 | : frameType=RANGE start=frameBound 479 | | frameType=ROWS start=frameBound 480 | | frameType=RANGE BETWEEN start=frameBound AND end=frameBound 481 | | frameType=ROWS BETWEEN start=frameBound AND end=frameBound 482 | ; 483 | 484 | frameBound 485 | : UNBOUNDED boundType=PRECEDING #unboundedFrame 486 | | UNBOUNDED boundType=FOLLOWING #unboundedFrame 487 | | CURRENT ROW #currentRowBound 488 | | expression boundType=(PRECEDING | FOLLOWING) #boundedFrame // expression should be unsignedLiteral 489 | ; 490 | 491 | 492 | explainOption 493 | : FORMAT value=(TEXT | GRAPHVIZ | JSON) #explainFormat 494 | | TYPE value=(LOGICAL | DISTRIBUTED | VALIDATE | IO) #explainType 495 | ; 496 | 497 | transactionMode 498 | : ISOLATION LEVEL levelOfIsolation #isolationLevel 499 | | READ accessMode=(ONLY | WRITE) #transactionAccessMode 500 | ; 501 | 502 | levelOfIsolation 503 | : READ UNCOMMITTED #readUncommitted 504 | | READ COMMITTED #readCommitted 505 | | REPEATABLE READ #repeatableRead 506 | | SERIALIZABLE #serializable 507 | ; 508 | 509 | callArgument 510 | : expression #positionalArgument 511 | | identifier '=>' expression #namedArgument 512 | ; 513 | 514 | privilege 515 | : SELECT | DELETE | INSERT | identifier 516 | ; 517 | 518 | qualifiedName 519 | : identifier ('.' identifier)* 520 | ; 521 | 522 | grantor 523 | : CURRENT_USER #currentUserGrantor 524 | | CURRENT_ROLE #currentRoleGrantor 525 | | principal #specifiedPrincipal 526 | ; 527 | 528 | principal 529 | : USER identifier #userPrincipal 530 | | ROLE identifier #rolePrincipal 531 | | identifier #unspecifiedPrincipal 532 | ; 533 | 534 | roles 535 | : identifier (',' identifier)* 536 | ; 537 | 538 | identifier 539 | : IDENTIFIER #unquotedIdentifier 540 | | QUOTED_IDENTIFIER #quotedIdentifier 541 | | nonReserved #unquotedIdentifier 542 | | BACKQUOTED_IDENTIFIER #backQuotedIdentifier 543 | | DIGIT_IDENTIFIER #digitIdentifier 544 | ; 545 | 546 | number 547 | : DECIMAL_VALUE #decimalLiteral 548 | | DOUBLE_VALUE #doubleLiteral 549 | | INTEGER_VALUE #integerLiteral 550 | ; 551 | 552 | nonReserved 553 | // IMPORTANT: this rule must only contain tokens. Nested rules are not supported. See SqlParser.exitNonReserved 554 | : ADD | ADMIN | ALL | ANALYZE | ANY | ARRAY | ASC | AT 555 | | BERNOULLI 556 | | CALL | CALLED | CASCADE | CATALOGS | COLUMN | COLUMNS | COMMENT | COMMIT | COMMITTED | CURRENT | CURRENT_ROLE 557 | | DATA | DATE | DAY | DEFINER | DESC | DETERMINISTIC | DISTRIBUTED 558 | | EXCLUDING | EXPLAIN | EXTERNAL 559 | | FILTER | FIRST | FOLLOWING | FORMAT | FUNCTION | FUNCTIONS 560 | | GRANT | GRANTED | GRANTS | GRAPHVIZ 561 | | HOUR 562 | | IF | IGNORE | INCLUDING | INPUT | INTERVAL | INVOKER | IO | ISOLATION 563 | | JSON 564 | | LANGUAGE | LAST | LATERAL | LEVEL | LIMIT | LOGICAL 565 | | MAP | MATERIALIZED | MINUTE | MONTH 566 | | NAME | NFC | NFD | NFKC | NFKD | NO | NONE | NULLIF | NULLS 567 | | ONLY | OPTION | ORDINALITY | OUTPUT | OVER 568 | | PARTITION | PARTITIONS | POSITION | PRECEDING | PRIVILEGES | PROPERTIES 569 | | RANGE | READ | RENAME | REPEATABLE | REPLACE | RESET | RESPECT | RESTRICT | RETURN | RETURNS | REVOKE | ROLE | ROLES | ROLLBACK | ROW | ROWS 570 | | SCHEMA | SCHEMAS | SECOND | SECURITY | SERIALIZABLE | SESSION | SET | SETS | SQL 571 | | SHOW | SOME | START | STATS | SUBSTRING | SYSTEM 572 | | TABLES | TABLESAMPLE | TEMPORARY | TEXT | TIME | TIMESTAMP | TO | TRANSACTION | TRY_CAST | TYPE 573 | | UNBOUNDED | UNCOMMITTED | USE | USER 574 | | VALIDATE | VERBOSE | VIEW 575 | | WORK | WRITE 576 | | YEAR 577 | | ZONE 578 | ; 579 | 580 | ADD: 'ADD'; 581 | ADMIN: 'ADMIN'; 582 | ALL: 'ALL'; 583 | ALTER: 'ALTER'; 584 | ANALYZE: 'ANALYZE'; 585 | AND: 'AND'; 586 | ANY: 'ANY'; 587 | ARRAY: 'ARRAY'; 588 | AS: 'AS'; 589 | ASC: 'ASC'; 590 | AT: 'AT'; 591 | BERNOULLI: 'BERNOULLI'; 592 | BETWEEN: 'BETWEEN'; 593 | BY: 'BY'; 594 | CALL: 'CALL'; 595 | CALLED: 'CALLED'; 596 | CASCADE: 'CASCADE'; 597 | CASE: 'CASE'; 598 | CAST: 'CAST'; 599 | CATALOGS: 'CATALOGS'; 600 | COLUMN: 'COLUMN'; 601 | COLUMNS: 'COLUMNS'; 602 | COMMENT: 'COMMENT'; 603 | COMMIT: 'COMMIT'; 604 | COMMITTED: 'COMMITTED'; 605 | CONSTRAINT: 'CONSTRAINT'; 606 | CREATE: 'CREATE'; 607 | CROSS: 'CROSS'; 608 | CUBE: 'CUBE'; 609 | CURRENT: 'CURRENT'; 610 | CURRENT_DATE: 'CURRENT_DATE'; 611 | CURRENT_ROLE: 'CURRENT_ROLE'; 612 | CURRENT_TIME: 'CURRENT_TIME'; 613 | CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; 614 | CURRENT_USER: 'CURRENT_USER'; 615 | DATA: 'DATA'; 616 | DATE: 'DATE'; 617 | DAY: 'DAY'; 618 | DEALLOCATE: 'DEALLOCATE'; 619 | DEFINER: 'DEFINER'; 620 | DELETE: 'DELETE'; 621 | DESC: 'DESC'; 622 | DESCRIBE: 'DESCRIBE'; 623 | DETERMINISTIC: 'DETERMINISTIC'; 624 | DISTINCT: 'DISTINCT'; 625 | DISTRIBUTED: 'DISTRIBUTED'; 626 | DROP: 'DROP'; 627 | ELSE: 'ELSE'; 628 | END: 'END'; 629 | ESCAPE: 'ESCAPE'; 630 | EXCEPT: 'EXCEPT'; 631 | EXCLUDING: 'EXCLUDING'; 632 | EXECUTE: 'EXECUTE'; 633 | EXISTS: 'EXISTS'; 634 | EXPLAIN: 'EXPLAIN'; 635 | EXTRACT: 'EXTRACT'; 636 | EXTERNAL: 'EXTERNAL'; 637 | FALSE: 'FALSE'; 638 | FILTER: 'FILTER'; 639 | FIRST: 'FIRST'; 640 | FOLLOWING: 'FOLLOWING'; 641 | FOR: 'FOR'; 642 | FORMAT: 'FORMAT'; 643 | FROM: 'FROM'; 644 | FULL: 'FULL'; 645 | FUNCTION: 'FUNCTION'; 646 | FUNCTIONS: 'FUNCTIONS'; 647 | GRANT: 'GRANT'; 648 | GRANTED: 'GRANTED'; 649 | GRANTS: 'GRANTS'; 650 | GRAPHVIZ: 'GRAPHVIZ'; 651 | GROUP: 'GROUP'; 652 | GROUPING: 'GROUPING'; 653 | HAVING: 'HAVING'; 654 | HOUR: 'HOUR'; 655 | IF: 'IF'; 656 | IGNORE: 'IGNORE'; 657 | IN: 'IN'; 658 | INCLUDING: 'INCLUDING'; 659 | INNER: 'INNER'; 660 | INPUT: 'INPUT'; 661 | INSERT: 'INSERT'; 662 | INTERSECT: 'INTERSECT'; 663 | INTERVAL: 'INTERVAL'; 664 | INTO: 'INTO'; 665 | INVOKER: 'INVOKER'; 666 | IO: 'IO'; 667 | IS: 'IS'; 668 | ISOLATION: 'ISOLATION'; 669 | JSON: 'JSON'; 670 | JOIN: 'JOIN'; 671 | LANGUAGE: 'LANGUAGE'; 672 | LAST: 'LAST'; 673 | LATERAL: 'LATERAL'; 674 | LEFT: 'LEFT'; 675 | LEVEL: 'LEVEL'; 676 | LIKE: 'LIKE'; 677 | LIMIT: 'LIMIT'; 678 | LOCALTIME: 'LOCALTIME'; 679 | LOCALTIMESTAMP: 'LOCALTIMESTAMP'; 680 | LOGICAL: 'LOGICAL'; 681 | MAP: 'MAP'; 682 | MATERIALIZED: 'MATERIALIZED'; 683 | MINUTE: 'MINUTE'; 684 | MONTH: 'MONTH'; 685 | NAME: 'NAME'; 686 | NATURAL: 'NATURAL'; 687 | NFC : 'NFC'; 688 | NFD : 'NFD'; 689 | NFKC : 'NFKC'; 690 | NFKD : 'NFKD'; 691 | NO: 'NO'; 692 | NONE: 'NONE'; 693 | NORMALIZE: 'NORMALIZE'; 694 | NOT: 'NOT'; 695 | NULL: 'NULL'; 696 | NULLIF: 'NULLIF'; 697 | NULLS: 'NULLS'; 698 | ON: 'ON'; 699 | ONLY: 'ONLY'; 700 | OPTION: 'OPTION'; 701 | OR: 'OR'; 702 | ORDER: 'ORDER'; 703 | ORDINALITY: 'ORDINALITY'; 704 | OUTER: 'OUTER'; 705 | OUTPUT: 'OUTPUT'; 706 | OVER: 'OVER'; 707 | PARTITION: 'PARTITION'; 708 | PARTITIONS: 'PARTITIONS'; 709 | POSITION: 'POSITION'; 710 | PRECEDING: 'PRECEDING'; 711 | PREPARE: 'PREPARE'; 712 | PRIVILEGES: 'PRIVILEGES'; 713 | PROPERTIES: 'PROPERTIES'; 714 | RANGE: 'RANGE'; 715 | READ: 'READ'; 716 | RECURSIVE: 'RECURSIVE'; 717 | RENAME: 'RENAME'; 718 | REPEATABLE: 'REPEATABLE'; 719 | REPLACE: 'REPLACE'; 720 | RESET: 'RESET'; 721 | RESPECT: 'RESPECT'; 722 | RESTRICT: 'RESTRICT'; 723 | RETURN: 'RETURN'; 724 | RETURNS: 'RETURNS'; 725 | REVOKE: 'REVOKE'; 726 | RIGHT: 'RIGHT'; 727 | ROLE: 'ROLE'; 728 | ROLES: 'ROLES'; 729 | ROLLBACK: 'ROLLBACK'; 730 | ROLLUP: 'ROLLUP'; 731 | ROW: 'ROW'; 732 | ROWS: 'ROWS'; 733 | SCHEMA: 'SCHEMA'; 734 | SCHEMAS: 'SCHEMAS'; 735 | SECOND: 'SECOND'; 736 | SECURITY: 'SECURITY'; 737 | SELECT: 'SELECT'; 738 | SERIALIZABLE: 'SERIALIZABLE'; 739 | SESSION: 'SESSION'; 740 | SET: 'SET'; 741 | SETS: 'SETS'; 742 | SHOW: 'SHOW'; 743 | SOME: 'SOME'; 744 | SQL: 'SQL'; 745 | START: 'START'; 746 | STATS: 'STATS'; 747 | SUBSTRING: 'SUBSTRING'; 748 | SYSTEM: 'SYSTEM'; 749 | TABLE: 'TABLE'; 750 | TABLES: 'TABLES'; 751 | TABLESAMPLE: 'TABLESAMPLE'; 752 | TEMPORARY: 'TEMPORARY'; 753 | TEXT: 'TEXT'; 754 | THEN: 'THEN'; 755 | TIME: 'TIME'; 756 | TIMESTAMP: 'TIMESTAMP'; 757 | TO: 'TO'; 758 | TRANSACTION: 'TRANSACTION'; 759 | TRUE: 'TRUE'; 760 | TRY_CAST: 'TRY_CAST'; 761 | TYPE: 'TYPE'; 762 | UESCAPE: 'UESCAPE'; 763 | UNBOUNDED: 'UNBOUNDED'; 764 | UNCOMMITTED: 'UNCOMMITTED'; 765 | UNION: 'UNION'; 766 | UNNEST: 'UNNEST'; 767 | USE: 'USE'; 768 | USER: 'USER'; 769 | USING: 'USING'; 770 | VALIDATE: 'VALIDATE'; 771 | VALUES: 'VALUES'; 772 | VERBOSE: 'VERBOSE'; 773 | VIEW: 'VIEW'; 774 | WHEN: 'WHEN'; 775 | WHERE: 'WHERE'; 776 | WITH: 'WITH'; 777 | WORK: 'WORK'; 778 | WRITE: 'WRITE'; 779 | YEAR: 'YEAR'; 780 | ZONE: 'ZONE'; 781 | 782 | EQ : '='; 783 | NEQ : '<>' | '!='; 784 | LT : '<'; 785 | LTE : '<='; 786 | GT : '>'; 787 | GTE : '>='; 788 | 789 | PLUS: '+'; 790 | MINUS: '-'; 791 | ASTERISK: '*'; 792 | SLASH: '/'; 793 | PERCENT: '%'; 794 | CONCAT: '||'; 795 | 796 | STRING 797 | : '\'' ( ~'\'' | '\'\'' )* '\'' 798 | ; 799 | 800 | UNICODE_STRING 801 | : 'U&\'' ( ~'\'' | '\'\'' )* '\'' 802 | ; 803 | 804 | // Note: we allow any character inside the binary literal and validate 805 | // its a correct literal when the AST is being constructed. This 806 | // allows us to provide more meaningful error messages to the user 807 | BINARY_LITERAL 808 | : 'X\'' (~'\'')* '\'' 809 | ; 810 | 811 | INTEGER_VALUE 812 | : DIGIT+ 813 | ; 814 | 815 | DECIMAL_VALUE 816 | : DIGIT+ '.' DIGIT* 817 | | '.' DIGIT+ 818 | ; 819 | 820 | DOUBLE_VALUE 821 | : DIGIT+ ('.' DIGIT*)? EXPONENT 822 | | '.' DIGIT+ EXPONENT 823 | ; 824 | 825 | IDENTIFIER 826 | : (LETTER | '_') (LETTER | DIGIT | '_' | '@' | ':')* 827 | ; 828 | 829 | DIGIT_IDENTIFIER 830 | : DIGIT (LETTER | DIGIT | '_' | '@' | ':')+ 831 | ; 832 | 833 | QUOTED_IDENTIFIER 834 | : '"' ( ~'"' | '""' )* '"' 835 | ; 836 | 837 | BACKQUOTED_IDENTIFIER 838 | : '`' ( ~'`' | '``' )* '`' 839 | ; 840 | 841 | TIME_WITH_TIME_ZONE 842 | : 'TIME' WS 'WITH' WS 'TIME' WS 'ZONE' 843 | ; 844 | 845 | TIMESTAMP_WITH_TIME_ZONE 846 | : 'TIMESTAMP' WS 'WITH' WS 'TIME' WS 'ZONE' 847 | ; 848 | 849 | DOUBLE_PRECISION 850 | : 'DOUBLE' WS 'PRECISION' 851 | ; 852 | 853 | fragment EXPONENT 854 | : 'E' [+-]? DIGIT+ 855 | ; 856 | 857 | fragment DIGIT 858 | : [0-9] 859 | ; 860 | 861 | fragment LETTER 862 | : [A-Z] 863 | ; 864 | 865 | SIMPLE_COMMENT 866 | : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) 867 | ; 868 | 869 | BRACKETED_COMMENT 870 | : '/*' .*? '*/' -> channel(HIDDEN) 871 | ; 872 | 873 | WS 874 | : [ \r\n\t]+ -> channel(HIDDEN) 875 | ; 876 | 877 | // Catch-all for anything we can't recognize. 878 | // We use this to be able to ignore and recover all the text 879 | // when splitting statements with DelimiterLexer 880 | UNRECOGNIZED 881 | : . 882 | ; 883 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/antlr/SparkSql.g4: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | * 14 | * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SparkSql.g4 grammar. 15 | */ 16 | 17 | grammar SparkSql; 18 | 19 | @parser::members { 20 | /** 21 | * When false, INTERSECT is given the greater precedence over the other set 22 | * operations (UNION, EXCEPT and MINUS) as per the SQL standard. 23 | */ 24 | public boolean legacy_setops_precedence_enabled = false; 25 | /** 26 | * When false, a literal with an exponent would be converted into 27 | * double type rather than decimal type. 28 | */ 29 | public boolean legacy_exponent_literal_as_decimal_enabled = false; 30 | /** 31 | * When true, the behavior of keywords follows ANSI SQL standard. 32 | */ 33 | public boolean SQL_standard_keyword_behavior = false; 34 | } 35 | 36 | @lexer::members { 37 | /** 38 | * Verify whether current token is a valid decimal token (which contains dot). 39 | * Returns true if the character that follows the token is not a digit or letter or underscore. 40 | * 41 | * For example: 42 | * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. 43 | * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. 44 | * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. 45 | * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed 46 | * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' 47 | * which is not a digit or letter or underscore. 48 | */ 49 | public boolean isValidDecimal() { 50 | int nextChar = _input.LA(1); 51 | if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || 52 | nextChar == '_') { 53 | return false; 54 | } else { 55 | return true; 56 | } 57 | } 58 | 59 | /** 60 | * This method will be called when we see '/*' and try to match it as a bracketed comment. 61 | * If the next character is '+', it should be parsed as hint later, and we cannot match 62 | * it as a bracketed comment. 63 | * 64 | * Returns true if the next character is '+'. 65 | */ 66 | public boolean isHint() { 67 | int nextChar = _input.LA(1); 68 | if (nextChar == '+') { 69 | return true; 70 | } else { 71 | return false; 72 | } 73 | } 74 | } 75 | 76 | singleStatement 77 | : statement ';'* EOF 78 | ; 79 | 80 | singleExpression 81 | : namedExpression EOF 82 | ; 83 | 84 | singleTableIdentifier 85 | : tableIdentifier EOF 86 | ; 87 | 88 | singleMultipartIdentifier 89 | : multipartIdentifier EOF 90 | ; 91 | 92 | singleFunctionIdentifier 93 | : functionIdentifier EOF 94 | ; 95 | 96 | singleDataType 97 | : dataType EOF 98 | ; 99 | 100 | singleTableSchema 101 | : colTypeList EOF 102 | ; 103 | 104 | statement 105 | : query #statementDefault 106 | | ctes? dmlStatementNoWith #dmlStatement 107 | | USE NAMESPACE? multipartIdentifier #use 108 | | CREATE namespace (IF NOT EXISTS)? multipartIdentifier 109 | (commentSpec | 110 | locationSpec | 111 | (WITH (DBPROPERTIES | PROPERTIES) tablePropertyList))* #createNamespace 112 | | ALTER namespace multipartIdentifier 113 | SET (DBPROPERTIES | PROPERTIES) tablePropertyList #setNamespaceProperties 114 | | ALTER namespace multipartIdentifier 115 | SET locationSpec #setNamespaceLocation 116 | | DROP namespace (IF EXISTS)? multipartIdentifier 117 | (RESTRICT | CASCADE)? #dropNamespace 118 | | SHOW (DATABASES | NAMESPACES) ((FROM | IN) multipartIdentifier)? 119 | (LIKE? pattern=STRING)? #showNamespaces 120 | | createTableHeader ('(' colTypeList ')')? tableProvider? 121 | createTableClauses 122 | (AS? query)? #createTable 123 | | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier 124 | LIKE source=tableIdentifier 125 | (tableProvider | 126 | rowFormat | 127 | createFileFormat | 128 | locationSpec | 129 | (TBLPROPERTIES tableProps=tablePropertyList))* #createTableLike 130 | | replaceTableHeader ('(' colTypeList ')')? tableProvider? 131 | createTableClauses 132 | (AS? query)? #replaceTable 133 | | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS 134 | (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)? #analyze 135 | | ANALYZE TABLES ((FROM | IN) multipartIdentifier)? COMPUTE STATISTICS 136 | (identifier)? #analyzeTables 137 | | ALTER TABLE multipartIdentifier 138 | ADD (COLUMN | COLUMNS) 139 | columns=qualifiedColTypeWithPositionList #addTableColumns 140 | | ALTER TABLE multipartIdentifier 141 | ADD (COLUMN | COLUMNS) 142 | '(' columns=qualifiedColTypeWithPositionList ')' #addTableColumns 143 | | ALTER TABLE table=multipartIdentifier 144 | RENAME COLUMN 145 | from=multipartIdentifier TO to=errorCapturingIdentifier #renameTableColumn 146 | | ALTER TABLE multipartIdentifier 147 | DROP (COLUMN | COLUMNS) 148 | '(' columns=multipartIdentifierList ')' #dropTableColumns 149 | | ALTER TABLE multipartIdentifier 150 | DROP (COLUMN | COLUMNS) columns=multipartIdentifierList #dropTableColumns 151 | | ALTER (TABLE | VIEW) from=multipartIdentifier 152 | RENAME TO to=multipartIdentifier #renameTable 153 | | ALTER (TABLE | VIEW) multipartIdentifier 154 | SET TBLPROPERTIES tablePropertyList #setTableProperties 155 | | ALTER (TABLE | VIEW) multipartIdentifier 156 | UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties 157 | | ALTER TABLE table=multipartIdentifier 158 | (ALTER | CHANGE) COLUMN? column=multipartIdentifier 159 | alterColumnAction? #alterTableAlterColumn 160 | | ALTER TABLE table=multipartIdentifier partitionSpec? 161 | CHANGE COLUMN? 162 | colName=multipartIdentifier colType colPosition? #hiveChangeColumn 163 | | ALTER TABLE table=multipartIdentifier partitionSpec? 164 | REPLACE COLUMNS 165 | '(' columns=qualifiedColTypeWithPositionList ')' #hiveReplaceColumns 166 | | ALTER TABLE multipartIdentifier (partitionSpec)? 167 | SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe 168 | | ALTER TABLE multipartIdentifier (partitionSpec)? 169 | SET SERDEPROPERTIES tablePropertyList #setTableSerDe 170 | | ALTER (TABLE | VIEW) multipartIdentifier ADD (IF NOT EXISTS)? 171 | partitionSpecLocation+ #addTablePartition 172 | | ALTER TABLE multipartIdentifier 173 | from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition 174 | | ALTER (TABLE | VIEW) multipartIdentifier 175 | DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions 176 | | ALTER TABLE multipartIdentifier 177 | (partitionSpec)? SET locationSpec #setTableLocation 178 | | ALTER TABLE multipartIdentifier RECOVER PARTITIONS #recoverPartitions 179 | | DROP TABLE (IF EXISTS)? multipartIdentifier PURGE? #dropTable 180 | | DROP VIEW (IF EXISTS)? multipartIdentifier #dropView 181 | | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? 182 | VIEW (IF NOT EXISTS)? multipartIdentifier 183 | identifierCommentList? 184 | (commentSpec | 185 | (PARTITIONED ON identifierList) | 186 | (TBLPROPERTIES tablePropertyList))* 187 | AS query #createView 188 | | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW 189 | tableIdentifier ('(' colTypeList ')')? tableProvider 190 | (OPTIONS tablePropertyList)? #createTempViewUsing 191 | | ALTER VIEW multipartIdentifier AS? query #alterViewQuery 192 | | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)? 193 | multipartIdentifier AS className=STRING 194 | (USING resource (',' resource)*)? #createFunction 195 | | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier #dropFunction 196 | | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? 197 | statement #explain 198 | | SHOW TABLES ((FROM | IN) multipartIdentifier)? 199 | (LIKE? pattern=STRING)? #showTables 200 | | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)? 201 | LIKE pattern=STRING partitionSpec? #showTableExtended 202 | | SHOW TBLPROPERTIES table=multipartIdentifier 203 | ('(' key=tablePropertyKey ')')? #showTblProperties 204 | | SHOW COLUMNS (FROM | IN) table=multipartIdentifier 205 | ((FROM | IN) ns=multipartIdentifier)? #showColumns 206 | | SHOW VIEWS ((FROM | IN) multipartIdentifier)? 207 | (LIKE? pattern=STRING)? #showViews 208 | | SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions 209 | | SHOW identifier? FUNCTIONS 210 | (LIKE? (multipartIdentifier | pattern=STRING))? #showFunctions 211 | | SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable 212 | | SHOW CURRENT NAMESPACE #showCurrentNamespace 213 | | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction 214 | | (DESC | DESCRIBE) namespace EXTENDED? 215 | multipartIdentifier #describeNamespace 216 | | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)? 217 | multipartIdentifier partitionSpec? describeColName? #describeRelation 218 | | (DESC | DESCRIBE) QUERY? query #describeQuery 219 | | COMMENT ON namespace multipartIdentifier IS 220 | comment=(STRING | NULL) #commentNamespace 221 | | COMMENT ON TABLE multipartIdentifier IS comment=(STRING | NULL) #commentTable 222 | | REFRESH TABLE multipartIdentifier #refreshTable 223 | | REFRESH FUNCTION multipartIdentifier #refreshFunction 224 | | REFRESH (STRING | .*?) #refreshResource 225 | | CACHE LAZY? TABLE multipartIdentifier 226 | (OPTIONS options=tablePropertyList)? (AS? query)? #cacheTable 227 | | UNCACHE TABLE (IF EXISTS)? multipartIdentifier #uncacheTable 228 | | CLEAR CACHE #clearCache 229 | | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE 230 | multipartIdentifier partitionSpec? #loadData 231 | | TRUNCATE TABLE multipartIdentifier partitionSpec? #truncateTable 232 | | MSCK REPAIR TABLE multipartIdentifier 233 | (option=(ADD|DROP|SYNC) PARTITIONS)? #repairTable 234 | | op=(ADD | LIST) identifier (STRING | .*?) #manageResource 235 | | SET ROLE .*? #failNativeCommand 236 | | SET TIME ZONE interval #setTimeZone 237 | | SET TIME ZONE timezone=(STRING | LOCAL) #setTimeZone 238 | | SET TIME ZONE .*? #setTimeZone 239 | | SET configKey EQ configValue #setQuotedConfiguration 240 | | SET configKey (EQ .*?)? #setQuotedConfiguration 241 | | SET .*? EQ configValue #setQuotedConfiguration 242 | | SET .*? #setConfiguration 243 | | RESET configKey #resetQuotedConfiguration 244 | | RESET .*? #resetConfiguration 245 | | unsupportedHiveNativeCommands .*? #failNativeCommand 246 | ; 247 | 248 | configKey 249 | : quotedIdentifier 250 | ; 251 | 252 | configValue 253 | : quotedIdentifier 254 | ; 255 | 256 | unsupportedHiveNativeCommands 257 | : kw1=CREATE kw2=ROLE 258 | | kw1=DROP kw2=ROLE 259 | | kw1=GRANT kw2=ROLE? 260 | | kw1=REVOKE kw2=ROLE? 261 | | kw1=SHOW kw2=GRANT 262 | | kw1=SHOW kw2=ROLE kw3=GRANT? 263 | | kw1=SHOW kw2=PRINCIPALS 264 | | kw1=SHOW kw2=ROLES 265 | | kw1=SHOW kw2=CURRENT kw3=ROLES 266 | | kw1=EXPORT kw2=TABLE 267 | | kw1=IMPORT kw2=TABLE 268 | | kw1=SHOW kw2=COMPACTIONS 269 | | kw1=SHOW kw2=CREATE kw3=TABLE 270 | | kw1=SHOW kw2=TRANSACTIONS 271 | | kw1=SHOW kw2=INDEXES 272 | | kw1=SHOW kw2=LOCKS 273 | | kw1=CREATE kw2=INDEX 274 | | kw1=DROP kw2=INDEX 275 | | kw1=ALTER kw2=INDEX 276 | | kw1=LOCK kw2=TABLE 277 | | kw1=LOCK kw2=DATABASE 278 | | kw1=UNLOCK kw2=TABLE 279 | | kw1=UNLOCK kw2=DATABASE 280 | | kw1=CREATE kw2=TEMPORARY kw3=MACRO 281 | | kw1=DROP kw2=TEMPORARY kw3=MACRO 282 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=CLUSTERED 283 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=CLUSTERED kw4=BY 284 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SORTED 285 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=SKEWED kw4=BY 286 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SKEWED 287 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=STORED kw5=AS kw6=DIRECTORIES 288 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=SET kw4=SKEWED kw5=LOCATION 289 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=EXCHANGE kw4=PARTITION 290 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=ARCHIVE kw4=PARTITION 291 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=UNARCHIVE kw4=PARTITION 292 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=TOUCH 293 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=COMPACT 294 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CONCATENATE 295 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=SET kw4=FILEFORMAT 296 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=REPLACE kw4=COLUMNS 297 | | kw1=START kw2=TRANSACTION 298 | | kw1=COMMIT 299 | | kw1=ROLLBACK 300 | | kw1=DFS 301 | ; 302 | 303 | createTableHeader 304 | : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? multipartIdentifier 305 | ; 306 | 307 | replaceTableHeader 308 | : (CREATE OR)? REPLACE TABLE multipartIdentifier 309 | ; 310 | 311 | bucketSpec 312 | : CLUSTERED BY identifierList 313 | (SORTED BY orderedIdentifierList)? 314 | INTO INTEGER_VALUE BUCKETS 315 | ; 316 | 317 | skewSpec 318 | : SKEWED BY identifierList 319 | ON (constantList | nestedConstantList) 320 | (STORED AS DIRECTORIES)? 321 | ; 322 | 323 | locationSpec 324 | : LOCATION STRING 325 | ; 326 | 327 | commentSpec 328 | : COMMENT STRING 329 | ; 330 | 331 | query 332 | : ctes? queryTerm queryOrganization 333 | ; 334 | 335 | insertInto 336 | : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)? identifierList? #insertOverwriteTable 337 | | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? identifierList? #insertIntoTable 338 | | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat? #insertOverwriteHiveDir 339 | | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=tablePropertyList)? #insertOverwriteDir 340 | ; 341 | 342 | partitionSpecLocation 343 | : partitionSpec locationSpec? 344 | ; 345 | 346 | partitionSpec 347 | : PARTITION '(' partitionVal (',' partitionVal)* ')' 348 | ; 349 | 350 | partitionVal 351 | : identifier (EQ constant)? 352 | ; 353 | 354 | namespace 355 | : NAMESPACE 356 | | DATABASE 357 | | SCHEMA 358 | ; 359 | 360 | describeFuncName 361 | : qualifiedName 362 | | STRING 363 | | comparisonOperator 364 | | arithmeticOperator 365 | | predicateOperator 366 | ; 367 | 368 | describeColName 369 | : nameParts+=identifier ('.' nameParts+=identifier)* 370 | ; 371 | 372 | ctes 373 | : WITH namedQuery (',' namedQuery)* 374 | ; 375 | 376 | namedQuery 377 | : name=errorCapturingIdentifier (columnAliases=identifierList)? AS? '(' query ')' 378 | ; 379 | 380 | tableProvider 381 | : USING multipartIdentifier 382 | ; 383 | 384 | createTableClauses 385 | :((OPTIONS options=tablePropertyList) | 386 | (PARTITIONED BY partitioning=partitionFieldList) | 387 | skewSpec | 388 | bucketSpec | 389 | rowFormat | 390 | createFileFormat | 391 | locationSpec | 392 | commentSpec | 393 | (TBLPROPERTIES tableProps=tablePropertyList))* 394 | ; 395 | 396 | tablePropertyList 397 | : '(' tableProperty (',' tableProperty)* ')' 398 | ; 399 | 400 | tableProperty 401 | : key=tablePropertyKey (EQ? value=tablePropertyValue)? 402 | ; 403 | 404 | tablePropertyKey 405 | : identifier ('.' identifier)* 406 | | STRING 407 | ; 408 | 409 | tablePropertyValue 410 | : INTEGER_VALUE 411 | | DECIMAL_VALUE 412 | | booleanValue 413 | | STRING 414 | ; 415 | 416 | constantList 417 | : '(' constant (',' constant)* ')' 418 | ; 419 | 420 | nestedConstantList 421 | : '(' constantList (',' constantList)* ')' 422 | ; 423 | 424 | createFileFormat 425 | : STORED AS fileFormat 426 | | STORED BY storageHandler 427 | ; 428 | 429 | fileFormat 430 | : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING #tableFileFormat 431 | | identifier #genericFileFormat 432 | ; 433 | 434 | storageHandler 435 | : STRING (WITH SERDEPROPERTIES tablePropertyList)? 436 | ; 437 | 438 | resource 439 | : identifier STRING 440 | ; 441 | 442 | dmlStatementNoWith 443 | : insertInto queryTerm queryOrganization #singleInsertQuery 444 | | fromClause multiInsertQueryBody+ #multiInsertQuery 445 | | DELETE FROM multipartIdentifier tableAlias whereClause? #deleteFromTable 446 | | UPDATE multipartIdentifier tableAlias setClause whereClause? #updateTable 447 | | MERGE INTO target=multipartIdentifier targetAlias=tableAlias 448 | USING (source=multipartIdentifier | 449 | '(' sourceQuery=query')') sourceAlias=tableAlias 450 | ON mergeCondition=booleanExpression 451 | matchedClause* 452 | notMatchedClause* #mergeIntoTable 453 | ; 454 | 455 | queryOrganization 456 | : (ORDER BY order+=sortItem (',' order+=sortItem)*)? 457 | (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? 458 | (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)? 459 | (SORT BY sort+=sortItem (',' sort+=sortItem)*)? 460 | windowClause? 461 | (LIMIT (ALL | limit=expression))? 462 | ; 463 | 464 | multiInsertQueryBody 465 | : insertInto fromStatementBody 466 | ; 467 | 468 | queryTerm 469 | : queryPrimary #queryTermDefault 470 | | left=queryTerm {legacy_setops_precedence_enabled}? 471 | operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation 472 | | left=queryTerm {!legacy_setops_precedence_enabled}? 473 | operator=INTERSECT setQuantifier? right=queryTerm #setOperation 474 | | left=queryTerm {!legacy_setops_precedence_enabled}? 475 | operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation 476 | ; 477 | 478 | queryPrimary 479 | : querySpecification #queryPrimaryDefault 480 | | fromStatement #fromStmt 481 | | TABLE multipartIdentifier #table 482 | | inlineTable #inlineTableDefault1 483 | | '(' query ')' #subquery 484 | ; 485 | 486 | sortItem 487 | : expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))? 488 | ; 489 | 490 | fromStatement 491 | : fromClause fromStatementBody+ 492 | ; 493 | 494 | fromStatementBody 495 | : transformClause 496 | whereClause? 497 | queryOrganization 498 | | selectClause 499 | lateralView* 500 | whereClause? 501 | aggregationClause? 502 | havingClause? 503 | windowClause? 504 | queryOrganization 505 | ; 506 | 507 | querySpecification 508 | : transformClause 509 | fromClause? 510 | whereClause? #transformQuerySpecification 511 | | selectClause 512 | fromClause? 513 | lateralView* 514 | whereClause? 515 | aggregationClause? 516 | havingClause? 517 | windowClause? #regularQuerySpecification 518 | ; 519 | 520 | transformClause 521 | : (SELECT kind=TRANSFORM '(' namedExpressionSeq ')' 522 | | kind=MAP namedExpressionSeq 523 | | kind=REDUCE namedExpressionSeq) 524 | inRowFormat=rowFormat? 525 | (RECORDWRITER recordWriter=STRING)? 526 | USING script=STRING 527 | (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))? 528 | outRowFormat=rowFormat? 529 | (RECORDREADER recordReader=STRING)? 530 | ; 531 | 532 | selectClause 533 | : SELECT (hints+=hint)* setQuantifier? namedExpressionSeq 534 | ; 535 | 536 | setClause 537 | : SET assignmentList 538 | ; 539 | 540 | matchedClause 541 | : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction 542 | ; 543 | notMatchedClause 544 | : WHEN NOT MATCHED (AND notMatchedCond=booleanExpression)? THEN notMatchedAction 545 | ; 546 | 547 | matchedAction 548 | : DELETE 549 | | UPDATE SET ASTERISK 550 | | UPDATE SET assignmentList 551 | ; 552 | 553 | notMatchedAction 554 | : INSERT ASTERISK 555 | | INSERT '(' columns=multipartIdentifierList ')' 556 | VALUES '(' expression (',' expression)* ')' 557 | ; 558 | 559 | assignmentList 560 | : assignment (',' assignment)* 561 | ; 562 | 563 | assignment 564 | : key=multipartIdentifier EQ value=expression 565 | ; 566 | 567 | whereClause 568 | : WHERE booleanExpression 569 | ; 570 | 571 | havingClause 572 | : HAVING booleanExpression 573 | ; 574 | 575 | hint 576 | : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/' 577 | ; 578 | 579 | hintStatement 580 | : hintName=identifier 581 | | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')' 582 | ; 583 | 584 | fromClause 585 | : FROM relation (',' relation)* lateralView* pivotClause? 586 | ; 587 | 588 | aggregationClause 589 | : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* ( 590 | WITH kind=ROLLUP 591 | | WITH kind=CUBE 592 | | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')? 593 | | GROUP BY kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')' 594 | ; 595 | 596 | groupingSet 597 | : '(' (expression (',' expression)*)? ')' 598 | | expression 599 | ; 600 | 601 | pivotClause 602 | : PIVOT '(' aggregates=namedExpressionSeq FOR pivotColumn IN '(' pivotValues+=pivotValue (',' pivotValues+=pivotValue)* ')' ')' 603 | ; 604 | 605 | pivotColumn 606 | : identifiers+=identifier 607 | | '(' identifiers+=identifier (',' identifiers+=identifier)* ')' 608 | ; 609 | 610 | pivotValue 611 | : expression (AS? identifier)? 612 | ; 613 | 614 | lateralView 615 | : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)? 616 | ; 617 | 618 | setQuantifier 619 | : DISTINCT 620 | | ALL 621 | ; 622 | 623 | relation 624 | : relationPrimary joinRelation* 625 | ; 626 | 627 | joinRelation 628 | : (joinType) JOIN right=relationPrimary joinCriteria? 629 | | NATURAL joinType JOIN right=relationPrimary 630 | ; 631 | 632 | joinType 633 | : INNER? 634 | | CROSS 635 | | LEFT OUTER? 636 | | LEFT? SEMI 637 | | RIGHT OUTER? 638 | | FULL OUTER? 639 | | LEFT? ANTI 640 | ; 641 | 642 | joinCriteria 643 | : ON booleanExpression 644 | | USING identifierList 645 | ; 646 | 647 | sample 648 | : TABLESAMPLE '(' sampleMethod? ')' 649 | ; 650 | 651 | sampleMethod 652 | : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT #sampleByPercentile 653 | | expression ROWS #sampleByRows 654 | | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE 655 | (ON (identifier | qualifiedName '(' ')'))? #sampleByBucket 656 | | bytes=expression #sampleByBytes 657 | ; 658 | 659 | identifierList 660 | : '(' identifierSeq ')' 661 | ; 662 | 663 | identifierSeq 664 | : ident+=errorCapturingIdentifier (',' ident+=errorCapturingIdentifier)* 665 | ; 666 | 667 | orderedIdentifierList 668 | : '(' orderedIdentifier (',' orderedIdentifier)* ')' 669 | ; 670 | 671 | orderedIdentifier 672 | : ident=errorCapturingIdentifier ordering=(ASC | DESC)? 673 | ; 674 | 675 | identifierCommentList 676 | : '(' identifierComment (',' identifierComment)* ')' 677 | ; 678 | 679 | identifierComment 680 | : identifier commentSpec? 681 | ; 682 | 683 | relationPrimary 684 | : multipartIdentifier sample? tableAlias #tableName 685 | | '(' query ')' sample? tableAlias #aliasedQuery 686 | | '(' relation ')' sample? tableAlias #aliasedRelation 687 | | inlineTable #inlineTableDefault2 688 | | functionTable #tableValuedFunction 689 | ; 690 | 691 | inlineTable 692 | : VALUES expression (',' expression)* tableAlias 693 | ; 694 | 695 | functionTable 696 | : funcName=functionName '(' (expression (',' expression)*)? ')' tableAlias 697 | ; 698 | 699 | tableAlias 700 | : (AS? strictIdentifier identifierList?)? 701 | ; 702 | 703 | rowFormat 704 | : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)? #rowFormatSerde 705 | | ROW FORMAT DELIMITED 706 | (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)? 707 | (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)? 708 | (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)? 709 | (LINES TERMINATED BY linesSeparatedBy=STRING)? 710 | (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited 711 | ; 712 | 713 | multipartIdentifierList 714 | : multipartIdentifier (',' multipartIdentifier)* 715 | ; 716 | 717 | multipartIdentifier 718 | : parts+=errorCapturingIdentifier ('.' parts+=errorCapturingIdentifier)* 719 | ; 720 | 721 | tableIdentifier 722 | : (db=errorCapturingIdentifier '.')? table=errorCapturingIdentifier 723 | ; 724 | 725 | functionIdentifier 726 | : (db=errorCapturingIdentifier '.')? function=errorCapturingIdentifier 727 | ; 728 | 729 | namedExpression 730 | : expression (AS? (name=errorCapturingIdentifier | identifierList))? 731 | ; 732 | 733 | namedExpressionSeq 734 | : namedExpression (',' namedExpression)* 735 | ; 736 | 737 | partitionFieldList 738 | : '(' fields+=partitionField (',' fields+=partitionField)* ')' 739 | ; 740 | 741 | partitionField 742 | : transform #partitionTransform 743 | | colType #partitionColumn 744 | ; 745 | 746 | transform 747 | : qualifiedName #identityTransform 748 | | transformName=identifier 749 | '(' argument+=transformArgument (',' argument+=transformArgument)* ')' #applyTransform 750 | ; 751 | 752 | transformArgument 753 | : qualifiedName 754 | | constant 755 | ; 756 | 757 | expression 758 | : booleanExpression 759 | ; 760 | 761 | booleanExpression 762 | : NOT booleanExpression #logicalNot 763 | | EXISTS '(' query ')' #exists 764 | | valueExpression predicate? #predicated 765 | | left=booleanExpression operator=AND right=booleanExpression #logicalBinary 766 | | left=booleanExpression operator=OR right=booleanExpression #logicalBinary 767 | ; 768 | 769 | predicate 770 | : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression 771 | | NOT? kind=IN '(' expression (',' expression)* ')' 772 | | NOT? kind=IN '(' query ')' 773 | | NOT? kind=RLIKE pattern=valueExpression 774 | | NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression (',' expression)* ')') 775 | | NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)? 776 | | IS NOT? kind=NULL 777 | | IS NOT? kind=(TRUE | FALSE | UNKNOWN) 778 | | IS NOT? kind=DISTINCT FROM right=valueExpression 779 | ; 780 | 781 | valueExpression 782 | : primaryExpression #valueExpressionDefault 783 | | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary 784 | | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary 785 | | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary 786 | | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary 787 | | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary 788 | | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary 789 | | left=valueExpression comparisonOperator right=valueExpression #comparison 790 | ; 791 | 792 | primaryExpression 793 | : name=(CURRENT_DATE | CURRENT_TIMESTAMP) #currentDatetime 794 | | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase 795 | | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase 796 | | CAST '(' expression AS dataType ')' #cast 797 | | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct 798 | | FIRST '(' expression (IGNORE NULLS)? ')' #first 799 | | LAST '(' expression (IGNORE NULLS)? ')' #last 800 | | POSITION '(' substr=valueExpression IN str=valueExpression ')' #position 801 | | constant #constantDefault 802 | | ASTERISK #star 803 | | qualifiedName '.' ASTERISK #star 804 | | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor 805 | | '(' query ')' #subqueryExpression 806 | | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' 807 | (FILTER '(' WHERE where=booleanExpression ')')? 808 | (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall 809 | | identifier '->' expression #lambda 810 | | '(' identifier (',' identifier)+ ')' '->' expression #lambda 811 | | value=primaryExpression '[' index=valueExpression ']' #subscript 812 | | identifier #columnReference 813 | | base=primaryExpression '.' fieldName=identifier #dereference 814 | | '(' expression ')' #parenthesizedExpression 815 | | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract 816 | | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression 817 | ((FOR | ',') len=valueExpression)? ')' #substring 818 | | TRIM '(' trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)? 819 | FROM srcStr=valueExpression ')' #trim 820 | | OVERLAY '(' input=valueExpression PLACING replace=valueExpression 821 | FROM position=valueExpression (FOR length=valueExpression)? ')' #overlay 822 | ; 823 | 824 | constant 825 | : NULL #nullLiteral 826 | | interval #intervalLiteral 827 | | identifier STRING #typeConstructor 828 | | number #numericLiteral 829 | | booleanValue #booleanLiteral 830 | | STRING+ #stringLiteral 831 | ; 832 | 833 | comparisonOperator 834 | : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ 835 | ; 836 | 837 | arithmeticOperator 838 | : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT 839 | ; 840 | 841 | predicateOperator 842 | : OR | AND | IN | NOT 843 | ; 844 | 845 | booleanValue 846 | : TRUE | FALSE 847 | ; 848 | 849 | interval 850 | : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? 851 | ; 852 | 853 | errorCapturingMultiUnitsInterval 854 | : multiUnitsInterval unitToUnitInterval? 855 | ; 856 | 857 | multiUnitsInterval 858 | : (intervalValue unit+=identifier)+ 859 | ; 860 | 861 | errorCapturingUnitToUnitInterval 862 | : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)? 863 | ; 864 | 865 | unitToUnitInterval 866 | : value=intervalValue from=identifier TO to=identifier 867 | ; 868 | 869 | intervalValue 870 | : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE) 871 | | STRING 872 | ; 873 | 874 | colPosition 875 | : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier 876 | ; 877 | 878 | dataType 879 | : complex=ARRAY '<' dataType '>' #complexDataType 880 | | complex=MAP '<' dataType ',' dataType '>' #complexDataType 881 | | complex=STRUCT ('<' complexColTypeList? '>' | NEQ) #complexDataType 882 | | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType 883 | ; 884 | 885 | qualifiedColTypeWithPositionList 886 | : qualifiedColTypeWithPosition (',' qualifiedColTypeWithPosition)* 887 | ; 888 | 889 | qualifiedColTypeWithPosition 890 | : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition? 891 | ; 892 | 893 | colTypeList 894 | : colType (',' colType)* 895 | ; 896 | 897 | colType 898 | : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec? 899 | ; 900 | 901 | complexColTypeList 902 | : complexColType (',' complexColType)* 903 | ; 904 | 905 | complexColType 906 | : identifier ':' dataType (NOT NULL)? commentSpec? 907 | ; 908 | 909 | whenClause 910 | : WHEN condition=expression THEN result=expression 911 | ; 912 | 913 | windowClause 914 | : WINDOW namedWindow (',' namedWindow)* 915 | ; 916 | 917 | namedWindow 918 | : name=errorCapturingIdentifier AS windowSpec 919 | ; 920 | 921 | windowSpec 922 | : name=errorCapturingIdentifier #windowRef 923 | | '('name=errorCapturingIdentifier')' #windowRef 924 | | '(' 925 | ( CLUSTER BY partition+=expression (',' partition+=expression)* 926 | | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)? 927 | ((ORDER | SORT) BY sortItem (',' sortItem)*)?) 928 | windowFrame? 929 | ')' #windowDef 930 | ; 931 | 932 | windowFrame 933 | : frameType=RANGE start=frameBound 934 | | frameType=ROWS start=frameBound 935 | | frameType=RANGE BETWEEN start=frameBound AND end=frameBound 936 | | frameType=ROWS BETWEEN start=frameBound AND end=frameBound 937 | ; 938 | 939 | frameBound 940 | : UNBOUNDED boundType=(PRECEDING | FOLLOWING) 941 | | boundType=CURRENT ROW 942 | | expression boundType=(PRECEDING | FOLLOWING) 943 | ; 944 | 945 | qualifiedNameList 946 | : qualifiedName (',' qualifiedName)* 947 | ; 948 | 949 | functionName 950 | : qualifiedName 951 | | FILTER 952 | | LEFT 953 | | RIGHT 954 | ; 955 | 956 | qualifiedName 957 | : identifier ('.' identifier)* 958 | ; 959 | 960 | // this rule is used for explicitly capturing wrong identifiers such as test-table, which should actually be `test-table` 961 | // replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise 962 | // valid expressions such as "a-b" can be recognized as an identifier 963 | errorCapturingIdentifier 964 | : identifier errorCapturingIdentifierExtra 965 | ; 966 | 967 | // extra left-factoring grammar 968 | errorCapturingIdentifierExtra 969 | : (MINUS identifier)+ #errorIdent 970 | | #realIdent 971 | ; 972 | 973 | identifier 974 | : strictIdentifier 975 | | {!SQL_standard_keyword_behavior}? strictNonReserved 976 | ; 977 | 978 | strictIdentifier 979 | : IDENTIFIER #unquotedIdentifier 980 | | quotedIdentifier #quotedIdentifierAlternative 981 | | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier 982 | | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier 983 | ; 984 | 985 | quotedIdentifier 986 | : BACKQUOTED_IDENTIFIER 987 | ; 988 | 989 | number 990 | : {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral 991 | | {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral 992 | | {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral 993 | | MINUS? INTEGER_VALUE #integerLiteral 994 | | MINUS? BIGINT_LITERAL #bigIntLiteral 995 | | MINUS? SMALLINT_LITERAL #smallIntLiteral 996 | | MINUS? TINYINT_LITERAL #tinyIntLiteral 997 | | MINUS? DOUBLE_LITERAL #doubleLiteral 998 | | MINUS? FLOAT_LITERAL #floatLiteral 999 | | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral 1000 | ; 1001 | 1002 | alterColumnAction 1003 | : TYPE dataType 1004 | | commentSpec 1005 | | colPosition 1006 | | setOrDrop=(SET | DROP) NOT NULL 1007 | ; 1008 | 1009 | // When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. 1010 | // - Reserved keywords: 1011 | // Keywords that are reserved and can't be used as identifiers for table, view, column, 1012 | // function, alias, etc. 1013 | // - Non-reserved keywords: 1014 | // Keywords that have a special meaning only in particular contexts and can be used as 1015 | // identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN 1016 | // can be used as identifiers in other places. 1017 | // You can find the full keywords list by searching "Start of the keywords list" in this file. 1018 | // The non-reserved keywords are listed below. Keywords not in this list are reserved keywords. 1019 | ansiNonReserved 1020 | //--ANSI-NON-RESERVED-START 1021 | : ADD 1022 | | AFTER 1023 | | ALTER 1024 | | ANALYZE 1025 | | ANTI 1026 | | ARCHIVE 1027 | | ARRAY 1028 | | ASC 1029 | | AT 1030 | | BETWEEN 1031 | | BUCKET 1032 | | BUCKETS 1033 | | BY 1034 | | CACHE 1035 | | CASCADE 1036 | | CHANGE 1037 | | CLEAR 1038 | | CLUSTER 1039 | | CLUSTERED 1040 | | CODEGEN 1041 | | COLLECTION 1042 | | COLUMNS 1043 | | COMMENT 1044 | | COMMIT 1045 | | COMPACT 1046 | | COMPACTIONS 1047 | | COMPUTE 1048 | | CONCATENATE 1049 | | COST 1050 | | CUBE 1051 | | CURRENT 1052 | | DATA 1053 | | DATABASE 1054 | | DATABASES 1055 | | DBPROPERTIES 1056 | | DEFINED 1057 | | DELETE 1058 | | DELIMITED 1059 | | DESC 1060 | | DESCRIBE 1061 | | DFS 1062 | | DIRECTORIES 1063 | | DIRECTORY 1064 | | DISTRIBUTE 1065 | | DIV 1066 | | DROP 1067 | | ESCAPED 1068 | | EXCHANGE 1069 | | EXISTS 1070 | | EXPLAIN 1071 | | EXPORT 1072 | | EXTENDED 1073 | | EXTERNAL 1074 | | EXTRACT 1075 | | FIELDS 1076 | | FILEFORMAT 1077 | | FIRST 1078 | | FOLLOWING 1079 | | FORMAT 1080 | | FORMATTED 1081 | | FUNCTION 1082 | | FUNCTIONS 1083 | | GLOBAL 1084 | | GROUPING 1085 | | IF 1086 | | IGNORE 1087 | | IMPORT 1088 | | INDEX 1089 | | INDEXES 1090 | | INPATH 1091 | | INPUTFORMAT 1092 | | INSERT 1093 | | INTERVAL 1094 | | ITEMS 1095 | | KEYS 1096 | | LAST 1097 | | LATERAL 1098 | | LAZY 1099 | | LIKE 1100 | | LIMIT 1101 | | LINES 1102 | | LIST 1103 | | LOAD 1104 | | LOCAL 1105 | | LOCATION 1106 | | LOCK 1107 | | LOCKS 1108 | | LOGICAL 1109 | | MACRO 1110 | | MAP 1111 | | MATCHED 1112 | | MERGE 1113 | | MSCK 1114 | | NAMESPACE 1115 | | NAMESPACES 1116 | | NO 1117 | | NULLS 1118 | | OF 1119 | | OPTION 1120 | | OPTIONS 1121 | | OUT 1122 | | OUTPUTFORMAT 1123 | | OVER 1124 | | OVERLAY 1125 | | OVERWRITE 1126 | | PARTITION 1127 | | PARTITIONED 1128 | | PARTITIONS 1129 | | PERCENTLIT 1130 | | PIVOT 1131 | | PLACING 1132 | | POSITION 1133 | | PRECEDING 1134 | | PRINCIPALS 1135 | | PROPERTIES 1136 | | PURGE 1137 | | QUERY 1138 | | RANGE 1139 | | RECORDREADER 1140 | | RECORDWRITER 1141 | | RECOVER 1142 | | REDUCE 1143 | | REFRESH 1144 | | RENAME 1145 | | REPAIR 1146 | | REPLACE 1147 | | RESET 1148 | | RESPECT 1149 | | RESTRICT 1150 | | REVOKE 1151 | | RLIKE 1152 | | ROLE 1153 | | ROLES 1154 | | ROLLBACK 1155 | | ROLLUP 1156 | | ROW 1157 | | ROWS 1158 | | SCHEMA 1159 | | SEMI 1160 | | SEPARATED 1161 | | SERDE 1162 | | SERDEPROPERTIES 1163 | | SET 1164 | | SETMINUS 1165 | | SETS 1166 | | SHOW 1167 | | SKEWED 1168 | | SORT 1169 | | SORTED 1170 | | START 1171 | | STATISTICS 1172 | | STORED 1173 | | STRATIFY 1174 | | STRUCT 1175 | | SUBSTR 1176 | | SUBSTRING 1177 | | SYNC 1178 | | TABLES 1179 | | TABLESAMPLE 1180 | | TBLPROPERTIES 1181 | | TEMPORARY 1182 | | TERMINATED 1183 | | TOUCH 1184 | | TRANSACTION 1185 | | TRANSACTIONS 1186 | | TRANSFORM 1187 | | TRIM 1188 | | TRUE 1189 | | TRUNCATE 1190 | | TYPE 1191 | | UNARCHIVE 1192 | | UNBOUNDED 1193 | | UNCACHE 1194 | | UNLOCK 1195 | | UNSET 1196 | | UPDATE 1197 | | USE 1198 | | VALUES 1199 | | VIEW 1200 | | VIEWS 1201 | | WINDOW 1202 | | ZONE 1203 | //--ANSI-NON-RESERVED-END 1204 | ; 1205 | 1206 | // When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL. 1207 | // - Non-reserved keywords: 1208 | // Same definition as the one when `SQL_standard_keyword_behavior=true`. 1209 | // - Strict-non-reserved keywords: 1210 | // A strict version of non-reserved keywords, which can not be used as table alias. 1211 | // You can find the full keywords list by searching "Start of the keywords list" in this file. 1212 | // The strict-non-reserved keywords are listed in `strictNonReserved`. 1213 | // The non-reserved keywords are listed in `nonReserved`. 1214 | // These 2 together contain all the keywords. 1215 | strictNonReserved 1216 | : ANTI 1217 | | CROSS 1218 | | EXCEPT 1219 | | FULL 1220 | | INNER 1221 | | INTERSECT 1222 | | JOIN 1223 | | LEFT 1224 | | NATURAL 1225 | | ON 1226 | | RIGHT 1227 | | SEMI 1228 | | SETMINUS 1229 | | UNION 1230 | | USING 1231 | ; 1232 | 1233 | nonReserved 1234 | //--DEFAULT-NON-RESERVED-START 1235 | : ADD 1236 | | AFTER 1237 | | ALL 1238 | | ALTER 1239 | | ANALYZE 1240 | | AND 1241 | | ANY 1242 | | ARCHIVE 1243 | | ARRAY 1244 | | AS 1245 | | ASC 1246 | | AT 1247 | | AUTHORIZATION 1248 | | BETWEEN 1249 | | BOTH 1250 | | BUCKET 1251 | | BUCKETS 1252 | | BY 1253 | | CACHE 1254 | | CASCADE 1255 | | CASE 1256 | | CAST 1257 | | CHANGE 1258 | | CHECK 1259 | | CLEAR 1260 | | CLUSTER 1261 | | CLUSTERED 1262 | | CODEGEN 1263 | | COLLATE 1264 | | COLLECTION 1265 | | COLUMN 1266 | | COLUMNS 1267 | | COMMENT 1268 | | COMMIT 1269 | | COMPACT 1270 | | COMPACTIONS 1271 | | COMPUTE 1272 | | CONCATENATE 1273 | | CONSTRAINT 1274 | | COST 1275 | | CREATE 1276 | | CUBE 1277 | | CURRENT 1278 | | CURRENT_DATE 1279 | | CURRENT_TIME 1280 | | CURRENT_TIMESTAMP 1281 | | CURRENT_USER 1282 | | DATA 1283 | | DATABASE 1284 | | DATABASES 1285 | | DBPROPERTIES 1286 | | DEFINED 1287 | | DELETE 1288 | | DELIMITED 1289 | | DESC 1290 | | DESCRIBE 1291 | | DFS 1292 | | DIRECTORIES 1293 | | DIRECTORY 1294 | | DISTINCT 1295 | | DISTRIBUTE 1296 | | DIV 1297 | | DROP 1298 | | ELSE 1299 | | END 1300 | | ESCAPE 1301 | | ESCAPED 1302 | | EXCHANGE 1303 | | EXISTS 1304 | | EXPLAIN 1305 | | EXPORT 1306 | | EXTENDED 1307 | | EXTERNAL 1308 | | EXTRACT 1309 | | FALSE 1310 | | FETCH 1311 | | FILTER 1312 | | FIELDS 1313 | | FILEFORMAT 1314 | | FIRST 1315 | | FOLLOWING 1316 | | FOR 1317 | | FOREIGN 1318 | | FORMAT 1319 | | FORMATTED 1320 | | FROM 1321 | | FUNCTION 1322 | | FUNCTIONS 1323 | | GLOBAL 1324 | | GRANT 1325 | | GROUP 1326 | | GROUPING 1327 | | HAVING 1328 | | IF 1329 | | IGNORE 1330 | | IMPORT 1331 | | IN 1332 | | INDEX 1333 | | INDEXES 1334 | | INPATH 1335 | | INPUTFORMAT 1336 | | INSERT 1337 | | INTERVAL 1338 | | INTO 1339 | | IS 1340 | | ITEMS 1341 | | KEYS 1342 | | LAST 1343 | | LATERAL 1344 | | LAZY 1345 | | LEADING 1346 | | LIKE 1347 | | LIMIT 1348 | | LINES 1349 | | LIST 1350 | | LOAD 1351 | | LOCAL 1352 | | LOCATION 1353 | | LOCK 1354 | | LOCKS 1355 | | LOGICAL 1356 | | MACRO 1357 | | MAP 1358 | | MATCHED 1359 | | MERGE 1360 | | MSCK 1361 | | NAMESPACE 1362 | | NAMESPACES 1363 | | NO 1364 | | NOT 1365 | | NULL 1366 | | NULLS 1367 | | OF 1368 | | ONLY 1369 | | OPTION 1370 | | OPTIONS 1371 | | OR 1372 | | ORDER 1373 | | OUT 1374 | | OUTER 1375 | | OUTPUTFORMAT 1376 | | OVER 1377 | | OVERLAPS 1378 | | OVERLAY 1379 | | OVERWRITE 1380 | | PARTITION 1381 | | PARTITIONED 1382 | | PARTITIONS 1383 | | PERCENTLIT 1384 | | PIVOT 1385 | | PLACING 1386 | | POSITION 1387 | | PRECEDING 1388 | | PRIMARY 1389 | | PRINCIPALS 1390 | | PROPERTIES 1391 | | PURGE 1392 | | QUERY 1393 | | RANGE 1394 | | RECORDREADER 1395 | | RECORDWRITER 1396 | | RECOVER 1397 | | REDUCE 1398 | | REFERENCES 1399 | | REFRESH 1400 | | RENAME 1401 | | REPAIR 1402 | | REPLACE 1403 | | RESET 1404 | | RESPECT 1405 | | RESTRICT 1406 | | REVOKE 1407 | | RLIKE 1408 | | ROLE 1409 | | ROLES 1410 | | ROLLBACK 1411 | | ROLLUP 1412 | | ROW 1413 | | ROWS 1414 | | SCHEMA 1415 | | SELECT 1416 | | SEPARATED 1417 | | SERDE 1418 | | SERDEPROPERTIES 1419 | | SESSION_USER 1420 | | SET 1421 | | SETS 1422 | | SHOW 1423 | | SKEWED 1424 | | SOME 1425 | | SORT 1426 | | SORTED 1427 | | START 1428 | | STATISTICS 1429 | | STORED 1430 | | STRATIFY 1431 | | STRUCT 1432 | | SUBSTR 1433 | | SUBSTRING 1434 | | SYNC 1435 | | TABLE 1436 | | TABLES 1437 | | TABLESAMPLE 1438 | | TBLPROPERTIES 1439 | | TEMPORARY 1440 | | TERMINATED 1441 | | THEN 1442 | | TIME 1443 | | TO 1444 | | TOUCH 1445 | | TRAILING 1446 | | TRANSACTION 1447 | | TRANSACTIONS 1448 | | TRANSFORM 1449 | | TRIM 1450 | | TRUE 1451 | | TRUNCATE 1452 | | TYPE 1453 | | UNARCHIVE 1454 | | UNBOUNDED 1455 | | UNCACHE 1456 | | UNIQUE 1457 | | UNKNOWN 1458 | | UNLOCK 1459 | | UNSET 1460 | | UPDATE 1461 | | USE 1462 | | USER 1463 | | VALUES 1464 | | VIEW 1465 | | VIEWS 1466 | | WHEN 1467 | | WHERE 1468 | | WINDOW 1469 | | WITH 1470 | | ZONE 1471 | //--DEFAULT-NON-RESERVED-END 1472 | ; 1473 | 1474 | // NOTE: If you add a new token in the list below, you should update the list of keywords 1475 | // and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`. 1476 | 1477 | //============================ 1478 | // Start of the keywords list 1479 | //============================ 1480 | //--SPARK-KEYWORD-LIST-START 1481 | ADD: 'ADD'; 1482 | AFTER: 'AFTER'; 1483 | ALL: 'ALL'; 1484 | ALTER: 'ALTER'; 1485 | ANALYZE: 'ANALYZE'; 1486 | AND: 'AND'; 1487 | ANTI: 'ANTI'; 1488 | ANY: 'ANY'; 1489 | ARCHIVE: 'ARCHIVE'; 1490 | ARRAY: 'ARRAY'; 1491 | AS: 'AS'; 1492 | ASC: 'ASC'; 1493 | AT: 'AT'; 1494 | AUTHORIZATION: 'AUTHORIZATION'; 1495 | BETWEEN: 'BETWEEN'; 1496 | BOTH: 'BOTH'; 1497 | BUCKET: 'BUCKET'; 1498 | BUCKETS: 'BUCKETS'; 1499 | BY: 'BY'; 1500 | CACHE: 'CACHE'; 1501 | CASCADE: 'CASCADE'; 1502 | CASE: 'CASE'; 1503 | CAST: 'CAST'; 1504 | CHANGE: 'CHANGE'; 1505 | CHECK: 'CHECK'; 1506 | CLEAR: 'CLEAR'; 1507 | CLUSTER: 'CLUSTER'; 1508 | CLUSTERED: 'CLUSTERED'; 1509 | CODEGEN: 'CODEGEN'; 1510 | COLLATE: 'COLLATE'; 1511 | COLLECTION: 'COLLECTION'; 1512 | COLUMN: 'COLUMN'; 1513 | COLUMNS: 'COLUMNS'; 1514 | COMMENT: 'COMMENT'; 1515 | COMMIT: 'COMMIT'; 1516 | COMPACT: 'COMPACT'; 1517 | COMPACTIONS: 'COMPACTIONS'; 1518 | COMPUTE: 'COMPUTE'; 1519 | CONCATENATE: 'CONCATENATE'; 1520 | CONSTRAINT: 'CONSTRAINT'; 1521 | COST: 'COST'; 1522 | CREATE: 'CREATE'; 1523 | CROSS: 'CROSS'; 1524 | CUBE: 'CUBE'; 1525 | CURRENT: 'CURRENT'; 1526 | CURRENT_DATE: 'CURRENT_DATE'; 1527 | CURRENT_TIME: 'CURRENT_TIME'; 1528 | CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; 1529 | CURRENT_USER: 'CURRENT_USER'; 1530 | DATA: 'DATA'; 1531 | DATABASE: 'DATABASE'; 1532 | DATABASES: 'DATABASES' | 'SCHEMAS'; 1533 | DBPROPERTIES: 'DBPROPERTIES'; 1534 | DEFINED: 'DEFINED'; 1535 | DELETE: 'DELETE'; 1536 | DELIMITED: 'DELIMITED'; 1537 | DESC: 'DESC'; 1538 | DESCRIBE: 'DESCRIBE'; 1539 | DFS: 'DFS'; 1540 | DIRECTORIES: 'DIRECTORIES'; 1541 | DIRECTORY: 'DIRECTORY'; 1542 | DISTINCT: 'DISTINCT'; 1543 | DISTRIBUTE: 'DISTRIBUTE'; 1544 | DIV: 'DIV'; 1545 | DROP: 'DROP'; 1546 | ELSE: 'ELSE'; 1547 | END: 'END'; 1548 | ESCAPE: 'ESCAPE'; 1549 | ESCAPED: 'ESCAPED'; 1550 | EXCEPT: 'EXCEPT'; 1551 | EXCHANGE: 'EXCHANGE'; 1552 | EXISTS: 'EXISTS'; 1553 | EXPLAIN: 'EXPLAIN'; 1554 | EXPORT: 'EXPORT'; 1555 | EXTENDED: 'EXTENDED'; 1556 | EXTERNAL: 'EXTERNAL'; 1557 | EXTRACT: 'EXTRACT'; 1558 | FALSE: 'FALSE'; 1559 | FETCH: 'FETCH'; 1560 | FIELDS: 'FIELDS'; 1561 | FILTER: 'FILTER'; 1562 | FILEFORMAT: 'FILEFORMAT'; 1563 | FIRST: 'FIRST'; 1564 | FOLLOWING: 'FOLLOWING'; 1565 | FOR: 'FOR'; 1566 | FOREIGN: 'FOREIGN'; 1567 | FORMAT: 'FORMAT'; 1568 | FORMATTED: 'FORMATTED'; 1569 | FROM: 'FROM'; 1570 | FULL: 'FULL'; 1571 | FUNCTION: 'FUNCTION'; 1572 | FUNCTIONS: 'FUNCTIONS'; 1573 | GLOBAL: 'GLOBAL'; 1574 | GRANT: 'GRANT'; 1575 | GROUP: 'GROUP'; 1576 | GROUPING: 'GROUPING'; 1577 | HAVING: 'HAVING'; 1578 | IF: 'IF'; 1579 | IGNORE: 'IGNORE'; 1580 | IMPORT: 'IMPORT'; 1581 | IN: 'IN'; 1582 | INDEX: 'INDEX'; 1583 | INDEXES: 'INDEXES'; 1584 | INNER: 'INNER'; 1585 | INPATH: 'INPATH'; 1586 | INPUTFORMAT: 'INPUTFORMAT'; 1587 | INSERT: 'INSERT'; 1588 | INTERSECT: 'INTERSECT'; 1589 | INTERVAL: 'INTERVAL'; 1590 | INTO: 'INTO'; 1591 | IS: 'IS'; 1592 | ITEMS: 'ITEMS'; 1593 | JOIN: 'JOIN'; 1594 | KEYS: 'KEYS'; 1595 | LAST: 'LAST'; 1596 | LATERAL: 'LATERAL'; 1597 | LAZY: 'LAZY'; 1598 | LEADING: 'LEADING'; 1599 | LEFT: 'LEFT'; 1600 | LIKE: 'LIKE'; 1601 | LIMIT: 'LIMIT'; 1602 | LINES: 'LINES'; 1603 | LIST: 'LIST'; 1604 | LOAD: 'LOAD'; 1605 | LOCAL: 'LOCAL'; 1606 | LOCATION: 'LOCATION'; 1607 | LOCK: 'LOCK'; 1608 | LOCKS: 'LOCKS'; 1609 | LOGICAL: 'LOGICAL'; 1610 | MACRO: 'MACRO'; 1611 | MAP: 'MAP'; 1612 | MATCHED: 'MATCHED'; 1613 | MERGE: 'MERGE'; 1614 | MSCK: 'MSCK'; 1615 | NAMESPACE: 'NAMESPACE'; 1616 | NAMESPACES: 'NAMESPACES'; 1617 | NATURAL: 'NATURAL'; 1618 | NO: 'NO'; 1619 | NOT: 'NOT' | '!'; 1620 | NULL: 'NULL'; 1621 | NULLS: 'NULLS'; 1622 | OF: 'OF'; 1623 | ON: 'ON'; 1624 | ONLY: 'ONLY'; 1625 | OPTION: 'OPTION'; 1626 | OPTIONS: 'OPTIONS'; 1627 | OR: 'OR'; 1628 | ORDER: 'ORDER'; 1629 | OUT: 'OUT'; 1630 | OUTER: 'OUTER'; 1631 | OUTPUTFORMAT: 'OUTPUTFORMAT'; 1632 | OVER: 'OVER'; 1633 | OVERLAPS: 'OVERLAPS'; 1634 | OVERLAY: 'OVERLAY'; 1635 | OVERWRITE: 'OVERWRITE'; 1636 | PARTITION: 'PARTITION'; 1637 | PARTITIONED: 'PARTITIONED'; 1638 | PARTITIONS: 'PARTITIONS'; 1639 | PERCENTLIT: 'PERCENT'; 1640 | PIVOT: 'PIVOT'; 1641 | PLACING: 'PLACING'; 1642 | POSITION: 'POSITION'; 1643 | PRECEDING: 'PRECEDING'; 1644 | PRIMARY: 'PRIMARY'; 1645 | PRINCIPALS: 'PRINCIPALS'; 1646 | PROPERTIES: 'PROPERTIES'; 1647 | PURGE: 'PURGE'; 1648 | QUERY: 'QUERY'; 1649 | RANGE: 'RANGE'; 1650 | RECORDREADER: 'RECORDREADER'; 1651 | RECORDWRITER: 'RECORDWRITER'; 1652 | RECOVER: 'RECOVER'; 1653 | REDUCE: 'REDUCE'; 1654 | REFERENCES: 'REFERENCES'; 1655 | REFRESH: 'REFRESH'; 1656 | RENAME: 'RENAME'; 1657 | REPAIR: 'REPAIR'; 1658 | REPLACE: 'REPLACE'; 1659 | RESET: 'RESET'; 1660 | RESPECT: 'RESPECT'; 1661 | RESTRICT: 'RESTRICT'; 1662 | REVOKE: 'REVOKE'; 1663 | RIGHT: 'RIGHT'; 1664 | RLIKE: 'RLIKE' | 'REGEXP'; 1665 | ROLE: 'ROLE'; 1666 | ROLES: 'ROLES'; 1667 | ROLLBACK: 'ROLLBACK'; 1668 | ROLLUP: 'ROLLUP'; 1669 | ROW: 'ROW'; 1670 | ROWS: 'ROWS'; 1671 | SCHEMA: 'SCHEMA'; 1672 | SELECT: 'SELECT'; 1673 | SEMI: 'SEMI'; 1674 | SEPARATED: 'SEPARATED'; 1675 | SERDE: 'SERDE'; 1676 | SERDEPROPERTIES: 'SERDEPROPERTIES'; 1677 | SESSION_USER: 'SESSION_USER'; 1678 | SET: 'SET'; 1679 | SETMINUS: 'MINUS'; 1680 | SETS: 'SETS'; 1681 | SHOW: 'SHOW'; 1682 | SKEWED: 'SKEWED'; 1683 | SOME: 'SOME'; 1684 | SORT: 'SORT'; 1685 | SORTED: 'SORTED'; 1686 | START: 'START'; 1687 | STATISTICS: 'STATISTICS'; 1688 | STORED: 'STORED'; 1689 | STRATIFY: 'STRATIFY'; 1690 | STRUCT: 'STRUCT'; 1691 | SUBSTR: 'SUBSTR'; 1692 | SUBSTRING: 'SUBSTRING'; 1693 | SYNC: 'SYNC'; 1694 | TABLE: 'TABLE'; 1695 | TABLES: 'TABLES'; 1696 | TABLESAMPLE: 'TABLESAMPLE'; 1697 | TBLPROPERTIES: 'TBLPROPERTIES'; 1698 | TEMPORARY: 'TEMPORARY' | 'TEMP'; 1699 | TERMINATED: 'TERMINATED'; 1700 | THEN: 'THEN'; 1701 | TIME: 'TIME'; 1702 | TO: 'TO'; 1703 | TOUCH: 'TOUCH'; 1704 | TRAILING: 'TRAILING'; 1705 | TRANSACTION: 'TRANSACTION'; 1706 | TRANSACTIONS: 'TRANSACTIONS'; 1707 | TRANSFORM: 'TRANSFORM'; 1708 | TRIM: 'TRIM'; 1709 | TRUE: 'TRUE'; 1710 | TRUNCATE: 'TRUNCATE'; 1711 | TYPE: 'TYPE'; 1712 | UNARCHIVE: 'UNARCHIVE'; 1713 | UNBOUNDED: 'UNBOUNDED'; 1714 | UNCACHE: 'UNCACHE'; 1715 | UNION: 'UNION'; 1716 | UNIQUE: 'UNIQUE'; 1717 | UNKNOWN: 'UNKNOWN'; 1718 | UNLOCK: 'UNLOCK'; 1719 | UNSET: 'UNSET'; 1720 | UPDATE: 'UPDATE'; 1721 | USE: 'USE'; 1722 | USER: 'USER'; 1723 | USING: 'USING'; 1724 | VALUES: 'VALUES'; 1725 | VIEW: 'VIEW'; 1726 | VIEWS: 'VIEWS'; 1727 | WHEN: 'WHEN'; 1728 | WHERE: 'WHERE'; 1729 | WINDOW: 'WINDOW'; 1730 | WITH: 'WITH'; 1731 | ZONE: 'ZONE'; 1732 | //--SPARK-KEYWORD-LIST-END 1733 | //============================ 1734 | // End of the keywords list 1735 | //============================ 1736 | 1737 | EQ : '=' | '=='; 1738 | NSEQ: '<=>'; 1739 | NEQ : '<>'; 1740 | NEQJ: '!='; 1741 | LT : '<'; 1742 | LTE : '<=' | '!>'; 1743 | GT : '>'; 1744 | GTE : '>=' | '!<'; 1745 | 1746 | PLUS: '+'; 1747 | MINUS: '-'; 1748 | ASTERISK: '*'; 1749 | SLASH: '/'; 1750 | PERCENT: '%'; 1751 | TILDE: '~'; 1752 | AMPERSAND: '&'; 1753 | PIPE: '|'; 1754 | CONCAT_PIPE: '||'; 1755 | HAT: '^'; 1756 | 1757 | STRING 1758 | : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' 1759 | | '"' ( ~('"'|'\\') | ('\\' .) )* '"' 1760 | ; 1761 | 1762 | BIGINT_LITERAL 1763 | : DIGIT+ 'L' 1764 | ; 1765 | 1766 | SMALLINT_LITERAL 1767 | : DIGIT+ 'S' 1768 | ; 1769 | 1770 | TINYINT_LITERAL 1771 | : DIGIT+ 'Y' 1772 | ; 1773 | 1774 | INTEGER_VALUE 1775 | : DIGIT+ 1776 | ; 1777 | 1778 | EXPONENT_VALUE 1779 | : DIGIT+ EXPONENT 1780 | | DECIMAL_DIGITS EXPONENT {isValidDecimal()}? 1781 | ; 1782 | 1783 | DECIMAL_VALUE 1784 | : DECIMAL_DIGITS {isValidDecimal()}? 1785 | ; 1786 | 1787 | FLOAT_LITERAL 1788 | : DIGIT+ EXPONENT? 'F' 1789 | | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}? 1790 | ; 1791 | 1792 | DOUBLE_LITERAL 1793 | : DIGIT+ EXPONENT? 'D' 1794 | | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}? 1795 | ; 1796 | 1797 | BIGDECIMAL_LITERAL 1798 | : DIGIT+ EXPONENT? 'BD' 1799 | | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? 1800 | ; 1801 | 1802 | IDENTIFIER 1803 | : (LETTER | DIGIT | '_')+ 1804 | ; 1805 | 1806 | BACKQUOTED_IDENTIFIER 1807 | : '`' ( ~'`' | '``' )* '`' 1808 | ; 1809 | 1810 | fragment DECIMAL_DIGITS 1811 | : DIGIT+ '.' DIGIT* 1812 | | '.' DIGIT+ 1813 | ; 1814 | 1815 | fragment EXPONENT 1816 | : 'E' [+-]? DIGIT+ 1817 | ; 1818 | 1819 | fragment DIGIT 1820 | : [0-9] 1821 | ; 1822 | 1823 | fragment LETTER 1824 | : [A-Z] 1825 | ; 1826 | 1827 | SIMPLE_COMMENT 1828 | : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) 1829 | ; 1830 | 1831 | BRACKETED_COMMENT 1832 | : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) 1833 | ; 1834 | 1835 | WS 1836 | : [ \r\n\t]+ -> channel(HIDDEN) 1837 | ; 1838 | 1839 | // Catch-all for anything we can't recognize. 1840 | // We use this to be able to ignore and recover all the text 1841 | // when splitting statements with DelimiterLexer 1842 | UNRECOGNIZED 1843 | : . 1844 | ; 1845 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/enums/SqlEngineEnum.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.enums; 2 | 3 | /** 4 | * @ClassName SqlTypeEnum 5 | * @description: 数据库类型 6 | * @author: again 7 | * @Date: 2021/3/10 4:08 下午 8 | */ 9 | public enum SqlEngineEnum { 10 | MYSQL, HIVE, SPARK, PRESTO; 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/enums/SqlTypeEnum.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.enums; 2 | 3 | /** 4 | * @ClassName SqlTypeEnum 5 | * @description: sql类型枚举 6 | * @author: again 7 | * @Date: 2021/3/10 4:08 下午 8 | */ 9 | public enum SqlTypeEnum { 10 | SELECT, CREATE, INSERT, UPDATE, DELETE; 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/hive/HiveSqlFieldLineageParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.hive; 2 | 3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor; 4 | import cn.ganjiacheng.antlr.HiveSqlParser; 5 | import cn.ganjiacheng.model.lineage.*; 6 | import org.antlr.v4.runtime.ParserRuleContext; 7 | import org.antlr.v4.runtime.RuleContext; 8 | import org.antlr.v4.runtime.tree.ParseTree; 9 | import org.codehaus.plexus.util.StringUtils; 10 | 11 | import java.util.*; 12 | import java.util.stream.Collectors; 13 | 14 | /** 15 | * @ClassName HiveSqlFieldLineage 16 | * @description: 17 | * @author: again 18 | * @Date: 2021/3/10 8:54 下午 19 | */ 20 | public class HiveSqlFieldLineageParser extends HiveSqlBaseVisitor { 21 | 22 | private TableNameModel outputTable; 23 | 24 | private final HashMap hiveFieldSelects = new LinkedHashMap<>(); 25 | 26 | private final Map selectParentKeyMap = new HashMap<>(); 27 | 28 | private String thisSelectId; 29 | 30 | private final String sourceSQL; 31 | 32 | /** 33 | * for select Item 34 | */ 35 | private FieldLineageSelectItemModel selectItemModel; 36 | private List selectFields = new ArrayList<>(); 37 | private Boolean startSelectItem = false; 38 | 39 | public HiveSqlFieldLineageParser(String sql) { 40 | this.sourceSQL = sql; 41 | } 42 | 43 | private String subSourceSql(ParserRuleContext parserRuleContext) { 44 | return sourceSQL.substring( 45 | parserRuleContext.getStart().getStartIndex(), 46 | parserRuleContext.getStop().getStopIndex() + 1); 47 | } 48 | 49 | /** 50 | * insert解析结果表 51 | */ 52 | @Override 53 | public Object visitInsert_stmt(HiveSqlParser.Insert_stmtContext ctx) { 54 | outputTable = Optional.ofNullable(ctx) 55 | .map(HiveSqlParser.Insert_stmtContext::table_name) 56 | .map(RuleContext::getText) 57 | .map(TableNameModel::parseTableName) 58 | .orElse(null); 59 | return super.visitInsert_stmt(ctx); 60 | } 61 | 62 | /** 63 | * 解析select每个selectItem里用到字段 64 | */ 65 | @Override 66 | public Object visitExpr(HiveSqlParser.ExprContext ctx) { 67 | if (startSelectItem) { 68 | Optional.ofNullable(ctx) 69 | .map(HiveSqlParser.ExprContext::expr_atom) 70 | .map(HiveSqlParser.Expr_atomContext::ident) 71 | .map(ParseTree::getText) 72 | .ifPresent(s -> { 73 | if (!StringUtils.isNumeric(s)) { 74 | selectItemModel.getFieldNames().add(TableNameModel.dealNameMark(s)); 75 | } 76 | }); 77 | } 78 | return super.visitExpr(ctx); 79 | } 80 | 81 | /** 82 | * selectItem 获取别名名,初始化selectItem存相关字段的fieldNames 83 | */ 84 | @Override 85 | public Object visitSelect_list_item(HiveSqlParser.Select_list_itemContext ctx) { 86 | startSelectItem = true; 87 | selectItemModel = new FieldLineageSelectItemModel(); 88 | selectItemModel.setFieldNames(new HashSet<>()); 89 | Optional.ofNullable(ctx) 90 | .map(HiveSqlParser.Select_list_itemContext::expr) 91 | .map(this::subSourceSql) 92 | .ifPresent(selectItemModel::setProcess); 93 | Optional.ofNullable(ctx) 94 | .map(HiveSqlParser.Select_list_itemContext::select_list_alias) 95 | .map(HiveSqlParser.Select_list_aliasContext::ident) 96 | .map(RuleContext::getText) 97 | .ifPresent(selectItemModel::setAlias); 98 | Object visit = super.visitSelect_list_item(ctx); 99 | selectFields.add(selectItemModel); 100 | return visit; 101 | } 102 | 103 | /** 104 | * from语句,处理于所有selectItem结束 105 | * 对上面解析出的字段名中的表别名进行处理 如t0.field 106 | */ 107 | @Override 108 | public Object visitFrom_clause(HiveSqlParser.From_clauseContext ctx) { 109 | startSelectItem = false; 110 | HashMap> fieldItems = new HashMap<>(); 111 | for (FieldLineageSelectItemModel item : selectFields) { 112 | HashMap> aliasSet = new HashMap<>(); 113 | for (String field : item.getFieldNames()) { 114 | String[] sp = field.split("\\."); 115 | if (sp.length == 2) { 116 | String key = thisSelectId + "_" + sp[0]; 117 | aliasSet.computeIfAbsent(key, t -> new HashSet<>()); 118 | aliasSet.get(key).add(sp[1]); 119 | } else if (sp.length == 1) { 120 | boolean flat = true; 121 | for (String k : selectParentKeyMap.values()) { 122 | if (k.startsWith(thisSelectId + "_")) { 123 | aliasSet.computeIfAbsent(k, t -> new HashSet<>()); 124 | aliasSet.get(k).add(sp[0]); 125 | flat = false; 126 | } 127 | } 128 | if (flat) { 129 | String key = thisSelectId + "_"; 130 | aliasSet.computeIfAbsent(key, t -> new HashSet<>()); 131 | aliasSet.get(key).add(sp[0]); 132 | } 133 | } 134 | } 135 | for (String key : aliasSet.keySet()) { 136 | fieldItems.computeIfAbsent(key, k -> new ArrayList<>()); 137 | FieldLineageSelectItemModel selectItemModel = new FieldLineageSelectItemModel(); 138 | selectItemModel.setFieldNames(aliasSet.get(key)); 139 | selectItemModel.setAlias(item.getAlias()); 140 | selectItemModel.setProcess(item.getProcess()); 141 | if (selectItemModel.getFieldNames().size() == 1 && selectItemModel.getAlias() == null) { 142 | selectItemModel.setAlias(selectItemModel.getFieldNames().iterator().next()); 143 | } 144 | fieldItems.get(key).add(selectItemModel); 145 | } 146 | } 147 | for (String key : fieldItems.keySet()) { 148 | if (hiveFieldSelects.get(key) != null) { 149 | hiveFieldSelects.get(key).setSelectItems(fieldItems.get(key)); 150 | } 151 | } 152 | return super.visitFrom_clause(ctx); 153 | } 154 | 155 | /** 156 | * 进入select前 157 | * 解析每个select存信息并另存父子关系 158 | * 父子来源于from subSelect, join subSelect 159 | */ 160 | @Override 161 | public Object visitSelect_stmt(HiveSqlParser.Select_stmtContext ctx) { 162 | List selectItems = ctx.fullselect_stmt().fullselect_stmt_item(); 163 | for (HiveSqlParser.Fullselect_stmt_itemContext selectItem : selectItems) { 164 | FieldLineageSelectModel fieldLineageSelectModel = new FieldLineageSelectModel(); 165 | Integer thisId = selectItem.getStart().getStartIndex(); 166 | HiveSqlParser.Subselect_stmtContext subSelect = selectItem.subselect_stmt(); 167 | HiveSqlParser.From_table_name_clauseContext fromTableNameClause = Optional.ofNullable(subSelect) 168 | .map(HiveSqlParser.Subselect_stmtContext::from_clause) 169 | .map(HiveSqlParser.From_clauseContext::from_table_clause) 170 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause) 171 | .orElse(null); 172 | Optional.ofNullable(fromTableNameClause) 173 | .map(HiveSqlParser.From_table_name_clauseContext::table_name) 174 | .map(RuleContext::getText) 175 | .map(TableNameModel::parseTableName) 176 | .ifPresent(fieldLineageSelectModel::setFromTable); 177 | Optional.ofNullable(fromTableNameClause) 178 | .map(HiveSqlParser.From_table_name_clauseContext::from_alias_clause) 179 | .map(HiveSqlParser.From_alias_clauseContext::ident) 180 | .map(RuleContext::getText) 181 | .ifPresent(fieldLineageSelectModel::setTableAlias); 182 | 183 | Optional.ofNullable(subSelect) 184 | .map(HiveSqlParser.Subselect_stmtContext::from_clause) 185 | .map(HiveSqlParser.From_clauseContext::from_table_clause) 186 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause) 187 | .map(HiveSqlParser.From_subselect_clauseContext::from_alias_clause) 188 | .map(RuleContext::getText) 189 | .ifPresent(fieldLineageSelectModel::setTableAlias); 190 | 191 | String alias = fieldLineageSelectModel.getTableAlias(); 192 | String thisKey = String.format("%s_%s", thisId, alias == null ? "" : alias); 193 | fieldLineageSelectModel.setId(thisKey + ""); 194 | fieldLineageSelectModel.setParentId(selectParentKeyMap.get(thisId)); 195 | fieldLineageSelectModel.setSelectItems(new ArrayList<>()); 196 | hiveFieldSelects.put(thisKey, fieldLineageSelectModel); 197 | 198 | Optional.ofNullable(subSelect) 199 | .map(HiveSqlParser.Subselect_stmtContext::from_clause) 200 | .map(HiveSqlParser.From_clauseContext::from_table_clause) 201 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause) 202 | .map(HiveSqlParser.From_subselect_clauseContext::select_stmt) 203 | .map(HiveSqlParser.Select_stmtContext::fullselect_stmt) 204 | .map(HiveSqlParser.Fullselect_stmtContext::fullselect_stmt_item) 205 | .ifPresent(subSelects -> 206 | subSelects.forEach(item -> 207 | selectParentKeyMap.put(item.getStart().getStartIndex(), thisKey))); 208 | 209 | List fromJoinClauses = Optional.ofNullable(subSelect) 210 | .map(HiveSqlParser.Subselect_stmtContext::from_clause) 211 | .map(HiveSqlParser.From_clauseContext::from_join_clause) 212 | .orElse(new ArrayList<>()); 213 | for (HiveSqlParser.From_join_clauseContext fromJoinClauseContext : fromJoinClauses) { 214 | FieldLineageSelectModel joinSelect = new FieldLineageSelectModel(); 215 | Optional.ofNullable(fromJoinClauseContext) 216 | .map(HiveSqlParser.From_join_clauseContext::from_table_clause) 217 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause) 218 | .map(HiveSqlParser.From_table_name_clauseContext::table_name) 219 | .map(RuleContext::getText) 220 | .map(TableNameModel::parseTableName) 221 | .ifPresent(joinSelect::setFromTable); 222 | Optional.ofNullable(fromJoinClauseContext) 223 | .map(HiveSqlParser.From_join_clauseContext::from_table_clause) 224 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause) 225 | .map(HiveSqlParser.From_table_name_clauseContext::from_alias_clause) 226 | .map(HiveSqlParser.From_alias_clauseContext::ident) 227 | .map(RuleContext::getText) 228 | .ifPresent(joinSelect::setTableAlias); 229 | 230 | Optional.ofNullable(fromJoinClauseContext) 231 | .map(HiveSqlParser.From_join_clauseContext::from_table_clause) 232 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause) 233 | .map(HiveSqlParser.From_subselect_clauseContext::from_alias_clause) 234 | .map(RuleContext::getText) 235 | .ifPresent(joinSelect::setTableAlias); 236 | 237 | String jalias = joinSelect.getTableAlias(); 238 | String jkey = String.format("%s_%s", thisId, jalias == null ? "" : jalias); 239 | joinSelect.setId(jkey); 240 | joinSelect.setParentId(selectParentKeyMap.get(thisId)); 241 | joinSelect.setSelectItems(new ArrayList<>()); 242 | hiveFieldSelects.put(jkey, joinSelect); 243 | 244 | Optional.ofNullable(fromJoinClauseContext) 245 | .map(HiveSqlParser.From_join_clauseContext::from_table_clause) 246 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause) 247 | .map(HiveSqlParser.From_subselect_clauseContext::select_stmt) 248 | .map(HiveSqlParser.Select_stmtContext::fullselect_stmt) 249 | .map(HiveSqlParser.Fullselect_stmtContext::fullselect_stmt_item) 250 | .ifPresent(subSelects -> 251 | subSelects.forEach(item -> 252 | selectParentKeyMap.put(item.getStart().getStartIndex(), jkey))); 253 | } 254 | } 255 | return super.visitSelect_stmt(ctx); 256 | } 257 | 258 | /** 259 | * 处理每个子select进入前, 260 | * 初始化selectItem相关的变量 261 | */ 262 | @Override 263 | public Object visitSubselect_stmt(HiveSqlParser.Subselect_stmtContext ctx) { 264 | thisSelectId = ctx.getStart().getStartIndex() + ""; 265 | selectFields = new ArrayList<>(); 266 | return super.visitSubselect_stmt(ctx); 267 | } 268 | 269 | private final List hiveFieldSelectList = new ArrayList<>(); 270 | 271 | /** 272 | * 转换HashMap存储为List 273 | */ 274 | private void transSelectToList() { 275 | for (String key : hiveFieldSelects.keySet()) { 276 | hiveFieldSelectList.add(hiveFieldSelects.get(key)); 277 | } 278 | } 279 | 280 | /** 281 | * 获取目标字段 282 | * 也就是parentId为null的最外层select的字段别名 283 | */ 284 | private List getTargetFields() { 285 | List> items = hiveFieldSelectList.stream() 286 | .filter(item -> item.getParentId() == null) 287 | .map(FieldLineageSelectModel::getSelectItems) 288 | .map(fields -> fields.stream() 289 | .map(FieldLineageSelectItemModel::getAlias) 290 | .collect(Collectors.toList())) 291 | .collect(Collectors.toList()); 292 | List res = new ArrayList<>(); 293 | for (List item : items) { 294 | res.addAll(item); 295 | } 296 | res = res.stream().distinct().collect(Collectors.toList()); 297 | List fieldNameModels = new ArrayList<>(); 298 | for (String i : res) { 299 | FieldNameModel fieldNameModel = new FieldNameModel(); 300 | if (outputTable != null) { 301 | fieldNameModel.setDbName(outputTable.getDbName()); 302 | fieldNameModel.setTableName(outputTable.getTableName()); 303 | } 304 | fieldNameModel.setFieldName(i); 305 | fieldNameModels.add(fieldNameModel); 306 | } 307 | return fieldNameModels; 308 | } 309 | 310 | private HashSet sourceFields; 311 | private String fieldProcess = ""; 312 | 313 | /** 314 | * 递归按每个字段从外到内寻找每个字段的来源 315 | * 逻辑为最外的字段别名,父id -> 匹配子id别名 -> 316 | * -> 如果是来源是表,存储,如果来源是子select,继续递归 317 | */ 318 | private void findFieldSource(String targetField, String parentId) { 319 | hiveFieldSelectList.forEach(select -> { 320 | if ((parentId == null && select.getParentId() == null) || 321 | (select.getParentId() != null && select.getParentId().equals(parentId))) { 322 | if (select.getSelectItems() != null) { 323 | if (select.getFromTable() == null) { 324 | select.getSelectItems().forEach(selectItem -> { 325 | if (selectItem.getAlias().equals(targetField)) { 326 | if (selectItem.getProcess().length() > fieldProcess.length()) { 327 | fieldProcess = selectItem.getProcess(); 328 | } 329 | for (String field : selectItem.getFieldNames()) { 330 | findFieldSource(field, select.getId()); 331 | } 332 | } 333 | }); 334 | } else { 335 | select.getSelectItems().forEach(selectItem -> { 336 | if (selectItem.getAlias().equals(targetField)) { 337 | if (selectItem.getProcess().length() > fieldProcess.length()) { 338 | fieldProcess = selectItem.getProcess(); 339 | } 340 | for (String field : selectItem.getFieldNames()) { 341 | FieldNameWithProcessModel fieldNameWithProcessModel = new FieldNameWithProcessModel(); 342 | fieldNameWithProcessModel.setDbName(select.getFromTable().getDbName()); 343 | fieldNameWithProcessModel.setTableName(select.getFromTable().getTableName()); 344 | fieldNameWithProcessModel.setFieldName(field); 345 | fieldNameWithProcessModel.setProcess(fieldProcess); 346 | sourceFields.add(fieldNameWithProcessModel); 347 | } 348 | } 349 | }); 350 | } 351 | } 352 | } 353 | }); 354 | } 355 | 356 | /** 357 | * 获取字段血缘列表 358 | */ 359 | public List getHiveFieldLineage() { 360 | transSelectToList(); 361 | List targetFields = getTargetFields(); 362 | List fieldLineageModelList = new ArrayList<>(); 363 | for (FieldNameModel targetField : targetFields) { 364 | FieldLineageModel fieldLineageModel = new FieldLineageModel(); 365 | fieldLineageModel.setTargetField(targetField); 366 | sourceFields = new HashSet<>(); 367 | fieldProcess = ""; 368 | findFieldSource(targetField.getFieldName(), null); 369 | fieldLineageModel.setSourceFields(sourceFields); 370 | fieldLineageModelList.add(fieldLineageModel); 371 | } 372 | return fieldLineageModelList; 373 | } 374 | 375 | /** 376 | * 获取sql解析处理后的结果 377 | */ 378 | public HashMap getHiveFieldSelects() { 379 | return hiveFieldSelects; 380 | } 381 | } 382 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/hive/HiveSqlFormatterParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.hive; 2 | 3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor; 4 | import cn.ganjiacheng.antlr.HiveSqlParser; 5 | import org.antlr.v4.runtime.ParserRuleContext; 6 | import org.antlr.v4.runtime.RuleContext; 7 | import org.codehaus.plexus.util.StringUtils; 8 | 9 | import java.util.*; 10 | import java.util.concurrent.atomic.AtomicReference; 11 | 12 | /** 13 | * @ClassName HiveSqlFormatter 14 | * @description: sql格式化 15 | * @author: again 16 | * @Date: 2021/3/10 8:31 下午 17 | */ 18 | public class HiveSqlFormatterParser extends HiveSqlBaseVisitor { 19 | private final String sourceSQL; 20 | 21 | private String formattedSQL = ""; 22 | 23 | private boolean firstSelect = true; 24 | 25 | public HiveSqlFormatterParser(String sql) { 26 | this.sourceSQL = sql; 27 | } 28 | 29 | // 用于从源字符串中截取,主要为了不继续深入,比如select的每个字段,会有计算和各个函数包裹等,就采用直接截取源字符串 30 | private String subSourceSql(ParserRuleContext parserRuleContext) { 31 | return sourceSQL.substring( 32 | parserRuleContext.getStart().getStartIndex(), 33 | parserRuleContext.getStop().getStopIndex() + 1); 34 | } 35 | 36 | //添加空占位,主要为了缩进 37 | private String addKongFormat(String s, Integer n) { 38 | return String.format("%" + (s.length() + n * 4) + "s", s); 39 | } 40 | 41 | // 保存某个select的层级,key为startIndex 42 | private final Map ceng = new HashMap<>(); 43 | 44 | private List boolExpr = new ArrayList<>(); 45 | private List boolExprOperator = new ArrayList<>(); 46 | 47 | // 由于where多重条件判断是树状 -> (前面条件) 操作 (最后一个条件),扩展左子树 48 | private void makeWhereExpr(HiveSqlParser.Bool_exprContext bool_exprContext) { 49 | if (bool_exprContext.children.size() == 3 && bool_exprContext.bool_expr().size() == 2) { 50 | makeWhereExpr(bool_exprContext.bool_expr(0)); 51 | boolExpr.add(sourceSQL.substring(bool_exprContext.bool_expr(1).getStart().getStartIndex(), 52 | bool_exprContext.bool_expr(1).getStop().getStopIndex() + 1)); 53 | boolExprOperator.add(bool_exprContext.bool_expr_logical_operator().getText()); 54 | } else { 55 | boolExpr.add(sourceSQL.substring(bool_exprContext.getStart().getStartIndex(), 56 | bool_exprContext.getStop().getStopIndex() + 1)); 57 | } 58 | } 59 | 60 | // select from 相关 61 | private String getFromTableClause(HiveSqlParser.From_table_clauseContext from_clauseContext, Integer kong) { 62 | StringBuilder tmpSelect = new StringBuilder(); 63 | Optional.of(from_clauseContext) 64 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause) 65 | .map(RuleContext -> addKongFormat(subSourceSql(RuleContext), kong + 1)) 66 | .ifPresent(tmpSelect::append); 67 | // from 子select( 68 | Optional.of(from_clauseContext) 69 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause) 70 | .map(HiveSqlParser.From_subselect_clauseContext::T_OPEN_P) 71 | .map(ParseTree -> addKongFormat(ParseTree.getText(), kong)) 72 | .ifPresent(tmpSelect::append); 73 | // 添加子select标记 74 | Integer fromSubIndex = Optional.of(from_clauseContext) 75 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause) 76 | .map(HiveSqlParser.From_subselect_clauseContext::select_stmt) 77 | .map(ParserRuleContext -> ParserRuleContext.getStart().getStartIndex()).orElse(null); 78 | if (fromSubIndex != null) { 79 | tmpSelect.append("\n{SELECT").append(fromSubIndex).append("}"); 80 | ceng.put(fromSubIndex, kong + 1); 81 | } 82 | // ) 83 | Optional.of(from_clauseContext) 84 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause) 85 | .map(HiveSqlParser.From_subselect_clauseContext::T_CLOSE_P) 86 | .map(ParseTree -> "\n" + addKongFormat(ParseTree.getText(), kong)) 87 | .ifPresent(tmpSelect::append); 88 | // from别名 89 | Optional.of(from_clauseContext) 90 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause) 91 | .map(HiveSqlParser.From_subselect_clauseContext::from_alias_clause) 92 | .map(ParserRuleContext -> " " + subSourceSql(ParserRuleContext)).ifPresent(tmpSelect::append); 93 | return tmpSelect.toString(); 94 | } 95 | 96 | // select 字段相关 97 | private String getSelectItem(HiveSqlParser.Select_list_itemContext selectItem, Integer kong) { 98 | AtomicReference itemRes = new AtomicReference<>(""); 99 | boolean isCase = Optional.of(selectItem) 100 | .map(HiveSqlParser.Select_list_itemContext::expr) 101 | .map(HiveSqlParser.ExprContext::expr_case) 102 | .map(HiveSqlParser.Expr_caseContext::expr_case_searched) 103 | .map(expr_case_searchedContext -> { 104 | StringBuilder tmpbuilder = new StringBuilder(); 105 | List tmps = new ArrayList<>(); 106 | Optional.of(expr_case_searchedContext) 107 | .map(HiveSqlParser.Expr_case_searchedContext::T_CASE) 108 | .map(ParseTree -> addKongFormat(ParseTree.getText() + "\n", kong + 1)) 109 | .map(tmpbuilder::append); 110 | Optional.of(expr_case_searchedContext) 111 | .map(HiveSqlParser.Expr_case_searchedContext::T_WHEN) 112 | .ifPresent(whenNodes -> { 113 | for (int i = 0; i < whenNodes.size(); i++) { 114 | tmps.add(addKongFormat(expr_case_searchedContext.T_WHEN(i).getText() + " " + 115 | subSourceSql(expr_case_searchedContext.bool_expr(i)) + " " + 116 | expr_case_searchedContext.T_THEN(i).getText() + " " + 117 | subSourceSql(expr_case_searchedContext.expr(i)), kong + 2)); 118 | } 119 | tmpbuilder.append(StringUtils.join(tmps.toArray(), "\n")); 120 | }); 121 | Optional.of(expr_case_searchedContext).map(HiveSqlParser.Expr_case_searchedContext::T_ELSE) 122 | .map(ParseTree -> "\n" + addKongFormat(ParseTree.getText() + 123 | " " + expr_case_searchedContext.expr(expr_case_searchedContext.expr().size() - 1).getText(), kong + 2)) 124 | .map(tmpbuilder::append); 125 | Optional.of(expr_case_searchedContext) 126 | .map(HiveSqlParser.Expr_case_searchedContext::T_END) 127 | .map(ParseTree -> "\n" + addKongFormat(ParseTree.getText(), kong + 1)) 128 | .map(tmpbuilder::append); 129 | Optional.of(selectItem) 130 | .map(HiveSqlParser.Select_list_itemContext::select_list_alias) 131 | .map(select_list_aliasContext -> "\n" + addKongFormat(subSourceSql(select_list_aliasContext), kong + 1)) 132 | .map(tmpbuilder::append); 133 | itemRes.set(tmpbuilder.toString()); 134 | return true; 135 | }).orElse(false); 136 | if (!isCase) { 137 | Optional.of(selectItem) 138 | .ifPresent(select_list_itemContext -> itemRes.set(addKongFormat(subSourceSql(selectItem), kong + 1))); 139 | } 140 | return itemRes.get(); 141 | } 142 | 143 | // private String getLateralView(HiveSqlParser.Lateral_clause_itemContext ctx, int kong) { 144 | // StringBuilder lateralView = new StringBuilder(); 145 | // Optional.of(ctx) 146 | // .map(HiveSqlParser.Lateral_clause_itemContext::T_LATERAL) 147 | // .map(lateralView::append); 148 | // lateralView.append(" "); 149 | // Optional.of(ctx) 150 | // .map(HiveSqlParser.Lateral_clause_itemContext::T_VIEW) 151 | // .map(lateralView::append); 152 | // lateralView.append("\n"); 153 | // Optional.of(ctx) 154 | // .map(HiveSqlParser.Lateral_clause_itemContext::expr) 155 | // .map(ParserRuleContext -> addKongFormat(subSourceSql(ParserRuleContext), kong+1)) 156 | // .map(lateralView::append); 157 | // lateralView.append(" "); 158 | // Optional.of(ctx) 159 | // .map(HiveSqlParser.Lateral_clause_itemContext::ident) 160 | // .map(RuleContext::getText) 161 | // .map(lateralView::append); 162 | // Optional.of(ctx) 163 | // .map(HiveSqlParser.Lateral_clause_itemContext::T_AS) 164 | // .map(ParserRuleContext -> "\n" + ParserRuleContext.getText()) 165 | // .map(lateralView::append); 166 | // Optional.of(ctx) 167 | // .map(HiveSqlParser.Lateral_clause_itemContext::lateral_clause_alias) 168 | // .map(ParserRuleContext -> " " + subSourceSql(ParserRuleContext)) 169 | // .map(lateralView::append); 170 | // return lateralView.toString(); 171 | // } 172 | 173 | private String getFromJoin(HiveSqlParser.From_join_clauseContext ctx, int kong) { 174 | StringBuilder fromjoinSQL = new StringBuilder(); 175 | Optional.of(ctx) 176 | .map(HiveSqlParser.From_join_clauseContext::from_join_type_clause) 177 | .map(from_join_type_clauseContext -> "\n" + addKongFormat(subSourceSql(ctx.from_join_type_clause()), kong) + "\n" 178 | + getFromTableClause(ctx.from_table_clause(), kong) + "\n" 179 | + addKongFormat(ctx.T_ON().getText(), kong) + " " 180 | + subSourceSql(ctx.bool_expr())) 181 | .map(fromjoinSQL::append); 182 | Optional.of(ctx) 183 | .map(HiveSqlParser.From_join_clauseContext::T_COMMA) 184 | .map(ParserRuleContext -> ParserRuleContext.getText() + "\n" 185 | + getFromTableClause(ctx.from_table_clause(), kong) 186 | ).map(fromjoinSQL::append); 187 | return fromjoinSQL.toString(); 188 | } 189 | 190 | // 使用替换模式替换{SELECTN} 191 | @Override 192 | public Object visitSelect_stmt(HiveSqlParser.Select_stmtContext ctx) { 193 | int selectSize = ctx.fullselect_stmt().fullselect_stmt_item().size(); 194 | // 第一次进入添加位置标记(节点在原字符串startIndex作为唯一标志) 195 | int gstartIndex = ctx.getStart().getStartIndex(); 196 | String thisSelect = String.format("{SELECT%s}", gstartIndex); 197 | if (!formattedSQL.contains(thisSelect) && firstSelect) { 198 | formattedSQL += thisSelect; 199 | firstSelect = false; 200 | } 201 | StringBuilder fullSelect = new StringBuilder(); 202 | // with语句 203 | // Optional.of(ctx) 204 | // .map(HiveSqlParser.Select_stmtContext::cte_select_stmt) 205 | // .map(HiveSqlParser.Cte_select_stmtContext::T_WITH) 206 | // .map(ParserRuleContext -> ParserRuleContext.getText() + " ") 207 | // .map(fullSelect::append); 208 | // Optional.of(ctx) 209 | // .map(HiveSqlParser.Select_stmtContext::cte_select_stmt) 210 | // .map(HiveSqlParser.Cte_select_stmtContext::cte_select_stmt_item) 211 | // .map(cte_select_stmt_itemContexts -> StringUtils.join(cte_select_stmt_itemContexts.stream().map( 212 | // item -> item.ident().getText() + " " + 213 | // item.T_AS().getText() + "\n" + 214 | // item.T_OPEN_P().getText() + "" + 215 | // String.format("\n{SELECT%s}", item.select_stmt().getStart().getStartIndex()) + "\n" + 216 | // item.T_CLOSE_P().getText() 217 | // ).toArray(), ",\n")).map(fullSelect::append); 218 | // 遍历子select添加位置标记 219 | Integer gkong = ceng.get(gstartIndex) == null ? 0 : ceng.get(gstartIndex); 220 | for (int i = 0; i < selectSize; i++) { 221 | Integer startIndex = ctx.fullselect_stmt().fullselect_stmt_item(i).subselect_stmt().getStart().getStartIndex(); 222 | fullSelect.append(String.format("{SELECT%s}", startIndex)); 223 | ceng.put(startIndex, gkong); 224 | if (i < selectSize - 1) { 225 | HiveSqlParser.Fullselect_set_clauseContext clauseContext = ctx.fullselect_stmt().fullselect_set_clause(i); 226 | fullSelect.append("\n").append(addKongFormat(subSourceSql(clauseContext), gkong)).append("\n"); 227 | } 228 | } 229 | // 格式化每个子select并替换标记 230 | formattedSQL = formattedSQL.replace(String.format("{SELECT%s}", ctx.getStart().getStartIndex()), fullSelect.toString()); 231 | for (int i = 0; i < selectSize; i++) { 232 | HiveSqlParser.Subselect_stmtContext subSelect = ctx.fullselect_stmt().fullselect_stmt_item(i).subselect_stmt(); 233 | Integer thisIndex = subSelect.getStart().getStartIndex(); 234 | Integer kong = ceng.get(thisIndex) == null ? 0 : ceng.get(thisIndex); 235 | StringBuilder tmpSelect = new StringBuilder(); 236 | // select 237 | tmpSelect.append(addKongFormat( 238 | subSelect.T_SELECT().getText(), 239 | kong)).append("\n"); 240 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::select_list) 241 | .map(HiveSqlParser.Select_listContext::select_list_set) 242 | .map(select_list_setContext -> addKongFormat(subSourceSql(select_list_setContext) + "\n", kong + 1)) 243 | .map(tmpSelect::append); 244 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::select_list) 245 | .map(HiveSqlParser.Select_listContext::select_list_item) 246 | .map(select_list_itemContexts -> 247 | StringUtils.join(select_list_itemContexts.stream().map( 248 | item -> getSelectItem(item, kong)).toArray(), ",\n") + "\n").map(tmpSelect::append); 249 | // from 250 | Optional.of(subSelect) 251 | .map(HiveSqlParser.Subselect_stmtContext::from_clause) 252 | .map(HiveSqlParser.From_clauseContext::T_FROM) 253 | .map(ParserRuleContext -> addKongFormat(ParserRuleContext.getText() + "\n", kong)) 254 | .map(tmpSelect::append); 255 | // from 表 256 | Optional.of(subSelect) 257 | .map(HiveSqlParser.Subselect_stmtContext::from_clause) 258 | .map(HiveSqlParser.From_clauseContext::from_table_clause) 259 | .map(from_table_clauseContext -> getFromTableClause(from_table_clauseContext, kong)) 260 | .map(tmpSelect::append); 261 | // join语句 262 | Optional.of(subSelect) 263 | .map(HiveSqlParser.Subselect_stmtContext::from_clause) 264 | .map(HiveSqlParser.From_clauseContext::from_join_clause) 265 | .map(from_join_clauseContexts -> from_join_clauseContexts.size() > 0 ? 266 | StringUtils.join(from_join_clauseContexts.stream().map( 267 | item -> getFromJoin(item, kong)).toArray(), "") : "") 268 | .ifPresent(tmpSelect::append); 269 | // Optional.of(subSelect) 270 | // .map(HiveSqlParser.Subselect_stmtContext::lateral_clause) 271 | // .map(HiveSqlParser.Lateral_clauseContext::lateral_clause_item) 272 | // .map(lateral_clause_itemContexts -> "\n" + StringUtils.join(lateral_clause_itemContexts.stream().map( 273 | // item -> getLateralView(item, kong)).toArray(), "\n")) 274 | // .map(tmpSelect::append); 275 | // where 276 | Optional.of(subSelect) 277 | .map(HiveSqlParser.Subselect_stmtContext::where_clause) 278 | .map(HiveSqlParser.Where_clauseContext::T_WHERE) 279 | .map(ParseTree -> "\n" + addKongFormat(ParseTree.getText() + "\n", kong)) 280 | .ifPresent(tmpSelect::append); 281 | // where条件 282 | Optional.of(subSelect) 283 | .map(HiveSqlParser.Subselect_stmtContext::where_clause) 284 | .ifPresent(ParserRuleContext -> { 285 | makeWhereExpr(ParserRuleContext.bool_expr()); 286 | List result = new ArrayList<>(); 287 | for (int t = 0; t < boolExpr.size(); t++) { 288 | if (t == boolExpr.size() - 1) { 289 | result.add(boolExpr.get(t)); 290 | } else { 291 | result.add(boolExpr.get(t) + " " + boolExprOperator.get(t)); 292 | } 293 | } 294 | boolExpr = new ArrayList<>(); 295 | boolExprOperator = new ArrayList<>(); 296 | tmpSelect.append(StringUtils.join(result.stream().map(item -> addKongFormat(item, kong + 1)).toArray(), "\n")); 297 | }); 298 | // group by 299 | Optional.of(subSelect) 300 | .map(HiveSqlParser.Subselect_stmtContext::group_by_clause) 301 | .map(ParserRuleContext -> "\n" + addKongFormat(ParserRuleContext.T_GROUP().getText() + " " + ParserRuleContext.T_BY().getText(), kong)) 302 | .ifPresent(tmpSelect::append); 303 | // group by 字段 304 | Optional.of(subSelect) 305 | .map(HiveSqlParser.Subselect_stmtContext::group_by_clause) 306 | .map(HiveSqlParser.Group_by_clauseContext::expr) 307 | .map(exprContexts -> "\n" + addKongFormat(StringUtils.join(exprContexts.stream().map(this::subSourceSql).toArray(), ", "), kong + 1)) 308 | .ifPresent(tmpSelect::append); 309 | // having 语句 310 | Optional.of(subSelect) 311 | .map(HiveSqlParser.Subselect_stmtContext::having_clause) 312 | .map(having_clauseContext -> "\n" + addKongFormat(subSourceSql(having_clauseContext), kong)) 313 | .ifPresent(tmpSelect::append); 314 | // order by 315 | Optional.of(subSelect) 316 | .map(HiveSqlParser.Subselect_stmtContext::order_by_clause) 317 | .map(ParserRuleContext -> "\n" + addKongFormat(ParserRuleContext.T_ORDER() + " " + ParserRuleContext.T_BY(), kong)) 318 | .ifPresent(tmpSelect::append); 319 | // order by 字段 320 | Optional.of(subSelect) 321 | .map(HiveSqlParser.Subselect_stmtContext::order_by_clause) 322 | .map(ParserRuleContext -> "\n" + addKongFormat(StringUtils.join(ParserRuleContext.expr().stream().map( 323 | RuleContext::getText).toArray(), ", "), kong + 1)) 324 | .ifPresent((tmpSelect::append)); 325 | // order by 参数 326 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::order_by_clause) 327 | .map(HiveSqlParser.Order_by_clauseContext::T_ASC) 328 | .map(ParserRuleContext -> ParserRuleContext.size() > 0 ? "\n" + ParserRuleContext.get(0).getText() : "").ifPresent(tmpSelect::append); 329 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::order_by_clause) 330 | .map(HiveSqlParser.Order_by_clauseContext::T_DESC) 331 | .map(ParserRuleContext -> ParserRuleContext.size() > 0 ? "\n" + ParserRuleContext.get(0).getText() : "").ifPresent(tmpSelect::append); 332 | // select 参数 333 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::select_options) 334 | .map(ParserRuleContext -> "\n" + addKongFormat(subSourceSql(ParserRuleContext), kong)) 335 | .ifPresent(tmpSelect::append); 336 | formattedSQL = formattedSQL.replace(String.format("{SELECT%s}", thisIndex), tmpSelect.toString()); 337 | } 338 | return super.visitSelect_stmt(ctx); 339 | } 340 | 341 | public String getFormattedSQL() { 342 | return formattedSQL; 343 | } 344 | } 345 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/hive/HiveSqlMetadataParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.hive; 2 | 3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor; 4 | import cn.ganjiacheng.antlr.HiveSqlParser; 5 | import cn.ganjiacheng.model.metadata.FieldMetadataModel; 6 | import cn.ganjiacheng.model.metadata.TableMetadataModel; 7 | import org.antlr.v4.runtime.ParserRuleContext; 8 | import org.antlr.v4.runtime.RuleContext; 9 | import org.antlr.v4.runtime.tree.ParseTree; 10 | 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import java.util.Optional; 14 | 15 | /** 16 | * @ClassName HiveSqlMetadataParser 17 | * @description: 18 | * @author: again 19 | * @Date: 2021/3/10 7:41 下午 20 | */ 21 | public class HiveSqlMetadataParser extends HiveSqlBaseVisitor { 22 | 23 | private final TableMetadataModel tableMetadata = new TableMetadataModel(); 24 | 25 | private final String sourceSQL; 26 | 27 | /** 28 | * 保存原始sql 29 | */ 30 | public HiveSqlMetadataParser(String sql) { 31 | this.sourceSQL = sql; 32 | } 33 | 34 | /** 35 | * 截取原始sql 36 | * @param parserRuleContext 37 | * @return 38 | */ 39 | private String subSourceSql(ParserRuleContext parserRuleContext) { 40 | return sourceSQL.substring( 41 | parserRuleContext.getStart().getStartIndex(), 42 | parserRuleContext.getStop().getStopIndex() + 1); 43 | } 44 | 45 | /** 46 | * 处理备注中的引号 47 | */ 48 | private String dealComment(String comment) { 49 | if(comment != null && comment.length() >= 2 50 | && comment.startsWith("'") && comment.endsWith("'")){ 51 | comment = comment.substring(1, comment.length()-1); 52 | } 53 | return comment; 54 | } 55 | 56 | /** 57 | * 处理表名字段名中的`` 58 | * @param name 59 | * @return 60 | */ 61 | private String dealNameMark(String name) { 62 | if(name.startsWith("`") && name.endsWith("`")) { 63 | return name.substring(1, name.length()-1); 64 | }else { 65 | return name; 66 | } 67 | } 68 | 69 | /** 70 | * 获取到字段信息 71 | * @param ctx 72 | */ 73 | private void setTableField(HiveSqlParser.Create_table_stmtContext ctx) { 74 | List itemContexts = 75 | ctx.create_table_definition().create_table_columns().create_table_columns_item(); 76 | List fields = new ArrayList<>(); 77 | itemContexts.forEach(item -> { 78 | FieldMetadataModel field = new FieldMetadataModel(); 79 | field.setFieldName(Optional.of(item) 80 | .map(HiveSqlParser.Create_table_columns_itemContext::column_name) 81 | .map(RuleContext::getText) 82 | .map(this::dealNameMark) 83 | .orElse(null)); 84 | String type = Optional.of(item) 85 | .map(HiveSqlParser.Create_table_columns_itemContext::dtype) 86 | .map(RuleContext::getText) 87 | .orElse(null); 88 | String typeLen = Optional.of(item) 89 | .map(HiveSqlParser.Create_table_columns_itemContext::dtype_len) 90 | .map(RuleContext::getText) 91 | .orElse(""); 92 | field.setDataType(type != null ? type + typeLen : null); 93 | field.setFieldComment(Optional.of(item) 94 | .map(HiveSqlParser.Create_table_columns_itemContext::column_comment) 95 | .map(RuleContext::getText) 96 | .map(this::dealComment) 97 | .orElse(null)); 98 | fields.add(field); 99 | }); 100 | tableMetadata.setFields(fields); 101 | } 102 | 103 | /** 104 | * 获取表其他属性信息 105 | * @param ctx 106 | */ 107 | private void setTableOption(HiveSqlParser.Create_table_stmtContext ctx) { 108 | // HiveSqlParser.Create_table_options_hive_itemContext tableOption = 109 | // ctx.create_table_definition().create_table_options().create_table_options_hive_item(); 110 | // tableMetadata.setTableComment(Optional.ofNullable(tableOption) 111 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::string) 112 | // .map(RuleContext::getText) 113 | // .map(this::dealComment) 114 | // .orElse(null)); 115 | // tableMetadata.setPartition(Optional.ofNullable(tableOption) 116 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_partitioned_by_clause) 117 | // .map(this::subSourceSql) 118 | // .orElse(null)); 119 | // tableMetadata.setRowFormat(Optional.ofNullable(tableOption) 120 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_row_format) 121 | // .map(this::subSourceSql) 122 | // .orElse(null)); 123 | // tableMetadata.setStore(Optional.ofNullable(tableOption) 124 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_stored) 125 | // .map(this::subSourceSql) 126 | // .orElse(null)); 127 | // tableMetadata.setLocation(Optional.ofNullable(tableOption) 128 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_location) 129 | // .map(this::subSourceSql) 130 | // .orElse(null)); 131 | // tableMetadata.setProperties(Optional.ofNullable(tableOption) 132 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_tblproperties) 133 | // .map(this::subSourceSql) 134 | // .orElse(null)); 135 | } 136 | 137 | /** 138 | * 获取到表相关信息 139 | * @param ctx 140 | * @return 141 | */ 142 | @Override 143 | public Object visitCreate_table_stmt(HiveSqlParser.Create_table_stmtContext ctx) { 144 | List tbNameTree = ctx.table_name().ident().children; 145 | if(tbNameTree.size() == 3 && tbNameTree.get(1).getText().equals(".")) { 146 | tableMetadata.setDbName(tbNameTree.get(0).getText()); 147 | tableMetadata.setTableName(dealNameMark(tbNameTree.get(2).getText())); 148 | }else{ 149 | tableMetadata.setTableName(dealNameMark(tbNameTree.get(0).getText())); 150 | } 151 | // tableMetadata.setTableType(Optional.of(ctx) 152 | // .map(HiveSqlParser.Create_table_stmtContext::T_EXTERNAl) 153 | // .map(ParseTree::getText) 154 | // .orElse(null)); 155 | setTableField(ctx); 156 | setTableOption(ctx); 157 | return super.visitCreate_table_stmt(ctx); 158 | } 159 | 160 | /** 161 | * 获取全部创表信息 162 | */ 163 | public TableMetadataModel getTableMetadata() { 164 | return this.tableMetadata; 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/hive/HiveSqlTableLineageParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.hive; 2 | 3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor; 4 | import cn.ganjiacheng.antlr.HiveSqlParser; 5 | import cn.ganjiacheng.model.lineage.TableLineageModel; 6 | import cn.ganjiacheng.model.lineage.TableNameModel; 7 | import org.antlr.v4.runtime.RuleContext; 8 | 9 | import java.util.HashSet; 10 | import java.util.Optional; 11 | 12 | /** 13 | * @ClassName HiveTableLineage 14 | * @description: 15 | * @author: again 16 | * @Date: 2021/3/10 8:47 下午 17 | */ 18 | public class HiveSqlTableLineageParser extends HiveSqlBaseVisitor { 19 | 20 | private TableNameModel outputTable; 21 | private final HashSet inputTables = new HashSet<>(); 22 | 23 | /** 24 | * visitInsert获取insert的table_name节点,作为目标输出表 25 | * @param ctx 26 | * @return 27 | */ 28 | @Override 29 | public Object visitInsert_stmt(HiveSqlParser.Insert_stmtContext ctx) { 30 | outputTable = Optional.ofNullable(ctx) 31 | .map(HiveSqlParser.Insert_stmtContext::table_name) 32 | .map(RuleContext::getText) 33 | .map(TableNameModel::parseTableName) 34 | .orElse(null); 35 | return super.visitInsert_stmt(ctx); 36 | } 37 | 38 | /** 39 | * 获取from真实表,加到来源表的Set里 40 | * @param ctx 41 | * @return 42 | */ 43 | @Override 44 | public Object visitFrom_table_clause(HiveSqlParser.From_table_clauseContext ctx) { 45 | Optional.ofNullable(ctx) 46 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause) 47 | .map(RuleContext::getText) 48 | .map(TableNameModel::parseTableName) 49 | .map(inputTables::add); 50 | return super.visitFrom_table_clause(ctx); 51 | } 52 | 53 | public TableLineageModel getTableLineage() { 54 | TableLineageModel tableLineageModel = new TableLineageModel(); 55 | tableLineageModel.setOutputTable(outputTable); 56 | tableLineageModel.setInputTables(inputTables); 57 | return tableLineageModel; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/hive/HiveSqlTypeParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.hive; 2 | 3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor; 4 | import cn.ganjiacheng.antlr.HiveSqlParser; 5 | import cn.ganjiacheng.enums.SqlTypeEnum; 6 | 7 | /** 8 | * @ClassName HiveSqlType 9 | * @description: 10 | * @author: again 11 | * @Date: 2021/3/10 4:29 下午 12 | */ 13 | public class HiveSqlTypeParser extends HiveSqlBaseVisitor { 14 | 15 | private SqlTypeEnum sqlType = null; 16 | 17 | private void initSqlTypeEnum(SqlTypeEnum type) { 18 | if(sqlType == null) { 19 | sqlType = type; 20 | } 21 | } 22 | 23 | @Override 24 | public Object visitCreate_table_stmt(HiveSqlParser.Create_table_stmtContext ctx) { 25 | initSqlTypeEnum(SqlTypeEnum.CREATE); 26 | return super.visitCreate_table_stmt(ctx); 27 | } 28 | 29 | @Override 30 | public Object visitInsert_stmt(HiveSqlParser.Insert_stmtContext ctx) { 31 | initSqlTypeEnum(SqlTypeEnum.INSERT); 32 | return super.visitInsert_stmt(ctx); 33 | } 34 | 35 | @Override 36 | public Object visitSelect_stmt(HiveSqlParser.Select_stmtContext ctx) { 37 | initSqlTypeEnum(SqlTypeEnum.SELECT); 38 | return super.visitSelect_stmt(ctx); 39 | } 40 | 41 | @Override 42 | public Object visitUpdate_stmt(HiveSqlParser.Update_stmtContext ctx) { 43 | initSqlTypeEnum(SqlTypeEnum.UPDATE); 44 | return super.visitUpdate_stmt(ctx); 45 | } 46 | 47 | @Override 48 | public Object visitDelete_stmt(HiveSqlParser.Delete_stmtContext ctx) { 49 | initSqlTypeEnum(SqlTypeEnum.DELETE); 50 | return super.visitDelete_stmt(ctx); 51 | } 52 | 53 | public SqlTypeEnum getSqlType() { 54 | return sqlType; 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/hive/MyHiveSqlParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.hive; 2 | 3 | import cn.ganjiacheng.SqlParserAbstract; 4 | import cn.ganjiacheng.antlr.HiveSqlLexer; 5 | import cn.ganjiacheng.antlr.HiveSqlParser; 6 | import cn.ganjiacheng.enums.SqlTypeEnum; 7 | import cn.ganjiacheng.model.lineage.FieldLineageModel; 8 | import cn.ganjiacheng.model.lineage.TableLineageModel; 9 | import cn.ganjiacheng.model.metadata.TableMetadataModel; 10 | import org.antlr.v4.runtime.CharStream; 11 | import org.antlr.v4.runtime.CharStreams; 12 | import org.antlr.v4.runtime.CommonTokenStream; 13 | import org.antlr.v4.runtime.tree.ParseTree; 14 | 15 | import java.util.List; 16 | 17 | /** 18 | * @ClassName HiveSqlParser 19 | * @description: 20 | * @author: again 21 | * @Date: 2021/3/10 4:21 下午 22 | */ 23 | public class MyHiveSqlParser extends SqlParserAbstract { 24 | 25 | private ParseTree getParseTree(String sql) { 26 | CharStream input = CharStreams.fromString(sql); 27 | HiveSqlLexer lexer = new HiveSqlLexer(input); 28 | CommonTokenStream tokenStream = new CommonTokenStream(lexer); 29 | HiveSqlParser parser = new HiveSqlParser(tokenStream); 30 | return parser.program(); 31 | } 32 | 33 | @Override 34 | public SqlTypeEnum parseSqlType(String sql) { 35 | HiveSqlTypeParser visitor = new HiveSqlTypeParser(); 36 | visitor.visit(getParseTree(sql)); 37 | return visitor.getSqlType(); 38 | } 39 | 40 | @Override 41 | public TableMetadataModel parseSqlMetadata(String sql) { 42 | HiveSqlMetadataParser visitor = new HiveSqlMetadataParser(sql); 43 | visitor.visit(getParseTree(sql)); 44 | return visitor.getTableMetadata(); 45 | } 46 | 47 | @Override 48 | public String parseSqlFormatter(String sql) { 49 | HiveSqlFormatterParser visitor = new HiveSqlFormatterParser(sql); 50 | visitor.visit(getParseTree(sql)); 51 | return visitor.getFormattedSQL(); 52 | } 53 | 54 | @Override 55 | public TableLineageModel parseSqlTableLineage(String sql) { 56 | HiveSqlTableLineageParser visitor = new HiveSqlTableLineageParser(); 57 | visitor.visit(getParseTree(sql)); 58 | return visitor.getTableLineage(); 59 | } 60 | 61 | @Override 62 | public List parseSqlFieldLineage(String sql) { 63 | HiveSqlFieldLineageParser visitor = new HiveSqlFieldLineageParser(sql); 64 | visitor.visit(getParseTree(sql)); 65 | return visitor.getHiveFieldLineage(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/lineage/FieldLineageModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.lineage; 2 | 3 | import java.util.HashSet; 4 | 5 | /** 6 | * @ClassName HiveFieldLineageModel 7 | * @description: 8 | * @author: again 9 | * @Date: 2021/3/10 8:52 下午 10 | */ 11 | public class FieldLineageModel { 12 | /** 13 | * 目标字段 14 | */ 15 | private FieldNameModel targetField; 16 | 17 | /** 18 | * 来源字段列表 19 | */ 20 | private HashSet sourceFields; 21 | 22 | public FieldNameModel getTargetField() { 23 | return targetField; 24 | } 25 | 26 | public void setTargetField(FieldNameModel targetField) { 27 | this.targetField = targetField; 28 | } 29 | 30 | public HashSet getSourceFields() { 31 | return sourceFields; 32 | } 33 | 34 | public void setSourceFields(HashSet sourceFields) { 35 | this.sourceFields = sourceFields; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/lineage/FieldLineageSelectItemModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.lineage; 2 | 3 | import java.util.Set; 4 | 5 | /** 6 | * @ClassName HiveFieldLineageSelectItemModel 7 | * @description: 8 | * @author: again 9 | * @Date: 2021/3/10 8:51 下午 10 | */ 11 | public class FieldLineageSelectItemModel { 12 | private Set fieldNames; 13 | private String alias; 14 | private String process; 15 | 16 | public Set getFieldNames() { 17 | return fieldNames; 18 | } 19 | 20 | public void setFieldNames(Set fieldNames) { 21 | this.fieldNames = fieldNames; 22 | } 23 | 24 | public String getAlias() { 25 | return alias; 26 | } 27 | 28 | public void setAlias(String alias) { 29 | this.alias = alias; 30 | } 31 | 32 | public String getProcess() { 33 | return process; 34 | } 35 | 36 | public void setProcess(String process) { 37 | this.process = process; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/lineage/FieldLineageSelectModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.lineage; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * @ClassName HiveFieldLineageSelectModel 7 | * @description: 8 | * @author: again 9 | * @Date: 2021/3/10 8:52 下午 10 | */ 11 | public class FieldLineageSelectModel { 12 | /** 13 | * index 14 | */ 15 | String id; 16 | 17 | /** 18 | * 父id,第一层select为null 19 | */ 20 | String parentId; 21 | 22 | /** 23 | * 来源表,来源子select则为null 24 | */ 25 | TableNameModel fromTable; 26 | 27 | /** 28 | * 表别名 29 | */ 30 | String tableAlias; 31 | 32 | /** 33 | * select字段 34 | */ 35 | List selectItems; 36 | 37 | public String getId() { 38 | return id; 39 | } 40 | 41 | public void setId(String id) { 42 | this.id = id; 43 | } 44 | 45 | public String getParentId() { 46 | return parentId; 47 | } 48 | 49 | public void setParentId(String parentId) { 50 | this.parentId = parentId; 51 | } 52 | 53 | public TableNameModel getFromTable() { 54 | return fromTable; 55 | } 56 | 57 | public void setFromTable(TableNameModel fromTable) { 58 | this.fromTable = fromTable; 59 | } 60 | 61 | public String getTableAlias() { 62 | return tableAlias; 63 | } 64 | 65 | public void setTableAlias(String tableAlias) { 66 | this.tableAlias = tableAlias; 67 | } 68 | 69 | public List getSelectItems() { 70 | return selectItems; 71 | } 72 | 73 | public void setSelectItems(List selectItems) { 74 | this.selectItems = selectItems; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/lineage/FieldNameModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.lineage; 2 | 3 | import java.util.Objects; 4 | 5 | /** 6 | * @ClassName FieldNameModel 7 | * @description: 8 | * @author: again 9 | * @Date: 2021/3/10 8:50 下午 10 | */ 11 | public class FieldNameModel { 12 | private String dbName; 13 | private String tableName; 14 | private String fieldName; 15 | 16 | public String getDbName() { 17 | return dbName; 18 | } 19 | 20 | public void setDbName(String dbName) { 21 | this.dbName = dbName; 22 | } 23 | 24 | public String getTableName() { 25 | return tableName; 26 | } 27 | 28 | public void setTableName(String tableName) { 29 | this.tableName = tableName; 30 | } 31 | 32 | public String getFieldName() { 33 | return fieldName; 34 | } 35 | 36 | public void setFieldName(String fieldName) { 37 | this.fieldName = fieldName; 38 | } 39 | 40 | @Override 41 | public boolean equals(Object o) { 42 | if (this == o) { 43 | return true; 44 | } 45 | if (o == null || getClass() != o.getClass()) { 46 | return false; 47 | } 48 | FieldNameModel that = (FieldNameModel) o; 49 | return Objects.equals(dbName, that.dbName) && 50 | Objects.equals(tableName, that.tableName) && 51 | Objects.equals(fieldName, that.fieldName); 52 | } 53 | 54 | @Override 55 | public int hashCode() { 56 | return Objects.hash(dbName, tableName, fieldName); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/lineage/FieldNameWithProcessModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.lineage; 2 | 3 | import java.util.Objects; 4 | 5 | /** 6 | * @ClassName FieldNameWithProcessModel 7 | * @description: 8 | * @author: again 9 | * @Date: 2021/3/10 8:51 下午 10 | */ 11 | public class FieldNameWithProcessModel { 12 | private String dbName; 13 | private String tableName; 14 | private String fieldName; 15 | private String process; 16 | 17 | public String getDbName() { 18 | return dbName; 19 | } 20 | 21 | public void setDbName(String dbName) { 22 | this.dbName = dbName; 23 | } 24 | 25 | public String getTableName() { 26 | return tableName; 27 | } 28 | 29 | public void setTableName(String tableName) { 30 | this.tableName = tableName; 31 | } 32 | 33 | public String getFieldName() { 34 | return fieldName; 35 | } 36 | 37 | public void setFieldName(String fieldName) { 38 | this.fieldName = fieldName; 39 | } 40 | 41 | public String getProcess() { 42 | return process; 43 | } 44 | 45 | public void setProcess(String process) { 46 | this.process = process; 47 | } 48 | 49 | @Override 50 | public boolean equals(Object o) { 51 | if (this == o) { 52 | return true; 53 | } 54 | if (o == null || getClass() != o.getClass()) { 55 | return false; 56 | } 57 | FieldNameWithProcessModel that = (FieldNameWithProcessModel) o; 58 | return Objects.equals(dbName, that.dbName) && 59 | Objects.equals(tableName, that.tableName) && 60 | Objects.equals(fieldName, that.fieldName) && 61 | Objects.equals(process, that.process); 62 | } 63 | 64 | @Override 65 | public int hashCode() { 66 | return Objects.hash(dbName, tableName, fieldName, process); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/lineage/TableLineageModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.lineage; 2 | 3 | import java.util.HashSet; 4 | 5 | /** 6 | * @ClassName HiveTableLineageModel 7 | * @description: 8 | * @author: again 9 | * @Date: 2021/3/10 8:45 下午 10 | */ 11 | public class TableLineageModel { 12 | 13 | private TableNameModel outputTable; 14 | 15 | /** 16 | * 输入的表名列表 17 | */ 18 | private HashSet inputTables; 19 | 20 | public TableNameModel getOutputTable() { 21 | return outputTable; 22 | } 23 | 24 | public void setOutputTable(TableNameModel outputTable) { 25 | this.outputTable = outputTable; 26 | } 27 | 28 | public HashSet getInputTables() { 29 | return inputTables; 30 | } 31 | 32 | public void setInputTables(HashSet inputTables) { 33 | this.inputTables = inputTables; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/lineage/TableNameModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.lineage; 2 | 3 | /** 4 | * @ClassName TableNameModel 5 | * @description: 6 | * @author: again 7 | * @Date: 2021/3/10 8:45 下午 8 | */ 9 | public class TableNameModel { 10 | private String dbName; 11 | private String tableName; 12 | 13 | public static String dealNameMark(String name) { 14 | if(name.startsWith("`") && name.endsWith("`")) { 15 | return name.substring(1, name.length()-1); 16 | }else{ 17 | return name; 18 | } 19 | } 20 | 21 | public static TableNameModel parseTableName(String tableName) { 22 | TableNameModel tableNameModel = new TableNameModel(); 23 | String[] splitTable = tableName.split("\\."); 24 | if(splitTable.length == 2) { 25 | tableNameModel.setDbName(splitTable[0]); 26 | tableNameModel.setTableName(splitTable[1]); 27 | }else if(splitTable.length == 1) { 28 | tableNameModel.setTableName(splitTable[0]); 29 | } 30 | return tableNameModel; 31 | } 32 | 33 | public String getDbName() { 34 | return dbName; 35 | } 36 | 37 | public void setDbName(String dbName) { 38 | this.dbName = dbName; 39 | } 40 | 41 | public String getTableName() { 42 | return tableName; 43 | } 44 | 45 | public void setTableName(String tableName) { 46 | this.tableName = tableName; 47 | } 48 | 49 | @Override 50 | public boolean equals(Object o) { 51 | if (this == o) return true; 52 | if (o == null || getClass() != o.getClass()) return false; 53 | 54 | TableNameModel that = (TableNameModel) o; 55 | 56 | if (dbName != null ? !dbName.equals(that.dbName) : that.dbName != null) { 57 | return false; 58 | } 59 | return tableName != null ? tableName.equals(that.tableName) : that.tableName == null; 60 | } 61 | 62 | @Override 63 | public int hashCode() { 64 | int result = dbName != null ? dbName.hashCode() : 0; 65 | result = 31 * result + (tableName != null ? tableName.hashCode() : 0); 66 | return result; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/metadata/FieldMetadataModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.metadata; 2 | 3 | /** 4 | * @ClassName HiveFieldMetadata 5 | * @description: 6 | * @author: again 7 | * @Date: 2021/3/10 7:54 下午 8 | */ 9 | public class FieldMetadataModel { 10 | /** 11 | * 字段名 12 | */ 13 | private String fieldName; 14 | 15 | /** 16 | * 数据类型 17 | */ 18 | private String dataType; 19 | 20 | /** 21 | * 字段备注 22 | */ 23 | private String fieldComment; 24 | 25 | public String getFieldName() { 26 | return fieldName; 27 | } 28 | 29 | public void setFieldName(String fieldName) { 30 | this.fieldName = fieldName; 31 | } 32 | 33 | public String getDataType() { 34 | return dataType; 35 | } 36 | 37 | public void setDataType(String dataType) { 38 | this.dataType = dataType; 39 | } 40 | 41 | public String getFieldComment() { 42 | return fieldComment; 43 | } 44 | 45 | public void setFieldComment(String fieldComment) { 46 | this.fieldComment = fieldComment; 47 | } 48 | } -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/model/metadata/TableMetadataModel.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.model.metadata; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * @ClassName HiveTableMetadata 7 | * @description: 8 | * @author: again 9 | * @Date: 2021/3/10 7:53 下午 10 | */ 11 | public class TableMetadataModel { 12 | /** 13 | * 库名 14 | */ 15 | private String dbName; 16 | 17 | /** 18 | * 表名 19 | */ 20 | private String tableName; 21 | 22 | /** 23 | * 表类型 24 | */ 25 | private String tableType; 26 | 27 | /** 28 | * 备注 29 | */ 30 | private String tableComment; 31 | 32 | /** 33 | * 分区 34 | */ 35 | private String partition; 36 | 37 | /** 38 | * 行格式 39 | */ 40 | private String rowFormat; 41 | 42 | /** 43 | * 存储格式 44 | */ 45 | private String store; 46 | 47 | /** 48 | * 存储位置 49 | */ 50 | private String location; 51 | 52 | /** 53 | * 属性(压缩格式) 54 | */ 55 | private String properties; 56 | 57 | /** 58 | * 字段 59 | */ 60 | private List fields; 61 | 62 | public String getDbName() { 63 | return dbName; 64 | } 65 | 66 | public void setDbName(String dbName) { 67 | this.dbName = dbName; 68 | } 69 | 70 | public String getTableName() { 71 | return tableName; 72 | } 73 | 74 | public void setTableName(String tableName) { 75 | this.tableName = tableName; 76 | } 77 | 78 | public String getTableType() { 79 | return tableType; 80 | } 81 | 82 | public void setTableType(String tableType) { 83 | this.tableType = tableType; 84 | } 85 | 86 | public String getTableComment() { 87 | return tableComment; 88 | } 89 | 90 | public void setTableComment(String tableComment) { 91 | this.tableComment = tableComment; 92 | } 93 | 94 | public String getPartition() { 95 | return partition; 96 | } 97 | 98 | public void setPartition(String partition) { 99 | this.partition = partition; 100 | } 101 | 102 | public String getRowFormat() { 103 | return rowFormat; 104 | } 105 | 106 | public void setRowFormat(String rowFormat) { 107 | this.rowFormat = rowFormat; 108 | } 109 | 110 | public String getStore() { 111 | return store; 112 | } 113 | 114 | public void setStore(String store) { 115 | this.store = store; 116 | } 117 | 118 | public String getLocation() { 119 | return location; 120 | } 121 | 122 | public void setLocation(String location) { 123 | this.location = location; 124 | } 125 | 126 | public String getProperties() { 127 | return properties; 128 | } 129 | 130 | public void setProperties(String properties) { 131 | this.properties = properties; 132 | } 133 | 134 | public List getFields() { 135 | return fields; 136 | } 137 | 138 | public void setFields(List fields) { 139 | this.fields = fields; 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/mysql/MysqlSqlParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.mysql; 2 | 3 | import cn.ganjiacheng.SqlParserAbstract; 4 | import cn.ganjiacheng.antlr.MySqlLexer; 5 | import cn.ganjiacheng.antlr.MySqlParser; 6 | import cn.ganjiacheng.enums.SqlTypeEnum; 7 | import org.antlr.v4.runtime.CharStream; 8 | import org.antlr.v4.runtime.CharStreams; 9 | import org.antlr.v4.runtime.CommonTokenStream; 10 | import org.antlr.v4.runtime.tree.ParseTree; 11 | 12 | /** 13 | * @ClassName MysqlSqlParser 14 | * @description: 15 | * @author: again 16 | * @Date: 2021/3/10 4:26 下午 17 | */ 18 | public class MysqlSqlParser extends SqlParserAbstract { 19 | 20 | private ParseTree getParseTree(String sql) { 21 | sql = sql.toUpperCase(); 22 | CharStream input = CharStreams.fromString(sql); 23 | MySqlLexer mySqlLexer = new MySqlLexer(input); 24 | CommonTokenStream tokens = new CommonTokenStream(mySqlLexer); 25 | MySqlParser parser = new MySqlParser(tokens); 26 | return parser.root(); 27 | } 28 | 29 | @Override 30 | public SqlTypeEnum parseSqlType(String sql) { 31 | MysqlSqlTypeParser visitor = new MysqlSqlTypeParser(); 32 | visitor.visit(getParseTree(sql)); 33 | return visitor.getSqlType(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/mysql/MysqlSqlTypeParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.mysql; 2 | 3 | import cn.ganjiacheng.antlr.MySqlParserBaseVisitor; 4 | import cn.ganjiacheng.antlr.MySqlParser; 5 | import cn.ganjiacheng.enums.SqlTypeEnum; 6 | 7 | /** 8 | * @ClassName MysqlSqlTypeParser 9 | * @description: 10 | * @author: again 11 | * @Date: 2021/3/10 5:09 下午 12 | */ 13 | public class MysqlSqlTypeParser extends MySqlParserBaseVisitor { 14 | 15 | private SqlTypeEnum sqlType = null; 16 | 17 | private void initSqlTypeEnum(SqlTypeEnum type) { 18 | if(sqlType == null) { 19 | sqlType = type; 20 | } 21 | } 22 | 23 | @Override 24 | public Object visitSimpleSelect(MySqlParser.SimpleSelectContext ctx) { 25 | initSqlTypeEnum(SqlTypeEnum.SELECT); 26 | return super.visitSimpleSelect(ctx); 27 | } 28 | 29 | @Override 30 | public Object visitUpdateStatement(MySqlParser.UpdateStatementContext ctx) { 31 | initSqlTypeEnum(SqlTypeEnum.UPDATE); 32 | return super.visitUpdateStatement(ctx); 33 | } 34 | 35 | @Override 36 | public Object visitInsertStatement(MySqlParser.InsertStatementContext ctx) { 37 | initSqlTypeEnum(SqlTypeEnum.INSERT); 38 | return super.visitInsertStatement(ctx); 39 | } 40 | 41 | @Override 42 | public Object visitColumnCreateTable(MySqlParser.ColumnCreateTableContext ctx) { 43 | initSqlTypeEnum(SqlTypeEnum.CREATE); 44 | return super.visitColumnCreateTable(ctx); 45 | } 46 | 47 | @Override 48 | public Object visitSingleDeleteStatement(MySqlParser.SingleDeleteStatementContext ctx) { 49 | initSqlTypeEnum(SqlTypeEnum.DELETE); 50 | return super.visitSingleDeleteStatement(ctx); 51 | } 52 | 53 | public SqlTypeEnum getSqlType() { 54 | return sqlType; 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/presto/MyPresoSqlParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.presto; 2 | 3 | import cn.ganjiacheng.SqlParserAbstract; 4 | import cn.ganjiacheng.antlr.PrestoSqlLexer; 5 | import cn.ganjiacheng.antlr.PrestoSqlParser; 6 | import cn.ganjiacheng.enums.SqlTypeEnum; 7 | import cn.ganjiacheng.mysql.MysqlSqlTypeParser; 8 | import org.antlr.v4.runtime.CharStream; 9 | import org.antlr.v4.runtime.CharStreams; 10 | import org.antlr.v4.runtime.CommonTokenStream; 11 | import org.antlr.v4.runtime.tree.ParseTree; 12 | 13 | /** 14 | * @ClassName MyPresoSqlParser 15 | * @description: 16 | * @author: again 17 | * @Date: 2021/3/11 7:59 下午 18 | */ 19 | public class MyPresoSqlParser extends SqlParserAbstract { 20 | 21 | private ParseTree getParseTree(String sql) { 22 | sql = sql.toUpperCase(); 23 | CharStream input = CharStreams.fromString(sql); 24 | PrestoSqlLexer mySqlLexer = new PrestoSqlLexer(input); 25 | CommonTokenStream tokens = new CommonTokenStream(mySqlLexer); 26 | PrestoSqlParser parser = new PrestoSqlParser(tokens); 27 | return parser.statement(); 28 | } 29 | 30 | @Override 31 | public SqlTypeEnum parseSqlType(String sql) { 32 | PrestoSqlTypeParser visitor = new PrestoSqlTypeParser(); 33 | visitor.visit(getParseTree(sql)); 34 | return visitor.getSqlType(); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/presto/PrestoSqlTypeParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.presto; 2 | 3 | import cn.ganjiacheng.antlr.PrestoSqlBaseVisitor; 4 | import cn.ganjiacheng.antlr.PrestoSqlParser; 5 | import cn.ganjiacheng.enums.SqlTypeEnum; 6 | 7 | /** 8 | * @ClassName PrestoSqlTypeParser 9 | * @description: 10 | * @author: again 11 | * @Date: 2021/3/11 7:59 下午 12 | */ 13 | public class PrestoSqlTypeParser extends PrestoSqlBaseVisitor { 14 | 15 | private SqlTypeEnum sqlType = null; 16 | 17 | private void initSqlTypeEnum(SqlTypeEnum type) { 18 | if(sqlType == null) { 19 | sqlType = type; 20 | } 21 | } 22 | 23 | public SqlTypeEnum getSqlType() { 24 | return sqlType; 25 | } 26 | 27 | @Override 28 | public Object visitCreateTable(PrestoSqlParser.CreateTableContext ctx) { 29 | initSqlTypeEnum(SqlTypeEnum.CREATE); 30 | return super.visitCreateTable(ctx); 31 | } 32 | 33 | @Override 34 | public Object visitInsertInto(PrestoSqlParser.InsertIntoContext ctx) { 35 | initSqlTypeEnum(SqlTypeEnum.INSERT); 36 | return super.visitInsertInto(ctx); 37 | } 38 | 39 | @Override 40 | public Object visitSelectSingle(PrestoSqlParser.SelectSingleContext ctx) { 41 | initSqlTypeEnum(SqlTypeEnum.SELECT); 42 | return super.visitSelectSingle(ctx); 43 | } 44 | 45 | @Override 46 | public Object visitDelete(PrestoSqlParser.DeleteContext ctx) { 47 | initSqlTypeEnum(SqlTypeEnum.DELETE); 48 | return super.visitDelete(ctx); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/spark/MySparkSqlParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.spark; 2 | 3 | import cn.ganjiacheng.SqlParserAbstract; 4 | import cn.ganjiacheng.antlr.PrestoSqlLexer; 5 | import cn.ganjiacheng.antlr.PrestoSqlParser; 6 | import cn.ganjiacheng.antlr.SparkSqlLexer; 7 | import cn.ganjiacheng.antlr.SparkSqlParser; 8 | import cn.ganjiacheng.enums.SqlTypeEnum; 9 | import cn.ganjiacheng.presto.PrestoSqlTypeParser; 10 | import org.antlr.v4.runtime.CharStream; 11 | import org.antlr.v4.runtime.CharStreams; 12 | import org.antlr.v4.runtime.CommonTokenStream; 13 | import org.antlr.v4.runtime.tree.ParseTree; 14 | 15 | /** 16 | * @ClassName MySparkSqlParser 17 | * @description: 18 | * @author: again 19 | * @Date: 2021/3/11 8:08 下午 20 | */ 21 | public class MySparkSqlParser extends SqlParserAbstract { 22 | 23 | private ParseTree getParseTree(String sql) { 24 | sql = sql.toUpperCase(); 25 | CharStream input = CharStreams.fromString(sql); 26 | SparkSqlLexer mySqlLexer = new SparkSqlLexer(input); 27 | CommonTokenStream tokens = new CommonTokenStream(mySqlLexer); 28 | SparkSqlParser parser = new SparkSqlParser(tokens); 29 | return parser.statement(); 30 | } 31 | 32 | @Override 33 | public SqlTypeEnum parseSqlType(String sql) { 34 | SparkSqlTypeParser visitor = new SparkSqlTypeParser(); 35 | visitor.visit(getParseTree(sql)); 36 | return visitor.getSqlType(); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/cn/ganjiacheng/spark/SparkSqlTypeParser.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng.spark; 2 | 3 | import cn.ganjiacheng.antlr.SparkSqlBaseVisitor; 4 | import cn.ganjiacheng.antlr.SparkSqlParser; 5 | import cn.ganjiacheng.enums.SqlTypeEnum; 6 | 7 | /** 8 | * @ClassName SparkSqlTypeParser 9 | * @description: 10 | * @author: again 11 | * @Date: 2021/3/11 8:08 下午 12 | */ 13 | public class SparkSqlTypeParser extends SparkSqlBaseVisitor { 14 | 15 | private SqlTypeEnum sqlType = null; 16 | 17 | private void initSqlTypeEnum(SqlTypeEnum type) { 18 | if(sqlType == null) { 19 | sqlType = type; 20 | } 21 | } 22 | 23 | public SqlTypeEnum getSqlType() { 24 | return sqlType; 25 | } 26 | 27 | @Override 28 | public Object visitSingleInsertQuery(SparkSqlParser.SingleInsertQueryContext ctx) { 29 | initSqlTypeEnum(SqlTypeEnum.INSERT); 30 | return super.visitSingleInsertQuery(ctx); 31 | } 32 | 33 | @Override 34 | public Object visitSelectClause(SparkSqlParser.SelectClauseContext ctx) { 35 | initSqlTypeEnum(SqlTypeEnum.SELECT); 36 | return super.visitSelectClause(ctx); 37 | } 38 | 39 | @Override 40 | public Object visitDeleteFromTable(SparkSqlParser.DeleteFromTableContext ctx) { 41 | initSqlTypeEnum(SqlTypeEnum.DELETE); 42 | return super.visitDeleteFromTable(ctx); 43 | } 44 | 45 | @Override 46 | public Object visitUpdateTable(SparkSqlParser.UpdateTableContext ctx) { 47 | initSqlTypeEnum(SqlTypeEnum.UPDATE); 48 | return super.visitUpdateTable(ctx); 49 | } 50 | 51 | @Override 52 | public Object visitCreateTable(SparkSqlParser.CreateTableContext ctx) { 53 | initSqlTypeEnum(SqlTypeEnum.CREATE); 54 | return super.visitCreateTable(ctx); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/test/java/cn/ganjiacheng/AppTest.java: -------------------------------------------------------------------------------- 1 | package cn.ganjiacheng; 2 | 3 | import static org.junit.Assert.assertTrue; 4 | 5 | import org.junit.Test; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | { 12 | /** 13 | * Rigorous Test :-) 14 | */ 15 | @Test 16 | public void shouldAnswerWithTrue() 17 | { 18 | assertTrue( true ); 19 | } 20 | } 21 | --------------------------------------------------------------------------------