├── .gitignore
├── LICENSE
├── README.md
├── pom.xml
├── sql-parser.iml
└── src
├── main
└── java
│ └── cn
│ └── ganjiacheng
│ ├── Application.java
│ ├── SqlParserAbstract.java
│ ├── SqlParserFactory.java
│ ├── SqlParserService.java
│ ├── antlr
│ ├── HiveSql.g4
│ ├── MySqlLexer.g4
│ ├── MySqlParser.g4
│ ├── PrestoSql.g4
│ └── SparkSql.g4
│ ├── enums
│ ├── SqlEngineEnum.java
│ └── SqlTypeEnum.java
│ ├── hive
│ ├── HiveSqlFieldLineageParser.java
│ ├── HiveSqlFormatterParser.java
│ ├── HiveSqlMetadataParser.java
│ ├── HiveSqlTableLineageParser.java
│ ├── HiveSqlTypeParser.java
│ └── MyHiveSqlParser.java
│ ├── model
│ ├── lineage
│ │ ├── FieldLineageModel.java
│ │ ├── FieldLineageSelectItemModel.java
│ │ ├── FieldLineageSelectModel.java
│ │ ├── FieldNameModel.java
│ │ ├── FieldNameWithProcessModel.java
│ │ ├── TableLineageModel.java
│ │ └── TableNameModel.java
│ └── metadata
│ │ ├── FieldMetadataModel.java
│ │ └── TableMetadataModel.java
│ ├── mysql
│ ├── MysqlSqlParser.java
│ └── MysqlSqlTypeParser.java
│ ├── presto
│ ├── MyPresoSqlParser.java
│ └── PrestoSqlTypeParser.java
│ └── spark
│ ├── MySparkSqlParser.java
│ └── SparkSqlTypeParser.java
└── test
└── java
└── cn
└── ganjiacheng
└── AppTest.java
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### Java template
3 | # Compiled class file
4 | *.class
5 |
6 | # Log file
7 | *.log
8 |
9 | # BlueJ files
10 | *.ctxt
11 |
12 | # Mobile Tools for Java (J2ME)
13 | .mtj.tmp/
14 |
15 | # Package Files #
16 | *.jar
17 | *.war
18 | *.nar
19 | *.ear
20 | *.zip
21 | *.tar.gz
22 | *.rar
23 |
24 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
25 | hs_err_pid*
26 |
27 | ### Maven template
28 | target/
29 | pom.xml.tag
30 | pom.xml.releaseBackup
31 | pom.xml.versionsBackup
32 | pom.xml.next
33 | release.properties
34 | dependency-reduced-pom.xml
35 | buildNumber.properties
36 | .mvn/timing.properties
37 | # https://github.com/takari/maven-wrapper#usage-without-binary-jar
38 | .mvn/wrapper/maven-wrapper.jar
39 |
40 | .idea
41 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # sql-parser
2 | ## 【学习使用】【仅做参考】
3 | 基于antlr4的sql解析,实现格式化,元数据,血源等自定义解析,包括hive
4 |
5 | ## 说明文章
6 |
7 | [基于antlr4实现HQL的解析[元数据]](http://ganjiacheng.cn/article/2020/article_6_%E5%9F%BA%E4%BA%8Eantlr4%E5%AE%9E%E7%8E%B0HQL%E7%9A%84%E8%A7%A3%E6%9E%90-%E5%85%83%E6%95%B0%E6%8D%AE/)
8 |
9 | [基于antlr4实现hiveSQL的解析[表血缘和字段血缘]](http://ganjiacheng.cn/article/2020/article_14_%E5%9F%BA%E4%BA%8Eantlr4%E5%AE%9E%E7%8E%B0HQL%E7%9A%84%E8%A7%A3%E6%9E%90-%E8%A1%A8%E8%A1%80%E7%BC%98%E5%92%8C%E5%AD%97%E6%AE%B5%E8%A1%80%E7%BC%98/)
10 |
11 | [基于antlr4实现HQL的解析[格式化]](http://ganjiacheng.cn/article/2020/article_12_%E5%9F%BA%E4%BA%8Eantlr4%E5%AE%9E%E7%8E%B0HQL%E7%9A%84%E8%A7%A3%E6%9E%90-%E6%A0%BC%E5%BC%8F%E5%8C%96/)
12 |
13 | ## 使用说明
14 |
15 | 下载安装[antlr4](https://www.antlr.org/index.html)
16 |
17 | idea装antlr4插件调试 cn.ganjiacheng.antlr.xxx.g4 中的规则
18 |
19 | 拉下来项目代码,运行Application
20 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 | 4.0.0
6 |
7 | cn.ganjiacheng
8 | sql-parser
9 | 1.0-SNAPSHOT
10 |
11 | sql-parser
12 |
13 | http://www.example.com
14 |
15 |
16 | 4.9.1
17 |
18 |
19 |
20 |
21 | junit
22 | junit
23 | 4.11
24 | test
25 |
26 |
27 | com.alibaba
28 | fastjson
29 | 1.2.75
30 |
31 |
32 | org.slf4j
33 | slf4j-api
34 | 1.7.30
35 |
36 |
37 | org.slf4j
38 | slf4j-simple
39 | 1.7.30
40 |
41 |
42 | org.antlr
43 | antlr4-runtime
44 | ${antlr.version}
45 |
46 |
47 | org.antlr
48 | antlr4-maven-plugin
49 | ${antlr.version}
50 |
51 |
52 |
53 |
54 |
55 |
56 | org.antlr
57 | antlr4-maven-plugin
58 | ${antlr.version}
59 |
60 | src/main/java
61 |
62 | -visitor
63 | -listener
64 |
65 |
66 |
67 |
68 |
69 | antlr4
70 |
71 |
72 |
73 |
74 |
75 | org.apache.maven.plugins
76 | maven-compiler-plugin
77 |
78 | 8
79 | 8
80 |
81 |
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/sql-parser.iml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/Application.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng;
2 |
3 | import cn.ganjiacheng.enums.SqlEngineEnum;
4 | import com.alibaba.fastjson.JSON;
5 | import com.alibaba.fastjson.serializer.SerializerFeature;
6 |
7 | /**
8 | * @author: again
9 | */
10 | public class Application {
11 | public static void main(String[] args) {
12 | SqlParserService parserService = SqlParserFactory.getParser(SqlEngineEnum.HIVE);
13 | String sql = "INSERT INTO TABLE db_test.table_result SELECT t1.id, t2.name FROM ( SELECT id1 + id2 AS id FROM db_test.table1 ) t1 LEFT JOIN ( SELECT id, name FROM ( SELECT id, sourcename AS name FROM db_test.table2 ) ) t2 ON t1.id=t2.id";
14 | // String sql = "SELECT F1 FROM TAB1";
15 |
16 | System.out.println("sql类型");
17 | System.out.println(parserService.parseSqlType(sql));
18 | System.out.println("\n");
19 |
20 | System.out.println("sql格式化");
21 | System.out.println(parserService.parseSqlFormatter(sql));
22 | System.out.println("\n");
23 |
24 | System.out.println("表血缘");
25 | System.out.println(JSON.toJSONString(parserService.parseSqlTableLineage(sql), SerializerFeature.WriteMapNullValue, SerializerFeature.PrettyFormat));
26 | System.out.println("\n");
27 |
28 | System.out.println("字段血源");
29 | System.out.println(JSON.toJSONString(parserService.parseSqlFieldLineage(sql), SerializerFeature.WriteMapNullValue, SerializerFeature.PrettyFormat));
30 | System.out.println("\n");
31 |
32 | String createSql = "CREATE TABLE db1.table1 (id number comment 'id', name string comment '姓名', age number)";
33 | System.out.println("元数据");
34 | System.out.println(JSON.toJSONString(parserService.parseSqlMetadata(createSql), SerializerFeature.WriteMapNullValue, SerializerFeature.PrettyFormat));
35 | System.out.println("\n");
36 |
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/SqlParserAbstract.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng;
2 |
3 | import cn.ganjiacheng.enums.SqlTypeEnum;
4 | import cn.ganjiacheng.model.lineage.FieldLineageModel;
5 | import cn.ganjiacheng.model.lineage.TableLineageModel;
6 | import cn.ganjiacheng.model.metadata.TableMetadataModel;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 |
10 | import java.util.List;
11 |
12 | /**
13 | * @ClassName SqlParserAbstractFactory
14 | * @description:
15 | * @author: again
16 | * @Date: 2021/3/10 8:14 下午
17 | */
18 | public abstract class SqlParserAbstract implements SqlParserService {
19 |
20 | public static final Logger logger = LoggerFactory.getLogger(SqlParserAbstract.class);
21 |
22 | private void notSupport(String msg) {
23 | logger.error("not support {}", msg);
24 | // throw new RuntimeException("not support");
25 | }
26 |
27 | @Override
28 | public SqlTypeEnum parseSqlType(String sql) {
29 | notSupport("parseSqlType");
30 | return null;
31 | }
32 |
33 | @Override
34 | public TableMetadataModel parseSqlMetadata(String sql) {
35 | notSupport("parseSqlMetadata");
36 | return null;
37 | }
38 |
39 | @Override
40 | public String parseSqlFormatter(String sql) {
41 | notSupport("parseSqlFormatter");
42 | return null;
43 | }
44 |
45 | @Override
46 | public TableLineageModel parseSqlTableLineage(String sql) {
47 | notSupport("parseSqlTableLineage");
48 | return null;
49 | }
50 |
51 | @Override
52 | public List parseSqlFieldLineage(String sql) {
53 | notSupport("parseSqlFieldLineage");
54 | return null;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/SqlParserFactory.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng;
2 |
3 | import cn.ganjiacheng.enums.SqlEngineEnum;
4 | import cn.ganjiacheng.hive.MyHiveSqlParser;
5 | import cn.ganjiacheng.mysql.MysqlSqlParser;
6 | import cn.ganjiacheng.presto.MyPresoSqlParser;
7 | import cn.ganjiacheng.spark.MySparkSqlParser;
8 |
9 | /**
10 | * @ClassName SqlParserFactory
11 | * @description:
12 | * @author: again
13 | * @Date: 2021/3/10 4:21 下午
14 | */
15 | public class SqlParserFactory {
16 |
17 | public static SqlParserService getParser(SqlEngineEnum sqlEngineEnum) {
18 | if (SqlEngineEnum.HIVE.equals(sqlEngineEnum)) {
19 | return new MyHiveSqlParser();
20 | } else if (SqlEngineEnum.MYSQL.equals(sqlEngineEnum)) {
21 | return new MysqlSqlParser();
22 | } else if (SqlEngineEnum.PRESTO.equals(sqlEngineEnum)) {
23 | return new MyPresoSqlParser();
24 | } else if (SqlEngineEnum.SPARK.equals(sqlEngineEnum)) {
25 | return new MySparkSqlParser();
26 | }
27 | throw new RuntimeException("db type is not support");
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/SqlParserService.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng;
2 |
3 | import cn.ganjiacheng.enums.SqlTypeEnum;
4 | import cn.ganjiacheng.model.lineage.FieldLineageModel;
5 | import cn.ganjiacheng.model.lineage.TableLineageModel;
6 | import cn.ganjiacheng.model.metadata.TableMetadataModel;
7 |
8 | import java.util.List;
9 |
10 | /**
11 | * @ClassName SqlParserService
12 | * @description: sql解析接口
13 | * @author: again
14 | * @Date: 2021/3/10 4:05 下午
15 | */
16 | public interface SqlParserService {
17 | /**
18 | * 获取sql类型
19 | */
20 | SqlTypeEnum parseSqlType(String sql);
21 |
22 | /**
23 | * 获取创表语句元数据
24 | */
25 | TableMetadataModel parseSqlMetadata(String sql);
26 |
27 | /**
28 | * sql格式化
29 | */
30 | String parseSqlFormatter(String sql);
31 |
32 | /**
33 | * sql解析表元数据
34 | */
35 | TableLineageModel parseSqlTableLineage(String sql);
36 |
37 | /**
38 | * sql解析字段元数据
39 | */
40 | List parseSqlFieldLineage(String sql);
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/antlr/PrestoSql.g4:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | */
14 |
15 | grammar PrestoSql;
16 |
17 | tokens {
18 | DELIMITER
19 | }
20 |
21 | singleStatement
22 | : statement EOF
23 | ;
24 |
25 | standaloneExpression
26 | : expression EOF
27 | ;
28 |
29 | standaloneRoutineBody
30 | : routineBody EOF
31 | ;
32 |
33 | statement
34 | : query #statementDefault
35 | | USE schema=identifier #use
36 | | USE catalog=identifier '.' schema=identifier #use
37 | | CREATE SCHEMA (IF NOT EXISTS)? qualifiedName
38 | (WITH properties)? #createSchema
39 | | DROP SCHEMA (IF EXISTS)? qualifiedName (CASCADE | RESTRICT)? #dropSchema
40 | | ALTER SCHEMA qualifiedName RENAME TO identifier #renameSchema
41 | | CREATE TABLE (IF NOT EXISTS)? qualifiedName columnAliases?
42 | (COMMENT string)?
43 | (WITH properties)? AS (query | '('query')')
44 | (WITH (NO)? DATA)? #createTableAsSelect
45 | | CREATE TABLE (IF NOT EXISTS)? qualifiedName
46 | '(' tableElement (',' tableElement)* ')'
47 | (COMMENT string)?
48 | (WITH properties)? #createTable
49 | | DROP TABLE (IF EXISTS)? qualifiedName #dropTable
50 | | INSERT INTO qualifiedName columnAliases? query #insertInto
51 | | DELETE FROM qualifiedName (WHERE booleanExpression)? #delete
52 | | ALTER TABLE (IF EXISTS)? from=qualifiedName
53 | RENAME TO to=qualifiedName #renameTable
54 | | ALTER TABLE (IF EXISTS)? tableName=qualifiedName
55 | RENAME COLUMN (IF EXISTS)? from=identifier TO to=identifier #renameColumn
56 | | ALTER TABLE (IF EXISTS)? tableName=qualifiedName
57 | DROP COLUMN (IF EXISTS)? column=qualifiedName #dropColumn
58 | | ALTER TABLE (IF EXISTS)? tableName=qualifiedName
59 | ADD COLUMN (IF NOT EXISTS)? column=columnDefinition #addColumn
60 | | ANALYZE qualifiedName (WITH properties)? #analyze
61 | | CREATE (OR REPLACE)? VIEW qualifiedName
62 | (SECURITY (DEFINER | INVOKER))? AS query #createView
63 | | DROP VIEW (IF EXISTS)? qualifiedName #dropView
64 | | CREATE MATERIALIZED VIEW (IF NOT EXISTS)? qualifiedName
65 | (COMMENT string)?
66 | (WITH properties)? AS (query | '('query')') #createMaterializedView
67 | | CREATE (OR REPLACE)? TEMPORARY? FUNCTION functionName=qualifiedName
68 | '(' (sqlParameterDeclaration (',' sqlParameterDeclaration)*)? ')'
69 | RETURNS returnType=type
70 | (COMMENT string)?
71 | routineCharacteristics routineBody #createFunction
72 | | ALTER FUNCTION qualifiedName types?
73 | alterRoutineCharacteristics #alterFunction
74 | | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName types? #dropFunction
75 | | CALL qualifiedName '(' (callArgument (',' callArgument)*)? ')' #call
76 | | CREATE ROLE name=identifier
77 | (WITH ADMIN grantor)? #createRole
78 | | DROP ROLE name=identifier #dropRole
79 | | GRANT
80 | roles
81 | TO principal (',' principal)*
82 | (WITH ADMIN OPTION)?
83 | (GRANTED BY grantor)? #grantRoles
84 | | REVOKE
85 | (ADMIN OPTION FOR)?
86 | roles
87 | FROM principal (',' principal)*
88 | (GRANTED BY grantor)? #revokeRoles
89 | | SET ROLE (ALL | NONE | role=identifier) #setRole
90 | | GRANT
91 | (privilege (',' privilege)* | ALL PRIVILEGES)
92 | ON TABLE? qualifiedName TO grantee=principal
93 | (WITH GRANT OPTION)? #grant
94 | | REVOKE
95 | (GRANT OPTION FOR)?
96 | (privilege (',' privilege)* | ALL PRIVILEGES)
97 | ON TABLE? qualifiedName FROM grantee=principal #revoke
98 | | SHOW GRANTS
99 | (ON TABLE? qualifiedName)? #showGrants
100 | | EXPLAIN ANALYZE? VERBOSE?
101 | ('(' explainOption (',' explainOption)* ')')? statement #explain
102 | | SHOW CREATE TABLE qualifiedName #showCreateTable
103 | | SHOW CREATE VIEW qualifiedName #showCreateView
104 | | SHOW CREATE FUNCTION qualifiedName types? #showCreateFunction
105 | | SHOW TABLES ((FROM | IN) qualifiedName)?
106 | (LIKE pattern=string (ESCAPE escape=string)?)? #showTables
107 | | SHOW SCHEMAS ((FROM | IN) identifier)?
108 | (LIKE pattern=string (ESCAPE escape=string)?)? #showSchemas
109 | | SHOW CATALOGS (LIKE pattern=string)? #showCatalogs
110 | | SHOW COLUMNS (FROM | IN) qualifiedName #showColumns
111 | | SHOW STATS FOR qualifiedName #showStats
112 | | SHOW STATS FOR '(' querySpecification ')' #showStatsForQuery
113 | | SHOW CURRENT? ROLES ((FROM | IN) identifier)? #showRoles
114 | | SHOW ROLE GRANTS ((FROM | IN) identifier)? #showRoleGrants
115 | | DESCRIBE qualifiedName #showColumns
116 | | DESC qualifiedName #showColumns
117 | | SHOW FUNCTIONS
118 | (LIKE pattern=string (ESCAPE escape=string)?)? #showFunctions
119 | | SHOW SESSION #showSession
120 | | SET SESSION qualifiedName EQ expression #setSession
121 | | RESET SESSION qualifiedName #resetSession
122 | | START TRANSACTION (transactionMode (',' transactionMode)*)? #startTransaction
123 | | COMMIT WORK? #commit
124 | | ROLLBACK WORK? #rollback
125 | | PREPARE identifier FROM statement #prepare
126 | | DEALLOCATE PREPARE identifier #deallocate
127 | | EXECUTE identifier (USING expression (',' expression)*)? #execute
128 | | DESCRIBE INPUT identifier #describeInput
129 | | DESCRIBE OUTPUT identifier #describeOutput
130 | ;
131 |
132 | query
133 | : with? queryNoWith
134 | ;
135 |
136 | with
137 | : WITH RECURSIVE? namedQuery (',' namedQuery)*
138 | ;
139 |
140 | tableElement
141 | : columnDefinition
142 | | likeClause
143 | ;
144 |
145 | columnDefinition
146 | : identifier type (NOT NULL)? (COMMENT string)? (WITH properties)?
147 | ;
148 |
149 | likeClause
150 | : LIKE qualifiedName (optionType=(INCLUDING | EXCLUDING) PROPERTIES)?
151 | ;
152 |
153 | properties
154 | : '(' property (',' property)* ')'
155 | ;
156 |
157 | property
158 | : identifier EQ expression
159 | ;
160 |
161 | sqlParameterDeclaration
162 | : identifier type
163 | ;
164 |
165 | routineCharacteristics
166 | : routineCharacteristic*
167 | ;
168 |
169 | routineCharacteristic
170 | : LANGUAGE language
171 | | determinism
172 | | nullCallClause
173 | ;
174 |
175 | alterRoutineCharacteristics
176 | : alterRoutineCharacteristic*
177 | ;
178 |
179 | alterRoutineCharacteristic
180 | : nullCallClause
181 | ;
182 |
183 | routineBody
184 | : returnStatement
185 | | externalBodyReference
186 | ;
187 |
188 | returnStatement
189 | : RETURN expression
190 | ;
191 |
192 | externalBodyReference
193 | : EXTERNAL (NAME externalRoutineName)?
194 | ;
195 |
196 | language
197 | : SQL
198 | | identifier
199 | ;
200 |
201 | determinism
202 | : DETERMINISTIC
203 | | NOT DETERMINISTIC;
204 |
205 | nullCallClause
206 | : RETURNS NULL ON NULL INPUT
207 | | CALLED ON NULL INPUT
208 | ;
209 |
210 | externalRoutineName
211 | : identifier
212 | ;
213 |
214 | queryNoWith:
215 | queryTerm
216 | (ORDER BY sortItem (',' sortItem)*)?
217 | (LIMIT limit=(INTEGER_VALUE | ALL))?
218 | ;
219 |
220 | queryTerm
221 | : queryPrimary #queryTermDefault
222 | | left=queryTerm operator=INTERSECT setQuantifier? right=queryTerm #setOperation
223 | | left=queryTerm operator=(UNION | EXCEPT) setQuantifier? right=queryTerm #setOperation
224 | ;
225 |
226 | queryPrimary
227 | : querySpecification #queryPrimaryDefault
228 | | TABLE qualifiedName #table
229 | | VALUES expression (',' expression)* #inlineTable
230 | | '(' queryNoWith ')' #subquery
231 | ;
232 |
233 | sortItem
234 | : expression ordering=(ASC | DESC)? (NULLS nullOrdering=(FIRST | LAST))?
235 | ;
236 |
237 | querySpecification
238 | : SELECT setQuantifier? selectItem (',' selectItem)*
239 | (FROM relation (',' relation)*)?
240 | (WHERE where=booleanExpression)?
241 | (GROUP BY groupBy)?
242 | (HAVING having=booleanExpression)?
243 | ;
244 |
245 | groupBy
246 | : setQuantifier? groupingElement (',' groupingElement)*
247 | ;
248 |
249 | groupingElement
250 | : groupingSet #singleGroupingSet
251 | | ROLLUP '(' (expression (',' expression)*)? ')' #rollup
252 | | CUBE '(' (expression (',' expression)*)? ')' #cube
253 | | GROUPING SETS '(' groupingSet (',' groupingSet)* ')' #multipleGroupingSets
254 | ;
255 |
256 | groupingSet
257 | : '(' (expression (',' expression)*)? ')'
258 | | expression
259 | ;
260 |
261 | namedQuery
262 | : name=identifier (columnAliases)? AS '(' query ')'
263 | ;
264 |
265 | setQuantifier
266 | : DISTINCT
267 | | ALL
268 | ;
269 |
270 | selectItem
271 | : expression (AS? identifier)? #selectSingle
272 | | qualifiedName '.' ASTERISK #selectAll
273 | | ASTERISK #selectAll
274 | ;
275 |
276 | relation
277 | : left=relation
278 | ( CROSS JOIN right=sampledRelation
279 | | joinType JOIN rightRelation=relation joinCriteria
280 | | NATURAL joinType JOIN right=sampledRelation
281 | ) #joinRelation
282 | | sampledRelation #relationDefault
283 | ;
284 |
285 | joinType
286 | : INNER?
287 | | LEFT OUTER?
288 | | RIGHT OUTER?
289 | | FULL OUTER?
290 | ;
291 |
292 | joinCriteria
293 | : ON booleanExpression
294 | | USING '(' identifier (',' identifier)* ')'
295 | ;
296 |
297 | sampledRelation
298 | : aliasedRelation (
299 | TABLESAMPLE sampleType '(' percentage=expression ')'
300 | )?
301 | ;
302 |
303 | sampleType
304 | : BERNOULLI
305 | | SYSTEM
306 | ;
307 |
308 | aliasedRelation
309 | : relationPrimary (AS? identifier columnAliases?)?
310 | ;
311 |
312 | columnAliases
313 | : '(' identifier (',' identifier)* ')'
314 | ;
315 |
316 | relationPrimary
317 | : qualifiedName #tableName
318 | | '(' query ')' #subqueryRelation
319 | | UNNEST '(' expression (',' expression)* ')' (WITH ORDINALITY)? #unnest
320 | | LATERAL '(' query ')' #lateral
321 | | '(' relation ')' #parenthesizedRelation
322 | ;
323 |
324 | expression
325 | : booleanExpression
326 | ;
327 |
328 | booleanExpression
329 | : valueExpression predicate[$valueExpression.ctx]? #predicated
330 | | NOT booleanExpression #logicalNot
331 | | left=booleanExpression operator=AND right=booleanExpression #logicalBinary
332 | | left=booleanExpression operator=OR right=booleanExpression #logicalBinary
333 | ;
334 |
335 | // workaround for https://github.com/antlr/antlr4/issues/780
336 | predicate[ParserRuleContext value]
337 | : comparisonOperator right=valueExpression #comparison
338 | | comparisonOperator comparisonQuantifier '(' query ')' #quantifiedComparison
339 | | NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between
340 | | NOT? IN '(' expression (',' expression)* ')' #inList
341 | | NOT? IN '(' query ')' #inSubquery
342 | | NOT? LIKE pattern=valueExpression (ESCAPE escape=valueExpression)? #like
343 | | IS NOT? NULL #nullPredicate
344 | | IS NOT? DISTINCT FROM right=valueExpression #distinctFrom
345 | ;
346 |
347 | valueExpression
348 | : primaryExpression #valueExpressionDefault
349 | | valueExpression AT timeZoneSpecifier #atTimeZone
350 | | operator=(MINUS | PLUS) valueExpression #arithmeticUnary
351 | | left=valueExpression operator=(ASTERISK | SLASH | PERCENT) right=valueExpression #arithmeticBinary
352 | | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary
353 | | left=valueExpression CONCAT right=valueExpression #concatenation
354 | ;
355 |
356 | primaryExpression
357 | : NULL #nullLiteral
358 | | interval #intervalLiteral
359 | | identifier string #typeConstructor
360 | | DOUBLE_PRECISION string #typeConstructor
361 | | number #numericLiteral
362 | | booleanValue #booleanLiteral
363 | | string #stringLiteral
364 | | BINARY_LITERAL #binaryLiteral
365 | | '?' #parameter
366 | | POSITION '(' valueExpression IN valueExpression ')' #position
367 | | '(' expression (',' expression)+ ')' #rowConstructor
368 | | ROW '(' expression (',' expression)* ')' #rowConstructor
369 | | qualifiedName '(' ASTERISK ')' filter? over? #functionCall
370 | | qualifiedName '(' (setQuantifier? expression (',' expression)*)?
371 | (ORDER BY sortItem (',' sortItem)*)? ')' filter? (nullTreatment? over)? #functionCall
372 | | identifier '->' expression #lambda
373 | | '(' (identifier (',' identifier)*)? ')' '->' expression #lambda
374 | | '(' query ')' #subqueryExpression
375 | // This is an extension to ANSI SQL, which considers EXISTS to be a
376 | | EXISTS '(' query ')' #exists
377 | | CASE valueExpression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
378 | | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
379 | | CAST '(' expression AS type ')' #cast
380 | | TRY_CAST '(' expression AS type ')' #cast
381 | | ARRAY '[' (expression (',' expression)*)? ']' #arrayConstructor
382 | | value=primaryExpression '[' index=valueExpression ']' #subscript
383 | | identifier #columnReference
384 | | base=primaryExpression '.' fieldName=identifier #dereference
385 | | name=CURRENT_DATE #specialDateTimeFunction
386 | | name=CURRENT_TIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
387 | | name=CURRENT_TIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
388 | | name=LOCALTIME ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
389 | | name=LOCALTIMESTAMP ('(' precision=INTEGER_VALUE ')')? #specialDateTimeFunction
390 | | name=CURRENT_USER #currentUser
391 | | SUBSTRING '(' valueExpression FROM valueExpression (FOR valueExpression)? ')' #substring
392 | | NORMALIZE '(' valueExpression (',' normalForm)? ')' #normalize
393 | | EXTRACT '(' identifier FROM valueExpression ')' #extract
394 | | '(' expression ')' #parenthesizedExpression
395 | | GROUPING '(' (qualifiedName (',' qualifiedName)*)? ')' #groupingOperation
396 | ;
397 |
398 | string
399 | : STRING #basicStringLiteral
400 | | UNICODE_STRING (UESCAPE STRING)? #unicodeStringLiteral
401 | ;
402 |
403 | nullTreatment
404 | : IGNORE NULLS
405 | | RESPECT NULLS
406 | ;
407 |
408 | timeZoneSpecifier
409 | : TIME ZONE interval #timeZoneInterval
410 | | TIME ZONE string #timeZoneString
411 | ;
412 |
413 | comparisonOperator
414 | : EQ | NEQ | LT | LTE | GT | GTE
415 | ;
416 |
417 | comparisonQuantifier
418 | : ALL | SOME | ANY
419 | ;
420 |
421 | booleanValue
422 | : TRUE | FALSE
423 | ;
424 |
425 | interval
426 | : INTERVAL sign=(PLUS | MINUS)? string from=intervalField (TO to=intervalField)?
427 | ;
428 |
429 | intervalField
430 | : YEAR | MONTH | DAY | HOUR | MINUTE | SECOND
431 | ;
432 |
433 | normalForm
434 | : NFD | NFC | NFKD | NFKC
435 | ;
436 |
437 | types
438 | : '(' (type (',' type)*)? ')'
439 | ;
440 |
441 | type
442 | : type ARRAY
443 | | ARRAY '<' type '>'
444 | | MAP '<' type ',' type '>'
445 | | ROW '(' identifier type (',' identifier type)* ')'
446 | | baseType ('(' typeParameter (',' typeParameter)* ')')?
447 | | INTERVAL from=intervalField TO to=intervalField
448 | ;
449 |
450 | typeParameter
451 | : INTEGER_VALUE | type
452 | ;
453 |
454 | baseType
455 | : TIME_WITH_TIME_ZONE
456 | | TIMESTAMP_WITH_TIME_ZONE
457 | | DOUBLE_PRECISION
458 | | qualifiedName
459 | ;
460 |
461 | whenClause
462 | : WHEN condition=expression THEN result=expression
463 | ;
464 |
465 | filter
466 | : FILTER '(' WHERE booleanExpression ')'
467 | ;
468 |
469 | over
470 | : OVER '('
471 | (PARTITION BY partition+=expression (',' partition+=expression)*)?
472 | (ORDER BY sortItem (',' sortItem)*)?
473 | windowFrame?
474 | ')'
475 | ;
476 |
477 | windowFrame
478 | : frameType=RANGE start=frameBound
479 | | frameType=ROWS start=frameBound
480 | | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
481 | | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
482 | ;
483 |
484 | frameBound
485 | : UNBOUNDED boundType=PRECEDING #unboundedFrame
486 | | UNBOUNDED boundType=FOLLOWING #unboundedFrame
487 | | CURRENT ROW #currentRowBound
488 | | expression boundType=(PRECEDING | FOLLOWING) #boundedFrame // expression should be unsignedLiteral
489 | ;
490 |
491 |
492 | explainOption
493 | : FORMAT value=(TEXT | GRAPHVIZ | JSON) #explainFormat
494 | | TYPE value=(LOGICAL | DISTRIBUTED | VALIDATE | IO) #explainType
495 | ;
496 |
497 | transactionMode
498 | : ISOLATION LEVEL levelOfIsolation #isolationLevel
499 | | READ accessMode=(ONLY | WRITE) #transactionAccessMode
500 | ;
501 |
502 | levelOfIsolation
503 | : READ UNCOMMITTED #readUncommitted
504 | | READ COMMITTED #readCommitted
505 | | REPEATABLE READ #repeatableRead
506 | | SERIALIZABLE #serializable
507 | ;
508 |
509 | callArgument
510 | : expression #positionalArgument
511 | | identifier '=>' expression #namedArgument
512 | ;
513 |
514 | privilege
515 | : SELECT | DELETE | INSERT | identifier
516 | ;
517 |
518 | qualifiedName
519 | : identifier ('.' identifier)*
520 | ;
521 |
522 | grantor
523 | : CURRENT_USER #currentUserGrantor
524 | | CURRENT_ROLE #currentRoleGrantor
525 | | principal #specifiedPrincipal
526 | ;
527 |
528 | principal
529 | : USER identifier #userPrincipal
530 | | ROLE identifier #rolePrincipal
531 | | identifier #unspecifiedPrincipal
532 | ;
533 |
534 | roles
535 | : identifier (',' identifier)*
536 | ;
537 |
538 | identifier
539 | : IDENTIFIER #unquotedIdentifier
540 | | QUOTED_IDENTIFIER #quotedIdentifier
541 | | nonReserved #unquotedIdentifier
542 | | BACKQUOTED_IDENTIFIER #backQuotedIdentifier
543 | | DIGIT_IDENTIFIER #digitIdentifier
544 | ;
545 |
546 | number
547 | : DECIMAL_VALUE #decimalLiteral
548 | | DOUBLE_VALUE #doubleLiteral
549 | | INTEGER_VALUE #integerLiteral
550 | ;
551 |
552 | nonReserved
553 | // IMPORTANT: this rule must only contain tokens. Nested rules are not supported. See SqlParser.exitNonReserved
554 | : ADD | ADMIN | ALL | ANALYZE | ANY | ARRAY | ASC | AT
555 | | BERNOULLI
556 | | CALL | CALLED | CASCADE | CATALOGS | COLUMN | COLUMNS | COMMENT | COMMIT | COMMITTED | CURRENT | CURRENT_ROLE
557 | | DATA | DATE | DAY | DEFINER | DESC | DETERMINISTIC | DISTRIBUTED
558 | | EXCLUDING | EXPLAIN | EXTERNAL
559 | | FILTER | FIRST | FOLLOWING | FORMAT | FUNCTION | FUNCTIONS
560 | | GRANT | GRANTED | GRANTS | GRAPHVIZ
561 | | HOUR
562 | | IF | IGNORE | INCLUDING | INPUT | INTERVAL | INVOKER | IO | ISOLATION
563 | | JSON
564 | | LANGUAGE | LAST | LATERAL | LEVEL | LIMIT | LOGICAL
565 | | MAP | MATERIALIZED | MINUTE | MONTH
566 | | NAME | NFC | NFD | NFKC | NFKD | NO | NONE | NULLIF | NULLS
567 | | ONLY | OPTION | ORDINALITY | OUTPUT | OVER
568 | | PARTITION | PARTITIONS | POSITION | PRECEDING | PRIVILEGES | PROPERTIES
569 | | RANGE | READ | RENAME | REPEATABLE | REPLACE | RESET | RESPECT | RESTRICT | RETURN | RETURNS | REVOKE | ROLE | ROLES | ROLLBACK | ROW | ROWS
570 | | SCHEMA | SCHEMAS | SECOND | SECURITY | SERIALIZABLE | SESSION | SET | SETS | SQL
571 | | SHOW | SOME | START | STATS | SUBSTRING | SYSTEM
572 | | TABLES | TABLESAMPLE | TEMPORARY | TEXT | TIME | TIMESTAMP | TO | TRANSACTION | TRY_CAST | TYPE
573 | | UNBOUNDED | UNCOMMITTED | USE | USER
574 | | VALIDATE | VERBOSE | VIEW
575 | | WORK | WRITE
576 | | YEAR
577 | | ZONE
578 | ;
579 |
580 | ADD: 'ADD';
581 | ADMIN: 'ADMIN';
582 | ALL: 'ALL';
583 | ALTER: 'ALTER';
584 | ANALYZE: 'ANALYZE';
585 | AND: 'AND';
586 | ANY: 'ANY';
587 | ARRAY: 'ARRAY';
588 | AS: 'AS';
589 | ASC: 'ASC';
590 | AT: 'AT';
591 | BERNOULLI: 'BERNOULLI';
592 | BETWEEN: 'BETWEEN';
593 | BY: 'BY';
594 | CALL: 'CALL';
595 | CALLED: 'CALLED';
596 | CASCADE: 'CASCADE';
597 | CASE: 'CASE';
598 | CAST: 'CAST';
599 | CATALOGS: 'CATALOGS';
600 | COLUMN: 'COLUMN';
601 | COLUMNS: 'COLUMNS';
602 | COMMENT: 'COMMENT';
603 | COMMIT: 'COMMIT';
604 | COMMITTED: 'COMMITTED';
605 | CONSTRAINT: 'CONSTRAINT';
606 | CREATE: 'CREATE';
607 | CROSS: 'CROSS';
608 | CUBE: 'CUBE';
609 | CURRENT: 'CURRENT';
610 | CURRENT_DATE: 'CURRENT_DATE';
611 | CURRENT_ROLE: 'CURRENT_ROLE';
612 | CURRENT_TIME: 'CURRENT_TIME';
613 | CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
614 | CURRENT_USER: 'CURRENT_USER';
615 | DATA: 'DATA';
616 | DATE: 'DATE';
617 | DAY: 'DAY';
618 | DEALLOCATE: 'DEALLOCATE';
619 | DEFINER: 'DEFINER';
620 | DELETE: 'DELETE';
621 | DESC: 'DESC';
622 | DESCRIBE: 'DESCRIBE';
623 | DETERMINISTIC: 'DETERMINISTIC';
624 | DISTINCT: 'DISTINCT';
625 | DISTRIBUTED: 'DISTRIBUTED';
626 | DROP: 'DROP';
627 | ELSE: 'ELSE';
628 | END: 'END';
629 | ESCAPE: 'ESCAPE';
630 | EXCEPT: 'EXCEPT';
631 | EXCLUDING: 'EXCLUDING';
632 | EXECUTE: 'EXECUTE';
633 | EXISTS: 'EXISTS';
634 | EXPLAIN: 'EXPLAIN';
635 | EXTRACT: 'EXTRACT';
636 | EXTERNAL: 'EXTERNAL';
637 | FALSE: 'FALSE';
638 | FILTER: 'FILTER';
639 | FIRST: 'FIRST';
640 | FOLLOWING: 'FOLLOWING';
641 | FOR: 'FOR';
642 | FORMAT: 'FORMAT';
643 | FROM: 'FROM';
644 | FULL: 'FULL';
645 | FUNCTION: 'FUNCTION';
646 | FUNCTIONS: 'FUNCTIONS';
647 | GRANT: 'GRANT';
648 | GRANTED: 'GRANTED';
649 | GRANTS: 'GRANTS';
650 | GRAPHVIZ: 'GRAPHVIZ';
651 | GROUP: 'GROUP';
652 | GROUPING: 'GROUPING';
653 | HAVING: 'HAVING';
654 | HOUR: 'HOUR';
655 | IF: 'IF';
656 | IGNORE: 'IGNORE';
657 | IN: 'IN';
658 | INCLUDING: 'INCLUDING';
659 | INNER: 'INNER';
660 | INPUT: 'INPUT';
661 | INSERT: 'INSERT';
662 | INTERSECT: 'INTERSECT';
663 | INTERVAL: 'INTERVAL';
664 | INTO: 'INTO';
665 | INVOKER: 'INVOKER';
666 | IO: 'IO';
667 | IS: 'IS';
668 | ISOLATION: 'ISOLATION';
669 | JSON: 'JSON';
670 | JOIN: 'JOIN';
671 | LANGUAGE: 'LANGUAGE';
672 | LAST: 'LAST';
673 | LATERAL: 'LATERAL';
674 | LEFT: 'LEFT';
675 | LEVEL: 'LEVEL';
676 | LIKE: 'LIKE';
677 | LIMIT: 'LIMIT';
678 | LOCALTIME: 'LOCALTIME';
679 | LOCALTIMESTAMP: 'LOCALTIMESTAMP';
680 | LOGICAL: 'LOGICAL';
681 | MAP: 'MAP';
682 | MATERIALIZED: 'MATERIALIZED';
683 | MINUTE: 'MINUTE';
684 | MONTH: 'MONTH';
685 | NAME: 'NAME';
686 | NATURAL: 'NATURAL';
687 | NFC : 'NFC';
688 | NFD : 'NFD';
689 | NFKC : 'NFKC';
690 | NFKD : 'NFKD';
691 | NO: 'NO';
692 | NONE: 'NONE';
693 | NORMALIZE: 'NORMALIZE';
694 | NOT: 'NOT';
695 | NULL: 'NULL';
696 | NULLIF: 'NULLIF';
697 | NULLS: 'NULLS';
698 | ON: 'ON';
699 | ONLY: 'ONLY';
700 | OPTION: 'OPTION';
701 | OR: 'OR';
702 | ORDER: 'ORDER';
703 | ORDINALITY: 'ORDINALITY';
704 | OUTER: 'OUTER';
705 | OUTPUT: 'OUTPUT';
706 | OVER: 'OVER';
707 | PARTITION: 'PARTITION';
708 | PARTITIONS: 'PARTITIONS';
709 | POSITION: 'POSITION';
710 | PRECEDING: 'PRECEDING';
711 | PREPARE: 'PREPARE';
712 | PRIVILEGES: 'PRIVILEGES';
713 | PROPERTIES: 'PROPERTIES';
714 | RANGE: 'RANGE';
715 | READ: 'READ';
716 | RECURSIVE: 'RECURSIVE';
717 | RENAME: 'RENAME';
718 | REPEATABLE: 'REPEATABLE';
719 | REPLACE: 'REPLACE';
720 | RESET: 'RESET';
721 | RESPECT: 'RESPECT';
722 | RESTRICT: 'RESTRICT';
723 | RETURN: 'RETURN';
724 | RETURNS: 'RETURNS';
725 | REVOKE: 'REVOKE';
726 | RIGHT: 'RIGHT';
727 | ROLE: 'ROLE';
728 | ROLES: 'ROLES';
729 | ROLLBACK: 'ROLLBACK';
730 | ROLLUP: 'ROLLUP';
731 | ROW: 'ROW';
732 | ROWS: 'ROWS';
733 | SCHEMA: 'SCHEMA';
734 | SCHEMAS: 'SCHEMAS';
735 | SECOND: 'SECOND';
736 | SECURITY: 'SECURITY';
737 | SELECT: 'SELECT';
738 | SERIALIZABLE: 'SERIALIZABLE';
739 | SESSION: 'SESSION';
740 | SET: 'SET';
741 | SETS: 'SETS';
742 | SHOW: 'SHOW';
743 | SOME: 'SOME';
744 | SQL: 'SQL';
745 | START: 'START';
746 | STATS: 'STATS';
747 | SUBSTRING: 'SUBSTRING';
748 | SYSTEM: 'SYSTEM';
749 | TABLE: 'TABLE';
750 | TABLES: 'TABLES';
751 | TABLESAMPLE: 'TABLESAMPLE';
752 | TEMPORARY: 'TEMPORARY';
753 | TEXT: 'TEXT';
754 | THEN: 'THEN';
755 | TIME: 'TIME';
756 | TIMESTAMP: 'TIMESTAMP';
757 | TO: 'TO';
758 | TRANSACTION: 'TRANSACTION';
759 | TRUE: 'TRUE';
760 | TRY_CAST: 'TRY_CAST';
761 | TYPE: 'TYPE';
762 | UESCAPE: 'UESCAPE';
763 | UNBOUNDED: 'UNBOUNDED';
764 | UNCOMMITTED: 'UNCOMMITTED';
765 | UNION: 'UNION';
766 | UNNEST: 'UNNEST';
767 | USE: 'USE';
768 | USER: 'USER';
769 | USING: 'USING';
770 | VALIDATE: 'VALIDATE';
771 | VALUES: 'VALUES';
772 | VERBOSE: 'VERBOSE';
773 | VIEW: 'VIEW';
774 | WHEN: 'WHEN';
775 | WHERE: 'WHERE';
776 | WITH: 'WITH';
777 | WORK: 'WORK';
778 | WRITE: 'WRITE';
779 | YEAR: 'YEAR';
780 | ZONE: 'ZONE';
781 |
782 | EQ : '=';
783 | NEQ : '<>' | '!=';
784 | LT : '<';
785 | LTE : '<=';
786 | GT : '>';
787 | GTE : '>=';
788 |
789 | PLUS: '+';
790 | MINUS: '-';
791 | ASTERISK: '*';
792 | SLASH: '/';
793 | PERCENT: '%';
794 | CONCAT: '||';
795 |
796 | STRING
797 | : '\'' ( ~'\'' | '\'\'' )* '\''
798 | ;
799 |
800 | UNICODE_STRING
801 | : 'U&\'' ( ~'\'' | '\'\'' )* '\''
802 | ;
803 |
804 | // Note: we allow any character inside the binary literal and validate
805 | // its a correct literal when the AST is being constructed. This
806 | // allows us to provide more meaningful error messages to the user
807 | BINARY_LITERAL
808 | : 'X\'' (~'\'')* '\''
809 | ;
810 |
811 | INTEGER_VALUE
812 | : DIGIT+
813 | ;
814 |
815 | DECIMAL_VALUE
816 | : DIGIT+ '.' DIGIT*
817 | | '.' DIGIT+
818 | ;
819 |
820 | DOUBLE_VALUE
821 | : DIGIT+ ('.' DIGIT*)? EXPONENT
822 | | '.' DIGIT+ EXPONENT
823 | ;
824 |
825 | IDENTIFIER
826 | : (LETTER | '_') (LETTER | DIGIT | '_' | '@' | ':')*
827 | ;
828 |
829 | DIGIT_IDENTIFIER
830 | : DIGIT (LETTER | DIGIT | '_' | '@' | ':')+
831 | ;
832 |
833 | QUOTED_IDENTIFIER
834 | : '"' ( ~'"' | '""' )* '"'
835 | ;
836 |
837 | BACKQUOTED_IDENTIFIER
838 | : '`' ( ~'`' | '``' )* '`'
839 | ;
840 |
841 | TIME_WITH_TIME_ZONE
842 | : 'TIME' WS 'WITH' WS 'TIME' WS 'ZONE'
843 | ;
844 |
845 | TIMESTAMP_WITH_TIME_ZONE
846 | : 'TIMESTAMP' WS 'WITH' WS 'TIME' WS 'ZONE'
847 | ;
848 |
849 | DOUBLE_PRECISION
850 | : 'DOUBLE' WS 'PRECISION'
851 | ;
852 |
853 | fragment EXPONENT
854 | : 'E' [+-]? DIGIT+
855 | ;
856 |
857 | fragment DIGIT
858 | : [0-9]
859 | ;
860 |
861 | fragment LETTER
862 | : [A-Z]
863 | ;
864 |
865 | SIMPLE_COMMENT
866 | : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
867 | ;
868 |
869 | BRACKETED_COMMENT
870 | : '/*' .*? '*/' -> channel(HIDDEN)
871 | ;
872 |
873 | WS
874 | : [ \r\n\t]+ -> channel(HIDDEN)
875 | ;
876 |
877 | // Catch-all for anything we can't recognize.
878 | // We use this to be able to ignore and recover all the text
879 | // when splitting statements with DelimiterLexer
880 | UNRECOGNIZED
881 | : .
882 | ;
883 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/antlr/SparkSql.g4:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
13 | *
14 | * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SparkSql.g4 grammar.
15 | */
16 |
17 | grammar SparkSql;
18 |
19 | @parser::members {
20 | /**
21 | * When false, INTERSECT is given the greater precedence over the other set
22 | * operations (UNION, EXCEPT and MINUS) as per the SQL standard.
23 | */
24 | public boolean legacy_setops_precedence_enabled = false;
25 | /**
26 | * When false, a literal with an exponent would be converted into
27 | * double type rather than decimal type.
28 | */
29 | public boolean legacy_exponent_literal_as_decimal_enabled = false;
30 | /**
31 | * When true, the behavior of keywords follows ANSI SQL standard.
32 | */
33 | public boolean SQL_standard_keyword_behavior = false;
34 | }
35 |
36 | @lexer::members {
37 | /**
38 | * Verify whether current token is a valid decimal token (which contains dot).
39 | * Returns true if the character that follows the token is not a digit or letter or underscore.
40 | *
41 | * For example:
42 | * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
43 | * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
44 | * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
45 | * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
46 | * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
47 | * which is not a digit or letter or underscore.
48 | */
49 | public boolean isValidDecimal() {
50 | int nextChar = _input.LA(1);
51 | if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
52 | nextChar == '_') {
53 | return false;
54 | } else {
55 | return true;
56 | }
57 | }
58 |
59 | /**
60 | * This method will be called when we see '/*' and try to match it as a bracketed comment.
61 | * If the next character is '+', it should be parsed as hint later, and we cannot match
62 | * it as a bracketed comment.
63 | *
64 | * Returns true if the next character is '+'.
65 | */
66 | public boolean isHint() {
67 | int nextChar = _input.LA(1);
68 | if (nextChar == '+') {
69 | return true;
70 | } else {
71 | return false;
72 | }
73 | }
74 | }
75 |
76 | singleStatement
77 | : statement ';'* EOF
78 | ;
79 |
80 | singleExpression
81 | : namedExpression EOF
82 | ;
83 |
84 | singleTableIdentifier
85 | : tableIdentifier EOF
86 | ;
87 |
88 | singleMultipartIdentifier
89 | : multipartIdentifier EOF
90 | ;
91 |
92 | singleFunctionIdentifier
93 | : functionIdentifier EOF
94 | ;
95 |
96 | singleDataType
97 | : dataType EOF
98 | ;
99 |
100 | singleTableSchema
101 | : colTypeList EOF
102 | ;
103 |
104 | statement
105 | : query #statementDefault
106 | | ctes? dmlStatementNoWith #dmlStatement
107 | | USE NAMESPACE? multipartIdentifier #use
108 | | CREATE namespace (IF NOT EXISTS)? multipartIdentifier
109 | (commentSpec |
110 | locationSpec |
111 | (WITH (DBPROPERTIES | PROPERTIES) tablePropertyList))* #createNamespace
112 | | ALTER namespace multipartIdentifier
113 | SET (DBPROPERTIES | PROPERTIES) tablePropertyList #setNamespaceProperties
114 | | ALTER namespace multipartIdentifier
115 | SET locationSpec #setNamespaceLocation
116 | | DROP namespace (IF EXISTS)? multipartIdentifier
117 | (RESTRICT | CASCADE)? #dropNamespace
118 | | SHOW (DATABASES | NAMESPACES) ((FROM | IN) multipartIdentifier)?
119 | (LIKE? pattern=STRING)? #showNamespaces
120 | | createTableHeader ('(' colTypeList ')')? tableProvider?
121 | createTableClauses
122 | (AS? query)? #createTable
123 | | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
124 | LIKE source=tableIdentifier
125 | (tableProvider |
126 | rowFormat |
127 | createFileFormat |
128 | locationSpec |
129 | (TBLPROPERTIES tableProps=tablePropertyList))* #createTableLike
130 | | replaceTableHeader ('(' colTypeList ')')? tableProvider?
131 | createTableClauses
132 | (AS? query)? #replaceTable
133 | | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS
134 | (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)? #analyze
135 | | ANALYZE TABLES ((FROM | IN) multipartIdentifier)? COMPUTE STATISTICS
136 | (identifier)? #analyzeTables
137 | | ALTER TABLE multipartIdentifier
138 | ADD (COLUMN | COLUMNS)
139 | columns=qualifiedColTypeWithPositionList #addTableColumns
140 | | ALTER TABLE multipartIdentifier
141 | ADD (COLUMN | COLUMNS)
142 | '(' columns=qualifiedColTypeWithPositionList ')' #addTableColumns
143 | | ALTER TABLE table=multipartIdentifier
144 | RENAME COLUMN
145 | from=multipartIdentifier TO to=errorCapturingIdentifier #renameTableColumn
146 | | ALTER TABLE multipartIdentifier
147 | DROP (COLUMN | COLUMNS)
148 | '(' columns=multipartIdentifierList ')' #dropTableColumns
149 | | ALTER TABLE multipartIdentifier
150 | DROP (COLUMN | COLUMNS) columns=multipartIdentifierList #dropTableColumns
151 | | ALTER (TABLE | VIEW) from=multipartIdentifier
152 | RENAME TO to=multipartIdentifier #renameTable
153 | | ALTER (TABLE | VIEW) multipartIdentifier
154 | SET TBLPROPERTIES tablePropertyList #setTableProperties
155 | | ALTER (TABLE | VIEW) multipartIdentifier
156 | UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties
157 | | ALTER TABLE table=multipartIdentifier
158 | (ALTER | CHANGE) COLUMN? column=multipartIdentifier
159 | alterColumnAction? #alterTableAlterColumn
160 | | ALTER TABLE table=multipartIdentifier partitionSpec?
161 | CHANGE COLUMN?
162 | colName=multipartIdentifier colType colPosition? #hiveChangeColumn
163 | | ALTER TABLE table=multipartIdentifier partitionSpec?
164 | REPLACE COLUMNS
165 | '(' columns=qualifiedColTypeWithPositionList ')' #hiveReplaceColumns
166 | | ALTER TABLE multipartIdentifier (partitionSpec)?
167 | SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe
168 | | ALTER TABLE multipartIdentifier (partitionSpec)?
169 | SET SERDEPROPERTIES tablePropertyList #setTableSerDe
170 | | ALTER (TABLE | VIEW) multipartIdentifier ADD (IF NOT EXISTS)?
171 | partitionSpecLocation+ #addTablePartition
172 | | ALTER TABLE multipartIdentifier
173 | from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition
174 | | ALTER (TABLE | VIEW) multipartIdentifier
175 | DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions
176 | | ALTER TABLE multipartIdentifier
177 | (partitionSpec)? SET locationSpec #setTableLocation
178 | | ALTER TABLE multipartIdentifier RECOVER PARTITIONS #recoverPartitions
179 | | DROP TABLE (IF EXISTS)? multipartIdentifier PURGE? #dropTable
180 | | DROP VIEW (IF EXISTS)? multipartIdentifier #dropView
181 | | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
182 | VIEW (IF NOT EXISTS)? multipartIdentifier
183 | identifierCommentList?
184 | (commentSpec |
185 | (PARTITIONED ON identifierList) |
186 | (TBLPROPERTIES tablePropertyList))*
187 | AS query #createView
188 | | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
189 | tableIdentifier ('(' colTypeList ')')? tableProvider
190 | (OPTIONS tablePropertyList)? #createTempViewUsing
191 | | ALTER VIEW multipartIdentifier AS? query #alterViewQuery
192 | | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)?
193 | multipartIdentifier AS className=STRING
194 | (USING resource (',' resource)*)? #createFunction
195 | | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier #dropFunction
196 | | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
197 | statement #explain
198 | | SHOW TABLES ((FROM | IN) multipartIdentifier)?
199 | (LIKE? pattern=STRING)? #showTables
200 | | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)?
201 | LIKE pattern=STRING partitionSpec? #showTableExtended
202 | | SHOW TBLPROPERTIES table=multipartIdentifier
203 | ('(' key=tablePropertyKey ')')? #showTblProperties
204 | | SHOW COLUMNS (FROM | IN) table=multipartIdentifier
205 | ((FROM | IN) ns=multipartIdentifier)? #showColumns
206 | | SHOW VIEWS ((FROM | IN) multipartIdentifier)?
207 | (LIKE? pattern=STRING)? #showViews
208 | | SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions
209 | | SHOW identifier? FUNCTIONS
210 | (LIKE? (multipartIdentifier | pattern=STRING))? #showFunctions
211 | | SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable
212 | | SHOW CURRENT NAMESPACE #showCurrentNamespace
213 | | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction
214 | | (DESC | DESCRIBE) namespace EXTENDED?
215 | multipartIdentifier #describeNamespace
216 | | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
217 | multipartIdentifier partitionSpec? describeColName? #describeRelation
218 | | (DESC | DESCRIBE) QUERY? query #describeQuery
219 | | COMMENT ON namespace multipartIdentifier IS
220 | comment=(STRING | NULL) #commentNamespace
221 | | COMMENT ON TABLE multipartIdentifier IS comment=(STRING | NULL) #commentTable
222 | | REFRESH TABLE multipartIdentifier #refreshTable
223 | | REFRESH FUNCTION multipartIdentifier #refreshFunction
224 | | REFRESH (STRING | .*?) #refreshResource
225 | | CACHE LAZY? TABLE multipartIdentifier
226 | (OPTIONS options=tablePropertyList)? (AS? query)? #cacheTable
227 | | UNCACHE TABLE (IF EXISTS)? multipartIdentifier #uncacheTable
228 | | CLEAR CACHE #clearCache
229 | | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
230 | multipartIdentifier partitionSpec? #loadData
231 | | TRUNCATE TABLE multipartIdentifier partitionSpec? #truncateTable
232 | | MSCK REPAIR TABLE multipartIdentifier
233 | (option=(ADD|DROP|SYNC) PARTITIONS)? #repairTable
234 | | op=(ADD | LIST) identifier (STRING | .*?) #manageResource
235 | | SET ROLE .*? #failNativeCommand
236 | | SET TIME ZONE interval #setTimeZone
237 | | SET TIME ZONE timezone=(STRING | LOCAL) #setTimeZone
238 | | SET TIME ZONE .*? #setTimeZone
239 | | SET configKey EQ configValue #setQuotedConfiguration
240 | | SET configKey (EQ .*?)? #setQuotedConfiguration
241 | | SET .*? EQ configValue #setQuotedConfiguration
242 | | SET .*? #setConfiguration
243 | | RESET configKey #resetQuotedConfiguration
244 | | RESET .*? #resetConfiguration
245 | | unsupportedHiveNativeCommands .*? #failNativeCommand
246 | ;
247 |
248 | configKey
249 | : quotedIdentifier
250 | ;
251 |
252 | configValue
253 | : quotedIdentifier
254 | ;
255 |
256 | unsupportedHiveNativeCommands
257 | : kw1=CREATE kw2=ROLE
258 | | kw1=DROP kw2=ROLE
259 | | kw1=GRANT kw2=ROLE?
260 | | kw1=REVOKE kw2=ROLE?
261 | | kw1=SHOW kw2=GRANT
262 | | kw1=SHOW kw2=ROLE kw3=GRANT?
263 | | kw1=SHOW kw2=PRINCIPALS
264 | | kw1=SHOW kw2=ROLES
265 | | kw1=SHOW kw2=CURRENT kw3=ROLES
266 | | kw1=EXPORT kw2=TABLE
267 | | kw1=IMPORT kw2=TABLE
268 | | kw1=SHOW kw2=COMPACTIONS
269 | | kw1=SHOW kw2=CREATE kw3=TABLE
270 | | kw1=SHOW kw2=TRANSACTIONS
271 | | kw1=SHOW kw2=INDEXES
272 | | kw1=SHOW kw2=LOCKS
273 | | kw1=CREATE kw2=INDEX
274 | | kw1=DROP kw2=INDEX
275 | | kw1=ALTER kw2=INDEX
276 | | kw1=LOCK kw2=TABLE
277 | | kw1=LOCK kw2=DATABASE
278 | | kw1=UNLOCK kw2=TABLE
279 | | kw1=UNLOCK kw2=DATABASE
280 | | kw1=CREATE kw2=TEMPORARY kw3=MACRO
281 | | kw1=DROP kw2=TEMPORARY kw3=MACRO
282 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=CLUSTERED
283 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=CLUSTERED kw4=BY
284 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SORTED
285 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=SKEWED kw4=BY
286 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SKEWED
287 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=STORED kw5=AS kw6=DIRECTORIES
288 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=SET kw4=SKEWED kw5=LOCATION
289 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=EXCHANGE kw4=PARTITION
290 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=ARCHIVE kw4=PARTITION
291 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=UNARCHIVE kw4=PARTITION
292 | | kw1=ALTER kw2=TABLE tableIdentifier kw3=TOUCH
293 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=COMPACT
294 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CONCATENATE
295 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=SET kw4=FILEFORMAT
296 | | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=REPLACE kw4=COLUMNS
297 | | kw1=START kw2=TRANSACTION
298 | | kw1=COMMIT
299 | | kw1=ROLLBACK
300 | | kw1=DFS
301 | ;
302 |
303 | createTableHeader
304 | : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? multipartIdentifier
305 | ;
306 |
307 | replaceTableHeader
308 | : (CREATE OR)? REPLACE TABLE multipartIdentifier
309 | ;
310 |
311 | bucketSpec
312 | : CLUSTERED BY identifierList
313 | (SORTED BY orderedIdentifierList)?
314 | INTO INTEGER_VALUE BUCKETS
315 | ;
316 |
317 | skewSpec
318 | : SKEWED BY identifierList
319 | ON (constantList | nestedConstantList)
320 | (STORED AS DIRECTORIES)?
321 | ;
322 |
323 | locationSpec
324 | : LOCATION STRING
325 | ;
326 |
327 | commentSpec
328 | : COMMENT STRING
329 | ;
330 |
331 | query
332 | : ctes? queryTerm queryOrganization
333 | ;
334 |
335 | insertInto
336 | : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)? identifierList? #insertOverwriteTable
337 | | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? identifierList? #insertIntoTable
338 | | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat? #insertOverwriteHiveDir
339 | | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=tablePropertyList)? #insertOverwriteDir
340 | ;
341 |
342 | partitionSpecLocation
343 | : partitionSpec locationSpec?
344 | ;
345 |
346 | partitionSpec
347 | : PARTITION '(' partitionVal (',' partitionVal)* ')'
348 | ;
349 |
350 | partitionVal
351 | : identifier (EQ constant)?
352 | ;
353 |
354 | namespace
355 | : NAMESPACE
356 | | DATABASE
357 | | SCHEMA
358 | ;
359 |
360 | describeFuncName
361 | : qualifiedName
362 | | STRING
363 | | comparisonOperator
364 | | arithmeticOperator
365 | | predicateOperator
366 | ;
367 |
368 | describeColName
369 | : nameParts+=identifier ('.' nameParts+=identifier)*
370 | ;
371 |
372 | ctes
373 | : WITH namedQuery (',' namedQuery)*
374 | ;
375 |
376 | namedQuery
377 | : name=errorCapturingIdentifier (columnAliases=identifierList)? AS? '(' query ')'
378 | ;
379 |
380 | tableProvider
381 | : USING multipartIdentifier
382 | ;
383 |
384 | createTableClauses
385 | :((OPTIONS options=tablePropertyList) |
386 | (PARTITIONED BY partitioning=partitionFieldList) |
387 | skewSpec |
388 | bucketSpec |
389 | rowFormat |
390 | createFileFormat |
391 | locationSpec |
392 | commentSpec |
393 | (TBLPROPERTIES tableProps=tablePropertyList))*
394 | ;
395 |
396 | tablePropertyList
397 | : '(' tableProperty (',' tableProperty)* ')'
398 | ;
399 |
400 | tableProperty
401 | : key=tablePropertyKey (EQ? value=tablePropertyValue)?
402 | ;
403 |
404 | tablePropertyKey
405 | : identifier ('.' identifier)*
406 | | STRING
407 | ;
408 |
409 | tablePropertyValue
410 | : INTEGER_VALUE
411 | | DECIMAL_VALUE
412 | | booleanValue
413 | | STRING
414 | ;
415 |
416 | constantList
417 | : '(' constant (',' constant)* ')'
418 | ;
419 |
420 | nestedConstantList
421 | : '(' constantList (',' constantList)* ')'
422 | ;
423 |
424 | createFileFormat
425 | : STORED AS fileFormat
426 | | STORED BY storageHandler
427 | ;
428 |
429 | fileFormat
430 | : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING #tableFileFormat
431 | | identifier #genericFileFormat
432 | ;
433 |
434 | storageHandler
435 | : STRING (WITH SERDEPROPERTIES tablePropertyList)?
436 | ;
437 |
438 | resource
439 | : identifier STRING
440 | ;
441 |
442 | dmlStatementNoWith
443 | : insertInto queryTerm queryOrganization #singleInsertQuery
444 | | fromClause multiInsertQueryBody+ #multiInsertQuery
445 | | DELETE FROM multipartIdentifier tableAlias whereClause? #deleteFromTable
446 | | UPDATE multipartIdentifier tableAlias setClause whereClause? #updateTable
447 | | MERGE INTO target=multipartIdentifier targetAlias=tableAlias
448 | USING (source=multipartIdentifier |
449 | '(' sourceQuery=query')') sourceAlias=tableAlias
450 | ON mergeCondition=booleanExpression
451 | matchedClause*
452 | notMatchedClause* #mergeIntoTable
453 | ;
454 |
455 | queryOrganization
456 | : (ORDER BY order+=sortItem (',' order+=sortItem)*)?
457 | (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)?
458 | (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)?
459 | (SORT BY sort+=sortItem (',' sort+=sortItem)*)?
460 | windowClause?
461 | (LIMIT (ALL | limit=expression))?
462 | ;
463 |
464 | multiInsertQueryBody
465 | : insertInto fromStatementBody
466 | ;
467 |
468 | queryTerm
469 | : queryPrimary #queryTermDefault
470 | | left=queryTerm {legacy_setops_precedence_enabled}?
471 | operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation
472 | | left=queryTerm {!legacy_setops_precedence_enabled}?
473 | operator=INTERSECT setQuantifier? right=queryTerm #setOperation
474 | | left=queryTerm {!legacy_setops_precedence_enabled}?
475 | operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation
476 | ;
477 |
478 | queryPrimary
479 | : querySpecification #queryPrimaryDefault
480 | | fromStatement #fromStmt
481 | | TABLE multipartIdentifier #table
482 | | inlineTable #inlineTableDefault1
483 | | '(' query ')' #subquery
484 | ;
485 |
486 | sortItem
487 | : expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))?
488 | ;
489 |
490 | fromStatement
491 | : fromClause fromStatementBody+
492 | ;
493 |
494 | fromStatementBody
495 | : transformClause
496 | whereClause?
497 | queryOrganization
498 | | selectClause
499 | lateralView*
500 | whereClause?
501 | aggregationClause?
502 | havingClause?
503 | windowClause?
504 | queryOrganization
505 | ;
506 |
507 | querySpecification
508 | : transformClause
509 | fromClause?
510 | whereClause? #transformQuerySpecification
511 | | selectClause
512 | fromClause?
513 | lateralView*
514 | whereClause?
515 | aggregationClause?
516 | havingClause?
517 | windowClause? #regularQuerySpecification
518 | ;
519 |
520 | transformClause
521 | : (SELECT kind=TRANSFORM '(' namedExpressionSeq ')'
522 | | kind=MAP namedExpressionSeq
523 | | kind=REDUCE namedExpressionSeq)
524 | inRowFormat=rowFormat?
525 | (RECORDWRITER recordWriter=STRING)?
526 | USING script=STRING
527 | (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))?
528 | outRowFormat=rowFormat?
529 | (RECORDREADER recordReader=STRING)?
530 | ;
531 |
532 | selectClause
533 | : SELECT (hints+=hint)* setQuantifier? namedExpressionSeq
534 | ;
535 |
536 | setClause
537 | : SET assignmentList
538 | ;
539 |
540 | matchedClause
541 | : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction
542 | ;
543 | notMatchedClause
544 | : WHEN NOT MATCHED (AND notMatchedCond=booleanExpression)? THEN notMatchedAction
545 | ;
546 |
547 | matchedAction
548 | : DELETE
549 | | UPDATE SET ASTERISK
550 | | UPDATE SET assignmentList
551 | ;
552 |
553 | notMatchedAction
554 | : INSERT ASTERISK
555 | | INSERT '(' columns=multipartIdentifierList ')'
556 | VALUES '(' expression (',' expression)* ')'
557 | ;
558 |
559 | assignmentList
560 | : assignment (',' assignment)*
561 | ;
562 |
563 | assignment
564 | : key=multipartIdentifier EQ value=expression
565 | ;
566 |
567 | whereClause
568 | : WHERE booleanExpression
569 | ;
570 |
571 | havingClause
572 | : HAVING booleanExpression
573 | ;
574 |
575 | hint
576 | : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/'
577 | ;
578 |
579 | hintStatement
580 | : hintName=identifier
581 | | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')'
582 | ;
583 |
584 | fromClause
585 | : FROM relation (',' relation)* lateralView* pivotClause?
586 | ;
587 |
588 | aggregationClause
589 | : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* (
590 | WITH kind=ROLLUP
591 | | WITH kind=CUBE
592 | | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')?
593 | | GROUP BY kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')'
594 | ;
595 |
596 | groupingSet
597 | : '(' (expression (',' expression)*)? ')'
598 | | expression
599 | ;
600 |
601 | pivotClause
602 | : PIVOT '(' aggregates=namedExpressionSeq FOR pivotColumn IN '(' pivotValues+=pivotValue (',' pivotValues+=pivotValue)* ')' ')'
603 | ;
604 |
605 | pivotColumn
606 | : identifiers+=identifier
607 | | '(' identifiers+=identifier (',' identifiers+=identifier)* ')'
608 | ;
609 |
610 | pivotValue
611 | : expression (AS? identifier)?
612 | ;
613 |
614 | lateralView
615 | : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)?
616 | ;
617 |
618 | setQuantifier
619 | : DISTINCT
620 | | ALL
621 | ;
622 |
623 | relation
624 | : relationPrimary joinRelation*
625 | ;
626 |
627 | joinRelation
628 | : (joinType) JOIN right=relationPrimary joinCriteria?
629 | | NATURAL joinType JOIN right=relationPrimary
630 | ;
631 |
632 | joinType
633 | : INNER?
634 | | CROSS
635 | | LEFT OUTER?
636 | | LEFT? SEMI
637 | | RIGHT OUTER?
638 | | FULL OUTER?
639 | | LEFT? ANTI
640 | ;
641 |
642 | joinCriteria
643 | : ON booleanExpression
644 | | USING identifierList
645 | ;
646 |
647 | sample
648 | : TABLESAMPLE '(' sampleMethod? ')'
649 | ;
650 |
651 | sampleMethod
652 | : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT #sampleByPercentile
653 | | expression ROWS #sampleByRows
654 | | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE
655 | (ON (identifier | qualifiedName '(' ')'))? #sampleByBucket
656 | | bytes=expression #sampleByBytes
657 | ;
658 |
659 | identifierList
660 | : '(' identifierSeq ')'
661 | ;
662 |
663 | identifierSeq
664 | : ident+=errorCapturingIdentifier (',' ident+=errorCapturingIdentifier)*
665 | ;
666 |
667 | orderedIdentifierList
668 | : '(' orderedIdentifier (',' orderedIdentifier)* ')'
669 | ;
670 |
671 | orderedIdentifier
672 | : ident=errorCapturingIdentifier ordering=(ASC | DESC)?
673 | ;
674 |
675 | identifierCommentList
676 | : '(' identifierComment (',' identifierComment)* ')'
677 | ;
678 |
679 | identifierComment
680 | : identifier commentSpec?
681 | ;
682 |
683 | relationPrimary
684 | : multipartIdentifier sample? tableAlias #tableName
685 | | '(' query ')' sample? tableAlias #aliasedQuery
686 | | '(' relation ')' sample? tableAlias #aliasedRelation
687 | | inlineTable #inlineTableDefault2
688 | | functionTable #tableValuedFunction
689 | ;
690 |
691 | inlineTable
692 | : VALUES expression (',' expression)* tableAlias
693 | ;
694 |
695 | functionTable
696 | : funcName=functionName '(' (expression (',' expression)*)? ')' tableAlias
697 | ;
698 |
699 | tableAlias
700 | : (AS? strictIdentifier identifierList?)?
701 | ;
702 |
703 | rowFormat
704 | : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)? #rowFormatSerde
705 | | ROW FORMAT DELIMITED
706 | (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)?
707 | (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)?
708 | (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)?
709 | (LINES TERMINATED BY linesSeparatedBy=STRING)?
710 | (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited
711 | ;
712 |
713 | multipartIdentifierList
714 | : multipartIdentifier (',' multipartIdentifier)*
715 | ;
716 |
717 | multipartIdentifier
718 | : parts+=errorCapturingIdentifier ('.' parts+=errorCapturingIdentifier)*
719 | ;
720 |
721 | tableIdentifier
722 | : (db=errorCapturingIdentifier '.')? table=errorCapturingIdentifier
723 | ;
724 |
725 | functionIdentifier
726 | : (db=errorCapturingIdentifier '.')? function=errorCapturingIdentifier
727 | ;
728 |
729 | namedExpression
730 | : expression (AS? (name=errorCapturingIdentifier | identifierList))?
731 | ;
732 |
733 | namedExpressionSeq
734 | : namedExpression (',' namedExpression)*
735 | ;
736 |
737 | partitionFieldList
738 | : '(' fields+=partitionField (',' fields+=partitionField)* ')'
739 | ;
740 |
741 | partitionField
742 | : transform #partitionTransform
743 | | colType #partitionColumn
744 | ;
745 |
746 | transform
747 | : qualifiedName #identityTransform
748 | | transformName=identifier
749 | '(' argument+=transformArgument (',' argument+=transformArgument)* ')' #applyTransform
750 | ;
751 |
752 | transformArgument
753 | : qualifiedName
754 | | constant
755 | ;
756 |
757 | expression
758 | : booleanExpression
759 | ;
760 |
761 | booleanExpression
762 | : NOT booleanExpression #logicalNot
763 | | EXISTS '(' query ')' #exists
764 | | valueExpression predicate? #predicated
765 | | left=booleanExpression operator=AND right=booleanExpression #logicalBinary
766 | | left=booleanExpression operator=OR right=booleanExpression #logicalBinary
767 | ;
768 |
769 | predicate
770 | : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
771 | | NOT? kind=IN '(' expression (',' expression)* ')'
772 | | NOT? kind=IN '(' query ')'
773 | | NOT? kind=RLIKE pattern=valueExpression
774 | | NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression (',' expression)* ')')
775 | | NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)?
776 | | IS NOT? kind=NULL
777 | | IS NOT? kind=(TRUE | FALSE | UNKNOWN)
778 | | IS NOT? kind=DISTINCT FROM right=valueExpression
779 | ;
780 |
781 | valueExpression
782 | : primaryExpression #valueExpressionDefault
783 | | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary
784 | | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary
785 | | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary
786 | | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary
787 | | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary
788 | | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary
789 | | left=valueExpression comparisonOperator right=valueExpression #comparison
790 | ;
791 |
792 | primaryExpression
793 | : name=(CURRENT_DATE | CURRENT_TIMESTAMP) #currentDatetime
794 | | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
795 | | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
796 | | CAST '(' expression AS dataType ')' #cast
797 | | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct
798 | | FIRST '(' expression (IGNORE NULLS)? ')' #first
799 | | LAST '(' expression (IGNORE NULLS)? ')' #last
800 | | POSITION '(' substr=valueExpression IN str=valueExpression ')' #position
801 | | constant #constantDefault
802 | | ASTERISK #star
803 | | qualifiedName '.' ASTERISK #star
804 | | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
805 | | '(' query ')' #subqueryExpression
806 | | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
807 | (FILTER '(' WHERE where=booleanExpression ')')?
808 | (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall
809 | | identifier '->' expression #lambda
810 | | '(' identifier (',' identifier)+ ')' '->' expression #lambda
811 | | value=primaryExpression '[' index=valueExpression ']' #subscript
812 | | identifier #columnReference
813 | | base=primaryExpression '.' fieldName=identifier #dereference
814 | | '(' expression ')' #parenthesizedExpression
815 | | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract
816 | | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression
817 | ((FOR | ',') len=valueExpression)? ')' #substring
818 | | TRIM '(' trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)?
819 | FROM srcStr=valueExpression ')' #trim
820 | | OVERLAY '(' input=valueExpression PLACING replace=valueExpression
821 | FROM position=valueExpression (FOR length=valueExpression)? ')' #overlay
822 | ;
823 |
824 | constant
825 | : NULL #nullLiteral
826 | | interval #intervalLiteral
827 | | identifier STRING #typeConstructor
828 | | number #numericLiteral
829 | | booleanValue #booleanLiteral
830 | | STRING+ #stringLiteral
831 | ;
832 |
833 | comparisonOperator
834 | : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ
835 | ;
836 |
837 | arithmeticOperator
838 | : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT
839 | ;
840 |
841 | predicateOperator
842 | : OR | AND | IN | NOT
843 | ;
844 |
845 | booleanValue
846 | : TRUE | FALSE
847 | ;
848 |
849 | interval
850 | : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
851 | ;
852 |
853 | errorCapturingMultiUnitsInterval
854 | : multiUnitsInterval unitToUnitInterval?
855 | ;
856 |
857 | multiUnitsInterval
858 | : (intervalValue unit+=identifier)+
859 | ;
860 |
861 | errorCapturingUnitToUnitInterval
862 | : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)?
863 | ;
864 |
865 | unitToUnitInterval
866 | : value=intervalValue from=identifier TO to=identifier
867 | ;
868 |
869 | intervalValue
870 | : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE)
871 | | STRING
872 | ;
873 |
874 | colPosition
875 | : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier
876 | ;
877 |
878 | dataType
879 | : complex=ARRAY '<' dataType '>' #complexDataType
880 | | complex=MAP '<' dataType ',' dataType '>' #complexDataType
881 | | complex=STRUCT ('<' complexColTypeList? '>' | NEQ) #complexDataType
882 | | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType
883 | ;
884 |
885 | qualifiedColTypeWithPositionList
886 | : qualifiedColTypeWithPosition (',' qualifiedColTypeWithPosition)*
887 | ;
888 |
889 | qualifiedColTypeWithPosition
890 | : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition?
891 | ;
892 |
893 | colTypeList
894 | : colType (',' colType)*
895 | ;
896 |
897 | colType
898 | : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec?
899 | ;
900 |
901 | complexColTypeList
902 | : complexColType (',' complexColType)*
903 | ;
904 |
905 | complexColType
906 | : identifier ':' dataType (NOT NULL)? commentSpec?
907 | ;
908 |
909 | whenClause
910 | : WHEN condition=expression THEN result=expression
911 | ;
912 |
913 | windowClause
914 | : WINDOW namedWindow (',' namedWindow)*
915 | ;
916 |
917 | namedWindow
918 | : name=errorCapturingIdentifier AS windowSpec
919 | ;
920 |
921 | windowSpec
922 | : name=errorCapturingIdentifier #windowRef
923 | | '('name=errorCapturingIdentifier')' #windowRef
924 | | '('
925 | ( CLUSTER BY partition+=expression (',' partition+=expression)*
926 | | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)?
927 | ((ORDER | SORT) BY sortItem (',' sortItem)*)?)
928 | windowFrame?
929 | ')' #windowDef
930 | ;
931 |
932 | windowFrame
933 | : frameType=RANGE start=frameBound
934 | | frameType=ROWS start=frameBound
935 | | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
936 | | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
937 | ;
938 |
939 | frameBound
940 | : UNBOUNDED boundType=(PRECEDING | FOLLOWING)
941 | | boundType=CURRENT ROW
942 | | expression boundType=(PRECEDING | FOLLOWING)
943 | ;
944 |
945 | qualifiedNameList
946 | : qualifiedName (',' qualifiedName)*
947 | ;
948 |
949 | functionName
950 | : qualifiedName
951 | | FILTER
952 | | LEFT
953 | | RIGHT
954 | ;
955 |
956 | qualifiedName
957 | : identifier ('.' identifier)*
958 | ;
959 |
960 | // this rule is used for explicitly capturing wrong identifiers such as test-table, which should actually be `test-table`
961 | // replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise
962 | // valid expressions such as "a-b" can be recognized as an identifier
963 | errorCapturingIdentifier
964 | : identifier errorCapturingIdentifierExtra
965 | ;
966 |
967 | // extra left-factoring grammar
968 | errorCapturingIdentifierExtra
969 | : (MINUS identifier)+ #errorIdent
970 | | #realIdent
971 | ;
972 |
973 | identifier
974 | : strictIdentifier
975 | | {!SQL_standard_keyword_behavior}? strictNonReserved
976 | ;
977 |
978 | strictIdentifier
979 | : IDENTIFIER #unquotedIdentifier
980 | | quotedIdentifier #quotedIdentifierAlternative
981 | | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
982 | | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier
983 | ;
984 |
985 | quotedIdentifier
986 | : BACKQUOTED_IDENTIFIER
987 | ;
988 |
989 | number
990 | : {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral
991 | | {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral
992 | | {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral
993 | | MINUS? INTEGER_VALUE #integerLiteral
994 | | MINUS? BIGINT_LITERAL #bigIntLiteral
995 | | MINUS? SMALLINT_LITERAL #smallIntLiteral
996 | | MINUS? TINYINT_LITERAL #tinyIntLiteral
997 | | MINUS? DOUBLE_LITERAL #doubleLiteral
998 | | MINUS? FLOAT_LITERAL #floatLiteral
999 | | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
1000 | ;
1001 |
1002 | alterColumnAction
1003 | : TYPE dataType
1004 | | commentSpec
1005 | | colPosition
1006 | | setOrDrop=(SET | DROP) NOT NULL
1007 | ;
1008 |
1009 | // When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
1010 | // - Reserved keywords:
1011 | // Keywords that are reserved and can't be used as identifiers for table, view, column,
1012 | // function, alias, etc.
1013 | // - Non-reserved keywords:
1014 | // Keywords that have a special meaning only in particular contexts and can be used as
1015 | // identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN
1016 | // can be used as identifiers in other places.
1017 | // You can find the full keywords list by searching "Start of the keywords list" in this file.
1018 | // The non-reserved keywords are listed below. Keywords not in this list are reserved keywords.
1019 | ansiNonReserved
1020 | //--ANSI-NON-RESERVED-START
1021 | : ADD
1022 | | AFTER
1023 | | ALTER
1024 | | ANALYZE
1025 | | ANTI
1026 | | ARCHIVE
1027 | | ARRAY
1028 | | ASC
1029 | | AT
1030 | | BETWEEN
1031 | | BUCKET
1032 | | BUCKETS
1033 | | BY
1034 | | CACHE
1035 | | CASCADE
1036 | | CHANGE
1037 | | CLEAR
1038 | | CLUSTER
1039 | | CLUSTERED
1040 | | CODEGEN
1041 | | COLLECTION
1042 | | COLUMNS
1043 | | COMMENT
1044 | | COMMIT
1045 | | COMPACT
1046 | | COMPACTIONS
1047 | | COMPUTE
1048 | | CONCATENATE
1049 | | COST
1050 | | CUBE
1051 | | CURRENT
1052 | | DATA
1053 | | DATABASE
1054 | | DATABASES
1055 | | DBPROPERTIES
1056 | | DEFINED
1057 | | DELETE
1058 | | DELIMITED
1059 | | DESC
1060 | | DESCRIBE
1061 | | DFS
1062 | | DIRECTORIES
1063 | | DIRECTORY
1064 | | DISTRIBUTE
1065 | | DIV
1066 | | DROP
1067 | | ESCAPED
1068 | | EXCHANGE
1069 | | EXISTS
1070 | | EXPLAIN
1071 | | EXPORT
1072 | | EXTENDED
1073 | | EXTERNAL
1074 | | EXTRACT
1075 | | FIELDS
1076 | | FILEFORMAT
1077 | | FIRST
1078 | | FOLLOWING
1079 | | FORMAT
1080 | | FORMATTED
1081 | | FUNCTION
1082 | | FUNCTIONS
1083 | | GLOBAL
1084 | | GROUPING
1085 | | IF
1086 | | IGNORE
1087 | | IMPORT
1088 | | INDEX
1089 | | INDEXES
1090 | | INPATH
1091 | | INPUTFORMAT
1092 | | INSERT
1093 | | INTERVAL
1094 | | ITEMS
1095 | | KEYS
1096 | | LAST
1097 | | LATERAL
1098 | | LAZY
1099 | | LIKE
1100 | | LIMIT
1101 | | LINES
1102 | | LIST
1103 | | LOAD
1104 | | LOCAL
1105 | | LOCATION
1106 | | LOCK
1107 | | LOCKS
1108 | | LOGICAL
1109 | | MACRO
1110 | | MAP
1111 | | MATCHED
1112 | | MERGE
1113 | | MSCK
1114 | | NAMESPACE
1115 | | NAMESPACES
1116 | | NO
1117 | | NULLS
1118 | | OF
1119 | | OPTION
1120 | | OPTIONS
1121 | | OUT
1122 | | OUTPUTFORMAT
1123 | | OVER
1124 | | OVERLAY
1125 | | OVERWRITE
1126 | | PARTITION
1127 | | PARTITIONED
1128 | | PARTITIONS
1129 | | PERCENTLIT
1130 | | PIVOT
1131 | | PLACING
1132 | | POSITION
1133 | | PRECEDING
1134 | | PRINCIPALS
1135 | | PROPERTIES
1136 | | PURGE
1137 | | QUERY
1138 | | RANGE
1139 | | RECORDREADER
1140 | | RECORDWRITER
1141 | | RECOVER
1142 | | REDUCE
1143 | | REFRESH
1144 | | RENAME
1145 | | REPAIR
1146 | | REPLACE
1147 | | RESET
1148 | | RESPECT
1149 | | RESTRICT
1150 | | REVOKE
1151 | | RLIKE
1152 | | ROLE
1153 | | ROLES
1154 | | ROLLBACK
1155 | | ROLLUP
1156 | | ROW
1157 | | ROWS
1158 | | SCHEMA
1159 | | SEMI
1160 | | SEPARATED
1161 | | SERDE
1162 | | SERDEPROPERTIES
1163 | | SET
1164 | | SETMINUS
1165 | | SETS
1166 | | SHOW
1167 | | SKEWED
1168 | | SORT
1169 | | SORTED
1170 | | START
1171 | | STATISTICS
1172 | | STORED
1173 | | STRATIFY
1174 | | STRUCT
1175 | | SUBSTR
1176 | | SUBSTRING
1177 | | SYNC
1178 | | TABLES
1179 | | TABLESAMPLE
1180 | | TBLPROPERTIES
1181 | | TEMPORARY
1182 | | TERMINATED
1183 | | TOUCH
1184 | | TRANSACTION
1185 | | TRANSACTIONS
1186 | | TRANSFORM
1187 | | TRIM
1188 | | TRUE
1189 | | TRUNCATE
1190 | | TYPE
1191 | | UNARCHIVE
1192 | | UNBOUNDED
1193 | | UNCACHE
1194 | | UNLOCK
1195 | | UNSET
1196 | | UPDATE
1197 | | USE
1198 | | VALUES
1199 | | VIEW
1200 | | VIEWS
1201 | | WINDOW
1202 | | ZONE
1203 | //--ANSI-NON-RESERVED-END
1204 | ;
1205 |
1206 | // When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL.
1207 | // - Non-reserved keywords:
1208 | // Same definition as the one when `SQL_standard_keyword_behavior=true`.
1209 | // - Strict-non-reserved keywords:
1210 | // A strict version of non-reserved keywords, which can not be used as table alias.
1211 | // You can find the full keywords list by searching "Start of the keywords list" in this file.
1212 | // The strict-non-reserved keywords are listed in `strictNonReserved`.
1213 | // The non-reserved keywords are listed in `nonReserved`.
1214 | // These 2 together contain all the keywords.
1215 | strictNonReserved
1216 | : ANTI
1217 | | CROSS
1218 | | EXCEPT
1219 | | FULL
1220 | | INNER
1221 | | INTERSECT
1222 | | JOIN
1223 | | LEFT
1224 | | NATURAL
1225 | | ON
1226 | | RIGHT
1227 | | SEMI
1228 | | SETMINUS
1229 | | UNION
1230 | | USING
1231 | ;
1232 |
1233 | nonReserved
1234 | //--DEFAULT-NON-RESERVED-START
1235 | : ADD
1236 | | AFTER
1237 | | ALL
1238 | | ALTER
1239 | | ANALYZE
1240 | | AND
1241 | | ANY
1242 | | ARCHIVE
1243 | | ARRAY
1244 | | AS
1245 | | ASC
1246 | | AT
1247 | | AUTHORIZATION
1248 | | BETWEEN
1249 | | BOTH
1250 | | BUCKET
1251 | | BUCKETS
1252 | | BY
1253 | | CACHE
1254 | | CASCADE
1255 | | CASE
1256 | | CAST
1257 | | CHANGE
1258 | | CHECK
1259 | | CLEAR
1260 | | CLUSTER
1261 | | CLUSTERED
1262 | | CODEGEN
1263 | | COLLATE
1264 | | COLLECTION
1265 | | COLUMN
1266 | | COLUMNS
1267 | | COMMENT
1268 | | COMMIT
1269 | | COMPACT
1270 | | COMPACTIONS
1271 | | COMPUTE
1272 | | CONCATENATE
1273 | | CONSTRAINT
1274 | | COST
1275 | | CREATE
1276 | | CUBE
1277 | | CURRENT
1278 | | CURRENT_DATE
1279 | | CURRENT_TIME
1280 | | CURRENT_TIMESTAMP
1281 | | CURRENT_USER
1282 | | DATA
1283 | | DATABASE
1284 | | DATABASES
1285 | | DBPROPERTIES
1286 | | DEFINED
1287 | | DELETE
1288 | | DELIMITED
1289 | | DESC
1290 | | DESCRIBE
1291 | | DFS
1292 | | DIRECTORIES
1293 | | DIRECTORY
1294 | | DISTINCT
1295 | | DISTRIBUTE
1296 | | DIV
1297 | | DROP
1298 | | ELSE
1299 | | END
1300 | | ESCAPE
1301 | | ESCAPED
1302 | | EXCHANGE
1303 | | EXISTS
1304 | | EXPLAIN
1305 | | EXPORT
1306 | | EXTENDED
1307 | | EXTERNAL
1308 | | EXTRACT
1309 | | FALSE
1310 | | FETCH
1311 | | FILTER
1312 | | FIELDS
1313 | | FILEFORMAT
1314 | | FIRST
1315 | | FOLLOWING
1316 | | FOR
1317 | | FOREIGN
1318 | | FORMAT
1319 | | FORMATTED
1320 | | FROM
1321 | | FUNCTION
1322 | | FUNCTIONS
1323 | | GLOBAL
1324 | | GRANT
1325 | | GROUP
1326 | | GROUPING
1327 | | HAVING
1328 | | IF
1329 | | IGNORE
1330 | | IMPORT
1331 | | IN
1332 | | INDEX
1333 | | INDEXES
1334 | | INPATH
1335 | | INPUTFORMAT
1336 | | INSERT
1337 | | INTERVAL
1338 | | INTO
1339 | | IS
1340 | | ITEMS
1341 | | KEYS
1342 | | LAST
1343 | | LATERAL
1344 | | LAZY
1345 | | LEADING
1346 | | LIKE
1347 | | LIMIT
1348 | | LINES
1349 | | LIST
1350 | | LOAD
1351 | | LOCAL
1352 | | LOCATION
1353 | | LOCK
1354 | | LOCKS
1355 | | LOGICAL
1356 | | MACRO
1357 | | MAP
1358 | | MATCHED
1359 | | MERGE
1360 | | MSCK
1361 | | NAMESPACE
1362 | | NAMESPACES
1363 | | NO
1364 | | NOT
1365 | | NULL
1366 | | NULLS
1367 | | OF
1368 | | ONLY
1369 | | OPTION
1370 | | OPTIONS
1371 | | OR
1372 | | ORDER
1373 | | OUT
1374 | | OUTER
1375 | | OUTPUTFORMAT
1376 | | OVER
1377 | | OVERLAPS
1378 | | OVERLAY
1379 | | OVERWRITE
1380 | | PARTITION
1381 | | PARTITIONED
1382 | | PARTITIONS
1383 | | PERCENTLIT
1384 | | PIVOT
1385 | | PLACING
1386 | | POSITION
1387 | | PRECEDING
1388 | | PRIMARY
1389 | | PRINCIPALS
1390 | | PROPERTIES
1391 | | PURGE
1392 | | QUERY
1393 | | RANGE
1394 | | RECORDREADER
1395 | | RECORDWRITER
1396 | | RECOVER
1397 | | REDUCE
1398 | | REFERENCES
1399 | | REFRESH
1400 | | RENAME
1401 | | REPAIR
1402 | | REPLACE
1403 | | RESET
1404 | | RESPECT
1405 | | RESTRICT
1406 | | REVOKE
1407 | | RLIKE
1408 | | ROLE
1409 | | ROLES
1410 | | ROLLBACK
1411 | | ROLLUP
1412 | | ROW
1413 | | ROWS
1414 | | SCHEMA
1415 | | SELECT
1416 | | SEPARATED
1417 | | SERDE
1418 | | SERDEPROPERTIES
1419 | | SESSION_USER
1420 | | SET
1421 | | SETS
1422 | | SHOW
1423 | | SKEWED
1424 | | SOME
1425 | | SORT
1426 | | SORTED
1427 | | START
1428 | | STATISTICS
1429 | | STORED
1430 | | STRATIFY
1431 | | STRUCT
1432 | | SUBSTR
1433 | | SUBSTRING
1434 | | SYNC
1435 | | TABLE
1436 | | TABLES
1437 | | TABLESAMPLE
1438 | | TBLPROPERTIES
1439 | | TEMPORARY
1440 | | TERMINATED
1441 | | THEN
1442 | | TIME
1443 | | TO
1444 | | TOUCH
1445 | | TRAILING
1446 | | TRANSACTION
1447 | | TRANSACTIONS
1448 | | TRANSFORM
1449 | | TRIM
1450 | | TRUE
1451 | | TRUNCATE
1452 | | TYPE
1453 | | UNARCHIVE
1454 | | UNBOUNDED
1455 | | UNCACHE
1456 | | UNIQUE
1457 | | UNKNOWN
1458 | | UNLOCK
1459 | | UNSET
1460 | | UPDATE
1461 | | USE
1462 | | USER
1463 | | VALUES
1464 | | VIEW
1465 | | VIEWS
1466 | | WHEN
1467 | | WHERE
1468 | | WINDOW
1469 | | WITH
1470 | | ZONE
1471 | //--DEFAULT-NON-RESERVED-END
1472 | ;
1473 |
1474 | // NOTE: If you add a new token in the list below, you should update the list of keywords
1475 | // and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`.
1476 |
1477 | //============================
1478 | // Start of the keywords list
1479 | //============================
1480 | //--SPARK-KEYWORD-LIST-START
1481 | ADD: 'ADD';
1482 | AFTER: 'AFTER';
1483 | ALL: 'ALL';
1484 | ALTER: 'ALTER';
1485 | ANALYZE: 'ANALYZE';
1486 | AND: 'AND';
1487 | ANTI: 'ANTI';
1488 | ANY: 'ANY';
1489 | ARCHIVE: 'ARCHIVE';
1490 | ARRAY: 'ARRAY';
1491 | AS: 'AS';
1492 | ASC: 'ASC';
1493 | AT: 'AT';
1494 | AUTHORIZATION: 'AUTHORIZATION';
1495 | BETWEEN: 'BETWEEN';
1496 | BOTH: 'BOTH';
1497 | BUCKET: 'BUCKET';
1498 | BUCKETS: 'BUCKETS';
1499 | BY: 'BY';
1500 | CACHE: 'CACHE';
1501 | CASCADE: 'CASCADE';
1502 | CASE: 'CASE';
1503 | CAST: 'CAST';
1504 | CHANGE: 'CHANGE';
1505 | CHECK: 'CHECK';
1506 | CLEAR: 'CLEAR';
1507 | CLUSTER: 'CLUSTER';
1508 | CLUSTERED: 'CLUSTERED';
1509 | CODEGEN: 'CODEGEN';
1510 | COLLATE: 'COLLATE';
1511 | COLLECTION: 'COLLECTION';
1512 | COLUMN: 'COLUMN';
1513 | COLUMNS: 'COLUMNS';
1514 | COMMENT: 'COMMENT';
1515 | COMMIT: 'COMMIT';
1516 | COMPACT: 'COMPACT';
1517 | COMPACTIONS: 'COMPACTIONS';
1518 | COMPUTE: 'COMPUTE';
1519 | CONCATENATE: 'CONCATENATE';
1520 | CONSTRAINT: 'CONSTRAINT';
1521 | COST: 'COST';
1522 | CREATE: 'CREATE';
1523 | CROSS: 'CROSS';
1524 | CUBE: 'CUBE';
1525 | CURRENT: 'CURRENT';
1526 | CURRENT_DATE: 'CURRENT_DATE';
1527 | CURRENT_TIME: 'CURRENT_TIME';
1528 | CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
1529 | CURRENT_USER: 'CURRENT_USER';
1530 | DATA: 'DATA';
1531 | DATABASE: 'DATABASE';
1532 | DATABASES: 'DATABASES' | 'SCHEMAS';
1533 | DBPROPERTIES: 'DBPROPERTIES';
1534 | DEFINED: 'DEFINED';
1535 | DELETE: 'DELETE';
1536 | DELIMITED: 'DELIMITED';
1537 | DESC: 'DESC';
1538 | DESCRIBE: 'DESCRIBE';
1539 | DFS: 'DFS';
1540 | DIRECTORIES: 'DIRECTORIES';
1541 | DIRECTORY: 'DIRECTORY';
1542 | DISTINCT: 'DISTINCT';
1543 | DISTRIBUTE: 'DISTRIBUTE';
1544 | DIV: 'DIV';
1545 | DROP: 'DROP';
1546 | ELSE: 'ELSE';
1547 | END: 'END';
1548 | ESCAPE: 'ESCAPE';
1549 | ESCAPED: 'ESCAPED';
1550 | EXCEPT: 'EXCEPT';
1551 | EXCHANGE: 'EXCHANGE';
1552 | EXISTS: 'EXISTS';
1553 | EXPLAIN: 'EXPLAIN';
1554 | EXPORT: 'EXPORT';
1555 | EXTENDED: 'EXTENDED';
1556 | EXTERNAL: 'EXTERNAL';
1557 | EXTRACT: 'EXTRACT';
1558 | FALSE: 'FALSE';
1559 | FETCH: 'FETCH';
1560 | FIELDS: 'FIELDS';
1561 | FILTER: 'FILTER';
1562 | FILEFORMAT: 'FILEFORMAT';
1563 | FIRST: 'FIRST';
1564 | FOLLOWING: 'FOLLOWING';
1565 | FOR: 'FOR';
1566 | FOREIGN: 'FOREIGN';
1567 | FORMAT: 'FORMAT';
1568 | FORMATTED: 'FORMATTED';
1569 | FROM: 'FROM';
1570 | FULL: 'FULL';
1571 | FUNCTION: 'FUNCTION';
1572 | FUNCTIONS: 'FUNCTIONS';
1573 | GLOBAL: 'GLOBAL';
1574 | GRANT: 'GRANT';
1575 | GROUP: 'GROUP';
1576 | GROUPING: 'GROUPING';
1577 | HAVING: 'HAVING';
1578 | IF: 'IF';
1579 | IGNORE: 'IGNORE';
1580 | IMPORT: 'IMPORT';
1581 | IN: 'IN';
1582 | INDEX: 'INDEX';
1583 | INDEXES: 'INDEXES';
1584 | INNER: 'INNER';
1585 | INPATH: 'INPATH';
1586 | INPUTFORMAT: 'INPUTFORMAT';
1587 | INSERT: 'INSERT';
1588 | INTERSECT: 'INTERSECT';
1589 | INTERVAL: 'INTERVAL';
1590 | INTO: 'INTO';
1591 | IS: 'IS';
1592 | ITEMS: 'ITEMS';
1593 | JOIN: 'JOIN';
1594 | KEYS: 'KEYS';
1595 | LAST: 'LAST';
1596 | LATERAL: 'LATERAL';
1597 | LAZY: 'LAZY';
1598 | LEADING: 'LEADING';
1599 | LEFT: 'LEFT';
1600 | LIKE: 'LIKE';
1601 | LIMIT: 'LIMIT';
1602 | LINES: 'LINES';
1603 | LIST: 'LIST';
1604 | LOAD: 'LOAD';
1605 | LOCAL: 'LOCAL';
1606 | LOCATION: 'LOCATION';
1607 | LOCK: 'LOCK';
1608 | LOCKS: 'LOCKS';
1609 | LOGICAL: 'LOGICAL';
1610 | MACRO: 'MACRO';
1611 | MAP: 'MAP';
1612 | MATCHED: 'MATCHED';
1613 | MERGE: 'MERGE';
1614 | MSCK: 'MSCK';
1615 | NAMESPACE: 'NAMESPACE';
1616 | NAMESPACES: 'NAMESPACES';
1617 | NATURAL: 'NATURAL';
1618 | NO: 'NO';
1619 | NOT: 'NOT' | '!';
1620 | NULL: 'NULL';
1621 | NULLS: 'NULLS';
1622 | OF: 'OF';
1623 | ON: 'ON';
1624 | ONLY: 'ONLY';
1625 | OPTION: 'OPTION';
1626 | OPTIONS: 'OPTIONS';
1627 | OR: 'OR';
1628 | ORDER: 'ORDER';
1629 | OUT: 'OUT';
1630 | OUTER: 'OUTER';
1631 | OUTPUTFORMAT: 'OUTPUTFORMAT';
1632 | OVER: 'OVER';
1633 | OVERLAPS: 'OVERLAPS';
1634 | OVERLAY: 'OVERLAY';
1635 | OVERWRITE: 'OVERWRITE';
1636 | PARTITION: 'PARTITION';
1637 | PARTITIONED: 'PARTITIONED';
1638 | PARTITIONS: 'PARTITIONS';
1639 | PERCENTLIT: 'PERCENT';
1640 | PIVOT: 'PIVOT';
1641 | PLACING: 'PLACING';
1642 | POSITION: 'POSITION';
1643 | PRECEDING: 'PRECEDING';
1644 | PRIMARY: 'PRIMARY';
1645 | PRINCIPALS: 'PRINCIPALS';
1646 | PROPERTIES: 'PROPERTIES';
1647 | PURGE: 'PURGE';
1648 | QUERY: 'QUERY';
1649 | RANGE: 'RANGE';
1650 | RECORDREADER: 'RECORDREADER';
1651 | RECORDWRITER: 'RECORDWRITER';
1652 | RECOVER: 'RECOVER';
1653 | REDUCE: 'REDUCE';
1654 | REFERENCES: 'REFERENCES';
1655 | REFRESH: 'REFRESH';
1656 | RENAME: 'RENAME';
1657 | REPAIR: 'REPAIR';
1658 | REPLACE: 'REPLACE';
1659 | RESET: 'RESET';
1660 | RESPECT: 'RESPECT';
1661 | RESTRICT: 'RESTRICT';
1662 | REVOKE: 'REVOKE';
1663 | RIGHT: 'RIGHT';
1664 | RLIKE: 'RLIKE' | 'REGEXP';
1665 | ROLE: 'ROLE';
1666 | ROLES: 'ROLES';
1667 | ROLLBACK: 'ROLLBACK';
1668 | ROLLUP: 'ROLLUP';
1669 | ROW: 'ROW';
1670 | ROWS: 'ROWS';
1671 | SCHEMA: 'SCHEMA';
1672 | SELECT: 'SELECT';
1673 | SEMI: 'SEMI';
1674 | SEPARATED: 'SEPARATED';
1675 | SERDE: 'SERDE';
1676 | SERDEPROPERTIES: 'SERDEPROPERTIES';
1677 | SESSION_USER: 'SESSION_USER';
1678 | SET: 'SET';
1679 | SETMINUS: 'MINUS';
1680 | SETS: 'SETS';
1681 | SHOW: 'SHOW';
1682 | SKEWED: 'SKEWED';
1683 | SOME: 'SOME';
1684 | SORT: 'SORT';
1685 | SORTED: 'SORTED';
1686 | START: 'START';
1687 | STATISTICS: 'STATISTICS';
1688 | STORED: 'STORED';
1689 | STRATIFY: 'STRATIFY';
1690 | STRUCT: 'STRUCT';
1691 | SUBSTR: 'SUBSTR';
1692 | SUBSTRING: 'SUBSTRING';
1693 | SYNC: 'SYNC';
1694 | TABLE: 'TABLE';
1695 | TABLES: 'TABLES';
1696 | TABLESAMPLE: 'TABLESAMPLE';
1697 | TBLPROPERTIES: 'TBLPROPERTIES';
1698 | TEMPORARY: 'TEMPORARY' | 'TEMP';
1699 | TERMINATED: 'TERMINATED';
1700 | THEN: 'THEN';
1701 | TIME: 'TIME';
1702 | TO: 'TO';
1703 | TOUCH: 'TOUCH';
1704 | TRAILING: 'TRAILING';
1705 | TRANSACTION: 'TRANSACTION';
1706 | TRANSACTIONS: 'TRANSACTIONS';
1707 | TRANSFORM: 'TRANSFORM';
1708 | TRIM: 'TRIM';
1709 | TRUE: 'TRUE';
1710 | TRUNCATE: 'TRUNCATE';
1711 | TYPE: 'TYPE';
1712 | UNARCHIVE: 'UNARCHIVE';
1713 | UNBOUNDED: 'UNBOUNDED';
1714 | UNCACHE: 'UNCACHE';
1715 | UNION: 'UNION';
1716 | UNIQUE: 'UNIQUE';
1717 | UNKNOWN: 'UNKNOWN';
1718 | UNLOCK: 'UNLOCK';
1719 | UNSET: 'UNSET';
1720 | UPDATE: 'UPDATE';
1721 | USE: 'USE';
1722 | USER: 'USER';
1723 | USING: 'USING';
1724 | VALUES: 'VALUES';
1725 | VIEW: 'VIEW';
1726 | VIEWS: 'VIEWS';
1727 | WHEN: 'WHEN';
1728 | WHERE: 'WHERE';
1729 | WINDOW: 'WINDOW';
1730 | WITH: 'WITH';
1731 | ZONE: 'ZONE';
1732 | //--SPARK-KEYWORD-LIST-END
1733 | //============================
1734 | // End of the keywords list
1735 | //============================
1736 |
1737 | EQ : '=' | '==';
1738 | NSEQ: '<=>';
1739 | NEQ : '<>';
1740 | NEQJ: '!=';
1741 | LT : '<';
1742 | LTE : '<=' | '!>';
1743 | GT : '>';
1744 | GTE : '>=' | '!<';
1745 |
1746 | PLUS: '+';
1747 | MINUS: '-';
1748 | ASTERISK: '*';
1749 | SLASH: '/';
1750 | PERCENT: '%';
1751 | TILDE: '~';
1752 | AMPERSAND: '&';
1753 | PIPE: '|';
1754 | CONCAT_PIPE: '||';
1755 | HAT: '^';
1756 |
1757 | STRING
1758 | : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
1759 | | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
1760 | ;
1761 |
1762 | BIGINT_LITERAL
1763 | : DIGIT+ 'L'
1764 | ;
1765 |
1766 | SMALLINT_LITERAL
1767 | : DIGIT+ 'S'
1768 | ;
1769 |
1770 | TINYINT_LITERAL
1771 | : DIGIT+ 'Y'
1772 | ;
1773 |
1774 | INTEGER_VALUE
1775 | : DIGIT+
1776 | ;
1777 |
1778 | EXPONENT_VALUE
1779 | : DIGIT+ EXPONENT
1780 | | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
1781 | ;
1782 |
1783 | DECIMAL_VALUE
1784 | : DECIMAL_DIGITS {isValidDecimal()}?
1785 | ;
1786 |
1787 | FLOAT_LITERAL
1788 | : DIGIT+ EXPONENT? 'F'
1789 | | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
1790 | ;
1791 |
1792 | DOUBLE_LITERAL
1793 | : DIGIT+ EXPONENT? 'D'
1794 | | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
1795 | ;
1796 |
1797 | BIGDECIMAL_LITERAL
1798 | : DIGIT+ EXPONENT? 'BD'
1799 | | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
1800 | ;
1801 |
1802 | IDENTIFIER
1803 | : (LETTER | DIGIT | '_')+
1804 | ;
1805 |
1806 | BACKQUOTED_IDENTIFIER
1807 | : '`' ( ~'`' | '``' )* '`'
1808 | ;
1809 |
1810 | fragment DECIMAL_DIGITS
1811 | : DIGIT+ '.' DIGIT*
1812 | | '.' DIGIT+
1813 | ;
1814 |
1815 | fragment EXPONENT
1816 | : 'E' [+-]? DIGIT+
1817 | ;
1818 |
1819 | fragment DIGIT
1820 | : [0-9]
1821 | ;
1822 |
1823 | fragment LETTER
1824 | : [A-Z]
1825 | ;
1826 |
1827 | SIMPLE_COMMENT
1828 | : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
1829 | ;
1830 |
1831 | BRACKETED_COMMENT
1832 | : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
1833 | ;
1834 |
1835 | WS
1836 | : [ \r\n\t]+ -> channel(HIDDEN)
1837 | ;
1838 |
1839 | // Catch-all for anything we can't recognize.
1840 | // We use this to be able to ignore and recover all the text
1841 | // when splitting statements with DelimiterLexer
1842 | UNRECOGNIZED
1843 | : .
1844 | ;
1845 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/enums/SqlEngineEnum.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.enums;
2 |
3 | /**
4 | * @ClassName SqlTypeEnum
5 | * @description: 数据库类型
6 | * @author: again
7 | * @Date: 2021/3/10 4:08 下午
8 | */
9 | public enum SqlEngineEnum {
10 | MYSQL, HIVE, SPARK, PRESTO;
11 | }
12 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/enums/SqlTypeEnum.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.enums;
2 |
3 | /**
4 | * @ClassName SqlTypeEnum
5 | * @description: sql类型枚举
6 | * @author: again
7 | * @Date: 2021/3/10 4:08 下午
8 | */
9 | public enum SqlTypeEnum {
10 | SELECT, CREATE, INSERT, UPDATE, DELETE;
11 | }
12 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/hive/HiveSqlFieldLineageParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.hive;
2 |
3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor;
4 | import cn.ganjiacheng.antlr.HiveSqlParser;
5 | import cn.ganjiacheng.model.lineage.*;
6 | import org.antlr.v4.runtime.ParserRuleContext;
7 | import org.antlr.v4.runtime.RuleContext;
8 | import org.antlr.v4.runtime.tree.ParseTree;
9 | import org.codehaus.plexus.util.StringUtils;
10 |
11 | import java.util.*;
12 | import java.util.stream.Collectors;
13 |
14 | /**
15 | * @ClassName HiveSqlFieldLineage
16 | * @description:
17 | * @author: again
18 | * @Date: 2021/3/10 8:54 下午
19 | */
20 | public class HiveSqlFieldLineageParser extends HiveSqlBaseVisitor {
21 |
22 | private TableNameModel outputTable;
23 |
24 | private final HashMap hiveFieldSelects = new LinkedHashMap<>();
25 |
26 | private final Map selectParentKeyMap = new HashMap<>();
27 |
28 | private String thisSelectId;
29 |
30 | private final String sourceSQL;
31 |
32 | /**
33 | * for select Item
34 | */
35 | private FieldLineageSelectItemModel selectItemModel;
36 | private List selectFields = new ArrayList<>();
37 | private Boolean startSelectItem = false;
38 |
39 | public HiveSqlFieldLineageParser(String sql) {
40 | this.sourceSQL = sql;
41 | }
42 |
43 | private String subSourceSql(ParserRuleContext parserRuleContext) {
44 | return sourceSQL.substring(
45 | parserRuleContext.getStart().getStartIndex(),
46 | parserRuleContext.getStop().getStopIndex() + 1);
47 | }
48 |
49 | /**
50 | * insert解析结果表
51 | */
52 | @Override
53 | public Object visitInsert_stmt(HiveSqlParser.Insert_stmtContext ctx) {
54 | outputTable = Optional.ofNullable(ctx)
55 | .map(HiveSqlParser.Insert_stmtContext::table_name)
56 | .map(RuleContext::getText)
57 | .map(TableNameModel::parseTableName)
58 | .orElse(null);
59 | return super.visitInsert_stmt(ctx);
60 | }
61 |
62 | /**
63 | * 解析select每个selectItem里用到字段
64 | */
65 | @Override
66 | public Object visitExpr(HiveSqlParser.ExprContext ctx) {
67 | if (startSelectItem) {
68 | Optional.ofNullable(ctx)
69 | .map(HiveSqlParser.ExprContext::expr_atom)
70 | .map(HiveSqlParser.Expr_atomContext::ident)
71 | .map(ParseTree::getText)
72 | .ifPresent(s -> {
73 | if (!StringUtils.isNumeric(s)) {
74 | selectItemModel.getFieldNames().add(TableNameModel.dealNameMark(s));
75 | }
76 | });
77 | }
78 | return super.visitExpr(ctx);
79 | }
80 |
81 | /**
82 | * selectItem 获取别名名,初始化selectItem存相关字段的fieldNames
83 | */
84 | @Override
85 | public Object visitSelect_list_item(HiveSqlParser.Select_list_itemContext ctx) {
86 | startSelectItem = true;
87 | selectItemModel = new FieldLineageSelectItemModel();
88 | selectItemModel.setFieldNames(new HashSet<>());
89 | Optional.ofNullable(ctx)
90 | .map(HiveSqlParser.Select_list_itemContext::expr)
91 | .map(this::subSourceSql)
92 | .ifPresent(selectItemModel::setProcess);
93 | Optional.ofNullable(ctx)
94 | .map(HiveSqlParser.Select_list_itemContext::select_list_alias)
95 | .map(HiveSqlParser.Select_list_aliasContext::ident)
96 | .map(RuleContext::getText)
97 | .ifPresent(selectItemModel::setAlias);
98 | Object visit = super.visitSelect_list_item(ctx);
99 | selectFields.add(selectItemModel);
100 | return visit;
101 | }
102 |
103 | /**
104 | * from语句,处理于所有selectItem结束
105 | * 对上面解析出的字段名中的表别名进行处理 如t0.field
106 | */
107 | @Override
108 | public Object visitFrom_clause(HiveSqlParser.From_clauseContext ctx) {
109 | startSelectItem = false;
110 | HashMap> fieldItems = new HashMap<>();
111 | for (FieldLineageSelectItemModel item : selectFields) {
112 | HashMap> aliasSet = new HashMap<>();
113 | for (String field : item.getFieldNames()) {
114 | String[] sp = field.split("\\.");
115 | if (sp.length == 2) {
116 | String key = thisSelectId + "_" + sp[0];
117 | aliasSet.computeIfAbsent(key, t -> new HashSet<>());
118 | aliasSet.get(key).add(sp[1]);
119 | } else if (sp.length == 1) {
120 | boolean flat = true;
121 | for (String k : selectParentKeyMap.values()) {
122 | if (k.startsWith(thisSelectId + "_")) {
123 | aliasSet.computeIfAbsent(k, t -> new HashSet<>());
124 | aliasSet.get(k).add(sp[0]);
125 | flat = false;
126 | }
127 | }
128 | if (flat) {
129 | String key = thisSelectId + "_";
130 | aliasSet.computeIfAbsent(key, t -> new HashSet<>());
131 | aliasSet.get(key).add(sp[0]);
132 | }
133 | }
134 | }
135 | for (String key : aliasSet.keySet()) {
136 | fieldItems.computeIfAbsent(key, k -> new ArrayList<>());
137 | FieldLineageSelectItemModel selectItemModel = new FieldLineageSelectItemModel();
138 | selectItemModel.setFieldNames(aliasSet.get(key));
139 | selectItemModel.setAlias(item.getAlias());
140 | selectItemModel.setProcess(item.getProcess());
141 | if (selectItemModel.getFieldNames().size() == 1 && selectItemModel.getAlias() == null) {
142 | selectItemModel.setAlias(selectItemModel.getFieldNames().iterator().next());
143 | }
144 | fieldItems.get(key).add(selectItemModel);
145 | }
146 | }
147 | for (String key : fieldItems.keySet()) {
148 | if (hiveFieldSelects.get(key) != null) {
149 | hiveFieldSelects.get(key).setSelectItems(fieldItems.get(key));
150 | }
151 | }
152 | return super.visitFrom_clause(ctx);
153 | }
154 |
155 | /**
156 | * 进入select前
157 | * 解析每个select存信息并另存父子关系
158 | * 父子来源于from subSelect, join subSelect
159 | */
160 | @Override
161 | public Object visitSelect_stmt(HiveSqlParser.Select_stmtContext ctx) {
162 | List selectItems = ctx.fullselect_stmt().fullselect_stmt_item();
163 | for (HiveSqlParser.Fullselect_stmt_itemContext selectItem : selectItems) {
164 | FieldLineageSelectModel fieldLineageSelectModel = new FieldLineageSelectModel();
165 | Integer thisId = selectItem.getStart().getStartIndex();
166 | HiveSqlParser.Subselect_stmtContext subSelect = selectItem.subselect_stmt();
167 | HiveSqlParser.From_table_name_clauseContext fromTableNameClause = Optional.ofNullable(subSelect)
168 | .map(HiveSqlParser.Subselect_stmtContext::from_clause)
169 | .map(HiveSqlParser.From_clauseContext::from_table_clause)
170 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause)
171 | .orElse(null);
172 | Optional.ofNullable(fromTableNameClause)
173 | .map(HiveSqlParser.From_table_name_clauseContext::table_name)
174 | .map(RuleContext::getText)
175 | .map(TableNameModel::parseTableName)
176 | .ifPresent(fieldLineageSelectModel::setFromTable);
177 | Optional.ofNullable(fromTableNameClause)
178 | .map(HiveSqlParser.From_table_name_clauseContext::from_alias_clause)
179 | .map(HiveSqlParser.From_alias_clauseContext::ident)
180 | .map(RuleContext::getText)
181 | .ifPresent(fieldLineageSelectModel::setTableAlias);
182 |
183 | Optional.ofNullable(subSelect)
184 | .map(HiveSqlParser.Subselect_stmtContext::from_clause)
185 | .map(HiveSqlParser.From_clauseContext::from_table_clause)
186 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause)
187 | .map(HiveSqlParser.From_subselect_clauseContext::from_alias_clause)
188 | .map(RuleContext::getText)
189 | .ifPresent(fieldLineageSelectModel::setTableAlias);
190 |
191 | String alias = fieldLineageSelectModel.getTableAlias();
192 | String thisKey = String.format("%s_%s", thisId, alias == null ? "" : alias);
193 | fieldLineageSelectModel.setId(thisKey + "");
194 | fieldLineageSelectModel.setParentId(selectParentKeyMap.get(thisId));
195 | fieldLineageSelectModel.setSelectItems(new ArrayList<>());
196 | hiveFieldSelects.put(thisKey, fieldLineageSelectModel);
197 |
198 | Optional.ofNullable(subSelect)
199 | .map(HiveSqlParser.Subselect_stmtContext::from_clause)
200 | .map(HiveSqlParser.From_clauseContext::from_table_clause)
201 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause)
202 | .map(HiveSqlParser.From_subselect_clauseContext::select_stmt)
203 | .map(HiveSqlParser.Select_stmtContext::fullselect_stmt)
204 | .map(HiveSqlParser.Fullselect_stmtContext::fullselect_stmt_item)
205 | .ifPresent(subSelects ->
206 | subSelects.forEach(item ->
207 | selectParentKeyMap.put(item.getStart().getStartIndex(), thisKey)));
208 |
209 | List fromJoinClauses = Optional.ofNullable(subSelect)
210 | .map(HiveSqlParser.Subselect_stmtContext::from_clause)
211 | .map(HiveSqlParser.From_clauseContext::from_join_clause)
212 | .orElse(new ArrayList<>());
213 | for (HiveSqlParser.From_join_clauseContext fromJoinClauseContext : fromJoinClauses) {
214 | FieldLineageSelectModel joinSelect = new FieldLineageSelectModel();
215 | Optional.ofNullable(fromJoinClauseContext)
216 | .map(HiveSqlParser.From_join_clauseContext::from_table_clause)
217 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause)
218 | .map(HiveSqlParser.From_table_name_clauseContext::table_name)
219 | .map(RuleContext::getText)
220 | .map(TableNameModel::parseTableName)
221 | .ifPresent(joinSelect::setFromTable);
222 | Optional.ofNullable(fromJoinClauseContext)
223 | .map(HiveSqlParser.From_join_clauseContext::from_table_clause)
224 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause)
225 | .map(HiveSqlParser.From_table_name_clauseContext::from_alias_clause)
226 | .map(HiveSqlParser.From_alias_clauseContext::ident)
227 | .map(RuleContext::getText)
228 | .ifPresent(joinSelect::setTableAlias);
229 |
230 | Optional.ofNullable(fromJoinClauseContext)
231 | .map(HiveSqlParser.From_join_clauseContext::from_table_clause)
232 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause)
233 | .map(HiveSqlParser.From_subselect_clauseContext::from_alias_clause)
234 | .map(RuleContext::getText)
235 | .ifPresent(joinSelect::setTableAlias);
236 |
237 | String jalias = joinSelect.getTableAlias();
238 | String jkey = String.format("%s_%s", thisId, jalias == null ? "" : jalias);
239 | joinSelect.setId(jkey);
240 | joinSelect.setParentId(selectParentKeyMap.get(thisId));
241 | joinSelect.setSelectItems(new ArrayList<>());
242 | hiveFieldSelects.put(jkey, joinSelect);
243 |
244 | Optional.ofNullable(fromJoinClauseContext)
245 | .map(HiveSqlParser.From_join_clauseContext::from_table_clause)
246 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause)
247 | .map(HiveSqlParser.From_subselect_clauseContext::select_stmt)
248 | .map(HiveSqlParser.Select_stmtContext::fullselect_stmt)
249 | .map(HiveSqlParser.Fullselect_stmtContext::fullselect_stmt_item)
250 | .ifPresent(subSelects ->
251 | subSelects.forEach(item ->
252 | selectParentKeyMap.put(item.getStart().getStartIndex(), jkey)));
253 | }
254 | }
255 | return super.visitSelect_stmt(ctx);
256 | }
257 |
258 | /**
259 | * 处理每个子select进入前,
260 | * 初始化selectItem相关的变量
261 | */
262 | @Override
263 | public Object visitSubselect_stmt(HiveSqlParser.Subselect_stmtContext ctx) {
264 | thisSelectId = ctx.getStart().getStartIndex() + "";
265 | selectFields = new ArrayList<>();
266 | return super.visitSubselect_stmt(ctx);
267 | }
268 |
269 | private final List hiveFieldSelectList = new ArrayList<>();
270 |
271 | /**
272 | * 转换HashMap存储为List
273 | */
274 | private void transSelectToList() {
275 | for (String key : hiveFieldSelects.keySet()) {
276 | hiveFieldSelectList.add(hiveFieldSelects.get(key));
277 | }
278 | }
279 |
280 | /**
281 | * 获取目标字段
282 | * 也就是parentId为null的最外层select的字段别名
283 | */
284 | private List getTargetFields() {
285 | List> items = hiveFieldSelectList.stream()
286 | .filter(item -> item.getParentId() == null)
287 | .map(FieldLineageSelectModel::getSelectItems)
288 | .map(fields -> fields.stream()
289 | .map(FieldLineageSelectItemModel::getAlias)
290 | .collect(Collectors.toList()))
291 | .collect(Collectors.toList());
292 | List res = new ArrayList<>();
293 | for (List item : items) {
294 | res.addAll(item);
295 | }
296 | res = res.stream().distinct().collect(Collectors.toList());
297 | List fieldNameModels = new ArrayList<>();
298 | for (String i : res) {
299 | FieldNameModel fieldNameModel = new FieldNameModel();
300 | if (outputTable != null) {
301 | fieldNameModel.setDbName(outputTable.getDbName());
302 | fieldNameModel.setTableName(outputTable.getTableName());
303 | }
304 | fieldNameModel.setFieldName(i);
305 | fieldNameModels.add(fieldNameModel);
306 | }
307 | return fieldNameModels;
308 | }
309 |
310 | private HashSet sourceFields;
311 | private String fieldProcess = "";
312 |
313 | /**
314 | * 递归按每个字段从外到内寻找每个字段的来源
315 | * 逻辑为最外的字段别名,父id -> 匹配子id别名 ->
316 | * -> 如果是来源是表,存储,如果来源是子select,继续递归
317 | */
318 | private void findFieldSource(String targetField, String parentId) {
319 | hiveFieldSelectList.forEach(select -> {
320 | if ((parentId == null && select.getParentId() == null) ||
321 | (select.getParentId() != null && select.getParentId().equals(parentId))) {
322 | if (select.getSelectItems() != null) {
323 | if (select.getFromTable() == null) {
324 | select.getSelectItems().forEach(selectItem -> {
325 | if (selectItem.getAlias().equals(targetField)) {
326 | if (selectItem.getProcess().length() > fieldProcess.length()) {
327 | fieldProcess = selectItem.getProcess();
328 | }
329 | for (String field : selectItem.getFieldNames()) {
330 | findFieldSource(field, select.getId());
331 | }
332 | }
333 | });
334 | } else {
335 | select.getSelectItems().forEach(selectItem -> {
336 | if (selectItem.getAlias().equals(targetField)) {
337 | if (selectItem.getProcess().length() > fieldProcess.length()) {
338 | fieldProcess = selectItem.getProcess();
339 | }
340 | for (String field : selectItem.getFieldNames()) {
341 | FieldNameWithProcessModel fieldNameWithProcessModel = new FieldNameWithProcessModel();
342 | fieldNameWithProcessModel.setDbName(select.getFromTable().getDbName());
343 | fieldNameWithProcessModel.setTableName(select.getFromTable().getTableName());
344 | fieldNameWithProcessModel.setFieldName(field);
345 | fieldNameWithProcessModel.setProcess(fieldProcess);
346 | sourceFields.add(fieldNameWithProcessModel);
347 | }
348 | }
349 | });
350 | }
351 | }
352 | }
353 | });
354 | }
355 |
356 | /**
357 | * 获取字段血缘列表
358 | */
359 | public List getHiveFieldLineage() {
360 | transSelectToList();
361 | List targetFields = getTargetFields();
362 | List fieldLineageModelList = new ArrayList<>();
363 | for (FieldNameModel targetField : targetFields) {
364 | FieldLineageModel fieldLineageModel = new FieldLineageModel();
365 | fieldLineageModel.setTargetField(targetField);
366 | sourceFields = new HashSet<>();
367 | fieldProcess = "";
368 | findFieldSource(targetField.getFieldName(), null);
369 | fieldLineageModel.setSourceFields(sourceFields);
370 | fieldLineageModelList.add(fieldLineageModel);
371 | }
372 | return fieldLineageModelList;
373 | }
374 |
375 | /**
376 | * 获取sql解析处理后的结果
377 | */
378 | public HashMap getHiveFieldSelects() {
379 | return hiveFieldSelects;
380 | }
381 | }
382 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/hive/HiveSqlFormatterParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.hive;
2 |
3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor;
4 | import cn.ganjiacheng.antlr.HiveSqlParser;
5 | import org.antlr.v4.runtime.ParserRuleContext;
6 | import org.antlr.v4.runtime.RuleContext;
7 | import org.codehaus.plexus.util.StringUtils;
8 |
9 | import java.util.*;
10 | import java.util.concurrent.atomic.AtomicReference;
11 |
12 | /**
13 | * @ClassName HiveSqlFormatter
14 | * @description: sql格式化
15 | * @author: again
16 | * @Date: 2021/3/10 8:31 下午
17 | */
18 | public class HiveSqlFormatterParser extends HiveSqlBaseVisitor {
19 | private final String sourceSQL;
20 |
21 | private String formattedSQL = "";
22 |
23 | private boolean firstSelect = true;
24 |
25 | public HiveSqlFormatterParser(String sql) {
26 | this.sourceSQL = sql;
27 | }
28 |
29 | // 用于从源字符串中截取,主要为了不继续深入,比如select的每个字段,会有计算和各个函数包裹等,就采用直接截取源字符串
30 | private String subSourceSql(ParserRuleContext parserRuleContext) {
31 | return sourceSQL.substring(
32 | parserRuleContext.getStart().getStartIndex(),
33 | parserRuleContext.getStop().getStopIndex() + 1);
34 | }
35 |
36 | //添加空占位,主要为了缩进
37 | private String addKongFormat(String s, Integer n) {
38 | return String.format("%" + (s.length() + n * 4) + "s", s);
39 | }
40 |
41 | // 保存某个select的层级,key为startIndex
42 | private final Map ceng = new HashMap<>();
43 |
44 | private List boolExpr = new ArrayList<>();
45 | private List boolExprOperator = new ArrayList<>();
46 |
47 | // 由于where多重条件判断是树状 -> (前面条件) 操作 (最后一个条件),扩展左子树
48 | private void makeWhereExpr(HiveSqlParser.Bool_exprContext bool_exprContext) {
49 | if (bool_exprContext.children.size() == 3 && bool_exprContext.bool_expr().size() == 2) {
50 | makeWhereExpr(bool_exprContext.bool_expr(0));
51 | boolExpr.add(sourceSQL.substring(bool_exprContext.bool_expr(1).getStart().getStartIndex(),
52 | bool_exprContext.bool_expr(1).getStop().getStopIndex() + 1));
53 | boolExprOperator.add(bool_exprContext.bool_expr_logical_operator().getText());
54 | } else {
55 | boolExpr.add(sourceSQL.substring(bool_exprContext.getStart().getStartIndex(),
56 | bool_exprContext.getStop().getStopIndex() + 1));
57 | }
58 | }
59 |
60 | // select from 相关
61 | private String getFromTableClause(HiveSqlParser.From_table_clauseContext from_clauseContext, Integer kong) {
62 | StringBuilder tmpSelect = new StringBuilder();
63 | Optional.of(from_clauseContext)
64 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause)
65 | .map(RuleContext -> addKongFormat(subSourceSql(RuleContext), kong + 1))
66 | .ifPresent(tmpSelect::append);
67 | // from 子select(
68 | Optional.of(from_clauseContext)
69 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause)
70 | .map(HiveSqlParser.From_subselect_clauseContext::T_OPEN_P)
71 | .map(ParseTree -> addKongFormat(ParseTree.getText(), kong))
72 | .ifPresent(tmpSelect::append);
73 | // 添加子select标记
74 | Integer fromSubIndex = Optional.of(from_clauseContext)
75 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause)
76 | .map(HiveSqlParser.From_subselect_clauseContext::select_stmt)
77 | .map(ParserRuleContext -> ParserRuleContext.getStart().getStartIndex()).orElse(null);
78 | if (fromSubIndex != null) {
79 | tmpSelect.append("\n{SELECT").append(fromSubIndex).append("}");
80 | ceng.put(fromSubIndex, kong + 1);
81 | }
82 | // )
83 | Optional.of(from_clauseContext)
84 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause)
85 | .map(HiveSqlParser.From_subselect_clauseContext::T_CLOSE_P)
86 | .map(ParseTree -> "\n" + addKongFormat(ParseTree.getText(), kong))
87 | .ifPresent(tmpSelect::append);
88 | // from别名
89 | Optional.of(from_clauseContext)
90 | .map(HiveSqlParser.From_table_clauseContext::from_subselect_clause)
91 | .map(HiveSqlParser.From_subselect_clauseContext::from_alias_clause)
92 | .map(ParserRuleContext -> " " + subSourceSql(ParserRuleContext)).ifPresent(tmpSelect::append);
93 | return tmpSelect.toString();
94 | }
95 |
96 | // select 字段相关
97 | private String getSelectItem(HiveSqlParser.Select_list_itemContext selectItem, Integer kong) {
98 | AtomicReference itemRes = new AtomicReference<>("");
99 | boolean isCase = Optional.of(selectItem)
100 | .map(HiveSqlParser.Select_list_itemContext::expr)
101 | .map(HiveSqlParser.ExprContext::expr_case)
102 | .map(HiveSqlParser.Expr_caseContext::expr_case_searched)
103 | .map(expr_case_searchedContext -> {
104 | StringBuilder tmpbuilder = new StringBuilder();
105 | List tmps = new ArrayList<>();
106 | Optional.of(expr_case_searchedContext)
107 | .map(HiveSqlParser.Expr_case_searchedContext::T_CASE)
108 | .map(ParseTree -> addKongFormat(ParseTree.getText() + "\n", kong + 1))
109 | .map(tmpbuilder::append);
110 | Optional.of(expr_case_searchedContext)
111 | .map(HiveSqlParser.Expr_case_searchedContext::T_WHEN)
112 | .ifPresent(whenNodes -> {
113 | for (int i = 0; i < whenNodes.size(); i++) {
114 | tmps.add(addKongFormat(expr_case_searchedContext.T_WHEN(i).getText() + " " +
115 | subSourceSql(expr_case_searchedContext.bool_expr(i)) + " " +
116 | expr_case_searchedContext.T_THEN(i).getText() + " " +
117 | subSourceSql(expr_case_searchedContext.expr(i)), kong + 2));
118 | }
119 | tmpbuilder.append(StringUtils.join(tmps.toArray(), "\n"));
120 | });
121 | Optional.of(expr_case_searchedContext).map(HiveSqlParser.Expr_case_searchedContext::T_ELSE)
122 | .map(ParseTree -> "\n" + addKongFormat(ParseTree.getText() +
123 | " " + expr_case_searchedContext.expr(expr_case_searchedContext.expr().size() - 1).getText(), kong + 2))
124 | .map(tmpbuilder::append);
125 | Optional.of(expr_case_searchedContext)
126 | .map(HiveSqlParser.Expr_case_searchedContext::T_END)
127 | .map(ParseTree -> "\n" + addKongFormat(ParseTree.getText(), kong + 1))
128 | .map(tmpbuilder::append);
129 | Optional.of(selectItem)
130 | .map(HiveSqlParser.Select_list_itemContext::select_list_alias)
131 | .map(select_list_aliasContext -> "\n" + addKongFormat(subSourceSql(select_list_aliasContext), kong + 1))
132 | .map(tmpbuilder::append);
133 | itemRes.set(tmpbuilder.toString());
134 | return true;
135 | }).orElse(false);
136 | if (!isCase) {
137 | Optional.of(selectItem)
138 | .ifPresent(select_list_itemContext -> itemRes.set(addKongFormat(subSourceSql(selectItem), kong + 1)));
139 | }
140 | return itemRes.get();
141 | }
142 |
143 | // private String getLateralView(HiveSqlParser.Lateral_clause_itemContext ctx, int kong) {
144 | // StringBuilder lateralView = new StringBuilder();
145 | // Optional.of(ctx)
146 | // .map(HiveSqlParser.Lateral_clause_itemContext::T_LATERAL)
147 | // .map(lateralView::append);
148 | // lateralView.append(" ");
149 | // Optional.of(ctx)
150 | // .map(HiveSqlParser.Lateral_clause_itemContext::T_VIEW)
151 | // .map(lateralView::append);
152 | // lateralView.append("\n");
153 | // Optional.of(ctx)
154 | // .map(HiveSqlParser.Lateral_clause_itemContext::expr)
155 | // .map(ParserRuleContext -> addKongFormat(subSourceSql(ParserRuleContext), kong+1))
156 | // .map(lateralView::append);
157 | // lateralView.append(" ");
158 | // Optional.of(ctx)
159 | // .map(HiveSqlParser.Lateral_clause_itemContext::ident)
160 | // .map(RuleContext::getText)
161 | // .map(lateralView::append);
162 | // Optional.of(ctx)
163 | // .map(HiveSqlParser.Lateral_clause_itemContext::T_AS)
164 | // .map(ParserRuleContext -> "\n" + ParserRuleContext.getText())
165 | // .map(lateralView::append);
166 | // Optional.of(ctx)
167 | // .map(HiveSqlParser.Lateral_clause_itemContext::lateral_clause_alias)
168 | // .map(ParserRuleContext -> " " + subSourceSql(ParserRuleContext))
169 | // .map(lateralView::append);
170 | // return lateralView.toString();
171 | // }
172 |
173 | private String getFromJoin(HiveSqlParser.From_join_clauseContext ctx, int kong) {
174 | StringBuilder fromjoinSQL = new StringBuilder();
175 | Optional.of(ctx)
176 | .map(HiveSqlParser.From_join_clauseContext::from_join_type_clause)
177 | .map(from_join_type_clauseContext -> "\n" + addKongFormat(subSourceSql(ctx.from_join_type_clause()), kong) + "\n"
178 | + getFromTableClause(ctx.from_table_clause(), kong) + "\n"
179 | + addKongFormat(ctx.T_ON().getText(), kong) + " "
180 | + subSourceSql(ctx.bool_expr()))
181 | .map(fromjoinSQL::append);
182 | Optional.of(ctx)
183 | .map(HiveSqlParser.From_join_clauseContext::T_COMMA)
184 | .map(ParserRuleContext -> ParserRuleContext.getText() + "\n"
185 | + getFromTableClause(ctx.from_table_clause(), kong)
186 | ).map(fromjoinSQL::append);
187 | return fromjoinSQL.toString();
188 | }
189 |
190 | // 使用替换模式替换{SELECTN}
191 | @Override
192 | public Object visitSelect_stmt(HiveSqlParser.Select_stmtContext ctx) {
193 | int selectSize = ctx.fullselect_stmt().fullselect_stmt_item().size();
194 | // 第一次进入添加位置标记(节点在原字符串startIndex作为唯一标志)
195 | int gstartIndex = ctx.getStart().getStartIndex();
196 | String thisSelect = String.format("{SELECT%s}", gstartIndex);
197 | if (!formattedSQL.contains(thisSelect) && firstSelect) {
198 | formattedSQL += thisSelect;
199 | firstSelect = false;
200 | }
201 | StringBuilder fullSelect = new StringBuilder();
202 | // with语句
203 | // Optional.of(ctx)
204 | // .map(HiveSqlParser.Select_stmtContext::cte_select_stmt)
205 | // .map(HiveSqlParser.Cte_select_stmtContext::T_WITH)
206 | // .map(ParserRuleContext -> ParserRuleContext.getText() + " ")
207 | // .map(fullSelect::append);
208 | // Optional.of(ctx)
209 | // .map(HiveSqlParser.Select_stmtContext::cte_select_stmt)
210 | // .map(HiveSqlParser.Cte_select_stmtContext::cte_select_stmt_item)
211 | // .map(cte_select_stmt_itemContexts -> StringUtils.join(cte_select_stmt_itemContexts.stream().map(
212 | // item -> item.ident().getText() + " " +
213 | // item.T_AS().getText() + "\n" +
214 | // item.T_OPEN_P().getText() + "" +
215 | // String.format("\n{SELECT%s}", item.select_stmt().getStart().getStartIndex()) + "\n" +
216 | // item.T_CLOSE_P().getText()
217 | // ).toArray(), ",\n")).map(fullSelect::append);
218 | // 遍历子select添加位置标记
219 | Integer gkong = ceng.get(gstartIndex) == null ? 0 : ceng.get(gstartIndex);
220 | for (int i = 0; i < selectSize; i++) {
221 | Integer startIndex = ctx.fullselect_stmt().fullselect_stmt_item(i).subselect_stmt().getStart().getStartIndex();
222 | fullSelect.append(String.format("{SELECT%s}", startIndex));
223 | ceng.put(startIndex, gkong);
224 | if (i < selectSize - 1) {
225 | HiveSqlParser.Fullselect_set_clauseContext clauseContext = ctx.fullselect_stmt().fullselect_set_clause(i);
226 | fullSelect.append("\n").append(addKongFormat(subSourceSql(clauseContext), gkong)).append("\n");
227 | }
228 | }
229 | // 格式化每个子select并替换标记
230 | formattedSQL = formattedSQL.replace(String.format("{SELECT%s}", ctx.getStart().getStartIndex()), fullSelect.toString());
231 | for (int i = 0; i < selectSize; i++) {
232 | HiveSqlParser.Subselect_stmtContext subSelect = ctx.fullselect_stmt().fullselect_stmt_item(i).subselect_stmt();
233 | Integer thisIndex = subSelect.getStart().getStartIndex();
234 | Integer kong = ceng.get(thisIndex) == null ? 0 : ceng.get(thisIndex);
235 | StringBuilder tmpSelect = new StringBuilder();
236 | // select
237 | tmpSelect.append(addKongFormat(
238 | subSelect.T_SELECT().getText(),
239 | kong)).append("\n");
240 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::select_list)
241 | .map(HiveSqlParser.Select_listContext::select_list_set)
242 | .map(select_list_setContext -> addKongFormat(subSourceSql(select_list_setContext) + "\n", kong + 1))
243 | .map(tmpSelect::append);
244 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::select_list)
245 | .map(HiveSqlParser.Select_listContext::select_list_item)
246 | .map(select_list_itemContexts ->
247 | StringUtils.join(select_list_itemContexts.stream().map(
248 | item -> getSelectItem(item, kong)).toArray(), ",\n") + "\n").map(tmpSelect::append);
249 | // from
250 | Optional.of(subSelect)
251 | .map(HiveSqlParser.Subselect_stmtContext::from_clause)
252 | .map(HiveSqlParser.From_clauseContext::T_FROM)
253 | .map(ParserRuleContext -> addKongFormat(ParserRuleContext.getText() + "\n", kong))
254 | .map(tmpSelect::append);
255 | // from 表
256 | Optional.of(subSelect)
257 | .map(HiveSqlParser.Subselect_stmtContext::from_clause)
258 | .map(HiveSqlParser.From_clauseContext::from_table_clause)
259 | .map(from_table_clauseContext -> getFromTableClause(from_table_clauseContext, kong))
260 | .map(tmpSelect::append);
261 | // join语句
262 | Optional.of(subSelect)
263 | .map(HiveSqlParser.Subselect_stmtContext::from_clause)
264 | .map(HiveSqlParser.From_clauseContext::from_join_clause)
265 | .map(from_join_clauseContexts -> from_join_clauseContexts.size() > 0 ?
266 | StringUtils.join(from_join_clauseContexts.stream().map(
267 | item -> getFromJoin(item, kong)).toArray(), "") : "")
268 | .ifPresent(tmpSelect::append);
269 | // Optional.of(subSelect)
270 | // .map(HiveSqlParser.Subselect_stmtContext::lateral_clause)
271 | // .map(HiveSqlParser.Lateral_clauseContext::lateral_clause_item)
272 | // .map(lateral_clause_itemContexts -> "\n" + StringUtils.join(lateral_clause_itemContexts.stream().map(
273 | // item -> getLateralView(item, kong)).toArray(), "\n"))
274 | // .map(tmpSelect::append);
275 | // where
276 | Optional.of(subSelect)
277 | .map(HiveSqlParser.Subselect_stmtContext::where_clause)
278 | .map(HiveSqlParser.Where_clauseContext::T_WHERE)
279 | .map(ParseTree -> "\n" + addKongFormat(ParseTree.getText() + "\n", kong))
280 | .ifPresent(tmpSelect::append);
281 | // where条件
282 | Optional.of(subSelect)
283 | .map(HiveSqlParser.Subselect_stmtContext::where_clause)
284 | .ifPresent(ParserRuleContext -> {
285 | makeWhereExpr(ParserRuleContext.bool_expr());
286 | List result = new ArrayList<>();
287 | for (int t = 0; t < boolExpr.size(); t++) {
288 | if (t == boolExpr.size() - 1) {
289 | result.add(boolExpr.get(t));
290 | } else {
291 | result.add(boolExpr.get(t) + " " + boolExprOperator.get(t));
292 | }
293 | }
294 | boolExpr = new ArrayList<>();
295 | boolExprOperator = new ArrayList<>();
296 | tmpSelect.append(StringUtils.join(result.stream().map(item -> addKongFormat(item, kong + 1)).toArray(), "\n"));
297 | });
298 | // group by
299 | Optional.of(subSelect)
300 | .map(HiveSqlParser.Subselect_stmtContext::group_by_clause)
301 | .map(ParserRuleContext -> "\n" + addKongFormat(ParserRuleContext.T_GROUP().getText() + " " + ParserRuleContext.T_BY().getText(), kong))
302 | .ifPresent(tmpSelect::append);
303 | // group by 字段
304 | Optional.of(subSelect)
305 | .map(HiveSqlParser.Subselect_stmtContext::group_by_clause)
306 | .map(HiveSqlParser.Group_by_clauseContext::expr)
307 | .map(exprContexts -> "\n" + addKongFormat(StringUtils.join(exprContexts.stream().map(this::subSourceSql).toArray(), ", "), kong + 1))
308 | .ifPresent(tmpSelect::append);
309 | // having 语句
310 | Optional.of(subSelect)
311 | .map(HiveSqlParser.Subselect_stmtContext::having_clause)
312 | .map(having_clauseContext -> "\n" + addKongFormat(subSourceSql(having_clauseContext), kong))
313 | .ifPresent(tmpSelect::append);
314 | // order by
315 | Optional.of(subSelect)
316 | .map(HiveSqlParser.Subselect_stmtContext::order_by_clause)
317 | .map(ParserRuleContext -> "\n" + addKongFormat(ParserRuleContext.T_ORDER() + " " + ParserRuleContext.T_BY(), kong))
318 | .ifPresent(tmpSelect::append);
319 | // order by 字段
320 | Optional.of(subSelect)
321 | .map(HiveSqlParser.Subselect_stmtContext::order_by_clause)
322 | .map(ParserRuleContext -> "\n" + addKongFormat(StringUtils.join(ParserRuleContext.expr().stream().map(
323 | RuleContext::getText).toArray(), ", "), kong + 1))
324 | .ifPresent((tmpSelect::append));
325 | // order by 参数
326 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::order_by_clause)
327 | .map(HiveSqlParser.Order_by_clauseContext::T_ASC)
328 | .map(ParserRuleContext -> ParserRuleContext.size() > 0 ? "\n" + ParserRuleContext.get(0).getText() : "").ifPresent(tmpSelect::append);
329 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::order_by_clause)
330 | .map(HiveSqlParser.Order_by_clauseContext::T_DESC)
331 | .map(ParserRuleContext -> ParserRuleContext.size() > 0 ? "\n" + ParserRuleContext.get(0).getText() : "").ifPresent(tmpSelect::append);
332 | // select 参数
333 | Optional.of(subSelect).map(HiveSqlParser.Subselect_stmtContext::select_options)
334 | .map(ParserRuleContext -> "\n" + addKongFormat(subSourceSql(ParserRuleContext), kong))
335 | .ifPresent(tmpSelect::append);
336 | formattedSQL = formattedSQL.replace(String.format("{SELECT%s}", thisIndex), tmpSelect.toString());
337 | }
338 | return super.visitSelect_stmt(ctx);
339 | }
340 |
341 | public String getFormattedSQL() {
342 | return formattedSQL;
343 | }
344 | }
345 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/hive/HiveSqlMetadataParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.hive;
2 |
3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor;
4 | import cn.ganjiacheng.antlr.HiveSqlParser;
5 | import cn.ganjiacheng.model.metadata.FieldMetadataModel;
6 | import cn.ganjiacheng.model.metadata.TableMetadataModel;
7 | import org.antlr.v4.runtime.ParserRuleContext;
8 | import org.antlr.v4.runtime.RuleContext;
9 | import org.antlr.v4.runtime.tree.ParseTree;
10 |
11 | import java.util.ArrayList;
12 | import java.util.List;
13 | import java.util.Optional;
14 |
15 | /**
16 | * @ClassName HiveSqlMetadataParser
17 | * @description:
18 | * @author: again
19 | * @Date: 2021/3/10 7:41 下午
20 | */
21 | public class HiveSqlMetadataParser extends HiveSqlBaseVisitor {
22 |
23 | private final TableMetadataModel tableMetadata = new TableMetadataModel();
24 |
25 | private final String sourceSQL;
26 |
27 | /**
28 | * 保存原始sql
29 | */
30 | public HiveSqlMetadataParser(String sql) {
31 | this.sourceSQL = sql;
32 | }
33 |
34 | /**
35 | * 截取原始sql
36 | * @param parserRuleContext
37 | * @return
38 | */
39 | private String subSourceSql(ParserRuleContext parserRuleContext) {
40 | return sourceSQL.substring(
41 | parserRuleContext.getStart().getStartIndex(),
42 | parserRuleContext.getStop().getStopIndex() + 1);
43 | }
44 |
45 | /**
46 | * 处理备注中的引号
47 | */
48 | private String dealComment(String comment) {
49 | if(comment != null && comment.length() >= 2
50 | && comment.startsWith("'") && comment.endsWith("'")){
51 | comment = comment.substring(1, comment.length()-1);
52 | }
53 | return comment;
54 | }
55 |
56 | /**
57 | * 处理表名字段名中的``
58 | * @param name
59 | * @return
60 | */
61 | private String dealNameMark(String name) {
62 | if(name.startsWith("`") && name.endsWith("`")) {
63 | return name.substring(1, name.length()-1);
64 | }else {
65 | return name;
66 | }
67 | }
68 |
69 | /**
70 | * 获取到字段信息
71 | * @param ctx
72 | */
73 | private void setTableField(HiveSqlParser.Create_table_stmtContext ctx) {
74 | List itemContexts =
75 | ctx.create_table_definition().create_table_columns().create_table_columns_item();
76 | List fields = new ArrayList<>();
77 | itemContexts.forEach(item -> {
78 | FieldMetadataModel field = new FieldMetadataModel();
79 | field.setFieldName(Optional.of(item)
80 | .map(HiveSqlParser.Create_table_columns_itemContext::column_name)
81 | .map(RuleContext::getText)
82 | .map(this::dealNameMark)
83 | .orElse(null));
84 | String type = Optional.of(item)
85 | .map(HiveSqlParser.Create_table_columns_itemContext::dtype)
86 | .map(RuleContext::getText)
87 | .orElse(null);
88 | String typeLen = Optional.of(item)
89 | .map(HiveSqlParser.Create_table_columns_itemContext::dtype_len)
90 | .map(RuleContext::getText)
91 | .orElse("");
92 | field.setDataType(type != null ? type + typeLen : null);
93 | field.setFieldComment(Optional.of(item)
94 | .map(HiveSqlParser.Create_table_columns_itemContext::column_comment)
95 | .map(RuleContext::getText)
96 | .map(this::dealComment)
97 | .orElse(null));
98 | fields.add(field);
99 | });
100 | tableMetadata.setFields(fields);
101 | }
102 |
103 | /**
104 | * 获取表其他属性信息
105 | * @param ctx
106 | */
107 | private void setTableOption(HiveSqlParser.Create_table_stmtContext ctx) {
108 | // HiveSqlParser.Create_table_options_hive_itemContext tableOption =
109 | // ctx.create_table_definition().create_table_options().create_table_options_hive_item();
110 | // tableMetadata.setTableComment(Optional.ofNullable(tableOption)
111 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::string)
112 | // .map(RuleContext::getText)
113 | // .map(this::dealComment)
114 | // .orElse(null));
115 | // tableMetadata.setPartition(Optional.ofNullable(tableOption)
116 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_partitioned_by_clause)
117 | // .map(this::subSourceSql)
118 | // .orElse(null));
119 | // tableMetadata.setRowFormat(Optional.ofNullable(tableOption)
120 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_row_format)
121 | // .map(this::subSourceSql)
122 | // .orElse(null));
123 | // tableMetadata.setStore(Optional.ofNullable(tableOption)
124 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_stored)
125 | // .map(this::subSourceSql)
126 | // .orElse(null));
127 | // tableMetadata.setLocation(Optional.ofNullable(tableOption)
128 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_location)
129 | // .map(this::subSourceSql)
130 | // .orElse(null));
131 | // tableMetadata.setProperties(Optional.ofNullable(tableOption)
132 | // .map(HiveSqlParser.Create_table_options_hive_itemContext::create_table_hive_tblproperties)
133 | // .map(this::subSourceSql)
134 | // .orElse(null));
135 | }
136 |
137 | /**
138 | * 获取到表相关信息
139 | * @param ctx
140 | * @return
141 | */
142 | @Override
143 | public Object visitCreate_table_stmt(HiveSqlParser.Create_table_stmtContext ctx) {
144 | List tbNameTree = ctx.table_name().ident().children;
145 | if(tbNameTree.size() == 3 && tbNameTree.get(1).getText().equals(".")) {
146 | tableMetadata.setDbName(tbNameTree.get(0).getText());
147 | tableMetadata.setTableName(dealNameMark(tbNameTree.get(2).getText()));
148 | }else{
149 | tableMetadata.setTableName(dealNameMark(tbNameTree.get(0).getText()));
150 | }
151 | // tableMetadata.setTableType(Optional.of(ctx)
152 | // .map(HiveSqlParser.Create_table_stmtContext::T_EXTERNAl)
153 | // .map(ParseTree::getText)
154 | // .orElse(null));
155 | setTableField(ctx);
156 | setTableOption(ctx);
157 | return super.visitCreate_table_stmt(ctx);
158 | }
159 |
160 | /**
161 | * 获取全部创表信息
162 | */
163 | public TableMetadataModel getTableMetadata() {
164 | return this.tableMetadata;
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/hive/HiveSqlTableLineageParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.hive;
2 |
3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor;
4 | import cn.ganjiacheng.antlr.HiveSqlParser;
5 | import cn.ganjiacheng.model.lineage.TableLineageModel;
6 | import cn.ganjiacheng.model.lineage.TableNameModel;
7 | import org.antlr.v4.runtime.RuleContext;
8 |
9 | import java.util.HashSet;
10 | import java.util.Optional;
11 |
12 | /**
13 | * @ClassName HiveTableLineage
14 | * @description:
15 | * @author: again
16 | * @Date: 2021/3/10 8:47 下午
17 | */
18 | public class HiveSqlTableLineageParser extends HiveSqlBaseVisitor {
19 |
20 | private TableNameModel outputTable;
21 | private final HashSet inputTables = new HashSet<>();
22 |
23 | /**
24 | * visitInsert获取insert的table_name节点,作为目标输出表
25 | * @param ctx
26 | * @return
27 | */
28 | @Override
29 | public Object visitInsert_stmt(HiveSqlParser.Insert_stmtContext ctx) {
30 | outputTable = Optional.ofNullable(ctx)
31 | .map(HiveSqlParser.Insert_stmtContext::table_name)
32 | .map(RuleContext::getText)
33 | .map(TableNameModel::parseTableName)
34 | .orElse(null);
35 | return super.visitInsert_stmt(ctx);
36 | }
37 |
38 | /**
39 | * 获取from真实表,加到来源表的Set里
40 | * @param ctx
41 | * @return
42 | */
43 | @Override
44 | public Object visitFrom_table_clause(HiveSqlParser.From_table_clauseContext ctx) {
45 | Optional.ofNullable(ctx)
46 | .map(HiveSqlParser.From_table_clauseContext::from_table_name_clause)
47 | .map(RuleContext::getText)
48 | .map(TableNameModel::parseTableName)
49 | .map(inputTables::add);
50 | return super.visitFrom_table_clause(ctx);
51 | }
52 |
53 | public TableLineageModel getTableLineage() {
54 | TableLineageModel tableLineageModel = new TableLineageModel();
55 | tableLineageModel.setOutputTable(outputTable);
56 | tableLineageModel.setInputTables(inputTables);
57 | return tableLineageModel;
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/hive/HiveSqlTypeParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.hive;
2 |
3 | import cn.ganjiacheng.antlr.HiveSqlBaseVisitor;
4 | import cn.ganjiacheng.antlr.HiveSqlParser;
5 | import cn.ganjiacheng.enums.SqlTypeEnum;
6 |
7 | /**
8 | * @ClassName HiveSqlType
9 | * @description:
10 | * @author: again
11 | * @Date: 2021/3/10 4:29 下午
12 | */
13 | public class HiveSqlTypeParser extends HiveSqlBaseVisitor {
14 |
15 | private SqlTypeEnum sqlType = null;
16 |
17 | private void initSqlTypeEnum(SqlTypeEnum type) {
18 | if(sqlType == null) {
19 | sqlType = type;
20 | }
21 | }
22 |
23 | @Override
24 | public Object visitCreate_table_stmt(HiveSqlParser.Create_table_stmtContext ctx) {
25 | initSqlTypeEnum(SqlTypeEnum.CREATE);
26 | return super.visitCreate_table_stmt(ctx);
27 | }
28 |
29 | @Override
30 | public Object visitInsert_stmt(HiveSqlParser.Insert_stmtContext ctx) {
31 | initSqlTypeEnum(SqlTypeEnum.INSERT);
32 | return super.visitInsert_stmt(ctx);
33 | }
34 |
35 | @Override
36 | public Object visitSelect_stmt(HiveSqlParser.Select_stmtContext ctx) {
37 | initSqlTypeEnum(SqlTypeEnum.SELECT);
38 | return super.visitSelect_stmt(ctx);
39 | }
40 |
41 | @Override
42 | public Object visitUpdate_stmt(HiveSqlParser.Update_stmtContext ctx) {
43 | initSqlTypeEnum(SqlTypeEnum.UPDATE);
44 | return super.visitUpdate_stmt(ctx);
45 | }
46 |
47 | @Override
48 | public Object visitDelete_stmt(HiveSqlParser.Delete_stmtContext ctx) {
49 | initSqlTypeEnum(SqlTypeEnum.DELETE);
50 | return super.visitDelete_stmt(ctx);
51 | }
52 |
53 | public SqlTypeEnum getSqlType() {
54 | return sqlType;
55 | }
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/hive/MyHiveSqlParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.hive;
2 |
3 | import cn.ganjiacheng.SqlParserAbstract;
4 | import cn.ganjiacheng.antlr.HiveSqlLexer;
5 | import cn.ganjiacheng.antlr.HiveSqlParser;
6 | import cn.ganjiacheng.enums.SqlTypeEnum;
7 | import cn.ganjiacheng.model.lineage.FieldLineageModel;
8 | import cn.ganjiacheng.model.lineage.TableLineageModel;
9 | import cn.ganjiacheng.model.metadata.TableMetadataModel;
10 | import org.antlr.v4.runtime.CharStream;
11 | import org.antlr.v4.runtime.CharStreams;
12 | import org.antlr.v4.runtime.CommonTokenStream;
13 | import org.antlr.v4.runtime.tree.ParseTree;
14 |
15 | import java.util.List;
16 |
17 | /**
18 | * @ClassName HiveSqlParser
19 | * @description:
20 | * @author: again
21 | * @Date: 2021/3/10 4:21 下午
22 | */
23 | public class MyHiveSqlParser extends SqlParserAbstract {
24 |
25 | private ParseTree getParseTree(String sql) {
26 | CharStream input = CharStreams.fromString(sql);
27 | HiveSqlLexer lexer = new HiveSqlLexer(input);
28 | CommonTokenStream tokenStream = new CommonTokenStream(lexer);
29 | HiveSqlParser parser = new HiveSqlParser(tokenStream);
30 | return parser.program();
31 | }
32 |
33 | @Override
34 | public SqlTypeEnum parseSqlType(String sql) {
35 | HiveSqlTypeParser visitor = new HiveSqlTypeParser();
36 | visitor.visit(getParseTree(sql));
37 | return visitor.getSqlType();
38 | }
39 |
40 | @Override
41 | public TableMetadataModel parseSqlMetadata(String sql) {
42 | HiveSqlMetadataParser visitor = new HiveSqlMetadataParser(sql);
43 | visitor.visit(getParseTree(sql));
44 | return visitor.getTableMetadata();
45 | }
46 |
47 | @Override
48 | public String parseSqlFormatter(String sql) {
49 | HiveSqlFormatterParser visitor = new HiveSqlFormatterParser(sql);
50 | visitor.visit(getParseTree(sql));
51 | return visitor.getFormattedSQL();
52 | }
53 |
54 | @Override
55 | public TableLineageModel parseSqlTableLineage(String sql) {
56 | HiveSqlTableLineageParser visitor = new HiveSqlTableLineageParser();
57 | visitor.visit(getParseTree(sql));
58 | return visitor.getTableLineage();
59 | }
60 |
61 | @Override
62 | public List parseSqlFieldLineage(String sql) {
63 | HiveSqlFieldLineageParser visitor = new HiveSqlFieldLineageParser(sql);
64 | visitor.visit(getParseTree(sql));
65 | return visitor.getHiveFieldLineage();
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/lineage/FieldLineageModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.lineage;
2 |
3 | import java.util.HashSet;
4 |
5 | /**
6 | * @ClassName HiveFieldLineageModel
7 | * @description:
8 | * @author: again
9 | * @Date: 2021/3/10 8:52 下午
10 | */
11 | public class FieldLineageModel {
12 | /**
13 | * 目标字段
14 | */
15 | private FieldNameModel targetField;
16 |
17 | /**
18 | * 来源字段列表
19 | */
20 | private HashSet sourceFields;
21 |
22 | public FieldNameModel getTargetField() {
23 | return targetField;
24 | }
25 |
26 | public void setTargetField(FieldNameModel targetField) {
27 | this.targetField = targetField;
28 | }
29 |
30 | public HashSet getSourceFields() {
31 | return sourceFields;
32 | }
33 |
34 | public void setSourceFields(HashSet sourceFields) {
35 | this.sourceFields = sourceFields;
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/lineage/FieldLineageSelectItemModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.lineage;
2 |
3 | import java.util.Set;
4 |
5 | /**
6 | * @ClassName HiveFieldLineageSelectItemModel
7 | * @description:
8 | * @author: again
9 | * @Date: 2021/3/10 8:51 下午
10 | */
11 | public class FieldLineageSelectItemModel {
12 | private Set fieldNames;
13 | private String alias;
14 | private String process;
15 |
16 | public Set getFieldNames() {
17 | return fieldNames;
18 | }
19 |
20 | public void setFieldNames(Set fieldNames) {
21 | this.fieldNames = fieldNames;
22 | }
23 |
24 | public String getAlias() {
25 | return alias;
26 | }
27 |
28 | public void setAlias(String alias) {
29 | this.alias = alias;
30 | }
31 |
32 | public String getProcess() {
33 | return process;
34 | }
35 |
36 | public void setProcess(String process) {
37 | this.process = process;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/lineage/FieldLineageSelectModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.lineage;
2 |
3 | import java.util.List;
4 |
5 | /**
6 | * @ClassName HiveFieldLineageSelectModel
7 | * @description:
8 | * @author: again
9 | * @Date: 2021/3/10 8:52 下午
10 | */
11 | public class FieldLineageSelectModel {
12 | /**
13 | * index
14 | */
15 | String id;
16 |
17 | /**
18 | * 父id,第一层select为null
19 | */
20 | String parentId;
21 |
22 | /**
23 | * 来源表,来源子select则为null
24 | */
25 | TableNameModel fromTable;
26 |
27 | /**
28 | * 表别名
29 | */
30 | String tableAlias;
31 |
32 | /**
33 | * select字段
34 | */
35 | List selectItems;
36 |
37 | public String getId() {
38 | return id;
39 | }
40 |
41 | public void setId(String id) {
42 | this.id = id;
43 | }
44 |
45 | public String getParentId() {
46 | return parentId;
47 | }
48 |
49 | public void setParentId(String parentId) {
50 | this.parentId = parentId;
51 | }
52 |
53 | public TableNameModel getFromTable() {
54 | return fromTable;
55 | }
56 |
57 | public void setFromTable(TableNameModel fromTable) {
58 | this.fromTable = fromTable;
59 | }
60 |
61 | public String getTableAlias() {
62 | return tableAlias;
63 | }
64 |
65 | public void setTableAlias(String tableAlias) {
66 | this.tableAlias = tableAlias;
67 | }
68 |
69 | public List getSelectItems() {
70 | return selectItems;
71 | }
72 |
73 | public void setSelectItems(List selectItems) {
74 | this.selectItems = selectItems;
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/lineage/FieldNameModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.lineage;
2 |
3 | import java.util.Objects;
4 |
5 | /**
6 | * @ClassName FieldNameModel
7 | * @description:
8 | * @author: again
9 | * @Date: 2021/3/10 8:50 下午
10 | */
11 | public class FieldNameModel {
12 | private String dbName;
13 | private String tableName;
14 | private String fieldName;
15 |
16 | public String getDbName() {
17 | return dbName;
18 | }
19 |
20 | public void setDbName(String dbName) {
21 | this.dbName = dbName;
22 | }
23 |
24 | public String getTableName() {
25 | return tableName;
26 | }
27 |
28 | public void setTableName(String tableName) {
29 | this.tableName = tableName;
30 | }
31 |
32 | public String getFieldName() {
33 | return fieldName;
34 | }
35 |
36 | public void setFieldName(String fieldName) {
37 | this.fieldName = fieldName;
38 | }
39 |
40 | @Override
41 | public boolean equals(Object o) {
42 | if (this == o) {
43 | return true;
44 | }
45 | if (o == null || getClass() != o.getClass()) {
46 | return false;
47 | }
48 | FieldNameModel that = (FieldNameModel) o;
49 | return Objects.equals(dbName, that.dbName) &&
50 | Objects.equals(tableName, that.tableName) &&
51 | Objects.equals(fieldName, that.fieldName);
52 | }
53 |
54 | @Override
55 | public int hashCode() {
56 | return Objects.hash(dbName, tableName, fieldName);
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/lineage/FieldNameWithProcessModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.lineage;
2 |
3 | import java.util.Objects;
4 |
5 | /**
6 | * @ClassName FieldNameWithProcessModel
7 | * @description:
8 | * @author: again
9 | * @Date: 2021/3/10 8:51 下午
10 | */
11 | public class FieldNameWithProcessModel {
12 | private String dbName;
13 | private String tableName;
14 | private String fieldName;
15 | private String process;
16 |
17 | public String getDbName() {
18 | return dbName;
19 | }
20 |
21 | public void setDbName(String dbName) {
22 | this.dbName = dbName;
23 | }
24 |
25 | public String getTableName() {
26 | return tableName;
27 | }
28 |
29 | public void setTableName(String tableName) {
30 | this.tableName = tableName;
31 | }
32 |
33 | public String getFieldName() {
34 | return fieldName;
35 | }
36 |
37 | public void setFieldName(String fieldName) {
38 | this.fieldName = fieldName;
39 | }
40 |
41 | public String getProcess() {
42 | return process;
43 | }
44 |
45 | public void setProcess(String process) {
46 | this.process = process;
47 | }
48 |
49 | @Override
50 | public boolean equals(Object o) {
51 | if (this == o) {
52 | return true;
53 | }
54 | if (o == null || getClass() != o.getClass()) {
55 | return false;
56 | }
57 | FieldNameWithProcessModel that = (FieldNameWithProcessModel) o;
58 | return Objects.equals(dbName, that.dbName) &&
59 | Objects.equals(tableName, that.tableName) &&
60 | Objects.equals(fieldName, that.fieldName) &&
61 | Objects.equals(process, that.process);
62 | }
63 |
64 | @Override
65 | public int hashCode() {
66 | return Objects.hash(dbName, tableName, fieldName, process);
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/lineage/TableLineageModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.lineage;
2 |
3 | import java.util.HashSet;
4 |
5 | /**
6 | * @ClassName HiveTableLineageModel
7 | * @description:
8 | * @author: again
9 | * @Date: 2021/3/10 8:45 下午
10 | */
11 | public class TableLineageModel {
12 |
13 | private TableNameModel outputTable;
14 |
15 | /**
16 | * 输入的表名列表
17 | */
18 | private HashSet inputTables;
19 |
20 | public TableNameModel getOutputTable() {
21 | return outputTable;
22 | }
23 |
24 | public void setOutputTable(TableNameModel outputTable) {
25 | this.outputTable = outputTable;
26 | }
27 |
28 | public HashSet getInputTables() {
29 | return inputTables;
30 | }
31 |
32 | public void setInputTables(HashSet inputTables) {
33 | this.inputTables = inputTables;
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/lineage/TableNameModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.lineage;
2 |
3 | /**
4 | * @ClassName TableNameModel
5 | * @description:
6 | * @author: again
7 | * @Date: 2021/3/10 8:45 下午
8 | */
9 | public class TableNameModel {
10 | private String dbName;
11 | private String tableName;
12 |
13 | public static String dealNameMark(String name) {
14 | if(name.startsWith("`") && name.endsWith("`")) {
15 | return name.substring(1, name.length()-1);
16 | }else{
17 | return name;
18 | }
19 | }
20 |
21 | public static TableNameModel parseTableName(String tableName) {
22 | TableNameModel tableNameModel = new TableNameModel();
23 | String[] splitTable = tableName.split("\\.");
24 | if(splitTable.length == 2) {
25 | tableNameModel.setDbName(splitTable[0]);
26 | tableNameModel.setTableName(splitTable[1]);
27 | }else if(splitTable.length == 1) {
28 | tableNameModel.setTableName(splitTable[0]);
29 | }
30 | return tableNameModel;
31 | }
32 |
33 | public String getDbName() {
34 | return dbName;
35 | }
36 |
37 | public void setDbName(String dbName) {
38 | this.dbName = dbName;
39 | }
40 |
41 | public String getTableName() {
42 | return tableName;
43 | }
44 |
45 | public void setTableName(String tableName) {
46 | this.tableName = tableName;
47 | }
48 |
49 | @Override
50 | public boolean equals(Object o) {
51 | if (this == o) return true;
52 | if (o == null || getClass() != o.getClass()) return false;
53 |
54 | TableNameModel that = (TableNameModel) o;
55 |
56 | if (dbName != null ? !dbName.equals(that.dbName) : that.dbName != null) {
57 | return false;
58 | }
59 | return tableName != null ? tableName.equals(that.tableName) : that.tableName == null;
60 | }
61 |
62 | @Override
63 | public int hashCode() {
64 | int result = dbName != null ? dbName.hashCode() : 0;
65 | result = 31 * result + (tableName != null ? tableName.hashCode() : 0);
66 | return result;
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/metadata/FieldMetadataModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.metadata;
2 |
3 | /**
4 | * @ClassName HiveFieldMetadata
5 | * @description:
6 | * @author: again
7 | * @Date: 2021/3/10 7:54 下午
8 | */
9 | public class FieldMetadataModel {
10 | /**
11 | * 字段名
12 | */
13 | private String fieldName;
14 |
15 | /**
16 | * 数据类型
17 | */
18 | private String dataType;
19 |
20 | /**
21 | * 字段备注
22 | */
23 | private String fieldComment;
24 |
25 | public String getFieldName() {
26 | return fieldName;
27 | }
28 |
29 | public void setFieldName(String fieldName) {
30 | this.fieldName = fieldName;
31 | }
32 |
33 | public String getDataType() {
34 | return dataType;
35 | }
36 |
37 | public void setDataType(String dataType) {
38 | this.dataType = dataType;
39 | }
40 |
41 | public String getFieldComment() {
42 | return fieldComment;
43 | }
44 |
45 | public void setFieldComment(String fieldComment) {
46 | this.fieldComment = fieldComment;
47 | }
48 | }
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/model/metadata/TableMetadataModel.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.model.metadata;
2 |
3 | import java.util.List;
4 |
5 | /**
6 | * @ClassName HiveTableMetadata
7 | * @description:
8 | * @author: again
9 | * @Date: 2021/3/10 7:53 下午
10 | */
11 | public class TableMetadataModel {
12 | /**
13 | * 库名
14 | */
15 | private String dbName;
16 |
17 | /**
18 | * 表名
19 | */
20 | private String tableName;
21 |
22 | /**
23 | * 表类型
24 | */
25 | private String tableType;
26 |
27 | /**
28 | * 备注
29 | */
30 | private String tableComment;
31 |
32 | /**
33 | * 分区
34 | */
35 | private String partition;
36 |
37 | /**
38 | * 行格式
39 | */
40 | private String rowFormat;
41 |
42 | /**
43 | * 存储格式
44 | */
45 | private String store;
46 |
47 | /**
48 | * 存储位置
49 | */
50 | private String location;
51 |
52 | /**
53 | * 属性(压缩格式)
54 | */
55 | private String properties;
56 |
57 | /**
58 | * 字段
59 | */
60 | private List fields;
61 |
62 | public String getDbName() {
63 | return dbName;
64 | }
65 |
66 | public void setDbName(String dbName) {
67 | this.dbName = dbName;
68 | }
69 |
70 | public String getTableName() {
71 | return tableName;
72 | }
73 |
74 | public void setTableName(String tableName) {
75 | this.tableName = tableName;
76 | }
77 |
78 | public String getTableType() {
79 | return tableType;
80 | }
81 |
82 | public void setTableType(String tableType) {
83 | this.tableType = tableType;
84 | }
85 |
86 | public String getTableComment() {
87 | return tableComment;
88 | }
89 |
90 | public void setTableComment(String tableComment) {
91 | this.tableComment = tableComment;
92 | }
93 |
94 | public String getPartition() {
95 | return partition;
96 | }
97 |
98 | public void setPartition(String partition) {
99 | this.partition = partition;
100 | }
101 |
102 | public String getRowFormat() {
103 | return rowFormat;
104 | }
105 |
106 | public void setRowFormat(String rowFormat) {
107 | this.rowFormat = rowFormat;
108 | }
109 |
110 | public String getStore() {
111 | return store;
112 | }
113 |
114 | public void setStore(String store) {
115 | this.store = store;
116 | }
117 |
118 | public String getLocation() {
119 | return location;
120 | }
121 |
122 | public void setLocation(String location) {
123 | this.location = location;
124 | }
125 |
126 | public String getProperties() {
127 | return properties;
128 | }
129 |
130 | public void setProperties(String properties) {
131 | this.properties = properties;
132 | }
133 |
134 | public List getFields() {
135 | return fields;
136 | }
137 |
138 | public void setFields(List fields) {
139 | this.fields = fields;
140 | }
141 | }
142 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/mysql/MysqlSqlParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.mysql;
2 |
3 | import cn.ganjiacheng.SqlParserAbstract;
4 | import cn.ganjiacheng.antlr.MySqlLexer;
5 | import cn.ganjiacheng.antlr.MySqlParser;
6 | import cn.ganjiacheng.enums.SqlTypeEnum;
7 | import org.antlr.v4.runtime.CharStream;
8 | import org.antlr.v4.runtime.CharStreams;
9 | import org.antlr.v4.runtime.CommonTokenStream;
10 | import org.antlr.v4.runtime.tree.ParseTree;
11 |
12 | /**
13 | * @ClassName MysqlSqlParser
14 | * @description:
15 | * @author: again
16 | * @Date: 2021/3/10 4:26 下午
17 | */
18 | public class MysqlSqlParser extends SqlParserAbstract {
19 |
20 | private ParseTree getParseTree(String sql) {
21 | sql = sql.toUpperCase();
22 | CharStream input = CharStreams.fromString(sql);
23 | MySqlLexer mySqlLexer = new MySqlLexer(input);
24 | CommonTokenStream tokens = new CommonTokenStream(mySqlLexer);
25 | MySqlParser parser = new MySqlParser(tokens);
26 | return parser.root();
27 | }
28 |
29 | @Override
30 | public SqlTypeEnum parseSqlType(String sql) {
31 | MysqlSqlTypeParser visitor = new MysqlSqlTypeParser();
32 | visitor.visit(getParseTree(sql));
33 | return visitor.getSqlType();
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/mysql/MysqlSqlTypeParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.mysql;
2 |
3 | import cn.ganjiacheng.antlr.MySqlParserBaseVisitor;
4 | import cn.ganjiacheng.antlr.MySqlParser;
5 | import cn.ganjiacheng.enums.SqlTypeEnum;
6 |
7 | /**
8 | * @ClassName MysqlSqlTypeParser
9 | * @description:
10 | * @author: again
11 | * @Date: 2021/3/10 5:09 下午
12 | */
13 | public class MysqlSqlTypeParser extends MySqlParserBaseVisitor {
14 |
15 | private SqlTypeEnum sqlType = null;
16 |
17 | private void initSqlTypeEnum(SqlTypeEnum type) {
18 | if(sqlType == null) {
19 | sqlType = type;
20 | }
21 | }
22 |
23 | @Override
24 | public Object visitSimpleSelect(MySqlParser.SimpleSelectContext ctx) {
25 | initSqlTypeEnum(SqlTypeEnum.SELECT);
26 | return super.visitSimpleSelect(ctx);
27 | }
28 |
29 | @Override
30 | public Object visitUpdateStatement(MySqlParser.UpdateStatementContext ctx) {
31 | initSqlTypeEnum(SqlTypeEnum.UPDATE);
32 | return super.visitUpdateStatement(ctx);
33 | }
34 |
35 | @Override
36 | public Object visitInsertStatement(MySqlParser.InsertStatementContext ctx) {
37 | initSqlTypeEnum(SqlTypeEnum.INSERT);
38 | return super.visitInsertStatement(ctx);
39 | }
40 |
41 | @Override
42 | public Object visitColumnCreateTable(MySqlParser.ColumnCreateTableContext ctx) {
43 | initSqlTypeEnum(SqlTypeEnum.CREATE);
44 | return super.visitColumnCreateTable(ctx);
45 | }
46 |
47 | @Override
48 | public Object visitSingleDeleteStatement(MySqlParser.SingleDeleteStatementContext ctx) {
49 | initSqlTypeEnum(SqlTypeEnum.DELETE);
50 | return super.visitSingleDeleteStatement(ctx);
51 | }
52 |
53 | public SqlTypeEnum getSqlType() {
54 | return sqlType;
55 | }
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/presto/MyPresoSqlParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.presto;
2 |
3 | import cn.ganjiacheng.SqlParserAbstract;
4 | import cn.ganjiacheng.antlr.PrestoSqlLexer;
5 | import cn.ganjiacheng.antlr.PrestoSqlParser;
6 | import cn.ganjiacheng.enums.SqlTypeEnum;
7 | import cn.ganjiacheng.mysql.MysqlSqlTypeParser;
8 | import org.antlr.v4.runtime.CharStream;
9 | import org.antlr.v4.runtime.CharStreams;
10 | import org.antlr.v4.runtime.CommonTokenStream;
11 | import org.antlr.v4.runtime.tree.ParseTree;
12 |
13 | /**
14 | * @ClassName MyPresoSqlParser
15 | * @description:
16 | * @author: again
17 | * @Date: 2021/3/11 7:59 下午
18 | */
19 | public class MyPresoSqlParser extends SqlParserAbstract {
20 |
21 | private ParseTree getParseTree(String sql) {
22 | sql = sql.toUpperCase();
23 | CharStream input = CharStreams.fromString(sql);
24 | PrestoSqlLexer mySqlLexer = new PrestoSqlLexer(input);
25 | CommonTokenStream tokens = new CommonTokenStream(mySqlLexer);
26 | PrestoSqlParser parser = new PrestoSqlParser(tokens);
27 | return parser.statement();
28 | }
29 |
30 | @Override
31 | public SqlTypeEnum parseSqlType(String sql) {
32 | PrestoSqlTypeParser visitor = new PrestoSqlTypeParser();
33 | visitor.visit(getParseTree(sql));
34 | return visitor.getSqlType();
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/presto/PrestoSqlTypeParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.presto;
2 |
3 | import cn.ganjiacheng.antlr.PrestoSqlBaseVisitor;
4 | import cn.ganjiacheng.antlr.PrestoSqlParser;
5 | import cn.ganjiacheng.enums.SqlTypeEnum;
6 |
7 | /**
8 | * @ClassName PrestoSqlTypeParser
9 | * @description:
10 | * @author: again
11 | * @Date: 2021/3/11 7:59 下午
12 | */
13 | public class PrestoSqlTypeParser extends PrestoSqlBaseVisitor {
14 |
15 | private SqlTypeEnum sqlType = null;
16 |
17 | private void initSqlTypeEnum(SqlTypeEnum type) {
18 | if(sqlType == null) {
19 | sqlType = type;
20 | }
21 | }
22 |
23 | public SqlTypeEnum getSqlType() {
24 | return sqlType;
25 | }
26 |
27 | @Override
28 | public Object visitCreateTable(PrestoSqlParser.CreateTableContext ctx) {
29 | initSqlTypeEnum(SqlTypeEnum.CREATE);
30 | return super.visitCreateTable(ctx);
31 | }
32 |
33 | @Override
34 | public Object visitInsertInto(PrestoSqlParser.InsertIntoContext ctx) {
35 | initSqlTypeEnum(SqlTypeEnum.INSERT);
36 | return super.visitInsertInto(ctx);
37 | }
38 |
39 | @Override
40 | public Object visitSelectSingle(PrestoSqlParser.SelectSingleContext ctx) {
41 | initSqlTypeEnum(SqlTypeEnum.SELECT);
42 | return super.visitSelectSingle(ctx);
43 | }
44 |
45 | @Override
46 | public Object visitDelete(PrestoSqlParser.DeleteContext ctx) {
47 | initSqlTypeEnum(SqlTypeEnum.DELETE);
48 | return super.visitDelete(ctx);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/spark/MySparkSqlParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.spark;
2 |
3 | import cn.ganjiacheng.SqlParserAbstract;
4 | import cn.ganjiacheng.antlr.PrestoSqlLexer;
5 | import cn.ganjiacheng.antlr.PrestoSqlParser;
6 | import cn.ganjiacheng.antlr.SparkSqlLexer;
7 | import cn.ganjiacheng.antlr.SparkSqlParser;
8 | import cn.ganjiacheng.enums.SqlTypeEnum;
9 | import cn.ganjiacheng.presto.PrestoSqlTypeParser;
10 | import org.antlr.v4.runtime.CharStream;
11 | import org.antlr.v4.runtime.CharStreams;
12 | import org.antlr.v4.runtime.CommonTokenStream;
13 | import org.antlr.v4.runtime.tree.ParseTree;
14 |
15 | /**
16 | * @ClassName MySparkSqlParser
17 | * @description:
18 | * @author: again
19 | * @Date: 2021/3/11 8:08 下午
20 | */
21 | public class MySparkSqlParser extends SqlParserAbstract {
22 |
23 | private ParseTree getParseTree(String sql) {
24 | sql = sql.toUpperCase();
25 | CharStream input = CharStreams.fromString(sql);
26 | SparkSqlLexer mySqlLexer = new SparkSqlLexer(input);
27 | CommonTokenStream tokens = new CommonTokenStream(mySqlLexer);
28 | SparkSqlParser parser = new SparkSqlParser(tokens);
29 | return parser.statement();
30 | }
31 |
32 | @Override
33 | public SqlTypeEnum parseSqlType(String sql) {
34 | SparkSqlTypeParser visitor = new SparkSqlTypeParser();
35 | visitor.visit(getParseTree(sql));
36 | return visitor.getSqlType();
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/cn/ganjiacheng/spark/SparkSqlTypeParser.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng.spark;
2 |
3 | import cn.ganjiacheng.antlr.SparkSqlBaseVisitor;
4 | import cn.ganjiacheng.antlr.SparkSqlParser;
5 | import cn.ganjiacheng.enums.SqlTypeEnum;
6 |
7 | /**
8 | * @ClassName SparkSqlTypeParser
9 | * @description:
10 | * @author: again
11 | * @Date: 2021/3/11 8:08 下午
12 | */
13 | public class SparkSqlTypeParser extends SparkSqlBaseVisitor {
14 |
15 | private SqlTypeEnum sqlType = null;
16 |
17 | private void initSqlTypeEnum(SqlTypeEnum type) {
18 | if(sqlType == null) {
19 | sqlType = type;
20 | }
21 | }
22 |
23 | public SqlTypeEnum getSqlType() {
24 | return sqlType;
25 | }
26 |
27 | @Override
28 | public Object visitSingleInsertQuery(SparkSqlParser.SingleInsertQueryContext ctx) {
29 | initSqlTypeEnum(SqlTypeEnum.INSERT);
30 | return super.visitSingleInsertQuery(ctx);
31 | }
32 |
33 | @Override
34 | public Object visitSelectClause(SparkSqlParser.SelectClauseContext ctx) {
35 | initSqlTypeEnum(SqlTypeEnum.SELECT);
36 | return super.visitSelectClause(ctx);
37 | }
38 |
39 | @Override
40 | public Object visitDeleteFromTable(SparkSqlParser.DeleteFromTableContext ctx) {
41 | initSqlTypeEnum(SqlTypeEnum.DELETE);
42 | return super.visitDeleteFromTable(ctx);
43 | }
44 |
45 | @Override
46 | public Object visitUpdateTable(SparkSqlParser.UpdateTableContext ctx) {
47 | initSqlTypeEnum(SqlTypeEnum.UPDATE);
48 | return super.visitUpdateTable(ctx);
49 | }
50 |
51 | @Override
52 | public Object visitCreateTable(SparkSqlParser.CreateTableContext ctx) {
53 | initSqlTypeEnum(SqlTypeEnum.CREATE);
54 | return super.visitCreateTable(ctx);
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/test/java/cn/ganjiacheng/AppTest.java:
--------------------------------------------------------------------------------
1 | package cn.ganjiacheng;
2 |
3 | import static org.junit.Assert.assertTrue;
4 |
5 | import org.junit.Test;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | {
12 | /**
13 | * Rigorous Test :-)
14 | */
15 | @Test
16 | public void shouldAnswerWithTrue()
17 | {
18 | assertTrue( true );
19 | }
20 | }
21 |
--------------------------------------------------------------------------------